aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra
diff options
context:
space:
mode:
Diffstat (limited to 'src/vppinfra')
-rw-r--r--src/vppinfra/CMakeLists.txt107
-rw-r--r--src/vppinfra/asm_mips.h351
-rw-r--r--src/vppinfra/asm_x86.c1947
-rw-r--r--src/vppinfra/asm_x86.h125
-rw-r--r--src/vppinfra/atomics.h2
-rw-r--r--src/vppinfra/backtrace.c260
-rw-r--r--src/vppinfra/bihash_12_4.h89
-rw-r--r--src/vppinfra/bihash_16_8.h9
-rw-r--r--src/vppinfra/bihash_16_8_32.h9
-rw-r--r--src/vppinfra/bihash_24_16.h13
-rw-r--r--src/vppinfra/bihash_24_8.h13
-rw-r--r--src/vppinfra/bihash_32_8.h13
-rw-r--r--src/vppinfra/bihash_40_8.h14
-rw-r--r--src/vppinfra/bihash_48_8.h13
-rw-r--r--src/vppinfra/bihash_8_16.h11
-rw-r--r--src/vppinfra/bihash_8_8.h8
-rw-r--r--src/vppinfra/bihash_8_8_stats.h8
-rw-r--r--src/vppinfra/bihash_doc.h216
-rw-r--r--src/vppinfra/bihash_template.c96
-rw-r--r--src/vppinfra/bihash_template.h28
-rw-r--r--src/vppinfra/bihash_vec8_8.h8
-rw-r--r--src/vppinfra/bitmap.h101
-rw-r--r--src/vppinfra/bitops.h207
-rw-r--r--src/vppinfra/byte_order.h2
-rw-r--r--src/vppinfra/cJSON.c160
-rw-r--r--src/vppinfra/cJSON.h19
-rw-r--r--src/vppinfra/cache.h84
-rw-r--r--src/vppinfra/clib.h174
-rw-r--r--src/vppinfra/config.h.in2
-rw-r--r--src/vppinfra/cpu.c260
-rw-r--r--src/vppinfra/cpu.h213
-rw-r--r--src/vppinfra/crc32.h159
-rw-r--r--src/vppinfra/crypto/aes.h491
-rw-r--r--src/vppinfra/crypto/aes_cbc.h745
-rw-r--r--src/vppinfra/crypto/aes_ctr.h190
-rw-r--r--src/vppinfra/crypto/aes_gcm.h944
-rw-r--r--src/vppinfra/crypto/ghash.h515
-rw-r--r--src/vppinfra/crypto/poly1305.h234
-rw-r--r--src/vppinfra/crypto/sha2.h715
-rw-r--r--src/vppinfra/devicetree.c346
-rw-r--r--src/vppinfra/devicetree.h72
-rw-r--r--src/vppinfra/dlmalloc.c62
-rw-r--r--src/vppinfra/dlmalloc.h2
-rw-r--r--src/vppinfra/elf.c8
-rw-r--r--src/vppinfra/elf.h8
-rw-r--r--src/vppinfra/elf_clib.c25
-rw-r--r--src/vppinfra/elf_clib.h2
-rw-r--r--src/vppinfra/elog.c4
-rw-r--r--src/vppinfra/error.c8
-rw-r--r--src/vppinfra/error.h7
-rw-r--r--src/vppinfra/error_bootstrap.h5
-rw-r--r--src/vppinfra/fifo.c17
-rw-r--r--src/vppinfra/fifo.h103
-rw-r--r--src/vppinfra/file.h2
-rw-r--r--src/vppinfra/format.c12
-rw-r--r--src/vppinfra/format.h57
-rw-r--r--src/vppinfra/format_ansi.h48
-rw-r--r--src/vppinfra/format_table.c295
-rw-r--r--src/vppinfra/format_table.h118
-rw-r--r--src/vppinfra/freebsd/mem.c471
-rw-r--r--src/vppinfra/graph.c182
-rw-r--r--src/vppinfra/graph.h127
-rw-r--r--src/vppinfra/hash.c262
-rw-r--r--src/vppinfra/hash.h40
-rw-r--r--src/vppinfra/heap.c25
-rw-r--r--src/vppinfra/heap.h44
-rw-r--r--src/vppinfra/interrupt.c99
-rw-r--r--src/vppinfra/interrupt.h141
-rw-r--r--src/vppinfra/jsonformat.c512
-rw-r--r--src/vppinfra/jsonformat.h114
-rw-r--r--src/vppinfra/lb_hash_hash.h14
-rw-r--r--src/vppinfra/linux/mem.c123
-rw-r--r--src/vppinfra/linux/sysfs.c46
-rw-r--r--src/vppinfra/linux/sysfs.h5
-rw-r--r--src/vppinfra/longjmp.S50
-rw-r--r--src/vppinfra/longjmp.h3
-rw-r--r--src/vppinfra/macros.c9
-rw-r--r--src/vppinfra/mem.h214
-rw-r--r--src/vppinfra/mem_bulk.c10
-rw-r--r--src/vppinfra/mem_dlmalloc.c415
-rw-r--r--src/vppinfra/memcpy.h43
-rw-r--r--src/vppinfra/memcpy_avx2.h249
-rw-r--r--src/vppinfra/memcpy_avx512.h285
-rw-r--r--src/vppinfra/memcpy_sse3.h368
-rw-r--r--src/vppinfra/memcpy_x86_64.h613
-rw-r--r--src/vppinfra/mhash.c56
-rw-r--r--src/vppinfra/mhash.h5
-rw-r--r--src/vppinfra/mpcap.c2
-rw-r--r--src/vppinfra/pcap.c4
-rw-r--r--src/vppinfra/pcg.h85
-rw-r--r--src/vppinfra/perfmon/bundle_core_power.c48
-rw-r--r--src/vppinfra/perfmon/bundle_default.c61
-rw-r--r--src/vppinfra/perfmon/perfmon.c230
-rw-r--r--src/vppinfra/perfmon/perfmon.h137
-rw-r--r--src/vppinfra/pmalloc.c57
-rw-r--r--src/vppinfra/pool.c90
-rw-r--r--src/vppinfra/pool.h498
-rw-r--r--src/vppinfra/random_buffer.h18
-rw-r--r--src/vppinfra/ring.h26
-rw-r--r--src/vppinfra/sanitizer.c7
-rw-r--r--src/vppinfra/sanitizer.h141
-rw-r--r--src/vppinfra/serialize.c24
-rw-r--r--src/vppinfra/sha2.h637
-rw-r--r--src/vppinfra/socket.c620
-rw-r--r--src/vppinfra/socket.h56
-rw-r--r--src/vppinfra/sparse_vec.h27
-rw-r--r--src/vppinfra/stack.c75
-rw-r--r--src/vppinfra/stack.h26
-rw-r--r--src/vppinfra/std-formats.c166
-rw-r--r--src/vppinfra/string.h358
-rw-r--r--src/vppinfra/test/aes_cbc.c187
-rw-r--r--src/vppinfra/test/aes_ctr.c481
-rw-r--r--src/vppinfra/test/aes_gcm.c1177
-rw-r--r--src/vppinfra/test/array_mask.c (renamed from src/vppinfra/vector/test/array_mask.c)45
-rw-r--r--src/vppinfra/test/compress.c266
-rw-r--r--src/vppinfra/test/count_equal.c104
-rw-r--r--src/vppinfra/test/crc32c.c54
-rw-r--r--src/vppinfra/test/index_to_ptr.c58
-rw-r--r--src/vppinfra/test/ip_csum.c169
-rw-r--r--src/vppinfra/test/mask_compare.c (renamed from src/vppinfra/vector/test/mask_compare.c)50
-rw-r--r--src/vppinfra/test/memcpy_x86_64.c142
-rw-r--r--src/vppinfra/test/poly1305.c268
-rw-r--r--src/vppinfra/test/sha2.c322
-rw-r--r--src/vppinfra/test/test.c259
-rw-r--r--src/vppinfra/test/test.h125
-rw-r--r--src/vppinfra/test/toeplitz.c514
-rw-r--r--src/vppinfra/test_bihash_template.c72
-rw-r--r--src/vppinfra/test_fifo.c2
-rw-r--r--src/vppinfra/test_fpool.c2
-rw-r--r--src/vppinfra/test_hash.c4
-rw-r--r--src/vppinfra/test_heap.c13
-rw-r--r--src/vppinfra/test_interrupt.c78
-rw-r--r--src/vppinfra/test_longjmp.c26
-rw-r--r--src/vppinfra/test_mhash.c403
-rw-r--r--src/vppinfra/test_mheap.c286
-rw-r--r--src/vppinfra/test_pool_alloc.c56
-rw-r--r--src/vppinfra/test_pool_iterate.c2
-rw-r--r--src/vppinfra/test_serialize.c46
-rw-r--r--src/vppinfra/test_socket.c2
-rw-r--r--src/vppinfra/test_tw_timer.c32
-rw-r--r--src/vppinfra/test_vec.c77
-rw-r--r--src/vppinfra/time.c58
-rw-r--r--src/vppinfra/time.h9
-rw-r--r--src/vppinfra/time_range.c9
-rw-r--r--src/vppinfra/timing_wheel.c20
-rw-r--r--src/vppinfra/tw_timer_template.c4
-rw-r--r--src/vppinfra/types.h62
-rw-r--r--src/vppinfra/unformat.c147
-rw-r--r--src/vppinfra/unix-formats.c19
-rw-r--r--src/vppinfra/unix-misc.c204
-rw-r--r--src/vppinfra/unix.h19
-rw-r--r--src/vppinfra/vec.c281
-rw-r--r--src/vppinfra/vec.h993
-rw-r--r--src/vppinfra/vec_bootstrap.h185
-rw-r--r--src/vppinfra/vector.h125
-rw-r--r--src/vppinfra/vector/array_mask.h117
-rw-r--r--src/vppinfra/vector/compress.h287
-rw-r--r--src/vppinfra/vector/count_equal.h306
-rw-r--r--src/vppinfra/vector/index_to_ptr.h257
-rw-r--r--src/vppinfra/vector/ip_csum.h339
-rw-r--r--src/vppinfra/vector/mask_compare.h207
-rw-r--r--src/vppinfra/vector/test/compress.c81
-rw-r--r--src/vppinfra/vector/test/test.c53
-rw-r--r--src/vppinfra/vector/test/test.h35
-rw-r--r--src/vppinfra/vector/toeplitz.c122
-rw-r--r--src/vppinfra/vector/toeplitz.h513
-rw-r--r--src/vppinfra/vector_avx2.h118
-rw-r--r--src/vppinfra/vector_avx512.h237
-rw-r--r--src/vppinfra/vector_neon.h81
-rw-r--r--src/vppinfra/vector_sse42.h90
170 files changed, 19338 insertions, 8636 deletions
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt
index c682d70f6f1..83a8b2a7e57 100644
--- a/src/vppinfra/CMakeLists.txt
+++ b/src/vppinfra/CMakeLists.txt
@@ -14,6 +14,26 @@
enable_language(ASM)
##############################################################################
+# find libdl
+##############################################################################
+list(APPEND VPPINFRA_LIBS ${CMAKE_DL_LIBS})
+
+##############################################################################
+# find libunwind
+##############################################################################
+vpp_find_path(LIBUNWIND_INCLUDE_DIR unwind.h)
+vpp_find_library(LIBUNWIND_LIB NAMES unwind libunwind)
+
+if (LIBUNWIND_INCLUDE_DIR AND LIBUNWIND_LIB)
+ message(STATUS "libunwind found at ${LIBUNWIND_LIB}")
+ list(APPEND VPPINFRA_LIBS ${LIBUNWIND_LIB})
+ add_definitions(-DHAVE_LIBUNWIND=1)
+else()
+ message(WARNING "libunwind not found - stack traces disabled")
+ add_definitions(-DHAVE_LIBUNWIND=0)
+endif()
+
+##############################################################################
# Generate vppinfra/config.h
##############################################################################
set(LOG2_CACHE_LINE_BYTES ${VPP_LOG2_CACHE_LINE_SIZE})
@@ -32,35 +52,35 @@ configure_file(
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h
- DESTINATION include/vppinfra
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vppinfra
COMPONENT vpp-dev
)
add_definitions(-fvisibility=hidden)
# Ensure symbols from cJSON are exported
-set_source_files_properties( cJSON.c PROPERTIES
+set_source_files_properties( cJSON.c jsonformat.c PROPERTIES
COMPILE_DEFINITIONS " CJSON_API_VISIBILITY " )
-
##############################################################################
# vppinfra sources
##############################################################################
set(VPPINFRA_SRCS
- backtrace.c
bitmap.c
bihash_all_vector.c
cpu.c
+ devicetree.c
dlmalloc.c
elf.c
elog.c
error.c
fifo.c
format.c
- graph.c
+ format_table.c
hash.c
heap.c
interrupt.c
+ jsonformat.c
longjmp.S
macros.c
maplog.c
@@ -77,9 +97,9 @@ set(VPPINFRA_SRCS
random.c
random_isaac.c
rbtree.c
- sanitizer.c
serialize.c
socket.c
+ stack.c
std-formats.c
string.c
time.c
@@ -97,11 +117,12 @@ set(VPPINFRA_SRCS
valloc.c
vec.c
vector.c
+ vector/toeplitz.c
cJSON.c
)
set(VPPINFRA_HEADERS
- sanitizer.h
+ bihash_12_4.h
bihash_16_8.h
bihash_24_8.h
bihash_32_8.h
@@ -124,6 +145,14 @@ set(VPPINFRA_HEADERS
clib.h
cpu.h
crc32.h
+ crypto/sha2.h
+ crypto/ghash.h
+ crypto/aes.h
+ crypto/aes_cbc.h
+ crypto/aes_ctr.h
+ crypto/aes_gcm.h
+ crypto/poly1305.h
+ devicetree.h
dlist.h
dlmalloc.h
elf_clib.h
@@ -134,10 +163,12 @@ set(VPPINFRA_HEADERS
fifo.h
file.h
format.h
- graph.h
+ format_ansi.h
+ format_table.h
hash.h
heap.h
interrupt.h
+ jsonformat.h
lb_hash_hash.h
llist.h
lock.h
@@ -146,15 +177,15 @@ set(VPPINFRA_HEADERS
maplog.h
math.h
memcpy.h
- memcpy_avx2.h
- memcpy_avx512.h
- memcpy_sse3.h
+ memcpy_x86_64.h
mem.h
mhash.h
mpcap.h
os.h
pcap.h
pcap_funcs.h
+ pcg.h
+ perfmon/perfmon.h
pmalloc.h
pool.h
ptclosure.h
@@ -163,10 +194,10 @@ set(VPPINFRA_HEADERS
random_isaac.h
rbtree.h
serialize.h
- sha2.h
smp.h
socket.h
sparse_vec.h
+ stack.h
string.h
time.h
time_range.h
@@ -188,9 +219,13 @@ set(VPPINFRA_HEADERS
vector_altivec.h
vector_avx2.h
vector_avx512.h
- vector/mask_compare.h
- vector/compress.h
vector/array_mask.h
+ vector/compress.h
+ vector/count_equal.h
+ vector/index_to_ptr.h
+ vector/ip_csum.h
+ vector/mask_compare.h
+ vector/toeplitz.h
vector.h
vector_neon.h
vector_sse42.h
@@ -205,16 +240,21 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
linux/mem.c
linux/sysfs.c
linux/netns.c
+# TODO: Temporarily don't build perfmon on non-Linux
+ perfmon/bundle_default.c
+ perfmon/bundle_core_power.c
+ perfmon/perfmon.c
+ )
+elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
+ list(APPEND VPPINFRA_SRCS
+ elf_clib.c
+ freebsd/mem.c
)
endif()
-option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." OFF)
-if(VPP_USE_EXTERNAL_LIBEXECINFO)
- set(EXECINFO_LIB execinfo)
-endif()
add_vpp_library(vppinfra
SOURCES ${VPPINFRA_SRCS}
- LINK_LIBRARIES m ${EXECINFO_LIB}
+ LINK_LIBRARIES m ${VPPINFRA_LIBS}
INSTALL_HEADERS ${VPPINFRA_HEADERS}
COMPONENT libvppinfra
LTO
@@ -235,10 +275,13 @@ if(VPP_BUILD_VPPINFRA_TESTS)
fpool
hash
heap
+ interrupt
longjmp
macros
maplog
+ mhash
pmalloc
+ pool_alloc
pool_iterate
ptclosure
random
@@ -265,23 +308,35 @@ if(VPP_BUILD_VPPINFRA_TESTS)
LINK_LIBRARIES vppinfra Threads::Threads
)
endforeach()
+endif(VPP_BUILD_VPPINFRA_TESTS)
set(test_files
- vector/test/compress.c
- vector/test/mask_compare.c
- vector/test/array_mask.c
+ test/aes_cbc.c
+ test/aes_ctr.c
+ test/aes_gcm.c
+ test/poly1305.c
+ test/array_mask.c
+ test/compress.c
+ test/count_equal.c
+ test/crc32c.c
+ test/index_to_ptr.c
+ test/ip_csum.c
+ test/mask_compare.c
+ test/memcpy_x86_64.c
+ test/sha2.c
+ test/toeplitz.c
)
-add_vpp_executable(test_vector_funcs
+add_vpp_executable(test_infra
SOURCES
- vector/test/test.c
+ test/test.c
${test_files}
LINK_LIBRARIES vppinfra
+ NO_INSTALL
)
-vpp_library_set_multiarch_sources(test_vector_funcs
+vpp_library_set_multiarch_sources(test_infra
SOURCES
${test_files}
)
-endif(VPP_BUILD_VPPINFRA_TESTS)
diff --git a/src/vppinfra/asm_mips.h b/src/vppinfra/asm_mips.h
deleted file mode 100644
index 7c9e69586f4..00000000000
--- a/src/vppinfra/asm_mips.h
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- Copyright (c) 2004 Eliot Dresselhaus
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef included_asm_mips_h
-#define included_asm_mips_h
-
-/* Encoding of MIPS instructions. */
-/* Encoding of opcode field (op). */
-#define mips_foreach_opcode \
- _(SPECIAL) _(REGIMM) _(j) _(jal) _(beq) _(bne) _(blez) _(bgtz) \
- _(addi) _(addiu) _(slti) _(sltiu) _(andi) _(ori) _(xori) _(lui) \
- _(COP0) _(COP1) _(COP2) _(COP1X) _(beql) _(bnel) _(blezl) _(bgtzl) \
- _(daddi) _(daddiu) _(ldl) _(ldr) _(SPECIAL2) _(jalx) _(MDMX) _(O37) \
- _(lb) _(lh) _(lwl) _(lw) _(lbu) _(lhu) _(lwr) _(lwu) \
- _(sb) _(sh) _(swl) _(sw) _(sdl) _(sdr) _(swr) _(cache) \
- _(ll) _(lwc1) _(lwc2) _(pref) _(lld) _(ldc1) _(ldc2) _(ld) \
- _(sc) _(swc1) _(swc2) _(o73) _(scd) _(sdc1) _(sdc2) _(sd)
-
-/* Encoding of funct field. */
-#define mips_foreach_special_funct \
- _(sll) _(MOVCI) _(srl) _(sra) _(sllv) _(o05) _(srlv) _(srav) \
- _(jr) _(jalr) _(movz) _(movn) _(syscall) _(break) _(o16) _(sync) \
- _(mfhi) _(mthi) _(mflo) _(mtlo) _(dsllv) _(o25) _(dsrlv) _(dsrav) \
- _(mult) _(multu) _(div) _(divu) _(dmult) _(dmultu) _(ddiv) _(ddivu) \
- _(add) _(addu) _(sub) _(subu) _(and) _(or) _(xor) _(nor) \
- _(o50) _(o51) _(slt) _(sltu) _(dadd) _(daddu) _(dsub) _(dsubu) \
- _(tge) _(tgeu) _(tlt) _(tltu) _(teq) _(o65) _(tne) _(o67) \
- _(dsll) _(o71) _(dsrl) _(dsra) _(dsll32) _(o75) _(dsrl32) _(dsra32)
-
-/* SPECIAL2 encoding of funct field. */
-#define mips_foreach_special2_funct \
- _(madd) _(maddu) _(mul) _(o03) _(msub) _(msubu) _(o06) _(o07) \
- _(o10) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \
- _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \
- _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \
- _(clz) _(clo) _(o42) _(o43) _(dclz) _(dclo) _(o46) _(o47) \
- _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
- _(o60) _(o61) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \
- _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(o76) _(sdbbp)
-
-/* REGIMM encoding of rt field. */
-#define mips_foreach_regimm_rt \
- _(bltz) _(bgez) _(bltzl) _(bgezl) _(o04) _(o05) _(o06) _(o07) \
- _(tgei) _(tgeiu) _(tltiu) _(teqi) _(o14) _(tnei) _(o16) _(o17) \
- _(bltzal) _(bgezal) _(bltzall) _(bgezall) _(o24) _(o25) _(o26) _(o27) \
- _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37)
-
-/* COP0 encoding of rs field. */
-#define mips_foreach_cop0_rs \
- _(mfc0) _(dmfc0) _(o02) _(o03) _(mtc0) _(dmtc0) _(o06) _(o07) \
- _(o10) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \
- _(C0) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \
- _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37)
-
-/* COP0 encoding of funct when rs == RS_CO */
-#define mips_foreach_cop0_funct \
- _(o00) _(tlbr) _(tlbwi) _(o03) _(o04) _(o05) _(tlbwr) _(o07) \
- _(tlbp) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \
- _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \
- _(eret) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(deret) \
- _(wait) _(o41) _(o42) _(o43) _(o44) _(o45) _(o46) _(o47) \
- _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
- _(o60) _(o61) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \
- _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(o76) _(o77)
-
-/* COP1 encoding of rs field. */
-#define mips_foreach_cop1_rs \
- _(mfc1) _(dmfc1) _(cfc1) _(o03) _(mtc1) _(dmtc1) _(ctc1) _(o07) \
- _(BC1) _(o11) _(o12) _(o13) _(o14) _(o15) _(o16) _(o17) \
- _(S) _(D) _(o22) _(o23) _(W) _(L) _(o26) _(o27) \
- _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37)
-
-/* COP1 encoding of funct for S and D */
-#define mips_foreach_cop1_funct \
- _(add) _(sub) _(mul) _(div) _(sqrt) _(abs) _(mov) _(neg) \
- _(roundl) _(truncl) _(ceill) _(floorl) _(roundw) _(truncw) _(ceilw) _(floorw) \
- _(o20) _(MOVCF) _(movz) _(movn) _(o24) _(recip) _(rsqrt) _(o27) \
- _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \
- _(cvts) _(cvtd) _(o42) _(o43) _(cvtw) _(cvtl) _(o46) _(o47) \
- _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
- _(cf) _(cun) _(ceq) _(cueq) _(colt) _(cult) _(cole) _(cule) \
- _(csf) _(cngle) _(cseq) _(cngl) _(clt) _(cnge) _(cle) _(cngt)
-
-/* COP1X encoding of funct */
-#define mips_foreach_cop1x_funct \
- _(lwxc1) _(ldxc1) _(o02) _(o03) _(o04) _(luxc1) _(o06) _(o07) \
- _(swxc1) _(sdxc1) _(o12) _(o13) _(o14) _(suxc1) _(o16) _(prefx) \
- _(o20) _(o21) _(o22) _(o23) _(o24) _(o25) _(o26) _(o27) \
- _(o30) _(o31) _(o32) _(o33) _(o34) _(o35) _(o36) _(o37) \
- _(madds) _(maddd) _(o42) _(o43) _(o44) _(o45) _(o46) _(o47) \
- _(msubs) _(msubd) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
- _(nmadds) _(nmaddd) _(o62) _(o63) _(o64) _(o65) _(o66) _(o67) \
- _(nmsubs) _(nmsubd) _(o72) _(o73) _(o74) _(o75) _(o76) _(o77)
-
-#define mips_foreach_mdmx_funct \
- _(msgn) _(ceq) _(pickf) _(pickt) _(clt) _(cle) _(min) _(max) \
- _(o10) _(o11) _(sub) _(add) _(and) _(xor) _(or) _(nor) \
- _(sll) _(o21) _(srl) _(sra) _(o24) _(o25) _(o26) _(o27) \
- _(alniob) _(alnvob) _(alniqh) _(alnvqh) _(o34) _(o35) _(o36) _(shfl) \
- _(rzu) _(rnau) _(rneu) _(o43) _(rzs) _(rnas) _(rnes) _(o47) \
- _(o50) _(o51) _(o52) _(o53) _(o54) _(o55) _(o56) _(o57) \
- _(mul) _(o61) _(muls) _(mula) _(o64) _(o65) _(suba) _(adda) \
- _(o70) _(o71) _(o72) _(o73) _(o74) _(o75) _(wac) _(rac)
-
-#define _(f) MIPS_OPCODE_##f,
-typedef enum
-{
- mips_foreach_opcode
-} mips_insn_opcode_t;
-#undef _
-
-#define _(f) MIPS_SPECIAL_FUNCT_##f,
-typedef enum
-{
- mips_foreach_special_funct
-} mips_insn_special_funct_t;
-#undef _
-
-#define _(f) MIPS_SPECIAL2_FUNCT_##f,
-typedef enum
-{
- mips_foreach_special2_funct
-} mips_insn_special2_funct_t;
-#undef _
-
-#define _(f) MIPS_REGIMM_RT_##f,
-typedef enum
-{
- mips_foreach_regimm_rt
-} mips_insn_regimm_rt_t;
-#undef _
-
-#define _(f) MIPS_COP0_RS_##f,
-typedef enum
-{
- mips_foreach_cop0_rs
-} mips_insn_cop0_rs_t;
-#undef _
-
-#define _(f) MIPS_COP0_FUNCT_##f,
-typedef enum
-{
- mips_foreach_cop0_funct
-} mips_insn_cop0_funct_t;
-#undef _
-
-#define _(f) MIPS_COP1_RS_##f,
-typedef enum
-{
- mips_foreach_cop1_rs
-} mips_insn_cop1_rs_t;
-#undef _
-
-#define _(f) MIPS_COP1_FUNCT_##f,
-typedef enum
-{
- mips_foreach_cop1_funct
-} mips_insn_cop1_funct_t;
-#undef _
-
-#define _(f) MIPS_COP1X_FUNCT_##f,
-typedef enum
-{
- mips_foreach_cop1x_funct
-} mips_insn_cop1x_funct_t;
-#undef _
-
-#define _(f) MIPS_MDMX_FUNCT_##f,
-typedef enum
-{
- mips_foreach_mdmx_funct
-} mips_insn_mdmx_funct_t;
-#undef _
-
-always_inline mips_insn_opcode_t
-mips_insn_get_op (u32 insn)
-{
- return (insn >> 26) & 0x3f;
-}
-
-always_inline u32
-mips_insn_get_rs (u32 insn)
-{
- return (insn >> 21) & 0x1f;
-}
-
-always_inline u32
-mips_insn_get_rt (u32 insn)
-{
- return (insn >> 16) & 0x1f;
-}
-
-always_inline u32
-mips_insn_get_rd (u32 insn)
-{
- return (insn >> 11) & 0x1f;
-}
-
-always_inline u32
-mips_insn_get_sa (u32 insn)
-{
- return (insn >> 6) & 0x1f;
-}
-
-always_inline u32
-mips_insn_get_funct (u32 insn)
-{
- return (insn >> 0) & 0x3f;
-}
-
-always_inline i32
-mips_insn_get_immediate (u32 insn)
-{
- return (((i32) insn) << 16) >> 16;
-}
-
-always_inline u32
-mips_insn_encode_i_type (int op, int rs, int rt, int immediate)
-{
- u32 insn;
- insn = immediate;
- insn |= rt << 16;
- insn |= rs << 21;
- insn |= op << 26;
-
- ASSERT (mips_insn_get_immediate (insn) == immediate);
- ASSERT (mips_insn_get_rt (insn) == rt);
- ASSERT (mips_insn_get_rs (insn) == rt);
- ASSERT (mips_insn_get_op (insn) == op);
-
- return insn;
-}
-
-always_inline u32
-mips_insn_encode_j_type (int op, u32 addr)
-{
- u32 insn;
-
- insn = (addr & ((1 << 28) - 1)) / 4;
- insn |= op << 26;
-
- return insn;
-}
-
-always_inline u32
-mips_insn_encode_r_type (int op, int rs, int rt, int rd, int sa, int funct)
-{
- u32 insn;
- insn = funct;
- insn |= sa << 6;
- insn |= rd << 11;
- insn |= rt << 16;
- insn |= rs << 21;
- insn |= op << 26;
-
- ASSERT (mips_insn_get_funct (insn) == funct);
- ASSERT (mips_insn_get_sa (insn) == sa);
- ASSERT (mips_insn_get_rd (insn) == rd);
- ASSERT (mips_insn_get_rt (insn) == rt);
- ASSERT (mips_insn_get_rs (insn) == rt);
- ASSERT (mips_insn_get_op (insn) == op);
-
- return insn;
-}
-
-#define mips_insn_r(op,funct,rd,rs,rt,sa) \
- mips_insn_encode_r_type (MIPS_OPCODE_##op, \
- (rs), (rt), (rd), (sa), \
- MIPS_##op##_FUNCT_##funct)
-
-#define mips_insn_i(op,rs,rt,imm) \
- mips_insn_encode_i_type (MIPS_OPCODE_##op, (rs), (rt), (imm))
-
-#define mips_insn_j(op,target) \
- mips_insn_encode_i_type (MIPS_OPCODE_##op, (rs), (rt), (imm))
-
-/* Generate unsigned load instructions of data of various sizes. */
-always_inline u32
-mips_insn_load (u32 rd, i32 offset, u32 base, u32 log2_bytes)
-{
- int op;
-
- ASSERT (log2_bytes < 4);
- switch (log2_bytes)
- {
- case 0:
- op = MIPS_OPCODE_lbu;
- break;
- case 1:
- op = MIPS_OPCODE_lhu;
- break;
- case 2:
- op = MIPS_OPCODE_lwu;
- break;
- case 3:
- op = MIPS_OPCODE_ld;
- break;
- }
-
- return mips_insn_encode_i_type (op, base, rd, offset);
-}
-
-typedef enum
-{
- MIPS_REG_SP = 29,
- MIPS_REG_RA = 31,
-} mips_reg_t;
-
-#endif /* included_asm_mips_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/asm_x86.c b/src/vppinfra/asm_x86.c
deleted file mode 100644
index e6e00ce5543..00000000000
--- a/src/vppinfra/asm_x86.c
+++ /dev/null
@@ -1,1947 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/* FIXME
- opcode name remove to save table space; enum
- x87
- 3dnow
- cbw naming
-*/
-
-#include <vppinfra/error.h>
-#include <vppinfra/byte_order.h>
-#include <vppinfra/asm_x86.h>
-
-#define foreach_x86_gp_register \
- _ (AX) _ (CX) _ (DX) _ (BX) \
- _ (SP) _ (BP) _ (SI) _ (DI)
-
-typedef enum {
-#define _(r) X86_INSN_GP_REG_##r,
- foreach_x86_gp_register
-#undef _
-} x86_insn_gp_register_t;
-
-typedef union {
- struct {
- u8 rm : 3;
- u8 reg : 3;
- u8 mode : 2;
- };
- u8 byte;
-} x86_insn_modrm_byte_t;
-
-typedef union {
- struct {
- u8 base : 3;
- u8 index : 3;
- u8 log2_scale : 2;
- };
- u8 byte;
-} x86_insn_sib_byte_t;
-
-always_inline uword
-x86_insn_has_modrm_byte (x86_insn_t * insn)
-{
- int i;
- for (i = 0; i < ARRAY_LEN (insn->operands); i++)
- switch (insn->operands[i].code)
- {
- case 'G': case 'E': case 'M': case 'R':
- return 1;
- }
- return 0;
-}
-
-always_inline uword
-x86_insn_immediate_type (x86_insn_t * insn)
-{
- int i;
- for (i = 0; i < ARRAY_LEN (insn->operands); i++)
- switch (insn->operands[i].code)
- {
- case 'J':
- case 'I':
- case 'O':
- return insn->operands[i].type;
- }
- return 0;
-}
-
-/* Opcode extension in modrm byte reg field. */
-#define foreach_x86_insn_modrm_reg_group \
- _ (1) _ (1a) _ (2) _ (3) _ (4) _ (5) _ (6) _ (7) \
- _ (8) _ (9) _ (10) _ (11) _ (12) _ (13) _ (14) \
- _ (15) _ (16) _ (p)
-
-#define foreach_x86_insn_sse_group \
- _ (10) _ (28) _ (50) _ (58) _ (60) _ (68) _ (70) _ (78) \
- _ (c0) _ (d0) _ (d8) _ (e0) _ (e8) _ (f0) _ (f8)
-
-enum {
-#define _(x) X86_INSN_MODRM_REG_GROUP_##x,
- foreach_x86_insn_modrm_reg_group
-#undef _
-#define _(x) X86_INSN_SSE_GROUP_##x,
- foreach_x86_insn_sse_group
-#undef _
-};
-
-enum {
-#define _(x) \
- X86_INSN_FLAG_MODRM_REG_GROUP_##x \
- = X86_INSN_FLAG_SET_MODRM_REG_GROUP (1 + X86_INSN_MODRM_REG_GROUP_##x),
- foreach_x86_insn_modrm_reg_group
-#undef _
-
-#define _(x) \
- X86_INSN_FLAG_SSE_GROUP_##x \
- = X86_INSN_FLAG_SET_SSE_GROUP (1 + X86_INSN_SSE_GROUP_##x),
- foreach_x86_insn_sse_group
-#undef _
-};
-
-#define foreach_x86_gp_reg \
- _ (AX) _ (CX) _ (DX) _ (BX) \
- _ (SP) _ (BP) _ (SI) _ (DI)
-
-#define foreach_x86_condition \
- _ (o) _ (no) _ (b) _ (nb) \
- _ (z) _ (nz) _ (be) _ (nbe) \
- _ (s) _ (ns) _ (p) _ (np) \
- _ (l) _ (nl) _ (le) _ (nle)
-
-#define _3f(x,f,o0,o1,o2) \
-{ \
- .name = #x, \
- .flags = (f), \
- .operands[0] = { .data = #o0 }, \
- .operands[1] = { .data = #o1 }, \
- .operands[2] = { .data = #o2 }, \
-}
-
-#define _2f(x,f,o0,o1) _3f(x,f,o0,o1,__)
-#define _1f(x,f,o0) _2f(x,f,o0,__)
-#define _0f(x,f) _1f(x,f,__)
-
-#define _3(x,o0,o1,o2) _3f(x,0,o0,o1,o2)
-#define _2(x,o0,o1) _2f(x,0,o0,o1)
-#define _1(x,o0) _1f(x,0,o0)
-#define _0(x) _0f(x,0)
-
-static x86_insn_t x86_insns_one_byte[256] = {
-
-#define _(x) \
- _2 (x, Eb, Gb), \
- _2 (x, Ev, Gv), \
- _2 (x, Gb, Eb), \
- _2 (x, Gv, Ev), \
- _2 (x, AL, Ib), \
- _2 (x, AX, Iz)
-
- /* 0x00 */
- _ (add),
- _0 (push_es),
- _0 (pop_es),
- _ (or),
- _0 (push_cs),
- _0 (escape_two_byte),
-
- /* 0x10 */
- _ (adc),
- _0 (push_ss),
- _0 (pop_ss),
- _ (sbb),
- _0 (push_ds),
- _0 (pop_ds),
-
- /* 0x20 */
- _ (and),
- _0 (segment_es),
- _0 (daa),
- _ (sub),
- _0 (segment_cs),
- _0 (das),
-
- /* 0x30 */
- _ (xor),
- _0 (segment_ss),
- _0 (aaa),
- _ (cmp),
- _0 (segment_ds),
- _0 (aas),
-
-#undef _
-
- /* 0x40 */
-#define _(r) _1 (inc, r),
- foreach_x86_gp_reg
-#undef _
-#define _(r) _1 (dec, r),
- foreach_x86_gp_reg
-#undef _
-
- /* 0x50 */
-#define _(r) _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, r),
- foreach_x86_gp_reg
-#undef _
-#define _(r) _1f (pop, X86_INSN_FLAG_DEFAULT_64_BIT, r),
- foreach_x86_gp_reg
-#undef _
-
- /* 0x60 */
- _0 (pusha),
- _0 (popa),
- _2 (bound, Gv, Ma),
- _2 (movsxd, Gv, Ed),
- _0 (segment_fs),
- _0 (segment_gs),
- _0 (operand_type),
- _0 (address_size),
- _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Iz),
- _3 (imul, Gv, Ev, Iz),
- _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Ib),
- _3 (imul, Gv, Ev, Ib),
- _1 (insb, DX),
- _1 (insw, DX),
- _1 (outsb, DX),
- _1 (outsw, DX),
-
- /* 0x70 */
-#define _(x) _1 (j##x, Jb),
- foreach_x86_condition
-#undef _
-
- /* 0x80 */
- _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Eb, Ib),
- _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Ev, Iz),
- _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Eb, Ib),
- _2f (modrm_group_1, X86_INSN_FLAG_MODRM_REG_GROUP_1, Ev, Ib),
- _2 (test, Eb, Gb),
- _2 (test, Ev, Gv),
- _2 (xchg, Eb, Gb),
- _2 (xchg, Ev, Gv),
- _2 (mov, Eb, Gb),
- _2 (mov, Ev, Gv),
- _2 (mov, Gb, Eb),
- _2 (mov, Gv, Ev),
- _2 (mov, Ev, Sw),
- _2 (lea, Gv, Ev),
- _2 (mov, Sw, Ew),
- _1f (modrm_group_1a, X86_INSN_FLAG_MODRM_REG_GROUP_1a, Ev),
-
- /* 0x90 */
- _0 (nop),
- _1 (xchg, CX),
- _1 (xchg, DX),
- _1 (xchg, BX),
- _1 (xchg, SP),
- _1 (xchg, BP),
- _1 (xchg, SI),
- _1 (xchg, DI),
- _0 (cbw),
- _0 (cwd),
- _1 (call, Ap),
- _0 (wait),
- _0 (pushf),
- _0 (popf),
- _0 (sahf),
- _0 (lahf),
-
- /* 0xa0 */
- _2 (mov, AL, Ob),
- _2 (mov, AX, Ov),
- _2 (mov, Ob, AL),
- _2 (mov, Ov, AX),
- _0 (movsb),
- _0 (movsw),
- _0 (cmpsb),
- _0 (cmpsw),
- _2 (test, AL, Ib),
- _2 (test, AX, Iz),
- _1 (stosb, AL),
- _1 (stosw, AX),
- _1 (lodsb, AL),
- _1 (lodsw, AX),
- _1 (scasb, AL),
- _1 (scasw, AX),
-
- /* 0xb0 */
- _2 (mov, AL, Ib),
- _2 (mov, CL, Ib),
- _2 (mov, DL, Ib),
- _2 (mov, BL, Ib),
- _2 (mov, AH, Ib),
- _2 (mov, CH, Ib),
- _2 (mov, DH, Ib),
- _2 (mov, BH, Ib),
-#define _(r) _2 (mov, r, Iv),
- foreach_x86_gp_reg
-#undef _
-
- /* 0xc0 */
- _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, Ib),
- _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, Ib),
- _1 (ret, Iw),
- _0 (ret),
- _2 (les, Gz, Mp),
- _2 (lds, Gz, Mp),
- _2f (modrm_group_11, X86_INSN_FLAG_MODRM_REG_GROUP_11, Eb, Ib),
- _2f (modrm_group_11, X86_INSN_FLAG_MODRM_REG_GROUP_11, Ev, Iz),
- _2 (enter, Iw, Ib),
- _0 (leave),
- _1 (ret, Iw),
- _0 (ret),
- _0 (int3),
- _1 (int, Ib),
- _0 (into),
- _0 (iret),
-
- /* 0xd0 */
- _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, 1b),
- _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, 1b),
- _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Eb, CL),
- _2f (modrm_group_2, X86_INSN_FLAG_MODRM_REG_GROUP_2, Ev, CL),
- _0 (aam),
- _0 (aad),
- _0 (salc),
- _0 (xlat),
- /* FIXME x87 */
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
-
- /* 0xe0 */
- _1 (loopnz, Jb),
- _1 (loopz, Jb),
- _1 (loop, Jb),
- _1 (jcxz, Jb),
- _2 (in, AL, Ib),
- _2 (in, AX, Ib),
- _2 (out, Ib, AL),
- _2 (out, Ib, AX),
- _1f (call, X86_INSN_FLAG_DEFAULT_64_BIT, Jz),
- _1f ( jmp, X86_INSN_FLAG_DEFAULT_64_BIT, Jz),
- _1 (jmp, Ap),
- _1 (jmp, Jb),
- _2 (in, AL, DX),
- _2 (in, AX, DX),
- _2 (out, DX, AL),
- _2 (out, DX, AX),
-
- /* 0xf0 */
- _0 (lock),
- _0 (int1),
- _0 (repne),
- _0 (rep),
- _0 (hlt),
- _0 (cmc),
- _0f (modrm_group_3, X86_INSN_FLAG_MODRM_REG_GROUP_3),
- _0f (modrm_group_3, X86_INSN_FLAG_MODRM_REG_GROUP_3),
- _0 (clc),
- _0 (stc),
- _0 (cli),
- _0 (sti),
- _0 (cld),
- _0 (std),
- _1f (modrm_group_4, X86_INSN_FLAG_MODRM_REG_GROUP_4, Eb),
- _0f (modrm_group_5, X86_INSN_FLAG_MODRM_REG_GROUP_5),
-};
-
-static x86_insn_t x86_insns_two_byte[256] = {
- /* 0x00 */
- _0f (modrm_group_6, X86_INSN_FLAG_MODRM_REG_GROUP_6),
- _0f (modrm_group_7, X86_INSN_FLAG_MODRM_REG_GROUP_7),
- _2 (lar, Gv, Ew),
- _2 (lsl, Gv, Ew),
- _0 (bad),
- _0 (syscall),
- _0 (clts),
- _0 (sysret),
- _0 (invd),
- _0 (wbinvd),
- _0 (bad),
- _0 (ud2),
- _0 (bad),
- _0f (modrm_group_p, X86_INSN_FLAG_MODRM_REG_GROUP_p),
- _0 (femms),
- _0 (escape_3dnow),
-
- /* 0x10 */
- _2f (movups, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
- _2f (movups, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx),
- _2f (movlps, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx),
- _2f (movlps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
- _2f (unpcklps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
- _2f (unpckhps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
- _2f (movhps, X86_INSN_FLAG_SSE_GROUP_10, Ex, Gx),
- _2f (movhps, X86_INSN_FLAG_SSE_GROUP_10, Gx, Ex),
- _0f (modrm_group_16, X86_INSN_FLAG_MODRM_REG_GROUP_16),
- _0 (nop),
- _0 (nop),
- _0 (nop),
- _0 (nop),
- _0 (nop),
- _0 (nop),
- _0 (nop),
-
- /* 0x20 */
- _2 (mov, Rv, Cv),
- _2 (mov, Rv, Dv),
- _2 (mov, Cv, Rv),
- _2 (mov, Dv, Rv),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2f (movaps, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
- _2f (movaps, X86_INSN_FLAG_SSE_GROUP_28, Ex, Gx),
- _2f (cvtpi2ps, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
- _2f (movntps, X86_INSN_FLAG_SSE_GROUP_28, Mx, Gx),
- _2f (cvttps2pi, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
- _2f (cvtps2pi, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
- _2f (ucomiss, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
- _2f (comiss, X86_INSN_FLAG_SSE_GROUP_28, Gx, Ex),
-
- /* 0x30 */
- _0 (wrmsr),
- _0 (rdtsc),
- _0 (rdmsr),
- _0 (rdpmc),
- _0 (sysenter),
- _0 (sysexit),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
-
- /* 0x40 */
-#define _(x) _2 (cmov##x, Gv, Ev),
- foreach_x86_condition
-#undef _
-
- /* 0x50 */
- _2f (movmskps, X86_INSN_FLAG_SSE_GROUP_50, Gd, Rx),
- _2f (sqrtps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
- _2f (rsqrtps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
- _2f (rcpps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
- _2f (andps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
- _2f (andnps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
- _2f (orps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
- _2f (xorps, X86_INSN_FLAG_SSE_GROUP_50, Gx, Ex),
- _2f (addps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
- _2f (mulps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
- _2f (cvtps2pd, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
- _2f (cvtdq2ps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
- _2f (subps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
- _2f (minps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
- _2f (divps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
- _2f (maxps, X86_INSN_FLAG_SSE_GROUP_58, Gx, Ex),
-
- /* 0x60 */
- _2f (punpcklbw, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
- _2f (punpcklwd, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
- _2f (punpckldq, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
- _2f (packsswb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
- _2f (pcmpgtb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
- _2f (pcmpgtw, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
- _2f (pcmpgtd, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
- _2f (packuswb, X86_INSN_FLAG_SSE_GROUP_60, Gm, Em),
- _2f (punpckhbw, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
- _2f (punpckhwd, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
- _2f (punpckhdq, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
- _2f (packssdw, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_68),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_68),
- _2f (movd, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
- _2f (movq, X86_INSN_FLAG_SSE_GROUP_68, Gm, Em),
-
- /* 0x70 */
- _3f (pshufw, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em, Ib),
- _0f (modrm_group_12, X86_INSN_FLAG_MODRM_REG_GROUP_12),
- _0f (modrm_group_13, X86_INSN_FLAG_MODRM_REG_GROUP_13),
- _0f (modrm_group_14, X86_INSN_FLAG_MODRM_REG_GROUP_14),
- _2f (pcmpeqb, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em),
- _2f (pcmpeqw, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em),
- _2f (pcmpeqd, X86_INSN_FLAG_SSE_GROUP_70, Gm, Em),
- _0f (emms, X86_INSN_FLAG_SSE_GROUP_70),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_78),
- _2f (movd, X86_INSN_FLAG_SSE_GROUP_78, Em, Gm),
- _2f (movq, X86_INSN_FLAG_SSE_GROUP_78, Em, Gm),
-
- /* 0x80 */
-#define _(x) _1 (jmp##x, Jz),
- foreach_x86_condition
-#undef _
-
- /* 0x90 */
-#define _(x) _1 (set##x, Eb),
- foreach_x86_condition
-#undef _
-
- /* 0xa0 */
- _0 (push_fs),
- _0 (pop_fs),
- _0 (cpuid),
- _2 (bt, Ev, Gv),
- _3 (shld, Ev, Gv, Ib),
- _3 (shld, Ev, Gv, CL),
- _0 (bad),
- _0 (bad),
- _0 (push_gs),
- _0 (pop_gs),
- _0 (rsm),
- _2 (bts, Ev, Gv),
- _3 (shrd, Ev, Gv, Ib),
- _3 (shrd, Ev, Gv, CL),
- _0f (modrm_group_15, X86_INSN_FLAG_MODRM_REG_GROUP_15),
- _2 (imul, Gv, Ev),
-
- /* 0xb0 */
- _2 (cmpxchg, Eb, Gb),
- _2 (cmpxchg, Ev, Gv),
- _2 (lss, Gz, Mp),
- _2 (btr, Ev, Gv),
- _2 (lfs, Gz, Mp),
- _2 (lgs, Gz, Mp),
- _2 (movzbl, Gv, Eb),
- _2 (movzwl, Gv, Ew),
- _0 (bad),
- _0f (modrm_group_10, X86_INSN_FLAG_MODRM_REG_GROUP_10),
- _2f (modrm_group_8, X86_INSN_FLAG_MODRM_REG_GROUP_8, Ev, Ib),
- _2 (btc, Ev, Gv),
- _2 (bsf, Gv, Ev),
- _2 (bsr, Gv, Ev),
- _2 (movsx, Gv, Eb),
- _2 (movsx, Gv, Ew),
-
- /* 0xc0 */
- _2 (xadd, Eb, Gb),
- _2 (xadd, Ev, Gv),
- _3f (cmpps, X86_INSN_FLAG_SSE_GROUP_c0, Gx, Ex, Ib),
- _2 (movnti, Mv, Gv),
- _3f (pinsrw, X86_INSN_FLAG_SSE_GROUP_c0, Gm, Ew, Ib),
- _3f (pextrw, X86_INSN_FLAG_SSE_GROUP_c0, Gd, Rm, Ib),
- _3f (shufps, X86_INSN_FLAG_SSE_GROUP_c0, Gx, Ex, Ib),
- _1f (modrm_group_9, X86_INSN_FLAG_MODRM_REG_GROUP_9, Mx),
-#define _(r) _1 (bswap, r),
- foreach_x86_gp_reg
-#undef _
-
- /* 0xd0 */
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_d0),
- _2f (psrlw, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
- _2f (psrld, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
- _2f (psrlq, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
- _2f (paddq, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
- _2f (pmullw, X86_INSN_FLAG_SSE_GROUP_d0, Gm, Em),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_d0),
- _2f (pmovmskb, X86_INSN_FLAG_SSE_GROUP_d0, Gd, Rm),
- _2f (psubusb, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
- _2f (psubusw, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
- _2f (pminub, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
- _2f (pand, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
- _2f (paddusb, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
- _2f (paddusw, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
- _2f (pmaxub, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
- _2f (pandn, X86_INSN_FLAG_SSE_GROUP_d8, Gm, Em),
-
- /* 0xe0 */
- _2f (pavgb, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
- _2f (psraw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
- _2f (psrad, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
- _2f (pavgw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
- _2f (pmulhuw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
- _2f (pmulhw, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
- _2f (bad, X86_INSN_FLAG_SSE_GROUP_e0, Gm, Em),
- _2f (movntq, X86_INSN_FLAG_SSE_GROUP_e0, Mm, Gm),
- _2f (psubsb, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
- _2f (psubsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
- _2f (pminsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
- _2f (por, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
- _2f (paddsb, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
- _2f (paddsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
- _2f (pmaxsw, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
- _2f (pxor, X86_INSN_FLAG_SSE_GROUP_e8, Gm, Em),
-
- /* 0xf0 */
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_f0),
- _2f (psllw, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
- _2f (pslld, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
- _2f (psllq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
- _2f (pmuludq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
- _2f (pmaddwd, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
- _2f (psadbw, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
- _2f (maskmovq, X86_INSN_FLAG_SSE_GROUP_f0, Gm, Em),
- _2f (psubb, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
- _2f (psubw, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
- _2f (psubd, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
- _2f (psubq, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
- _2f (paddb, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
- _2f (paddw, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
- _2f (paddd, X86_INSN_FLAG_SSE_GROUP_f8, Gm, Em),
- _0f (bad, X86_INSN_FLAG_SSE_GROUP_f8),
-};
-
-typedef struct {
- x86_insn_t insns[8];
-} x86_insn_group8_t;
-
-/* Escape groups are indexed by modrm reg field. */
-static x86_insn_group8_t x86_insn_modrm_reg_groups[] = {
- [X86_INSN_MODRM_REG_GROUP_1].insns = {
- _0 (add), _0 ( or), _0 (adc), _0 (sbb),
- _0 (and), _0 (sub), _0 (xor), _0 (cmp),
- },
-
- [X86_INSN_MODRM_REG_GROUP_1a].insns = {
- _0f (pop, X86_INSN_FLAG_DEFAULT_64_BIT),
- _0 (bad), _0 (bad), _0 (bad),
- _0 (bad), _0 (bad), _0 (bad), _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_2].insns = {
- _0 (rol), _0 (ror), _0 (rcl), _0 (rcr),
- _0 (shl), _0 (shr), _0 (sal), _0 (sar),
- },
-
- [X86_INSN_MODRM_REG_GROUP_3].insns = {
- _0 (test), _0 (test), _0 (not), _0 (neg),
- _0 (mul), _0 (imul), _0 (div), _0 (idiv),
- },
-
- [X86_INSN_MODRM_REG_GROUP_4].insns = {
- _0 (inc), _0 (dec), _0 (bad), _0 (bad),
- _0 (bad), _0 (bad), _0 (bad), _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_5].insns = {
- _1 (inc, Ev),
- _1 (dec, Ev),
- _1f (call, X86_INSN_FLAG_DEFAULT_64_BIT, Ev),
- _1 (call, Mp),
- _1f (jmp, X86_INSN_FLAG_DEFAULT_64_BIT, Ev),
- _1 (jmp, Mp),
- _1f (push, X86_INSN_FLAG_DEFAULT_64_BIT, Ev),
- _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_6].insns = {
- _1 (sldt, Ev),
- _1 (str, Ev),
- _1 (lldt, Ev),
- _1 (ltr, Ev),
- _1 (verr, Ev),
- _1 (verw, Ev),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_7].insns = {
- _1 (sgdt, Mv),
- _1 (sidt, Mv),
- _1 (lgdt, Mv),
- _1 (lidt, Mv),
- _1 (smsw, Ev),
- _0 (bad),
- _1 (lmsw, Ew),
- _1 (invlpg, Mv),
- },
-
- [X86_INSN_MODRM_REG_GROUP_8].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (bt, Ev, Ib),
- _2 (bts, Ev, Ib),
- _2 (btr, Ev, Ib),
- _2 (btc, Ev, Ib),
- },
-
- [X86_INSN_MODRM_REG_GROUP_9].insns = {
- _0 (bad),
- _1 (cmpxchg, Mx),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_10].insns = {
- _0 (bad), _0 (bad), _0 (bad), _0 (bad),
- _0 (bad), _0 (bad), _0 (bad), _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_11].insns = {
- _0 (mov), _0 (bad), _0 (bad), _0 (bad),
- _0 (bad), _0 (bad), _0 (bad), _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_12].insns = {
- _0 (bad),
- _0 (bad),
- _2 (psrlw, Rm, Ib),
- _0 (bad),
- _2 (psraw, Rm, Ib),
- _0 (bad),
- _2 (psllw, Rm, Ib),
- _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_13].insns = {
- _0 (bad),
- _0 (bad),
- _2 (psrld, Rm, Ib),
- _0 (bad),
- _2 (psrad, Rm, Ib),
- _0 (bad),
- _2 (pslld, Rm, Ib),
- _0 (bad),
- },
-
- [X86_INSN_MODRM_REG_GROUP_14].insns = {
- _0 (bad),
- _0 (bad),
- _2 (psrlq, Rm, Ib),
- _0f (bad, 0),
- _0 (bad),
- _0 (bad),
- _2 (psllq, Rm, Ib),
- _0f (bad, 0),
- },
-
- [X86_INSN_MODRM_REG_GROUP_15].insns = {
- _1 (fxsave, Mv),
- _1 (fxrstor, Mv),
- _1 (ldmxcsr, Mv),
- _1 (stmxcsr, Mv),
- _0 (bad),
- _1 (lfence, Mv),
- _1 (mfence, Mv),
- _1 (sfence, Mv),
- },
-
- [X86_INSN_MODRM_REG_GROUP_16].insns = {
- _1 (prefetch_nta, Mv),
- _1 (prefetch_t0, Mv),
- _1 (prefetch_t1, Mv),
- _1 (prefetch_t2, Mv),
- _1 (prefetch_nop, Mv),
- _1 (prefetch_nop, Mv),
- _1 (prefetch_nop, Mv),
- _1 (prefetch_nop, Mv),
- },
-
- [X86_INSN_MODRM_REG_GROUP_p].insns = {
- _1 (prefetch_exclusive, Mv),
- _1 (prefetch_modified, Mv),
- _1 (prefetch_nop, Mv),
- _1 (prefetch_modified, Mv),
- _1 (prefetch_nop, Mv),
- _1 (prefetch_nop, Mv),
- _1 (prefetch_nop, Mv),
- _1 (prefetch_nop, Mv),
- },
-};
-
-static x86_insn_group8_t x86_insn_sse_groups_repz[] = {
- [X86_INSN_SSE_GROUP_10].insns = {
- _2 (movss, Gx, Ex),
- _2 (movss, Ex, Gx),
- _2 (movsldup, Gx, Ex),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (movshdup, Gx, Ex),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_28].insns = {
- _0 (bad),
- _0 (bad),
- _2 (cvtsi2ss, Gx, Ev),
- _0 (bad),
- _2 (cvttss2si, Gv, Ex),
- _2 (cvtss2si, Gv, Ex),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_50].insns = {
- _0 (bad),
- _2 (sqrtss, Gx, Ex),
- _2 (rsqrtps, Gx, Ex),
- _2 (rcpss, Gx, Ex),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_58].insns = {
- _2 (addss, Gx, Ex),
- _2 (mulss, Gx, Ex),
- _2 (cvtss2sd, Gx, Ex),
- _2 (cvttps2dq, Gx, Ex),
- _2 (subss, Gx, Ex),
- _2 (minss, Gx, Ex),
- _2 (divss, Gx, Ex),
- _2 (maxss, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_60].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_68].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (movdqu, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_70].insns = {
- _3 (pshufhw, Gx, Ex, Ib),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_78].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (movq, Gx, Ex),
- _2 (movdqu, Ex, Gx),
- },
-
- [X86_INSN_SSE_GROUP_c0].insns = {
- _0 (bad),
- _0 (bad),
- _3 (cmpss, Gx, Ex, Ib),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_d0].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (movq2dq, Gx, Em),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_d8].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_e0].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (cvtdq2pd, Gx, Ex),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_e8].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_f0].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_f8].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-};
-
-static x86_insn_group8_t x86_insn_sse_groups_operand_size[] = {
- [X86_INSN_SSE_GROUP_10].insns = {
- _2 (movupd, Gx, Ex),
- _2 (movupd, Ex, Gx),
- _2 (movlpd, Gx, Ex),
- _2 (movlpd, Ex, Gx),
- _2 (unpcklpd, Gx, Ex),
- _2 (unpckhpd, Gx, Ex),
- _2 (movhpd, Gx, Mx),
- _2 (movhpd, Mx, Gx),
- },
-
- [X86_INSN_SSE_GROUP_28].insns = {
- _2 (movapd, Gx, Ex),
- _2 (movapd, Ex, Gx),
- _2 (cvtpi2pd, Gx, Ex),
- _2 (movntpd, Mx, Gx),
- _2 (cvttpd2pi, Gx, Mx),
- _2 (cvtpd2pi, Gx, Mx),
- _2 (ucomisd, Gx, Ex),
- _2 (comisd, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_50].insns = {
- _2 (movmskpd, Gd, Rx),
- _2 (sqrtpd, Gx, Ex),
- _0 (bad),
- _0 (bad),
- _2 (andpd, Gx, Ex),
- _2 (andnpd, Gx, Ex),
- _2 (orpd, Gx, Ex),
- _2 (xorpd, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_58].insns = {
- _2 (addpd, Gx, Ex),
- _2 (mulpd, Gx, Ex),
- _2 (cvtpd2ps, Gx, Ex),
- _2 (cvtps2dq, Gx, Ex),
- _2 (subpd, Gx, Ex),
- _2 (minpd, Gx, Ex),
- _2 (divpd, Gx, Ex),
- _2 (maxpd, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_60].insns = {
- _2 (punpcklbw, Gx, Ex),
- _2 (punpcklwd, Gx, Ex),
- _2 (punpckldq, Gx, Ex),
- _2 (packsswb, Gx, Ex),
- _2 (pcmpgtb, Gx, Ex),
- _2 (pcmpgtw, Gx, Ex),
- _2 (pcmpgtd, Gx, Ex),
- _2 (packuswb, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_68].insns = {
- _2 (punpckhbw, Gx, Ex),
- _2 (punpckhwd, Gx, Ex),
- _2 (punpckhdq, Gx, Ex),
- _2 (packssdw, Gx, Ex),
- _2 (punpcklqdq, Gx, Ex),
- _2 (punpckhqdq, Gx, Ex),
- _2 (movd, Gx, Ev),
- _2 (movdqa, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_70].insns = {
- _3 (pshufd, Gx, Ex, Ib),
- _0f (modrm_group_12, X86_INSN_FLAG_MODRM_REG_GROUP_12),
- _0f (modrm_group_13, X86_INSN_FLAG_MODRM_REG_GROUP_13),
- _0f (modrm_group_14, X86_INSN_FLAG_MODRM_REG_GROUP_14),
- _2 (pcmpeqb, Gx, Ex),
- _2 (pcmpeqw, Gx, Ex),
- _2 (pcmpeqd, Gx, Ex),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_78].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (haddpd, Gx, Ex),
- _2 (hsubpd, Gx, Ex),
- _2 (movd, Ev, Gx),
- _2 (movdqa, Ex, Gx),
- },
-
- [X86_INSN_SSE_GROUP_c0].insns = {
- _0 (bad),
- _0 (bad),
- _3 (cmppd, Gx, Ex, Ib),
- _0 (bad),
- _3 (pinsrw, Gx, Ew, Ib),
- _3 (pextrw, Gd, Gx, Ib),
- _3 (shufpd, Gx, Ex, Ib),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_d0].insns = {
- _2 (addsubpd, Gx, Ex),
- _2 (psrlw, Gx, Ex),
- _2 (psrld, Gx, Ex),
- _2 (psrlq, Gx, Ex),
- _2 (paddq, Gx, Ex),
- _2 (pmullw, Gx, Ex),
- _2 (movq, Ex, Gx),
- _2 (pmovmskb, Gd, Rx),
- },
-
- [X86_INSN_SSE_GROUP_d8].insns = {
- _2 (psubusb, Gx, Ex),
- _2 (psubusw, Gx, Ex),
- _2 (pminub, Gx, Ex),
- _2 (pand, Gx, Ex),
- _2 (paddusb, Gx, Ex),
- _2 (paddusw, Gx, Ex),
- _2 (pmaxub, Gx, Ex),
- _2 (pandn, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_e0].insns = {
- _2 (pavgb, Gx, Ex),
- _2 (psraw, Gx, Ex),
- _2 (psrad, Gx, Ex),
- _2 (pavgw, Gx, Ex),
- _2 (pmulhuw, Gx, Ex),
- _2 (pmulhw, Gx, Ex),
- _2 (cvttpd2dq, Gx, Ex),
- _2 (movntdq, Mx, Gx),
- },
-
- [X86_INSN_SSE_GROUP_e8].insns = {
- _2 (psubsb, Gx, Ex),
- _2 (psubsw, Gx, Ex),
- _2 (pminsw, Gx, Ex),
- _2 (por, Gx, Ex),
- _2 (paddsb, Gx, Ex),
- _2 (paddsw, Gx, Ex),
- _2 (pmaxsw, Gx, Ex),
- _2 (pxor, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_f0].insns = {
- _0 (bad),
- _2 (psllw, Gx, Ex),
- _2 (pslld, Gx, Ex),
- _2 (psllq, Gx, Ex),
- _2 (pmuludq, Gx, Ex),
- _2 (pmaddwd, Gx, Ex),
- _2 (psadbw, Gx, Ex),
- _2 (maskmovdqu, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_f8].insns = {
- _2 (psubb, Gx, Ex),
- _2 (psubw, Gx, Ex),
- _2 (psubd, Gx, Ex),
- _2 (psubq, Gx, Ex),
- _2 (paddb, Gx, Ex),
- _2 (paddw, Gx, Ex),
- _2 (paddd, Gx, Ex),
- _0 (bad),
- },
-};
-
-static x86_insn_group8_t x86_insn_sse_groups_repnz[] = {
- [X86_INSN_SSE_GROUP_10].insns = {
- _2 (movsd, Gx, Ex),
- _2 (movsd, Ex, Gx),
- _2 (movddup, Gx, Ex),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_28].insns = {
- _0 (bad),
- _0 (bad),
- _2 (cvtsi2sd, Gx, Ev),
- _0 (bad),
- _2 (cvttsd2si, Gv, Ex),
- _2 (cvtsd2si, Gv, Ex),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_50].insns = {
- _0 (bad),
- _2 (sqrtsd, Gx, Ex),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_58].insns = {
- _2 (addsd, Gx, Ex),
- _2 (mulsd, Gx, Ex),
- _2 (cvtsd2ss, Gx, Ex),
- _0 (bad),
- _2 (subsd, Gx, Ex),
- _2 (minsd, Gx, Ex),
- _2 (divsd, Gx, Ex),
- _2 (maxsd, Gx, Ex),
- },
-
- [X86_INSN_SSE_GROUP_60].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_68].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_70].insns = {
- _3 (pshuflw, Gx, Ex, Ib),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_78].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (haddps, Gx, Ex),
- _2 (hsubps, Gx, Ex),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_c0].insns = {
- _0 (bad),
- _0 (bad),
- _3 (cmpsd, Gx, Ex, Ib),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_d0].insns = {
- _2 (addsubps, Gx, Ex),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (movdq2q, Gm, Ex),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_d8].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_e0].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _2 (cvtpd2dq, Gx, Ex),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_e8].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_f0].insns = {
- _2 (lddqu, Gx, Mx),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-
- [X86_INSN_SSE_GROUP_f8].insns = {
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- _0 (bad),
- },
-};
-
-#undef _
-
-/* Parses memory displacements and immediates. */
-static u8 * x86_insn_parse_number (u32 log2_n_bytes,
- u8 * code, u8 * code_end,
- i64 * result)
-{
- i64 x = 0;
-
- if (code + (1 << log2_n_bytes) > code_end)
- return 0;
-
- switch (log2_n_bytes)
- {
- case 3:
- x = clib_little_to_host_unaligned_mem_u64 ((u64 *) code);
- break;
-
- case 2:
- x = (i32) clib_little_to_host_unaligned_mem_u32 ((u32 *) code);
- break;
-
- case 1:
- x = (i16) clib_little_to_host_unaligned_mem_u16 ((u16 *) code);
- break;
-
- case 0:
- x = (i8) code[0];
- break;
-
- default:
- ASSERT (0);
- }
-
- *result = x;
- return code + (1 << log2_n_bytes);
-}
-
-static u32
-x86_insn_log2_immediate_bytes (x86_insn_parse_t * p, x86_insn_t * insn)
-{
- u32 i = ~0;
- switch (x86_insn_immediate_type (insn))
- {
- case 'b': i = 0; break;
- case 'w': i = 1; break;
- case 'd': i = 2; break;
- case 'q': i = 3; break;
-
- case 'z':
- i = p->log2_effective_operand_bytes;
- if (i > 2) i = 2;
- break;
-
- case 'v':
- i = p->log2_effective_operand_bytes;
- break;
-
- default:
- i = ~0;
- break;
- }
-
- return i;
-}
-
-static u8 *
-x86_insn_parse_modrm_byte (x86_insn_parse_t * x,
- x86_insn_modrm_byte_t modrm,
- u32 parse_flags,
- u8 * code,
- u8 * code_end)
-{
- u8 effective_address_bits;
-
- if (parse_flags & X86_INSN_PARSE_64_BIT)
- effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 32 : 64;
- else if (parse_flags & X86_INSN_PARSE_32_BIT)
- effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 16 : 32;
- else
- effective_address_bits = (x->flags & X86_INSN_ADDRESS_SIZE) ? 32 : 16;
-
- x->log2_effective_address_bytes = 1;
- x->log2_effective_address_bytes += effective_address_bits > 16;
- x->log2_effective_address_bytes += effective_address_bits > 32;
-
- x->regs[0] |= modrm.reg;
- if (modrm.mode == 3)
- x->regs[1] |= modrm.rm;
- else
- {
- u32 log2_disp_bytes = ~0;
-
- x->flags |= X86_INSN_IS_ADDRESS;
-
- if (effective_address_bits != 16)
- {
- u8 has_sib_byte = 0;
-
- switch (modrm.mode)
- {
- case 0:
- /* When base is bp displacement is present for mode 0. */
- if (modrm.rm == X86_INSN_GP_REG_BP)
- {
- log2_disp_bytes = x->log2_effective_address_bytes;
- break;
- }
- else if (modrm.rm == X86_INSN_GP_REG_SP
- && effective_address_bits != 16)
- {
- has_sib_byte = 1;
- break;
- }
- /* fall through */
- case 1:
- case 2:
- x->regs[1] |= modrm.rm;
- x->flags |= X86_INSN_HAS_BASE;
- if (modrm.mode != 0)
- {
- log2_disp_bytes = (modrm.mode == 1
- ? 0
- : x->log2_effective_address_bytes);
- if (log2_disp_bytes > 2)
- log2_disp_bytes = 2;
- }
- break;
- }
-
- if (has_sib_byte)
- {
- x86_insn_sib_byte_t sib;
-
- if (code >= code_end)
- return 0;
- sib.byte = *code++;
-
- x->log2_index_scale = 1 << sib.log2_scale;
- x->regs[1] |= sib.base;
- x->flags |= X86_INSN_HAS_BASE;
-
- if (sib.index != X86_INSN_GP_REG_SP)
- {
- x->regs[2] |= sib.index;
- x->flags |= X86_INSN_HAS_INDEX;
- }
- }
- }
- else
- {
- /* effective_address_bits == 16 */
- switch (modrm.mode)
- {
- case 0:
- if (modrm.rm == 6)
- {
- /* [disp16] */
- log2_disp_bytes = 1;
- break;
- }
- /* fall through */
- case 1:
- case 2:
- switch (modrm.rm)
- {
- case 0: /* [bx + si/di] */
- case 1:
- x->regs[1] = X86_INSN_GP_REG_BX;
- x->regs[2] = X86_INSN_GP_REG_SI + (modrm.rm & 1);
- x->flags |= X86_INSN_HAS_BASE | X86_INSN_HAS_INDEX;
- break;
-
- case 2: /* [bp + si/di] */
- case 3:
- x->regs[1] = X86_INSN_GP_REG_BP;
- x->regs[2] = X86_INSN_GP_REG_SI + (modrm.rm & 1);
- x->flags |= X86_INSN_HAS_BASE | X86_INSN_HAS_INDEX;
- break;
-
- case 4: /* [si/di] */
- case 5:
- x->regs[1] = X86_INSN_GP_REG_SI + (modrm.rm & 1);
- x->flags |= X86_INSN_HAS_BASE;
- break;
-
- case 6: /* [bp + disp] */
- x->regs[1] = X86_INSN_GP_REG_BP;
- x->flags |= X86_INSN_HAS_BASE;
- break;
-
- case 7: /* [bx + disp] */
- x->regs[1] = X86_INSN_GP_REG_BX;
- x->flags |= X86_INSN_HAS_BASE;
- break;
- }
-
- if (modrm.mode != 0)
- log2_disp_bytes = modrm.mode == 1 ? 0 : 1;
- break;
- }
- }
-
- if (log2_disp_bytes != ~0)
- {
- i64 disp;
- code = x86_insn_parse_number (log2_disp_bytes, code, code_end,
- &disp);
- if (code)
- x->displacement = disp;
- }
- }
-
- return code;
-}
-
-u8 * x86_insn_parse (x86_insn_parse_t * p, u8 * code_start)
-{
- u8 i, * code, * code_end;
- x86_insn_t * insn, * group_insn;
- u8 default_operand_bits, effective_operand_bits;
- u32 opcode, parse_flags;
-
- /* Preserve global parse flags. */
- parse_flags = p->flags & (X86_INSN_PARSE_32_BIT | X86_INSN_PARSE_64_BIT);
- clib_memset (p, 0, sizeof (p[0]));
- p->flags = parse_flags;
-
- /* 64 implies 32 bit parsing. */
- if (parse_flags & X86_INSN_PARSE_64_BIT)
- parse_flags |= X86_INSN_PARSE_32_BIT;
-
- /* Instruction must be <= 15 bytes. */
- code = code_start;
- code_end = code + 15;
-
- /* Parse legacy prefixes. */
- while (1)
- {
- if (code >= code_end)
- goto insn_too_long;
- i = code[0];
- code++;
- switch (i)
- {
- default: goto prefix_done;
-
- /* Set flags based on prefix. */
-#define _(x,o) case o: p->flags |= X86_INSN_##x; break;
- foreach_x86_legacy_prefix;
-#undef _
- }
- }
- prefix_done:
-
- /* REX prefix. */
- if ((parse_flags & X86_INSN_PARSE_64_BIT) && i >= 0x40 && i <= 0x4f)
- {
- p->regs[0] |= ((i & (1 << 2)) != 0) << 3; /* r bit */
- p->regs[1] |= ((i & (1 << 0)) != 0) << 3; /* b bit */
- p->regs[2] |= ((i & (1 << 1)) != 0) << 3; /* x bit */
- p->flags |= ((i & (1 << 3)) /* w bit */
- ? X86_INSN_OPERAND_SIZE_64 : 0);
- if (code >= code_end)
- goto insn_too_long;
- i = *code++;
- }
-
- opcode = i;
- if (opcode == 0x0f)
- {
- /* two byte opcode. */;
- if (code >= code_end)
- goto insn_too_long;
- i = *code++;
- opcode = (opcode << 8) | i;
- insn = x86_insns_two_byte + i;
- }
- else
- {
- static x86_insn_t arpl = {
- .name = "arpl",
- .operands[0].data = "Ew",
- .operands[1].data = "Gw",
- };
-
- if (PREDICT_FALSE (i == 0x63
- && ! (parse_flags & X86_INSN_PARSE_64_BIT)))
- insn = &arpl;
- else
- insn = x86_insns_one_byte + i;
- }
-
- if ((i = X86_INSN_FLAG_GET_SSE_GROUP (insn->flags)) != 0)
- {
- x86_insn_group8_t * g8;
-
- if (p->flags & X86_INSN_OPERAND_SIZE)
- g8 = x86_insn_sse_groups_operand_size;
- else if (p->flags & X86_INSN_REPZ)
- g8 = x86_insn_sse_groups_repz;
- else if (p->flags & X86_INSN_REPNZ)
- g8 = x86_insn_sse_groups_repnz;
- else
- g8 = 0;
-
- /* insn flags have 1 + group so != 0 test above can work. */
- ASSERT ((i - 1) < ARRAY_LEN (x86_insn_sse_groups_operand_size));
- if (g8)
- insn = g8[i - 1].insns + (opcode & 7);
- }
-
- /* Parse modrm and displacement if present. */
- if (x86_insn_has_modrm_byte (insn))
- {
- x86_insn_modrm_byte_t modrm;
-
- if (code >= code_end)
- goto insn_too_long;
- modrm.byte = *code++;
-
- /* Handle special 0x0f01 and 0x0fae encodings. */
- if (PREDICT_FALSE (modrm.mode == 3
- && (opcode == 0x0f01
- || opcode == 0x0fae)))
- {
- static x86_insn_t x86_insns_0f01_special[] = {
- _0 (swapgs), _0 (rdtscp), _0 (bad), _0 (bad),
- _0 (bad), _0 (bad), _0 (bad), _0 (bad),
- };
- static x86_insn_t x86_insns_0fae_special[] = {
- _0 (vmrun), _0 (vmmcall), _0 (vmload), _0 (vmsave),
- _0 (stgi), _0 (clgi), _0 (skinit), _0 (invlpga),
- };
-
- if (opcode == 0x0f01)
- insn = x86_insns_0f01_special;
- else
- insn = x86_insns_0fae_special;
- insn += modrm.rm;
- opcode = (opcode << 8) | modrm.byte;
- }
- else
- {
- code = x86_insn_parse_modrm_byte (p, modrm, parse_flags,
- code, code_end);
- if (! code)
- goto insn_too_long;
- }
- }
-
- group_insn = 0;
- if ((i = X86_INSN_FLAG_GET_MODRM_REG_GROUP (insn->flags)) != 0)
- {
- u32 g = i - 1;
- ASSERT (g < ARRAY_LEN (x86_insn_modrm_reg_groups));
- group_insn = x86_insn_modrm_reg_groups[g].insns + (p->regs[0] & 7);
- }
-
- p->insn = insn[0];
- if (group_insn)
- {
- u32 k;
- p->insn.name = group_insn->name;
- p->insn.flags |= group_insn->flags;
- for (k = 0; k < ARRAY_LEN (group_insn->operands); k++)
- if (x86_insn_operand_is_valid (group_insn, k))
- p->insn.operands[k] = group_insn->operands[k];
- }
-
- default_operand_bits
- = ((((parse_flags & X86_INSN_PARSE_32_BIT) != 0)
- ^ ((p->flags & X86_INSN_OPERAND_SIZE) != 0))
- ? BITS (u32) : BITS (u16));
-
- if ((parse_flags & X86_INSN_PARSE_64_BIT)
- && (p->insn.flags & X86_INSN_FLAG_DEFAULT_64_BIT))
- default_operand_bits = BITS (u64);
-
- effective_operand_bits = default_operand_bits;
- if (p->flags & X86_INSN_OPERAND_SIZE_64)
- effective_operand_bits = BITS (u64);
-
- p->log2_effective_operand_bytes = 1;
- p->log2_effective_operand_bytes += effective_operand_bits > 16;
- p->log2_effective_operand_bytes += effective_operand_bits > 32;
-
- /* Parse immediate if present. */
- {
- u32 l = x86_insn_log2_immediate_bytes (p, insn);
- if (l <= 3)
- {
- code = x86_insn_parse_number (l, code, code_end, &p->immediate);
- if (! code)
- goto insn_too_long;
- }
- }
-
- return code;
-
- insn_too_long:
- return 0;
-}
-
-static u8 * format_x86_gp_reg_operand (u8 * s, va_list * va)
-{
- u32 r = va_arg (*va, u32);
- u32 log2_n_bytes = va_arg (*va, u32);
-
- const char names8[8] = "acdbsbsd";
- const char names16[8] = "xxxxppii";
-
- ASSERT (r < 16);
-
- /* Add % register prefix. */
- vec_add1 (s, '%');
-
- switch (log2_n_bytes)
- {
- case 0:
- {
-
- if (r < 8)
- s = format (s, "%c%c", names8[r & 3], (r >> 2) ? 'l' : 'h');
- else
- s = format (s, "r%db", r);
- }
- break;
-
- case 2:
- case 3:
- s = format (s, "%c", log2_n_bytes == 2 ? 'e' : 'r');
- /* fall through */
- case 1:
- if (r < 8)
- s = format (s, "%c%c", names8[r], names16[r]);
- else
- {
- s = format (s, "%d", r);
- if (log2_n_bytes != 3)
- s = format (s, "%c", log2_n_bytes == 1 ? 'w' : 'd');
- }
- break;
-
- default:
- ASSERT (0);
- }
-
- return s;
-}
-
-static u8 * format_x86_reg_operand (u8 * s, va_list * va)
-{
- u32 reg = va_arg (*va, u32);
- u32 log2_n_bytes = va_arg (*va, u32);
- u32 type = va_arg (*va, u32);
-
- switch (type)
- {
- default:
- ASSERT (0);
- break;
-
- case 'x':
- ASSERT (reg < 16);
- return format (s, "%%xmm%d", reg);
-
- case 'm':
- ASSERT (reg < 8);
- return format (s, "%%mm%d", reg);
-
- /* Explicit byte/word/double-word/quad-word */
- case 'b': log2_n_bytes = 0; break;
- case 'w': log2_n_bytes = 1; break;
- case 'd': log2_n_bytes = 2; break;
- case 'q': log2_n_bytes = 3; break;
-
- /* Use effective operand size. */
- case 'v': break;
-
- /* word or double-word depending on effective operand size. */
- case 'z':
- log2_n_bytes = clib_min (log2_n_bytes, 2);
- break;
- }
-
- s = format (s, "%U", format_x86_gp_reg_operand, reg, log2_n_bytes);
- return s;
-}
-
-static u8 * format_x86_mem_operand (u8 * s, va_list * va)
-{
- x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *);
-
- if (p->displacement != 0)
- s = format (s, "0x%x", p->displacement);
-
- if (p->flags & X86_INSN_HAS_BASE)
- {
- s = format (s, "(%U",
- format_x86_gp_reg_operand, p->regs[1],
- p->log2_effective_address_bytes);
- if (p->flags & X86_INSN_HAS_INDEX)
- {
- s = format (s, ",%U",
- format_x86_gp_reg_operand, p->regs[2],
- p->log2_effective_address_bytes);
- if (p->log2_index_scale != 0)
- s = format (s, ",%d", 1 << p->log2_index_scale);
- }
- s = format (s, ")");
- }
-
- /* [RIP+disp] PC relative addressing in 64 bit mode. */
- else if (p->flags & X86_INSN_PARSE_64_BIT)
- s = format (s, "(%%rip)");
-
- return s;
-}
-
-static u8 * format_x86_insn_operand (u8 * s, va_list * va)
-{
- x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *);
- x86_insn_t * insn = &p->insn;
- u32 o = va_arg (*va, u32);
- u8 c, t;
-
- ASSERT (o < ARRAY_LEN (insn->operands));
- c = insn->operands[o].code;
- t = insn->operands[o].type;
-
- /* Register encoded in instruction. */
- if (c < 8)
- return format (s, "%U",
- format_x86_gp_reg_operand, c,
- p->log2_effective_operand_bytes);
-
- switch (c)
- {
- /* Memory or reg field from modrm byte. */
- case 'M':
- ASSERT (p->flags & X86_INSN_IS_ADDRESS);
- /* FALLTHROUGH */
- case 'E':
- if (p->flags & X86_INSN_IS_ADDRESS)
- s = format (s, "%U", format_x86_mem_operand, p);
- else
- s = format (s, "%U",
- format_x86_reg_operand, p->regs[1],
- p->log2_effective_operand_bytes, t);
- break;
-
- /* reg field from modrm byte. */
- case 'R':
- case 'G':
- s = format (s, "%U",
- format_x86_reg_operand, p->regs[0],
- p->log2_effective_operand_bytes, t);
- break;
-
- case 'I':
- {
- u32 l = x86_insn_log2_immediate_bytes (p, insn);
- i64 mask = pow2_mask (8ULL << l);
- s = format (s, "$0x%Lx", p->immediate & mask);
- }
- break;
-
- case 'J':
- if (p->immediate < 0)
- s = format (s, "- 0x%Lx", -p->immediate);
- else
- s = format (s, "+ 0x%Lx", p->immediate);
- break;
-
- case 'O':
- s = format (s, "0x%Lx", p->immediate);
- break;
-
- case 'A':
- /* AX/AL */
- s = format (s, "%U",
- format_x86_gp_reg_operand, X86_INSN_GP_REG_AX,
- t == 'L' ? 0 : p->log2_effective_operand_bytes);
- break;
-
- case 'B':
- /* BX/BL/BP */
- s = format (s, "%U",
- format_x86_gp_reg_operand,
- t == 'P' ? X86_INSN_GP_REG_BP : X86_INSN_GP_REG_BX,
- t == 'L' ? 0 : p->log2_effective_operand_bytes);
- break;
-
- case 'C':
- /* CX/CL */
- s = format (s, "%U",
- format_x86_gp_reg_operand, X86_INSN_GP_REG_CX,
- t == 'L' ? 0 : p->log2_effective_operand_bytes);
- break;
-
- case 'D':
- /* DX/DL/DI */
- s = format (s, "%U",
- format_x86_gp_reg_operand,
- t == 'I' ? X86_INSN_GP_REG_DI : X86_INSN_GP_REG_DX,
- t == 'L' ? 0 : p->log2_effective_operand_bytes);
- break;
-
- case 'S':
- /* SI/SP */
- s = format (s, "%U",
- format_x86_gp_reg_operand,
- t == 'I' ? X86_INSN_GP_REG_SI : X86_INSN_GP_REG_SP,
- p->log2_effective_operand_bytes);
- break;
-
- case '1':
- s = format (s, "1");
- break;
-
- default:
- ASSERT (0);
- }
-
- return s;
-}
-
-u8 * format_x86_insn_parse (u8 * s, va_list * va)
-{
- x86_insn_parse_t * p = va_arg (*va, x86_insn_parse_t *);
- x86_insn_t * insn = &p->insn;
- u32 o, i, is_src_dst;
-
- s = format (s, "%s", insn->name);
-
- if (! x86_insn_operand_is_valid (insn, 0))
- goto done;
-
- is_src_dst = x86_insn_operand_is_valid (insn, 1);
-
- /* If instruction has immediate add suffix to opcode to
- indicate operand size. */
- if (is_src_dst)
- {
- u32 b;
-
- b = x86_insn_log2_immediate_bytes (p, insn);
- if (b < p->log2_effective_operand_bytes
- && (p->flags & X86_INSN_IS_ADDRESS))
- s = format (s, "%c", "bwlq"[b]);
- }
-
- for (i = 0; i < ARRAY_LEN (insn->operands); i++)
- {
- o = is_src_dst + i;
- if (! x86_insn_operand_is_valid (insn, o))
- break;
- s = format (s, "%s%U",
- i == 0 ? " " : ", ",
- format_x86_insn_operand, p, o);
- }
-
- if (is_src_dst)
- s = format (s, ", %U",
- format_x86_insn_operand, p, 0);
-
- done:
- return s;
-}
diff --git a/src/vppinfra/asm_x86.h b/src/vppinfra/asm_x86.h
deleted file mode 100644
index dacef61755c..00000000000
--- a/src/vppinfra/asm_x86.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef included_asm_x86_h
-#define included_asm_x86_h
-
-#include <vppinfra/format.h>
-
-typedef union
-{
- struct
- {
- u8 code;
- u8 type;
- };
- u8 data[2];
-} x86_insn_operand_t;
-
-typedef struct
-{
- /* Instruction name. */
- char *name;
-
- /* X86 instructions may have up to 3 operands. */
- x86_insn_operand_t operands[3];
-
- u16 flags;
-#define X86_INSN_FLAG_DEFAULT_64_BIT (1 << 0)
-#define X86_INSN_FLAG_SET_SSE_GROUP(n) ((n) << 5)
-#define X86_INSN_FLAG_GET_SSE_GROUP(f) (((f) >> 5) & 0x1f)
-#define X86_INSN_FLAG_SET_MODRM_REG_GROUP(n) (((n) & 0x3f) << 10)
-#define X86_INSN_FLAG_GET_MODRM_REG_GROUP(f) (((f) >> 10) & 0x3f)
-} x86_insn_t;
-
-always_inline uword
-x86_insn_operand_is_valid (x86_insn_t * i, uword o)
-{
- ASSERT (o < ARRAY_LEN (i->operands));
- return i->operands[o].code != '_';
-}
-
-#define foreach_x86_legacy_prefix \
- _ (OPERAND_SIZE, 0x66) \
- _ (ADDRESS_SIZE, 0x67) \
- _ (SEGMENT_CS, 0x2e) \
- _ (SEGMENT_DS, 0x3e) \
- _ (SEGMENT_ES, 0x26) \
- _ (SEGMENT_FS, 0x64) \
- _ (SEGMENT_GS, 0x65) \
- _ (SEGMENT_SS, 0x36) \
- _ (LOCK, 0xf0) \
- _ (REPZ, 0xf3) \
- _ (REPNZ, 0xf2)
-
-#define foreach_x86_insn_parse_flag \
- /* Parse in 32/64-bit mode. */ \
- _ (PARSE_32_BIT, 0) \
- _ (PARSE_64_BIT, 0) \
- _ (IS_ADDRESS, 0) \
- /* regs[1/2] is a valid base/index register */ \
- _ (HAS_BASE, 0) \
- _ (HAS_INDEX, 0) \
- /* rex w bit */ \
- _ (OPERAND_SIZE_64, 0)
-
-typedef enum
-{
-#define _(f,o) X86_INSN_FLAG_BIT_##f,
- foreach_x86_insn_parse_flag foreach_x86_legacy_prefix
-#undef _
-} x86_insn_parse_flag_bit_t;
-
-typedef enum
-{
-#define _(f,o) X86_INSN_##f = 1 << X86_INSN_FLAG_BIT_##f,
- foreach_x86_insn_parse_flag foreach_x86_legacy_prefix
-#undef _
-} x86_insn_parse_flag_t;
-
-typedef struct
-{
- /* Registers in instruction.
- [0] is modrm reg field
- [1] is base reg
- [2] is index reg. */
- u8 regs[3];
-
- /* Scale for index register. */
- u8 log2_index_scale:2;
- u8 log2_effective_operand_bytes:3;
- u8 log2_effective_address_bytes:3;
-
- i32 displacement;
-
- /* Parser flags: set of x86_insn_parse_flag_t enums. */
- u32 flags;
-
- i64 immediate;
-
- x86_insn_t insn;
-} x86_insn_parse_t;
-
-u8 *x86_insn_parse (x86_insn_parse_t * p, u8 * code_start);
-format_function_t format_x86_insn_parse;
-
-#endif /* included_asm_x86_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/atomics.h b/src/vppinfra/atomics.h
index 5d3c5f8d601..92c45610391 100644
--- a/src/vppinfra/atomics.h
+++ b/src/vppinfra/atomics.h
@@ -52,6 +52,8 @@
#define clib_atomic_store_rel_n(a, b) __atomic_store_n ((a), (b), __ATOMIC_RELEASE)
#define clib_atomic_store_seq_cst(a, b) \
__atomic_store_n ((a), (b), __ATOMIC_SEQ_CST)
+#define clib_atomic_store_relax_n(a, b) \
+ __atomic_store_n ((a), (b), __ATOMIC_RELAXED)
#define clib_atomic_load_seq_cst(a) __atomic_load_n ((a), __ATOMIC_SEQ_CST)
#define clib_atomic_swap_acq_n(a, b) __atomic_exchange_n ((a), (b), __ATOMIC_ACQUIRE)
diff --git a/src/vppinfra/backtrace.c b/src/vppinfra/backtrace.c
deleted file mode 100644
index e713bae6876..00000000000
--- a/src/vppinfra/backtrace.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- Copyright (c) 2004 Eliot Dresselhaus
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#include <vppinfra/clib.h>
-#include <vppinfra/error.h>
-
-#ifdef __mips__
-
-/* Let code below know we've defined _clib_backtrace */
-#define clib_backtrace_defined
-
-#include <vppinfra/asm_mips.h>
-
-__clib_export uword
-clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip)
-{
- u32 *pc;
- void *sp;
- uword i, saved_pc;
-
- /* Figure current PC, saved PC and stack pointer. */
- asm volatile (".set push\n"
- ".set noat\n" "move %[saved_pc], $31\n" "move %[sp], $29\n"
- /* Fetches current PC. */
- "la $at, 1f\n"
- "jalr %[pc], $at\n"
- "nop\n"
- "1:\n"
- ".set pop\n":[pc] "=r" (pc),
- [saved_pc] "=r" (saved_pc),[sp] "=r" (sp));
-
- /* Also skip current frame. */
- n_frames_to_skip += 1;
-
- for (i = 0; i < max_callers + n_frames_to_skip; i++)
- {
- mips_insn_opcode_t op;
- mips_insn_special_funct_t funct;
- i32 insn, rs, rt, rd, immediate, found_saved_pc;
- u32 *start_pc;
-
- /* Parse instructions until we reach prologue for this
- stack frame. We'll need to figure out where saved
- PC is and where previous stack frame lives. */
- start_pc = pc;
- found_saved_pc = 0;
- while (1)
- {
- insn = *--pc;
- op = mips_insn_get_op (insn);
- funct = mips_insn_get_funct (insn);
- rs = mips_insn_get_rs (insn);
- rt = mips_insn_get_rt (insn);
- rd = mips_insn_get_rd (insn);
- immediate = mips_insn_get_immediate (insn);
-
- switch (op)
- {
- default:
- break;
-
- case MIPS_OPCODE_sd:
- case MIPS_OPCODE_sw:
- /* Trace stores of return address. */
- if (rt == MIPS_REG_RA)
- {
- void *addr = sp + immediate;
-
- /* If RA is stored somewhere other than in the
- stack frame, give up. */
- if (rs != MIPS_REG_SP)
- goto backtrace_done;
-
- ASSERT (immediate % 4 == 0);
- if (op == MIPS_OPCODE_sw)
- saved_pc = ((u32 *) addr)[0];
- else
- saved_pc = ((u64 *) addr)[0];
- found_saved_pc = 1;
- }
- break;
-
- case MIPS_OPCODE_addiu:
- case MIPS_OPCODE_daddiu:
- case MIPS_OPCODE_addi:
- case MIPS_OPCODE_daddi:
- if (rt == MIPS_REG_SP)
- {
- if (rs != MIPS_REG_SP)
- goto backtrace_done;
-
- ASSERT (immediate % 4 == 0);
-
- /* Assume positive offset is part of the epilogue.
- E.g.
- jr ra
- add sp,sp,100
- */
- if (immediate > 0)
- continue;
-
- /* Negative offset means allocate stack space.
- This could either be the prologue or could be due to
- alloca. */
- sp -= immediate;
-
- /* This frame will not save RA. */
- if (i == 0)
- goto found_prologue;
-
- /* Assume that addiu sp,sp,-N without store of ra means
- that we have not found the prologue yet. */
- if (found_saved_pc)
- goto found_prologue;
- }
- break;
-
- case MIPS_OPCODE_slti:
- case MIPS_OPCODE_sltiu:
- case MIPS_OPCODE_andi:
- case MIPS_OPCODE_ori:
- case MIPS_OPCODE_xori:
- case MIPS_OPCODE_lui:
- case MIPS_OPCODE_ldl:
- case MIPS_OPCODE_ldr:
- case MIPS_OPCODE_lb:
- case MIPS_OPCODE_lh:
- case MIPS_OPCODE_lwl:
- case MIPS_OPCODE_lw:
- case MIPS_OPCODE_lbu:
- case MIPS_OPCODE_lhu:
- case MIPS_OPCODE_lwr:
- case MIPS_OPCODE_lwu:
- case MIPS_OPCODE_ld:
- /* Give up when we find anyone setting the stack pointer. */
- if (rt == MIPS_REG_SP)
- goto backtrace_done;
- break;
-
- case MIPS_OPCODE_SPECIAL:
- if (rd == MIPS_REG_SP)
- switch (funct)
- {
- default:
- /* Give up when we find anyone setting the stack pointer. */
- goto backtrace_done;
-
- case MIPS_SPECIAL_FUNCT_break:
- case MIPS_SPECIAL_FUNCT_jr:
- case MIPS_SPECIAL_FUNCT_sync:
- case MIPS_SPECIAL_FUNCT_syscall:
- case MIPS_SPECIAL_FUNCT_tge:
- case MIPS_SPECIAL_FUNCT_tgeu:
- case MIPS_SPECIAL_FUNCT_tlt:
- case MIPS_SPECIAL_FUNCT_tltu:
- case MIPS_SPECIAL_FUNCT_teq:
- case MIPS_SPECIAL_FUNCT_tne:
- /* These instructions can validly have rd == MIPS_REG_SP */
- break;
- }
- break;
- }
- }
-
- found_prologue:
- /* Check sanity of saved pc. */
- if (saved_pc & 3)
- goto backtrace_done;
- if (saved_pc == 0)
- goto backtrace_done;
-
- if (i >= n_frames_to_skip)
- callers[i - n_frames_to_skip] = saved_pc;
- pc = uword_to_pointer (saved_pc, u32 *);
- }
-
-backtrace_done:
- if (i < n_frames_to_skip)
- return 0;
- else
- return i - n_frames_to_skip;
-}
-#endif /* __mips__ */
-
-#ifndef clib_backtrace_defined
-#define clib_backtrace_defined
-
-/* use glibc backtrace for stack trace */
-#include <execinfo.h>
-
-__clib_export uword
-clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip)
-{
- int size;
- void *array[20];
- /* Also skip current frame. */
- n_frames_to_skip += 1;
-
- size = clib_min (ARRAY_LEN (array), max_callers + n_frames_to_skip);
-
- size = backtrace (array, size);
-
- uword i;
-
- for (i = 0; i < max_callers + n_frames_to_skip && i < size; i++)
- {
- if (i >= n_frames_to_skip)
- callers[i - n_frames_to_skip] = pointer_to_uword (array[i]);
- }
-
- if (i < n_frames_to_skip)
- return 0;
- else
- return i - n_frames_to_skip;
-}
-
-
-#endif /* clib_backtrace_defined */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/bihash_12_4.h b/src/vppinfra/bihash_12_4.h
new file mode 100644
index 00000000000..3fdf1847861
--- /dev/null
+++ b/src/vppinfra/bihash_12_4.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+#undef BIHASH_TYPE
+#undef BIHASH_KVP_PER_PAGE
+#undef BIHASH_32_64_SVM
+#undef BIHASH_ENABLE_STATS
+#undef BIHASH_KVP_AT_BUCKET_LEVEL
+#undef BIHASH_LAZY_INSTANTIATE
+#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES
+#undef BIHASH_USE_HEAP
+
+#define BIHASH_TYPE _12_4
+#define BIHASH_KVP_PER_PAGE 4
+#define BIHASH_KVP_AT_BUCKET_LEVEL 0
+#define BIHASH_LAZY_INSTANTIATE 1
+#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 1
+#define BIHASH_USE_HEAP 1
+
+#ifndef __included_bihash_12_4_h__
+#define __included_bihash_12_4_h__
+
+#include <vppinfra/crc32.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+
+typedef union
+{
+ struct
+ {
+ u32 key[3];
+ u32 value;
+ };
+ u64 as_u64[2];
+} clib_bihash_kv_12_4_t;
+
+static inline void
+clib_bihash_mark_free_12_4 (clib_bihash_kv_12_4_t *v)
+{
+ v->value = 0xFEEDFACE;
+}
+
+static inline int
+clib_bihash_is_free_12_4 (const clib_bihash_kv_12_4_t *v)
+{
+ if (v->value == 0xFEEDFACE)
+ return 1;
+ return 0;
+}
+
+static inline u64
+clib_bihash_hash_12_4 (const clib_bihash_kv_12_4_t *v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) v->key, 12);
+#else
+ u64 tmp = v->as_u64[0] ^ v->key[2];
+ return clib_xxhash (tmp);
+#endif
+}
+
+static inline u8 *
+format_bihash_kvp_12_4 (u8 *s, va_list *args)
+{
+ clib_bihash_kv_12_4_t *v = va_arg (*args, clib_bihash_kv_12_4_t *);
+
+ s = format (s, "key %u %u %u value %u", v->key[0], v->key[1], v->key[2],
+ v->value);
+ return s;
+}
+
+static inline int
+clib_bihash_key_compare_12_4 (u32 *a, u32 *b)
+{
+#if defined(CLIB_HAVE_VEC128)
+ u32x4 v = (*(u32x4u *) a) ^ (*(u32x4u *) b);
+ v[3] = 0;
+ return u32x4_is_all_zero (v);
+#else
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
+#endif
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_12_4_h__ */
diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h
index 6b116bcf3e4..67aa678efa9 100644
--- a/src/vppinfra/bihash_16_8.h
+++ b/src/vppinfra/bihash_16_8.h
@@ -43,11 +43,16 @@ typedef struct
u64 value;
} clib_bihash_kv_16_8_t;
+static inline void
+clib_bihash_mark_free_16_8 (clib_bihash_kv_16_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_16_8 (clib_bihash_kv_16_8_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bihash_16_8_32.h b/src/vppinfra/bihash_16_8_32.h
index 9453f88ace7..d899253302c 100644
--- a/src/vppinfra/bihash_16_8_32.h
+++ b/src/vppinfra/bihash_16_8_32.h
@@ -43,11 +43,16 @@ typedef struct
u64 value;
} clib_bihash_kv_16_8_32_t;
+static inline void
+clib_bihash_mark_free_16_8_32 (clib_bihash_kv_16_8_32_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_16_8_32 (clib_bihash_kv_16_8_32_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bihash_24_16.h b/src/vppinfra/bihash_24_16.h
index 4e979b49410..b421ab12edc 100644
--- a/src/vppinfra/bihash_24_16.h
+++ b/src/vppinfra/bihash_24_16.h
@@ -43,11 +43,16 @@ typedef struct
u64 value[2];
} clib_bihash_kv_24_16_t;
+static inline void
+clib_bihash_mark_free_24_16 (clib_bihash_kv_24_16_t *v)
+{
+ v->value[0] = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_24_16 (const clib_bihash_kv_24_16_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value[0] == ~0ULL && v->value[1] == ~0ULL)
+ if (v->value[0] == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -77,8 +82,8 @@ static inline int
clib_bihash_key_compare_24_16 (u64 * a, u64 * b)
{
#if defined (CLIB_HAVE_VEC512)
- u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0x7) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0x7),
+ u64x8_mask_load_zero (b, 0x7));
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
u64x2 v = { a[2] ^ b[2], 0 };
v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
diff --git a/src/vppinfra/bihash_24_8.h b/src/vppinfra/bihash_24_8.h
index 2d667ad9aa3..14e5225ccfd 100644
--- a/src/vppinfra/bihash_24_8.h
+++ b/src/vppinfra/bihash_24_8.h
@@ -43,11 +43,16 @@ typedef struct
u64 value;
} clib_bihash_kv_24_8_t;
+static inline void
+clib_bihash_mark_free_24_8 (clib_bihash_kv_24_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_24_8 (const clib_bihash_kv_24_8_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -77,8 +82,8 @@ static inline int
clib_bihash_key_compare_24_8 (u64 * a, u64 * b)
{
#if defined (CLIB_HAVE_VEC512)
- u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0x7) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0x7),
+ u64x8_mask_load_zero (b, 0x7));
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
u64x2 v = { a[2] ^ b[2], 0 };
v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
diff --git a/src/vppinfra/bihash_32_8.h b/src/vppinfra/bihash_32_8.h
index 0935fcce184..8139a0eab62 100644
--- a/src/vppinfra/bihash_32_8.h
+++ b/src/vppinfra/bihash_32_8.h
@@ -43,11 +43,16 @@ typedef struct
u64 value;
} clib_bihash_kv_32_8_t;
+static inline void
+clib_bihash_mark_free_32_8 (clib_bihash_kv_32_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_32_8 (const clib_bihash_kv_32_8_t *v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -77,8 +82,8 @@ static inline int
clib_bihash_key_compare_32_8 (u64 *a, u64 *b)
{
#if defined(CLIB_HAVE_VEC512)
- u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0xf) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0xf),
+ u64x8_mask_load_zero (b, 0xf));
#elif defined(CLIB_HAVE_VEC256)
u64x4 v = u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
return u64x4_is_all_zero (v);
diff --git a/src/vppinfra/bihash_40_8.h b/src/vppinfra/bihash_40_8.h
index 1fb344fdeeb..27207a3a69c 100644
--- a/src/vppinfra/bihash_40_8.h
+++ b/src/vppinfra/bihash_40_8.h
@@ -44,11 +44,16 @@ typedef struct
u64 value;
} clib_bihash_kv_40_8_t;
+static inline void
+clib_bihash_mark_free_40_8 (clib_bihash_kv_40_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_40_8 (const clib_bihash_kv_40_8_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -78,9 +83,8 @@ static inline int
clib_bihash_key_compare_40_8 (u64 * a, u64 * b)
{
#if defined (CLIB_HAVE_VEC512)
- u64x8 v;
- v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0x1f) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0x1f),
+ u64x8_mask_load_zero (b, 0x1f));
#elif defined (CLIB_HAVE_VEC256)
u64x4 v = { a[4] ^ b[4], 0, 0, 0 };
v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h
index 54fd7090e81..dbc92c3df1d 100644
--- a/src/vppinfra/bihash_48_8.h
+++ b/src/vppinfra/bihash_48_8.h
@@ -42,11 +42,16 @@ typedef struct
u64 value;
} clib_bihash_kv_48_8_t;
+static inline void
+clib_bihash_mark_free_48_8 (clib_bihash_kv_48_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_48_8 (const clib_bihash_kv_48_8_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -78,8 +83,8 @@ static inline int
clib_bihash_key_compare_48_8 (u64 * a, u64 * b)
{
#if defined (CLIB_HAVE_VEC512)
- u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0x3f) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0x3f),
+ u64x8_mask_load_zero (b, 0x3f));
#elif defined (CLIB_HAVE_VEC256)
u64x4 v = { 0 };
v = u64x4_insert_lo (v, u64x2_load_unaligned (a + 4) ^
diff --git a/src/vppinfra/bihash_8_16.h b/src/vppinfra/bihash_8_16.h
index b42b32c33d2..36ddda7149b 100644
--- a/src/vppinfra/bihash_8_16.h
+++ b/src/vppinfra/bihash_8_16.h
@@ -44,13 +44,19 @@ typedef struct
u64 value[2]; /**< the value */
} clib_bihash_kv_8_16_t;
+static inline void
+clib_bihash_mark_free_8_16 (clib_bihash_kv_8_16_t *v)
+{
+ v->value[0] = 0xFEEDFACE8BADF00DULL;
+}
+
/** Decide if a clib_bihash_kv_8_16_t instance is free
@param v- pointer to the (key,value) pair
*/
static inline int
clib_bihash_is_free_8_16 (clib_bihash_kv_8_16_t * v)
{
- if (v->key == ~0ULL && v->value[0] == ~0ULL && v->value[1] == ~0ULL)
+ if (v->value[0] == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -80,8 +86,7 @@ format_bihash_kvp_8_16 (u8 * s, va_list * args)
clib_bihash_kv_8_16_t *v = va_arg (*args, clib_bihash_kv_8_16_t *);
s =
- format (s, "key %llu value [%ll,%llx]u", v->key, v->value[0],
- v->value[1]);
+ format (s, "key %llx value [%llx,%llx]", v->key, v->value[0], v->value[1]);
return s;
}
diff --git a/src/vppinfra/bihash_8_8.h b/src/vppinfra/bihash_8_8.h
index 2fdd2ed7aef..2471871fc81 100644
--- a/src/vppinfra/bihash_8_8.h
+++ b/src/vppinfra/bihash_8_8.h
@@ -44,13 +44,19 @@ typedef struct
u64 value; /**< the value */
} clib_bihash_kv_8_8_t;
+static inline void
+clib_bihash_mark_free_8_8 (clib_bihash_kv_8_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
/** Decide if a clib_bihash_kv_8_8_t instance is free
@param v- pointer to the (key,value) pair
*/
static inline int
clib_bihash_is_free_8_8 (clib_bihash_kv_8_8_t * v)
{
- if (v->key == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bihash_8_8_stats.h b/src/vppinfra/bihash_8_8_stats.h
index 2237a0d624f..14702dfd782 100644
--- a/src/vppinfra/bihash_8_8_stats.h
+++ b/src/vppinfra/bihash_8_8_stats.h
@@ -45,13 +45,19 @@ typedef struct
u64 value; /**< the value */
} clib_bihash_kv_8_8_stats_t;
+static inline void
+clib_bihash_mark_free_8_8_stats (clib_bihash_kv_8_8_stats_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
/** Decide if a clib_bihash_kv_8_8_t instance is free
@param v- pointer to the (key,value) pair
*/
static inline int
clib_bihash_is_free_8_8_stats (clib_bihash_kv_8_8_stats_t * v)
{
- if (v->key == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bihash_doc.h b/src/vppinfra/bihash_doc.h
index 7c7e5179961..f6d32ce0b56 100644
--- a/src/vppinfra/bihash_doc.h
+++ b/src/vppinfra/bihash_doc.h
@@ -90,83 +90,172 @@ static inline void *clib_bihash_get_value (clib_bihash * h, uword offset);
/** Get clib mheap offset given a pointer */
static inline uword clib_bihash_get_offset (clib_bihash * h, void *v);
-/** initialize a bounded index extensible hash table
-
- @param h - the bi-hash table to initialize
- @param name - name of the hash table
- @param nbuckets - the number of buckets, will be rounded up to
-a power of two
- @param memory_size - clib mheap size, in bytes
-*/
-
+/**
+ * initialize a bounded index extensible hash table
+ *
+ * @param h - the bi-hash table to initialize
+ * @param name - name of the hash table
+ * @param nbuckets - the number of buckets, will be rounded up to
+ * a power of two
+ * @param memory_size - clib mheap size, in bytes
+ */
void clib_bihash_init
(clib_bihash * h, char *name, u32 nbuckets, uword memory_size);
-/** Destroy a bounded index extensible hash table
- @param h - the bi-hash table to free
-*/
+/**
+ * initialize a bounded index extensible hash table with arguments passed as
+ * a struct
+ *
+ * @param a - initialization parameters
+ * h - the bi-hash table to initialize;
+ * name - name of the hash table
+ * nbuckets - the number of buckets, will be rounded up to a power of two
+ * memory_size - clib mheap size, in bytes
+ * format_function_t - format function for the bihash kv pairs
+ * instantiate_immediately - allocate memory right away
+ * dont_add_to_all_bihash_list - dont mention in 'show bihash'
+ */
+void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a);
-void clib_bihash_free (clib_bihash * h);
+/**
+ * Set the formating function for the bihash
+ *
+ * @param h - the bi-hash table
+ * @param kvp_fmt_fn - the format function
+ */
+void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h,
+ format_function_t *kvp_fmt_fn);
-/** Add or delete a (key,value) pair from a bi-hash table
+/**
+ * Destroy a bounded index extensible hash table
+ *
+ * @param h - the bi-hash table to free
+ */
+void clib_bihash_free (clib_bihash *h);
- @param h - the bi-hash table to search
- @param add_v - the (key,value) pair to add
- @param is_add - add=1 (BIHASH_ADD), delete=0 (BIHASH_DEL)
- @returns 0 on success, < 0 on error
- @note This function will replace an existing (key,value) pair if the
- new key matches an existing key
-*/
+/**
+ * Add or delete a (key,value) pair from a bi-hash table
+ *
+ * @param h - the bi-hash table to search
+ * @param add_v - the (key,value) pair to add
+ * @param is_add - add=1 (BIHASH_ADD), delete=0 (BIHASH_DEL)
+ * @returns 0 on success, < 0 on error
+ * @note This function will replace an existing (key,value) pair if the
+ * new key matches an existing key
+ */
int clib_bihash_add_del (clib_bihash * h, clib_bihash_kv * add_v, int is_add);
+/**
+ * Add or delete a (key,value) pair from a bi-hash table, using a pre-computed
+ * hash
+ *
+ * @param h - the bi-hash table to search
+ * @param add_v - the (key,value) pair to add
+ * @param hash - the precomputed hash of the key
+ * @param is_add - add=1 (BIHASH_ADD), delete=0 (BIHASH_DEL)
+ * @returns 0 on success, < 0 on error
+ * @note This function will replace an existing (key,value) pair if the
+ * new key matches an existing key
+ */
+int BV (clib_bihash_add_del_with_hash) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * add_v, u64 hash,
+ int is_add);
-/** Search a bi-hash table, use supplied hash code
+/**
+ * Add a (key,value) pair to a bi-hash table, and tries to free stale entries
+ * on collisions with passed filter.
+ *
+ * @param h - the bi-hash table to search
+ * @param add_v - the (key,value) pair to add
+ * @param is_stale_cb - callback receiving a kv pair, returning 1 if the kv is
+ * stale and can be overwriten. This will be called on adding a kv in a full
+ * page before trying to split & rehash its bucket.
+ * @param arg - opaque arguement passed to is_stale_cb
+ * @returns 0 on success, < 0 on error
+ * @note This function will replace an existing (key,value) pair if the
+ * new key matches an existing key
+ */
+int BV (clib_bihash_add_or_overwrite_stale) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
+ int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg);
- @param h - the bi-hash table to search
- @param hash - the hash code
- @param in_out_kv - (key,value) pair containing the search key
- @returns 0 on success (with in_out_kv set), < 0 on error
-*/
-int clib_bihash_search_inline_with_hash
- (clib_bihash * h, u64 hash, clib_bihash_kv * in_out_kv);
+/**
+ * Add a (key,value) pair to a bi-hash table, calling a callback on overwrite
+ * with the bucket lock held.
+ *
+ * @param h - the bi-hash table to search
+ * @param add_v - the (key,value) pair to add
+ * @param overwrite_cb - callback called when overwriting a key, allowing
+ * you to cleanup the value with the bucket lock held.
+ * @param arg - opaque arguement passed to overwrite_cb
+ * @returns 0 on success, < 0 on error
+ * @note This function will replace an existing (key,value) pair if the
+ * new key matches an existing key
+ */
+int BV (clib_bihash_add_with_overwrite_cb) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
+ void (*overwrite_cb) (BVT (clib_bihash_kv) *, void *), void *arg);
-/** Search a bi-hash table
+/**
+ * Tells if the bihash was initialised (i.e. mem allocated by first add)
+ *
+ * @param h - the bi-hash table to search
+ */
+int BV (clib_bihash_is_initialised) (const BVT (clib_bihash) * h);
- @param h - the bi-hash table to search
- @param in_out_kv - (key,value) pair containing the search key
- @returns 0 on success (with in_out_kv set), < 0 on error
-*/
-int clib_bihash_search_inline (clib_bihash * h, clib_bihash_kv * in_out_kv);
+/**
+ * Search a bi-hash table, use supplied hash code
+ *
+ * @param h - the bi-hash table to search
+ * @param hash - the hash code
+ * @param in_out_kv - (key,value) pair containing the search key
+ * @returns 0 on success (with in_out_kv set), < 0 on error
+ */
+int clib_bihash_search_inline_with_hash (clib_bihash *h, u64 hash,
+ clib_bihash_kv *in_out_kv);
-/** Prefetch a bi-hash bucket given a hash code
+/**
+ * Search a bi-hash table
+ *
+ * @param h - the bi-hash table to search
+ * @param in_out_kv - (key,value) pair containing the search key
+ * @returns 0 on success (with in_out_kv set), < 0 on error
+ */
+int clib_bihash_search_inline (clib_bihash *h, clib_bihash_kv *in_out_kv);
- @param h - the bi-hash table to search
- @param hash - the hash code
- @note see also clib_bihash_hash to compute the code
-*/
+/**
+ * Prefetch a bi-hash bucket given a hash code
+ *
+ * @param h - the bi-hash table to search
+ * @param hash - the hash code
+ * @note see also clib_bihash_hash to compute the code
+ */
void clib_bihash_prefetch_bucket (clib_bihash * h, u64 hash);
-/** Prefetch bi-hash (key,value) data given a hash code
-
- @param h - the bi-hash table to search
- @param hash - the hash code
- @note assumes that the bucket has been prefetched, see
- clib_bihash_prefetch_bucket
-*/
+/**
+ * Prefetch bi-hash (key,value) data given a hash code
+ *
+ * @param h - the bi-hash table to search
+ * @param hash - the hash code
+ * @note assumes that the bucket has been prefetched, see
+ * clib_bihash_prefetch_bucket
+ */
void clib_bihash_prefetch_data (clib_bihash * h, u64 hash);
-/** Search a bi-hash table
-
- @param h - the bi-hash table to search
- @param search_key - (key,value) pair containing the search key
- @param valuep - (key,value) set to search result
- @returns 0 on success (with valuep set), < 0 on error
- @note used in situations where key modification is not desired
-*/
+/**
+ * Search a bi-hash table
+ *
+ * @param h - the bi-hash table to search
+ * @param search_key - (key,value) pair containing the search key
+ * @param valuep - (key,value) set to search result
+ * @returns 0 on success (with valuep set), < 0 on error
+ * @note used in situations where key modification is not desired
+ */
int clib_bihash_search_inline_2
(clib_bihash * h, clib_bihash_kv * search_key, clib_bihash_kv * valuep);
-/* Calback function for walking a bihash table
+/**
+ * Calback function for walking a bihash table
*
* @param kv - KV pair visited
* @param ctx - Context passed to the walk
@@ -175,13 +264,14 @@ int clib_bihash_search_inline_2
typedef int (*clib_bihash_foreach_key_value_pair_cb) (clib_bihash_kv * kv,
void *ctx);
-/** Visit active (key,value) pairs in a bi-hash table
-
- @param h - the bi-hash table to search
- @param callback - function to call with each active (key,value) pair
- @param arg - arbitrary second argument passed to the callback function
- First argument is the (key,value) pair to visit
-*/
+/**
+ * Visit active (key,value) pairs in a bi-hash table
+ *
+ * @param h - the bi-hash table to search
+ * @param callback - function to call with each active (key,value) pair
+ * @param arg - arbitrary second argument passed to the callback function
+ * First argument is the (key,value) pair to visit
+ */
void clib_bihash_foreach_key_value_pair (clib_bihash * h,
clib_bihash_foreach_key_value_pair_cb
* callback, void *arg);
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index ddaccbdb126..d488b1a659c 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -106,8 +106,10 @@ static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
void *base, *rv;
uword alloc = alloc_arena_next (h) - alloc_arena_mapped (h);
int mmap_flags = MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS;
+#if __linux__
int mmap_flags_huge = (mmap_flags | MAP_HUGETLB | MAP_LOCKED |
BIHASH_LOG2_HUGEPAGE_SIZE << MAP_HUGE_SHIFT);
+#endif /* __linux__ */
/* new allocation is 25% of existing one */
if (alloc_arena_mapped (h) >> 2 > alloc)
@@ -118,7 +120,11 @@ static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
base = (void *) (uword) (alloc_arena (h) + alloc_arena_mapped (h));
+#if __linux__
rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags_huge, -1, 0);
+#elif __FreeBSD__
+ rv = MAP_FAILED;
+#endif /* __linux__ */
/* fallback - maybe we are still able to allocate normal pages */
if (rv == MAP_FAILED || mlock (base, alloc) != 0)
@@ -165,19 +171,23 @@ static void BV (clib_bihash_instantiate) (BVT (clib_bihash) * h)
if (BIHASH_KVP_AT_BUCKET_LEVEL)
{
- int i;
+ int i, j;
BVT (clib_bihash_bucket) * b;
b = h->buckets;
for (i = 0; i < h->nbuckets; i++)
{
+ BVT (clib_bihash_kv) * v;
b->offset = BV (clib_bihash_get_offset) (h, (void *) (b + 1));
b->refcnt = 1;
/* Mark all elements free */
- clib_memset_u8 ((b + 1), 0xff, BIHASH_KVP_PER_PAGE *
- sizeof (BVT (clib_bihash_kv)));
-
+ v = (void *) (b + 1);
+ for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
+ {
+ BV (clib_bihash_mark_free) (v);
+ v++;
+ }
/* Compute next bucket start address */
b = (void *) (((uword) b) + sizeof (*b) +
(BIHASH_KVP_PER_PAGE *
@@ -201,6 +211,7 @@ void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a)
h->log2_nbuckets = max_log2 (a->nbuckets);
h->memory_size = BIHASH_USE_HEAP ? 0 : a->memory_size;
h->instantiated = 0;
+ h->dont_add_to_all_bihash_list = a->dont_add_to_all_bihash_list;
h->fmt_fn = BV (format_bihash);
h->kvp_fmt_fn = a->kvp_fmt_fn;
@@ -425,6 +436,7 @@ void BV (clib_bihash_free) (BVT (clib_bihash) * h)
vec_free (h->working_copies);
vec_free (h->working_copy_lengths);
+ clib_mem_free ((void *) h->alloc_lock);
#if BIHASH_32_64_SVM == 0
vec_free (h->freelists);
#else
@@ -435,6 +447,11 @@ void BV (clib_bihash_free) (BVT (clib_bihash) * h)
clib_mem_vm_free ((void *) (uword) (alloc_arena (h)),
alloc_arena_size (h));
never_initialized:
+ if (h->dont_add_to_all_bihash_list)
+ {
+ clib_memset_u8 (h, 0, sizeof (*h));
+ return;
+ }
clib_memset_u8 (h, 0, sizeof (*h));
for (i = 0; i < vec_len (clib_all_bihashes); i++)
{
@@ -452,6 +469,7 @@ static
BVT (clib_bihash_value) *
BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
{
+ int i;
BVT (clib_bihash_value) * rv = 0;
ASSERT (h->alloc_lock[0]);
@@ -471,12 +489,15 @@ BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
initialize:
ASSERT (rv);
- /*
- * Latest gcc complains that the length arg is zero
- * if we replace (1<<log2_pages) with vec_len(rv).
- * No clue.
- */
- clib_memset_u8 (rv, 0xff, sizeof (*rv) * (1 << log2_pages));
+
+ BVT (clib_bihash_kv) * v;
+ v = (BVT (clib_bihash_kv) *) rv;
+
+ for (i = 0; i < BIHASH_KVP_PER_PAGE * (1 << log2_pages); i++)
+ {
+ BV (clib_bihash_mark_free) (v);
+ v++;
+ }
return rv;
}
@@ -665,9 +686,10 @@ BV (split_and_rehash_linear)
return new_values;
}
-static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
- (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, u64 hash, int is_add,
- int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
+static_always_inline int BV (clib_bihash_add_del_inline_with_hash) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, u64 hash, int is_add,
+ int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *is_stale_arg,
+ void (*overwrite_cb) (BVT (clib_bihash_kv) *, void *), void *overwrite_arg)
{
BVT (clib_bihash_bucket) * b, tmp_b;
BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy;
@@ -678,12 +700,10 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
int mark_bucket_linear;
int resplit_once;
- /* *INDENT-OFF* */
static const BVT (clib_bihash_bucket) mask = {
.linear_search = 1,
.log2_pages = -1
};
- /* *INDENT-ON* */
#if BIHASH_LAZY_INSTANTIATE
/*
@@ -705,6 +725,12 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
ASSERT (h->instantiated != 0);
#endif
+ /*
+ * Debug image: make sure that an item being added doesn't accidentally
+ * look like a free item.
+ */
+ ASSERT ((is_add && BV (clib_bihash_is_free) (add_v)) == 0);
+
b = BV (clib_bihash_get_bucket) (h, hash);
BV (clib_bihash_lock_bucket) (b);
@@ -761,6 +787,8 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
*/
for (i = 0; i < limit; i++)
{
+ if (BV (clib_bihash_is_free) (&(v->kvp[i])))
+ continue;
if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
{
/* Add but do not overwrite? */
@@ -769,7 +797,8 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
BV (clib_bihash_unlock_bucket) (b);
return (-2);
}
-
+ if (overwrite_cb)
+ overwrite_cb (&(v->kvp[i]), overwrite_arg);
clib_memcpy_fast (&(v->kvp[i].value),
&add_v->value, sizeof (add_v->value));
BV (clib_bihash_unlock_bucket) (b);
@@ -805,7 +834,7 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
{
for (i = 0; i < limit; i++)
{
- if (is_stale_cb (&(v->kvp[i]), arg))
+ if (is_stale_cb (&(v->kvp[i]), is_stale_arg))
{
clib_memcpy_fast (&(v->kvp[i]), add_v, sizeof (*add_v));
CLIB_MEMORY_STORE_BARRIER ();
@@ -821,10 +850,13 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
{
for (i = 0; i < limit; i++)
{
+ /* no sense even looking at this one */
+ if (BV (clib_bihash_is_free) (&(v->kvp[i])))
+ continue;
/* Found the key? Kill it... */
if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
{
- clib_memset_u8 (&(v->kvp[i]), 0xff, sizeof (*(add_v)));
+ BV (clib_bihash_mark_free) (&(v->kvp[i]));
/* Is the bucket empty? */
if (PREDICT_TRUE (b->refcnt > 1))
{
@@ -839,8 +871,13 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
b->linear_search = 0;
b->log2_pages = 0;
/* Clean up the bucket-level kvp array */
- clib_memset_u8 ((b + 1), 0xff, BIHASH_KVP_PER_PAGE *
- sizeof (BVT (clib_bihash_kv)));
+ BVT (clib_bihash_kv) *v = (void *) (b + 1);
+ int j;
+ for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
+ {
+ BV (clib_bihash_mark_free) (v);
+ v++;
+ }
CLIB_MEMORY_STORE_BARRIER ();
BV (clib_bihash_unlock_bucket) (b);
BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del, 1);
@@ -987,7 +1024,15 @@ static_always_inline int BV (clib_bihash_add_del_inline)
{
u64 hash = BV (clib_bihash_hash) (add_v);
return BV (clib_bihash_add_del_inline_with_hash) (h, add_v, hash, is_add,
- is_stale_cb, arg);
+ is_stale_cb, arg, 0, 0);
+}
+
+int BV (clib_bihash_add_del_with_hash) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * add_v, u64 hash,
+ int is_add)
+{
+ return BV (clib_bihash_add_del_inline_with_hash) (h, add_v, hash, is_add, 0,
+ 0, 0, 0);
}
int BV (clib_bihash_add_del)
@@ -1003,6 +1048,15 @@ int BV (clib_bihash_add_or_overwrite_stale)
return BV (clib_bihash_add_del_inline) (h, add_v, 1, stale_callback, arg);
}
+int BV (clib_bihash_add_with_overwrite_cb) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
+ void (overwrite_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
+{
+ u64 hash = BV (clib_bihash_hash) (add_v);
+ return BV (clib_bihash_add_del_inline_with_hash) (h, add_v, hash, 1, 0, 0,
+ overwrite_cb, arg);
+}
+
int BV (clib_bihash_search)
(BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h
index da2f684b685..8f5879b4634 100644
--- a/src/vppinfra/bihash_template.h
+++ b/src/vppinfra/bihash_template.h
@@ -99,7 +99,6 @@ typedef struct
STATIC_ASSERT_SIZEOF (BVT (clib_bihash_bucket), sizeof (u64));
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/*
* Backing store allocation. Since bihash manages its own
@@ -118,7 +117,6 @@ typedef CLIB_PACKED (struct {
volatile u32 ready;
u64 pad[1];
}) BVT (clib_bihash_shared_header);
-/* *INDENT-ON* */
STATIC_ASSERT_SIZEOF (BVT (clib_bihash_shared_header), 8 * sizeof (u64));
@@ -170,6 +168,7 @@ BVS (clib_bihash)
u64 alloc_arena; /* Base of the allocation arena */
volatile u8 instantiated;
+ u8 dont_add_to_all_bihash_list;
/**
* A custom format function to print the Key and Value of bihash_key instead of default hexdump
@@ -281,9 +280,7 @@ static inline void BV (clib_bihash_alloc_unlock) (BVT (clib_bihash) * h)
static inline void BV (clib_bihash_lock_bucket) (BVT (clib_bihash_bucket) * b)
{
- /* *INDENT-OFF* */
BVT (clib_bihash_bucket) mask = { .lock = 1 };
- /* *INDENT-ON* */
u64 old;
try_again:
@@ -355,12 +352,19 @@ void BV (clib_bihash_free) (BVT (clib_bihash) * h);
int BV (clib_bihash_add_del) (BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * add_v, int is_add);
+
+int BV (clib_bihash_add_del_with_hash) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * add_v, u64 hash,
+ int is_add);
int BV (clib_bihash_add_or_overwrite_stale) (BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * add_v,
int (*is_stale_cb) (BVT
(clib_bihash_kv)
*, void *),
void *arg);
+int BV (clib_bihash_add_with_overwrite_cb) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
+ void (*overwrite_cb) (BVT (clib_bihash_kv) *, void *), void *arg);
int BV (clib_bihash_search) (BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * search_v,
BVT (clib_bihash_kv) * return_v);
@@ -402,16 +406,15 @@ BV (clib_bihash_get_bucket) (BVT (clib_bihash) * h, u64 hash)
static inline int BV (clib_bihash_search_inline_with_hash)
(BVT (clib_bihash) * h, u64 hash, BVT (clib_bihash_kv) * key_result)
{
+ BVT (clib_bihash_kv) rv;
BVT (clib_bihash_value) * v;
BVT (clib_bihash_bucket) * b;
int i, limit;
- /* *INDENT-OFF* */
static const BVT (clib_bihash_bucket) mask = {
.linear_search = 1,
.log2_pages = -1
};
- /* *INDENT-ON* */
#if BIHASH_LAZY_INSTANTIATE
if (PREDICT_FALSE (h->instantiated == 0))
@@ -447,7 +450,10 @@ static inline int BV (clib_bihash_search_inline_with_hash)
{
if (BV (clib_bihash_key_compare) (v->kvp[i].key, key_result->key))
{
- *key_result = v->kvp[i];
+ rv = v->kvp[i];
+ if (BV (clib_bihash_is_free) (&rv))
+ return -1;
+ *key_result = rv;
return 0;
}
}
@@ -501,16 +507,15 @@ static inline int BV (clib_bihash_search_inline_2_with_hash)
(BVT (clib_bihash) * h,
u64 hash, BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
{
+ BVT (clib_bihash_kv) rv;
BVT (clib_bihash_value) * v;
BVT (clib_bihash_bucket) * b;
int i, limit;
-/* *INDENT-OFF* */
static const BVT (clib_bihash_bucket) mask = {
.linear_search = 1,
.log2_pages = -1
};
-/* *INDENT-ON* */
ASSERT (valuep);
@@ -548,7 +553,10 @@ static inline int BV (clib_bihash_search_inline_2_with_hash)
{
if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key))
{
- *valuep = v->kvp[i];
+ rv = v->kvp[i];
+ if (BV (clib_bihash_is_free) (&rv))
+ return -1;
+ *valuep = rv;
return 0;
}
}
diff --git a/src/vppinfra/bihash_vec8_8.h b/src/vppinfra/bihash_vec8_8.h
index 15c6d8cebff..822f1bcc51f 100644
--- a/src/vppinfra/bihash_vec8_8.h
+++ b/src/vppinfra/bihash_vec8_8.h
@@ -42,13 +42,19 @@ typedef struct
u64 value; /**< the value */
} clib_bihash_kv_vec8_8_t;
+static inline void
+clib_bihash_mark_free_vec8_8 (clib_bihash_kv_vec8_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
/** Decide if a clib_bihash_kv_vec8_8_t instance is free
@param v- pointer to the (key,value) pair
*/
static inline int
clib_bihash_is_free_vec8_8 (clib_bihash_kv_vec8_8_t * v)
{
- if (v->key == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bitmap.h b/src/vppinfra/bitmap.h
index 92205bfc8e8..4ab7bcf7a7c 100644
--- a/src/vppinfra/bitmap.h
+++ b/src/vppinfra/bitmap.h
@@ -45,7 +45,6 @@
#include <vppinfra/vec.h>
#include <vppinfra/random.h>
#include <vppinfra/error.h>
-#include <vppinfra/bitops.h> /* for count_set_bits */
typedef uword clib_bitmap_t;
@@ -115,6 +114,24 @@ clib_bitmap_is_equal (uword * a, uword * b)
#define clib_bitmap_validate(v,n_bits) \
clib_bitmap_vec_validate ((v), ((n_bits) - 1) / BITS (uword))
+/** Copy a bitmap
+ @param dst - copy to
+ @param src - copy from
+*/
+static_always_inline void
+clib_bitmap_copy (clib_bitmap_t **dst, const clib_bitmap_t *src)
+{
+ if (vec_len (src))
+ {
+ clib_bitmap_vec_validate (*dst, vec_len (src) - 1);
+ vec_copy (*dst, src);
+ }
+ else
+ {
+ vec_reset_length (*dst);
+ }
+}
+
/* low-level routine to remove trailing zeros from a bitmap */
always_inline uword *
_clib_bitmap_remove_trailing_zeros (uword * a)
@@ -125,7 +142,7 @@ _clib_bitmap_remove_trailing_zeros (uword * a)
for (i = _vec_len (a) - 1; i >= 0; i--)
if (a[i] != 0)
break;
- _vec_len (a) = i + 1;
+ vec_set_len (a, i + 1);
}
return a;
}
@@ -161,7 +178,7 @@ clib_bitmap_set_no_check (uword * a, uword i, uword new_value)
@param ai - pointer to the bitmap
@param i - the bit position to interrogate
@param value - new value for the bit
- @returns the old value of the bit
+ @returns the (possibly reallocated) bitmap object pointer
*/
always_inline uword *
clib_bitmap_set (uword * ai, uword i, uword value)
@@ -188,6 +205,12 @@ clib_bitmap_set (uword * ai, uword i, uword value)
return ai;
}
+always_inline u8
+clib_bitmap_will_expand (uword *ai, uword i)
+{
+ return (i / BITS (ai[0])) >= vec_max_len (ai);
+}
+
/** Gets the ith bit value from a bitmap
@param ai - pointer to the bitmap
@param i - the bit position to interrogate
@@ -222,7 +245,7 @@ clib_bitmap_get_multiple_no_check (uword * ai, uword i, uword n_bits)
uword i0 = i / BITS (ai[0]);
uword i1 = i % BITS (ai[0]);
ASSERT (i1 + n_bits <= BITS (uword));
- return 0 != ((ai[i0] >> i1) & pow2_mask (n_bits));
+ return ((ai[i0] >> i1) & pow2_mask (n_bits));
}
/** Gets the ith through ith + n_bits bit values from a bitmap
@@ -282,7 +305,7 @@ clib_bitmap_set_multiple (uword * bitmap, uword i, uword value, uword n_bits)
i1 = i % BITS (bitmap[0]);
/* Allocate bitmap. */
- clib_bitmap_vec_validate (bitmap, (i + n_bits) / BITS (bitmap[0]));
+ clib_bitmap_vec_validate (bitmap, (i + n_bits - 1) / BITS (bitmap[0]));
l = vec_len (bitmap);
m = ~0;
@@ -316,14 +339,15 @@ clib_bitmap_set_multiple (uword * bitmap, uword i, uword value, uword n_bits)
always_inline uword *
clib_bitmap_set_region (uword * bitmap, uword i, uword value, uword n_bits)
{
- uword a0, a1, b0;
+ uword a0, a1, b0, b1;
uword i_end, mask;
a0 = i / BITS (bitmap[0]);
a1 = i % BITS (bitmap[0]);
- i_end = i + n_bits;
+ i_end = i + n_bits - 1;
b0 = i_end / BITS (bitmap[0]);
+ b1 = i_end % BITS (bitmap[0]);
clib_bitmap_vec_validate (bitmap, b0);
@@ -341,8 +365,7 @@ clib_bitmap_set_region (uword * bitmap, uword i, uword value, uword n_bits)
if (a0 == b0)
{
- word n_bits_left = n_bits - (BITS (bitmap[0]) - a1);
- mask = pow2_mask (n_bits_left);
+ mask = (uword) ~0 >> (BITS (bitmap[0]) - b1 - 1);
if (value)
bitmap[a0] |= mask;
else
@@ -495,37 +518,38 @@ always_inline uword *clib_bitmap_or (uword * ai, uword * bi);
always_inline uword *clib_bitmap_xor (uword * ai, uword * bi);
/* ALU function definition macro for functions taking two bitmaps. */
-#define _(name, body, check_zero) \
-always_inline uword * \
-clib_bitmap_##name (uword * ai, uword * bi) \
-{ \
- uword i, a, b, bi_len, n_trailing_zeros; \
- \
- n_trailing_zeros = 0; \
- bi_len = vec_len (bi); \
- if (bi_len > 0) \
- clib_bitmap_vec_validate (ai, bi_len - 1); \
- for (i = 0; i < vec_len (ai); i++) \
- { \
- a = ai[i]; \
- b = i < bi_len ? bi[i] : 0; \
- do { body; } while (0); \
- ai[i] = a; \
- if (check_zero) \
- n_trailing_zeros = a ? 0 : (n_trailing_zeros + 1); \
- } \
- if (check_zero) \
- _vec_len (ai) -= n_trailing_zeros; \
- return ai; \
-}
+#define _(name, body, check_zero) \
+ always_inline uword *clib_bitmap_##name (uword *ai, uword *bi) \
+ { \
+ uword i, a, b, bi_len, n_trailing_zeros; \
+ \
+ n_trailing_zeros = 0; \
+ bi_len = vec_len (bi); \
+ if (bi_len > 0) \
+ clib_bitmap_vec_validate (ai, bi_len - 1); \
+ for (i = 0; i < vec_len (ai); i++) \
+ { \
+ a = ai[i]; \
+ b = i < bi_len ? bi[i] : 0; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ ai[i] = a; \
+ if (check_zero) \
+ n_trailing_zeros = a ? 0 : (n_trailing_zeros + 1); \
+ } \
+ if (check_zero) \
+ vec_dec_len (ai, n_trailing_zeros); \
+ return ai; \
+ }
/* ALU functions: */
-/* *INDENT-OFF* */
_(and, a = a & b, 1)
_(andnot, a = a & ~b, 1)
_(or, a = a | b, 0)
_(xor, a = a ^ b, 1)
-/* *INDENT-ON* */
#undef _
/** Logical operator across two bitmaps which duplicates the first bitmap
@@ -564,12 +588,10 @@ always_inline uword *clib_bitmap_dup_xor (uword * ai, uword * bi);
clib_bitmap_dup_##name (uword * ai, uword * bi) \
{ return clib_bitmap_##name (clib_bitmap_dup (ai), bi); }
-/* *INDENT-OFF* */
_(and);
_(andnot);
_(or);
_(xor);
-/* *INDENT-ON* */
#undef _
/* ALU function definition macro for functions taking one bitmap and an
@@ -592,12 +614,10 @@ clib_bitmap_##name (uword * ai, uword i) \
}
/* ALU functions immediate: */
-/* *INDENT-OFF* */
_(andi, a = a & b, 1)
_(andnoti, a = a & ~b, 1)
_(ori, a = a | b, 0)
_(xori, a = a ^ b, 1)
-/* *INDENT-ON* */
#undef _
/* ALU function definition macro for functions taking one bitmap and an
@@ -618,13 +638,11 @@ clib_bitmap_##name##_notrim (uword * ai, uword i) \
}
/* ALU functions immediate: */
-/* *INDENT-OFF* */
_(andi, a = a & b)
_(andnoti, a = a & ~b)
_(ori, a = a | b)
_(xori, a = a ^ b)
#undef _
-/* *INDENT-ON* */
/** Return a random bitmap of the requested length
@param ai - pointer to the destination bitmap
@@ -716,8 +734,7 @@ clib_bitmap_next_clear (uword * ai, uword i)
return log2_first_set (t) + i0 * BITS (ai[0]);
}
- /* no clear bit left in bitmap, return bit just beyond bitmap */
- return (i0 * BITS (ai[0])) + 1;
+ return i0 * BITS (ai[0]);
}
return i;
}
diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h
index 17ad49ffb46..c1122f59ff6 100644
--- a/src/vppinfra/bitops.h
+++ b/src/vppinfra/bitops.h
@@ -38,18 +38,41 @@
#ifndef included_clib_bitops_h
#define included_clib_bitops_h
-#include <vppinfra/clib.h>
+#define SET_BIT(i) (1 << i)
+#define GET_BIT(n, i) (n >> i) & 1U
+
+static_always_inline uword
+clear_lowest_set_bit (uword x)
+{
+#ifdef __BMI__
+ return uword_bits > 32 ? _blsr_u64 (x) : _blsr_u32 (x);
+#else
+ return x & (x - 1);
+#endif
+}
+
+static_always_inline uword
+get_lowest_set_bit (uword x)
+{
+#ifdef __BMI__
+ return uword_bits > 32 ? _blsi_u64 (x) : _blsi_u32 (x);
+#else
+ return x & -x;
+#endif
+}
+
+static_always_inline u8
+get_lowest_set_bit_index (uword x)
+{
+ return uword_bits > 32 ? __builtin_ctzll (x) : __builtin_ctz (x);
+}
/* Population count from Hacker's Delight. */
always_inline uword
count_set_bits (uword x)
{
#ifdef __POPCNT__
-#if uword_bits == 64
- return __builtin_popcountll (x);
-#else
- return __builtin_popcount (x);
-#endif
+ return uword_bits > 32 ? __builtin_popcountll (x) : __builtin_popcount (x);
#else
#if uword_bits == 64
const uword c1 = 0x5555555555555555;
@@ -81,6 +104,15 @@ count_set_bits (uword x)
#endif
}
+#if uword_bits == 64
+#define count_leading_zeros(x) __builtin_clzll (x)
+#else
+#define count_leading_zeros(x) __builtin_clzll (x)
+#endif
+
+#define count_trailing_zeros(x) get_lowest_set_bit_index (x)
+#define log2_first_set(x) get_lowest_set_bit_index (x)
+
/* Based on "Hacker's Delight" code from GLS. */
typedef struct
{
@@ -163,19 +195,158 @@ next_with_same_number_of_set_bits (uword x)
return ripple | ones;
}
-#define foreach_set_bit(var,mask,body) \
-do { \
- uword _foreach_set_bit_m_##var = (mask); \
- uword _foreach_set_bit_f_##var; \
- while (_foreach_set_bit_m_##var != 0) \
- { \
- _foreach_set_bit_f_##var = first_set (_foreach_set_bit_m_##var); \
- _foreach_set_bit_m_##var ^= _foreach_set_bit_f_##var; \
- (var) = min_log2 (_foreach_set_bit_f_##var); \
- do { body; } while (0); \
- } \
-} while (0)
+#define foreach_set_bit_index(i, v) \
+ for (uword _tmp = (v) + 0 * (uword) (i = get_lowest_set_bit_index (v)); \
+ _tmp; \
+ i = get_lowest_set_bit_index (_tmp = clear_lowest_set_bit (_tmp)))
+
+static_always_inline uword
+uword_bitmap_count_set_bits (uword *bmp, uword n_uwords)
+{
+ uword count = 0;
+ while (n_uwords--)
+ count += count_set_bits (bmp++[0]);
+ return count;
+}
+
+static_always_inline uword
+uword_bitmap_is_bit_set (uword *bmp, uword bit_index)
+{
+ bmp += bit_index / uword_bits;
+ bit_index %= uword_bits;
+ return (bmp[0] >> bit_index) & 1;
+}
+
+static_always_inline void
+uword_bitmap_set_bits_at_index (uword *bmp, uword bit_index, uword n_bits)
+{
+ bmp += bit_index / uword_bits;
+ bit_index %= uword_bits;
+ uword max_bits = uword_bits - bit_index;
+
+ if (n_bits < max_bits)
+ {
+ bmp[0] |= pow2_mask (n_bits) << bit_index;
+ return;
+ }
+
+ bmp++[0] |= pow2_mask (max_bits) << bit_index;
+ n_bits -= max_bits;
+
+ for (; n_bits >= uword_bits; bmp++, n_bits -= uword_bits)
+ bmp[0] = ~0ULL;
+
+ if (n_bits)
+ bmp[0] |= pow2_mask (n_bits);
+}
+
+static_always_inline void
+uword_bitmap_clear_bits_at_index (uword *bmp, uword bit_index, uword n_bits)
+{
+ bmp += bit_index / uword_bits;
+ bit_index %= uword_bits;
+ uword max_bits = uword_bits - bit_index;
+
+ if (n_bits < max_bits)
+ {
+ bmp[0] &= ~(pow2_mask (n_bits) << bit_index);
+ return;
+ }
+
+ bmp++[0] &= ~(pow2_mask (max_bits) << bit_index);
+ n_bits -= max_bits;
+
+ for (; n_bits >= uword_bits; bmp++, n_bits -= uword_bits)
+ bmp[0] = 0ULL;
+
+ if (n_bits)
+ bmp[0] &= ~pow2_mask (n_bits);
+}
+
+static_always_inline int
+uword_bitmap_find_first_set (uword *bmp)
+{
+ uword *b = bmp;
+ while (b[0] == 0)
+ b++;
+
+ return (b - bmp) * uword_bits + get_lowest_set_bit_index (b[0]);
+}
+
+static_always_inline u32
+bit_extract_u32 (u32 v, u32 mask)
+{
+#ifdef __BMI2__
+ return _pext_u32 (v, mask);
+#else
+ u32 rv = 0;
+ u32 bit = 1;
+
+ while (mask)
+ {
+ u32 lowest_mask_bit = get_lowest_set_bit (mask);
+ mask ^= lowest_mask_bit;
+ rv |= (v & lowest_mask_bit) ? bit : 0;
+ bit <<= 1;
+ }
+
+ return rv;
+#endif
+}
+
+static_always_inline u64
+bit_extract_u64 (u64 v, u64 mask)
+{
+#ifdef __BMI2__
+ return _pext_u64 (v, mask);
+#else
+ u64 rv = 0;
+ u64 bit = 1;
+
+ while (mask)
+ {
+ u64 lowest_mask_bit = get_lowest_set_bit (mask);
+ mask ^= lowest_mask_bit;
+ rv |= (v & lowest_mask_bit) ? bit : 0;
+ bit <<= 1;
+ }
+
+ return rv;
+#endif
+}
+
+static_always_inline void
+u64_bit_set (u64 *p, u8 bit_index, u8 is_one)
+{
+ u64 val = *p;
+ val &= ~(1ULL << bit_index);
+ val |= 1ULL << bit_index;
+ *p = val;
+}
+
+static_always_inline void
+u32_bit_set (u32 *p, u8 bit_index, u8 is_one)
+{
+ u32 val = *p;
+ val &= ~(1U << bit_index);
+ val |= 1U << bit_index;
+ *p = val;
+}
+
+static_always_inline int
+u64_is_bit_set (u64 v, u8 bit_index)
+{
+ return (v & 1ULL << bit_index) != 0;
+}
+
+static_always_inline int
+u32_is_bit_set (u32 v, u8 bit_index)
+{
+ return (v & 1U << bit_index) != 0;
+}
+#else
+#warning "already included"
#endif /* included_clib_bitops_h */
/*
diff --git a/src/vppinfra/byte_order.h b/src/vppinfra/byte_order.h
index 9beb4470634..7bc26002a2f 100644
--- a/src/vppinfra/byte_order.h
+++ b/src/vppinfra/byte_order.h
@@ -173,12 +173,10 @@ _(i64);
#undef _
/* Dummy endian swap functions for IEEE floating-point numbers */
-/* *INDENT-OFF* */
always_inline f64 clib_net_to_host_f64 (f64 x) { return x; }
always_inline f64 clib_host_to_net_f64 (f64 x) { return x; }
always_inline f32 clib_net_to_host_f32 (f32 x) { return x; }
always_inline f32 clib_host_to_net_f32 (f32 x) { return x; }
-/* *INDENT-ON* */
#endif /* included_clib_byte_order_h */
diff --git a/src/vppinfra/cJSON.c b/src/vppinfra/cJSON.c
index 5b26a4be4e1..24e0110ed08 100644
--- a/src/vppinfra/cJSON.c
+++ b/src/vppinfra/cJSON.c
@@ -20,6 +20,7 @@
THE SOFTWARE.
*/
/* clang-format off */
+
/* cJSON */
/* JSON parser in C. */
@@ -96,9 +97,9 @@ CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void)
return (const char*) (global_error.json + global_error.position);
}
-CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON * const item)
+CJSON_PUBLIC (char *) cJSON_GetStringValue (const cJSON *const item)
{
- if (!cJSON_IsString(item))
+ if (!cJSON_IsString (item))
{
return NULL;
}
@@ -106,9 +107,9 @@ CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON * const item)
return item->valuestring;
}
-CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON * const item)
+CJSON_PUBLIC (double) cJSON_GetNumberValue (const cJSON *const item)
{
- if (!cJSON_IsNumber(item))
+ if (!cJSON_IsNumber (item))
{
return (double) NAN;
}
@@ -117,8 +118,9 @@ CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON * const item)
}
/* This is a safeguard to prevent copy-pasters from using incompatible C and header files */
-#if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) || (CJSON_VERSION_PATCH != 14)
- #error cJSON.h and cJSON.c have different versions. Make sure that both have the same.
+#if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) || \
+ (CJSON_VERSION_PATCH != 17)
+#error cJSON.h and cJSON.c have different versions. Make sure that both have the same.
#endif
CJSON_PUBLIC(const char*) cJSON_Version(void)
@@ -157,7 +159,7 @@ typedef struct internal_hooks
{
void *(CJSON_CDECL *allocate)(size_t size);
void (CJSON_CDECL *deallocate)(void *pointer);
- void *(CJSON_CDECL *reallocate)(void *pointer, size_t size);
+ void *(CJSON_CDECL *reallocate) (void *pointer, size_t size);
} internal_hooks;
#if defined(_MSC_VER)
@@ -170,9 +172,10 @@ static void CJSON_CDECL internal_free(void *pointer)
{
free(pointer);
}
-static void * CJSON_CDECL internal_realloc(void *pointer, size_t size)
+static void *CJSON_CDECL
+internal_realloc (void *pointer, size_t size)
{
- return realloc(pointer, size);
+ return realloc (pointer, size);
}
#else
#define internal_malloc malloc
@@ -213,8 +216,8 @@ CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks)
/* Reset hooks */
global_hooks.allocate = malloc;
global_hooks.deallocate = free;
- global_hooks.reallocate = realloc;
- return;
+ global_hooks.reallocate = realloc;
+ return;
}
global_hooks.allocate = malloc;
@@ -229,12 +232,11 @@ CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks)
global_hooks.deallocate = hooks->free_fn;
}
- /* use realloc only if both free and malloc are used */
- global_hooks.reallocate = NULL;
- if ((global_hooks.allocate == malloc) && (global_hooks.deallocate == free))
- {
- global_hooks.reallocate = realloc;
- }
+ global_hooks.reallocate = realloc;
+ if (hooks->realloc_fn != NULL)
+ {
+ global_hooks.reallocate = hooks->realloc_fn;
+ }
}
/* Internal constructor. */
@@ -397,14 +399,22 @@ CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number)
return object->valuedouble = number;
}
+/* Note: when passing a NULL valuestring, cJSON_SetValuestring treats this as
+ * an error and return NULL */
CJSON_PUBLIC(char*) cJSON_SetValuestring(cJSON *object, const char *valuestring)
{
char *copy = NULL;
/* if object's type is not cJSON_String or is cJSON_IsReference, it should not set valuestring */
- if (!(object->type & cJSON_String) || (object->type & cJSON_IsReference))
- {
- return NULL;
- }
+ if ((object == NULL) || !(object->type & cJSON_String) ||
+ (object->type & cJSON_IsReference))
+ {
+ return NULL;
+ }
+ /* return NULL if the object is corrupted or valuestring is NULL */
+ if (object->valuestring == NULL || valuestring == NULL)
+ {
+ return NULL;
+ }
if (strlen(valuestring) <= strlen(object->valuestring))
{
strcpy(object->valuestring, valuestring);
@@ -487,34 +497,34 @@ static unsigned char* ensure(printbuffer * const p, size_t needed)
}
if (p->hooks.reallocate != NULL)
- {
- /* reallocate with realloc if available */
- newbuffer = (unsigned char*)p->hooks.reallocate(p->buffer, newsize);
- if (newbuffer == NULL)
- {
- p->hooks.deallocate(p->buffer);
- p->length = 0;
- p->buffer = NULL;
-
- return NULL;
- }
- }
+ {
+ /* reallocate with realloc if available */
+ newbuffer = (unsigned char *) p->hooks.reallocate (p->buffer, newsize);
+ if (newbuffer == NULL)
+ {
+ p->hooks.deallocate (p->buffer);
+ p->length = 0;
+ p->buffer = NULL;
+
+ return NULL;
+ }
+ }
else
- {
- /* otherwise reallocate manually */
- newbuffer = (unsigned char*)p->hooks.allocate(newsize);
- if (!newbuffer)
- {
- p->hooks.deallocate(p->buffer);
- p->length = 0;
- p->buffer = NULL;
+ {
+ /* otherwise reallocate manually */
+ newbuffer = (unsigned char *) p->hooks.allocate (newsize);
+ if (!newbuffer)
+ {
+ p->hooks.deallocate (p->buffer);
+ p->length = 0;
+ p->buffer = NULL;
- return NULL;
- }
+ return NULL;
+ }
memcpy (newbuffer, p->buffer, p->offset + 1);
p->hooks.deallocate (p->buffer);
- }
+ }
p->length = newsize;
p->buffer = newbuffer;
@@ -562,6 +572,10 @@ static cJSON_bool print_number(const cJSON * const item, printbuffer * const out
{
length = sprintf((char*)number_buffer, "null");
}
+ else if (d == (double) item->valueint)
+ {
+ length = sprintf ((char *) number_buffer, "%d", item->valueint);
+ }
else
{
/* Try 15 decimal places of precision to avoid nonsignificant nonzero digits */
@@ -1103,7 +1117,7 @@ CJSON_PUBLIC(cJSON *) cJSON_ParseWithLengthOpts(const char *value, size_t buffer
}
buffer.content = (const unsigned char*)value;
- buffer.length = buffer_length;
+ buffer.length = buffer_length;
buffer.offset = 0;
buffer.hooks = global_hooks;
@@ -1208,11 +1222,13 @@ static unsigned char *print(const cJSON * const item, cJSON_bool format, const i
/* check if reallocate is available */
if (hooks->reallocate != NULL)
{
- printed = (unsigned char*) hooks->reallocate(buffer->buffer, buffer->offset + 1);
- if (printed == NULL) {
- goto fail;
- }
- buffer->buffer = NULL;
+ printed = (unsigned char *) hooks->reallocate (buffer->buffer,
+ buffer->offset + 1);
+ if (printed == NULL)
+ {
+ goto fail;
+ }
+ buffer->buffer = NULL;
}
else /* otherwise copy the JSON over to a new buffer */
{
@@ -1650,8 +1666,13 @@ static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_bu
current_item = new_item;
}
- /* parse the name of the child */
- input_buffer->offset++;
+ if (cannot_access_at_index (input_buffer, 1))
+ {
+ goto fail; /* nothing comes after the comma */
+ }
+
+ /* parse the name of the child */
+ input_buffer->offset++;
buffer_skip_whitespace(input_buffer);
if (!parse_string(current_item, input_buffer))
{
@@ -2260,10 +2281,10 @@ CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON
{
cJSON *after_inserted = NULL;
- if (which < 0)
- {
- return false;
- }
+ if (which < 0 || newitem == NULL)
+ {
+ return false;
+ }
after_inserted = get_array_item(array, (size_t)which);
if (after_inserted == NULL)
@@ -2271,6 +2292,12 @@ CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON
return add_item_to_array(array, newitem);
}
+ if (after_inserted != array->child && after_inserted->prev == NULL)
+ {
+ /* return false if after_inserted is a corrupted array item */
+ return false;
+ }
+
newitem->next = after_inserted;
newitem->prev = after_inserted->prev;
after_inserted->prev = newitem;
@@ -2287,7 +2314,8 @@ CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON
CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement)
{
- if ((parent == NULL) || (replacement == NULL) || (item == NULL))
+ if ((parent == NULL) || (parent->child == NULL) || (replacement == NULL) ||
+ (item == NULL))
{
return false;
}
@@ -2357,6 +2385,11 @@ static cJSON_bool replace_item_in_object(cJSON *object, const char *string, cJSO
cJSON_free(replacement->string);
}
replacement->string = (char*)cJSON_strdup((const unsigned char*)string, &global_hooks);
+ if (replacement->string == NULL)
+ {
+ return false;
+ }
+
replacement->type &= ~cJSON_StringIsConst;
return cJSON_ReplaceItemViaPointer(object, get_object_item(object, string, case_sensitive), replacement);
@@ -2631,9 +2664,9 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count)
for (i = 0; a && (i < (size_t) count); i++)
{
- n = cJSON_CreateNumber(numbers[i]);
- if(!n)
- {
+ n = cJSON_CreateNumber (numbers[i]);
+ if (!n)
+ {
cJSON_Delete(a);
return NULL;
}
@@ -2980,7 +3013,7 @@ CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item)
CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive)
{
- if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF)) || cJSON_IsInvalid(a))
+ if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF)))
{
return false;
}
@@ -3112,3 +3145,8 @@ CJSON_PUBLIC(void) cJSON_free(void *object)
{
global_hooks.deallocate(object);
}
+
+CJSON_PUBLIC (void *) cJSON_realloc (void *object, size_t size)
+{
+ return global_hooks.reallocate (object, size);
+}
diff --git a/src/vppinfra/cJSON.h b/src/vppinfra/cJSON.h
index e97e5f4cdc4..1c98dfac70e 100644
--- a/src/vppinfra/cJSON.h
+++ b/src/vppinfra/cJSON.h
@@ -81,7 +81,7 @@ then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJ
/* project version */
#define CJSON_VERSION_MAJOR 1
#define CJSON_VERSION_MINOR 7
-#define CJSON_VERSION_PATCH 14
+#define CJSON_VERSION_PATCH 17
#include <stddef.h>
@@ -127,6 +127,7 @@ typedef struct cJSON_Hooks
/* malloc/free are CDECL on Windows regardless of the default calling convention of the compiler, so ensure the hooks allow passing those functions directly. */
void *(CJSON_CDECL *malloc_fn)(size_t sz);
void (CJSON_CDECL *free_fn)(void *ptr);
+ void *(CJSON_CDECL *realloc_fn) (void *ptr, size_t sz);
} cJSON_Hooks;
typedef int cJSON_bool;
@@ -254,9 +255,10 @@ CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse);
* case_sensitive determines if object keys are treated case sensitive (1) or case insensitive (0) */
CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive);
-/* Minify a strings, remove blank characters(such as ' ', '\t', '\r', '\n') from strings.
- * The input pointer json cannot point to a read-only address area, such as a string constant,
- * but should point to a readable and writable adress area. */
+/* Minify a strings, remove blank characters(such as ' ', '\t', '\r', '\n')
+ * from strings. The input pointer json cannot point to a read-only address
+ * area, such as a string constant,
+ * but should point to a readable and writable address area. */
CJSON_PUBLIC(void) cJSON_Minify(char *json);
/* Helper functions for creating and adding items to an object at the same time.
@@ -279,12 +281,21 @@ CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number);
/* Change the valuestring of a cJSON_String object, only takes effect when type of object is cJSON_String */
CJSON_PUBLIC(char*) cJSON_SetValuestring(cJSON *object, const char *valuestring);
+/* If the object is not a boolean type this does nothing and returns
+ * cJSON_Invalid else it returns the new type*/
+#define cJSON_SetBoolValue(object, boolValue) \
+ ((object != NULL && ((object)->type & (cJSON_False | cJSON_True))) ? \
+ (object)->type = ((object)->type & (~(cJSON_False | cJSON_True))) | \
+ ((boolValue) ? cJSON_True : cJSON_False) : \
+ cJSON_Invalid)
+
/* Macro for iterating over an array or object */
#define cJSON_ArrayForEach(element, array) for(element = (array != NULL) ? (array)->child : NULL; element != NULL; element = element->next)
/* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */
CJSON_PUBLIC(void *) cJSON_malloc(size_t size);
CJSON_PUBLIC(void) cJSON_free(void *object);
+CJSON_PUBLIC (void *) cJSON_realloc (void *object, size_t size);
#ifdef __cplusplus
}
diff --git a/src/vppinfra/cache.h b/src/vppinfra/cache.h
index 04f91e00061..13778a423fd 100644
--- a/src/vppinfra/cache.h
+++ b/src/vppinfra/cache.h
@@ -40,66 +40,88 @@
#include <vppinfra/error_bootstrap.h>
-/*
- * Allow CFLAGS to override the configured / deduced cache line size
- */
-#ifndef CLIB_LOG2_CACHE_LINE_BYTES
-
/* Default cache line size of 64 bytes. */
#ifndef CLIB_LOG2_CACHE_LINE_BYTES
#define CLIB_LOG2_CACHE_LINE_BYTES 6
#endif
-#endif /* CLIB_LOG2_CACHE_LINE_BYTES defined */
-
-#if (CLIB_LOG2_CACHE_LINE_BYTES >= 9)
-#error Cache line size 512 bytes or greater
+/* How much data prefetch instruction prefetches */
+#ifndef CLIB_LOG2_CACHE_PREFETCH_BYTES
+#define CLIB_LOG2_CACHE_PREFETCH_BYTES CLIB_LOG2_CACHE_LINE_BYTES
#endif
-#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES)
-#define CLIB_CACHE_LINE_ALIGN_MARK(mark) u8 mark[0] __attribute__((aligned(CLIB_CACHE_LINE_BYTES)))
-#define CLIB_CACHE_LINE_ROUND(x) ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1))
-
/* Default cache line fill buffers. */
#ifndef CLIB_N_PREFETCHES
#define CLIB_N_PREFETCHES 16
#endif
+#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES)
+#define CLIB_CACHE_PREFETCH_BYTES (1 << CLIB_LOG2_CACHE_PREFETCH_BYTES)
+#define CLIB_CACHE_LINE_ALIGN_MARK(mark) \
+ u8 mark[0] __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)))
+#define CLIB_CACHE_LINE_ROUND(x) \
+ ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1))
+
/* Read/write arguments to __builtin_prefetch. */
#define CLIB_PREFETCH_READ 0
#define CLIB_PREFETCH_LOAD 0 /* alias for read */
#define CLIB_PREFETCH_WRITE 1
#define CLIB_PREFETCH_STORE 1 /* alias for write */
-#define _CLIB_PREFETCH(n,size,type) \
- if ((size) > (n)*CLIB_CACHE_LINE_BYTES) \
- __builtin_prefetch (_addr + (n)*CLIB_CACHE_LINE_BYTES, \
- CLIB_PREFETCH_##type, \
- /* locality */ 3);
-
-#define CLIB_PREFETCH(addr,size,type) \
-do { \
- void * _addr = (addr); \
- \
- ASSERT ((size) <= 4*CLIB_CACHE_LINE_BYTES); \
- _CLIB_PREFETCH (0, size, type); \
- _CLIB_PREFETCH (1, size, type); \
- _CLIB_PREFETCH (2, size, type); \
- _CLIB_PREFETCH (3, size, type); \
-} while (0)
+/* locality arguments to __builtin_prefetch. */
+#define CLIB_PREFETCH_TO_STREAM 0 // NTA
+#define CLIB_PREFETCH_TO_L3 1 // T2
+#define CLIB_PREFETCH_TO_L2 2 // T1
+#define CLIB_PREFETCH_TO_L1 3 // T0
+
+#define _CLIB_TARGETED_PREFETCH(n, size, type, loc) \
+ if ((size) > (n) *CLIB_CACHE_PREFETCH_BYTES) \
+ __builtin_prefetch (_addr + (n) *CLIB_CACHE_PREFETCH_BYTES, \
+ CLIB_PREFETCH_##type, CLIB_PREFETCH_TO_##loc);
+
+#define _CLIB_PREFETCH(n, size, type) \
+ if ((size) > (n) *CLIB_CACHE_PREFETCH_BYTES) \
+ __builtin_prefetch (_addr + (n) *CLIB_CACHE_PREFETCH_BYTES, \
+ CLIB_PREFETCH_##type, /* locality */ 3);
+
+#define CLIB_PREFETCH(addr, size, type) \
+ do \
+ { \
+ void *_addr = (addr); \
+ \
+ ASSERT ((size) <= 4 * CLIB_CACHE_PREFETCH_BYTES); \
+ _CLIB_PREFETCH (0, size, type); \
+ _CLIB_PREFETCH (1, size, type); \
+ _CLIB_PREFETCH (2, size, type); \
+ _CLIB_PREFETCH (3, size, type); \
+ } \
+ while (0)
+
+#define CLIB_TARGETED_PREFETCH(addr, size, type, locality) \
+ do \
+ { \
+ void *_addr = (addr); \
+ \
+ ASSERT ((size) <= 4 * CLIB_CACHE_PREFETCH_BYTES); \
+ _CLIB_TARGETED_PREFETCH (0, size, type, locality); \
+ _CLIB_TARGETED_PREFETCH (1, size, type, locality); \
+ _CLIB_TARGETED_PREFETCH (2, size, type, locality); \
+ _CLIB_TARGETED_PREFETCH (3, size, type, locality); \
+ } \
+ while (0)
#undef _
static_always_inline void
clib_prefetch_load (void *p)
{
- CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ __builtin_prefetch (p, /* rw */ 0, /* locality */ 3);
}
static_always_inline void
clib_prefetch_store (void *p)
{
- CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, STORE);
+ __builtin_prefetch (p, /* rw */ 1, /* locality */ 3);
}
#endif /* included_clib_cache_h */
diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h
index ade7e5fa4bb..5348738ec6a 100644
--- a/src/vppinfra/clib.h
+++ b/src/vppinfra/clib.h
@@ -53,6 +53,12 @@
#define CLIB_UNIX
#endif
+#ifdef __linux__
+#define CLIB_LINUX 1
+#else
+#define CLIB_LINUX 0
+#endif
+
#include <vppinfra/types.h>
#include <vppinfra/atomics.h>
@@ -68,6 +74,8 @@
#define BITS(x) (8*sizeof(x))
#define ARRAY_LEN(x) (sizeof (x)/sizeof (x[0]))
+#define FOREACH_ARRAY_ELT(a, b) \
+ for (typeof ((b)[0]) *(a) = (b); (a) - (b) < ARRAY_LEN (b); (a)++)
#define _STRUCT_FIELD(t,f) (((t *) 0)->f)
#define STRUCT_OFFSET_OF(t,f) offsetof(t, f)
@@ -95,15 +103,52 @@
/* Make a string from the macro's argument */
#define CLIB_STRING_MACRO(x) #x
+#define CLIB_STRING_ARRAY(...) \
+ (char *[]) { __VA_ARGS__, 0 }
+
+#define CLIB_SWAP(a, b) \
+ { \
+ typeof (a) __tmp = a; \
+ a = b; \
+ b = __tmp; \
+ }
+
+/* sanitizers */
+#ifdef __has_feature
+#if __has_feature(address_sanitizer)
+#define CLIB_SANITIZE_ADDR 1
+#endif
+#elif defined(__SANITIZE_ADDRESS__)
+#define CLIB_SANITIZE_ADDR 1
+#endif
+
#define __clib_unused __attribute__ ((unused))
#define __clib_weak __attribute__ ((weak))
#define __clib_packed __attribute__ ((packed))
+#define __clib_flatten __attribute__ ((flatten))
#define __clib_constructor __attribute__ ((constructor))
#define __clib_noinline __attribute__ ((noinline))
+#ifdef __clang__
+#define __clib_noclone
+#else
+#define __clib_noclone __attribute__ ((noclone))
+#endif
#define __clib_aligned(x) __attribute__ ((aligned(x)))
#define __clib_section(s) __attribute__ ((section(s)))
#define __clib_warn_unused_result __attribute__ ((warn_unused_result))
#define __clib_export __attribute__ ((visibility("default")))
+#ifdef __clang__
+#define __clib_no_tail_calls __attribute__ ((disable_tail_calls))
+#else
+#define __clib_no_tail_calls \
+ __attribute__ ((optimize ("no-optimize-sibling-calls")))
+#endif
+
+#ifdef CLIB_SANITIZE_ADDR
+#define __clib_nosanitize_addr __attribute__ ((no_sanitize_address))
+#else
+#define __clib_nosanitize_addr
+#endif
#define never_inline __attribute__ ((__noinline__))
@@ -124,10 +169,17 @@
#define PREDICT_FALSE(x) __builtin_expect((x),0)
#define PREDICT_TRUE(x) __builtin_expect((x),1)
#define COMPILE_TIME_CONST(x) __builtin_constant_p (x)
+#define CLIB_ASSUME(x) \
+ do \
+ { \
+ if (!(x)) \
+ __builtin_unreachable (); \
+ } \
+ while (0)
/*
* Compiler barrier
- * prevent compiler to reorder memory access accross this boundary
+ * prevent compiler to reorder memory access across this boundary
* prevent compiler to cache values in register (force reload)
* Not to be confused with CPU memory barrier below
*/
@@ -136,7 +188,7 @@
/* Full memory barrier (read and write). */
#define CLIB_MEMORY_BARRIER() __sync_synchronize ()
-#if __x86_64__
+#if __SSE__
#define CLIB_MEMORY_STORE_BARRIER() __builtin_ia32_sfence ()
#else
#define CLIB_MEMORY_STORE_BARRIER() __sync_synchronize ()
@@ -152,26 +204,17 @@
decl __attribute ((destructor)); \
decl
-/* Use __builtin_clz if available. */
-#if uword_bits == 64
-#define count_leading_zeros(x) __builtin_clzll (x)
-#define count_trailing_zeros(x) __builtin_ctzll (x)
-#else
-#define count_leading_zeros(x) __builtin_clzl (x)
-#define count_trailing_zeros(x) __builtin_ctzl (x)
-#endif
-
-#if defined (count_leading_zeros)
always_inline uword
-clear_lowest_set_bit (uword x)
+pow2_mask (uword x)
{
#ifdef __BMI2__
- return _blsr_u64 (x);
-#else
- return x ^ (1ULL << count_trailing_zeros (x));
+ return _bzhi_u64 (-1ULL, x);
#endif
+ return ((uword) 1 << x) - (uword) 1;
}
+#include <vppinfra/bitops.h>
+
always_inline uword
min_log2 (uword x)
{
@@ -179,45 +222,6 @@ min_log2 (uword x)
n = count_leading_zeros (x);
return BITS (uword) - n - 1;
}
-#else
-always_inline uword
-min_log2 (uword x)
-{
- uword a = x, b = BITS (uword) / 2, c = 0, r = 0;
-
- /* Reduce x to 4 bit result. */
-#define _ \
-{ \
- c = a >> b; \
- if (c) a = c; \
- if (c) r += b; \
- b /= 2; \
-}
-
- if (BITS (uword) > 32)
- _;
- _;
- _;
- _;
-#undef _
-
- /* Do table lookup on 4 bit partial. */
- if (BITS (uword) > 32)
- {
- const u64 table = 0x3333333322221104LL;
- uword t = (table >> (4 * a)) & 0xf;
- r = t < 4 ? r + t : ~0;
- }
- else
- {
- const u32 table = 0x22221104;
- uword t = (a & 8) ? 3 : ((table >> (4 * a)) & 0xf);
- r = t < 4 ? r + t : ~0;
- }
-
- return r;
-}
-#endif
always_inline uword
max_log2 (uword x)
@@ -249,12 +253,6 @@ min_log2_u64 (u64 x)
}
always_inline uword
-pow2_mask (uword x)
-{
- return ((uword) 1 << x) - (uword) 1;
-}
-
-always_inline uword
max_pow2 (uword x)
{
word y = (word) 1 << min_log2 (x);
@@ -293,18 +291,6 @@ first_set (uword x)
return x & -x;
}
-always_inline uword
-log2_first_set (uword x)
-{
- uword result;
-#ifdef count_trailing_zeros
- result = count_trailing_zeros (x);
-#else
- result = min_log2 (first_set (x));
-#endif
- return result;
-}
-
always_inline f64
flt_round_down (f64 x)
{
@@ -360,6 +346,44 @@ extract_bits (uword x, int start, int count)
_x < 0 ? -_x : _x; \
})
+static_always_inline u64
+u64_add_with_carry (u64 *carry, u64 a, u64 b)
+{
+#if defined(__x86_64__)
+ unsigned long long v;
+ *carry = _addcarry_u64 (*carry, a, b, &v);
+ return (u64) v;
+#elif defined(__clang__)
+ unsigned long long c;
+ u64 rv = __builtin_addcll (a, b, *carry, &c);
+ *carry = c;
+ return rv;
+#else
+ u64 rv = a + b + *carry;
+ *carry = rv < a;
+ return rv;
+#endif
+}
+
+static_always_inline u64
+u64_sub_with_borrow (u64 *borrow, u64 x, u64 y)
+{
+#if defined(__x86_64__)
+ unsigned long long v;
+ *borrow = _subborrow_u64 (*borrow, x, y, &v);
+ return (u64) v;
+#elif defined(__clang__)
+ unsigned long long b;
+ u64 rv = __builtin_subcll (x, y, *borrow, &b);
+ *borrow = b;
+ return rv;
+#else
+ unsigned long long rv = x - (y + *borrow);
+ *borrow = rv >= x;
+ return rv;
+#endif
+}
+
/* Standard standalone-only function declarations. */
#ifndef CLIB_UNIX
void clib_standalone_init (void *memory, uword memory_bytes);
@@ -368,10 +392,6 @@ void qsort (void *base, uword n, uword size,
int (*)(const void *, const void *));
#endif
-/* Stack backtrace. */
-uword
-clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip);
-
#include <vppinfra/byte_order.h>
#endif /* included_clib_h */
diff --git a/src/vppinfra/config.h.in b/src/vppinfra/config.h.in
index 7aad027a8b6..c2b804cd590 100644
--- a/src/vppinfra/config.h.in
+++ b/src/vppinfra/config.h.in
@@ -20,7 +20,7 @@
#define CLIB_LOG2_CACHE_LINE_BYTES @LOG2_CACHE_LINE_BYTES@
#endif
-#define CLIB_TARGET_TRIPLET "@CMAKE_C_COMPILER_TARGET@"
+#define CLIB_LIB_DIR "@VPP_LIBRARY_DIR@"
#define CLIB_VECTOR_GROW_BY_ONE @VECTOR_GROW_BY_ONE@
#endif
diff --git a/src/vppinfra/cpu.c b/src/vppinfra/cpu.c
index 045d1f727f4..385a4e25408 100644
--- a/src/vppinfra/cpu.c
+++ b/src/vppinfra/cpu.c
@@ -17,70 +17,76 @@
#include <vppinfra/format.h>
#include <vppinfra/cpu.h>
-#define foreach_x86_cpu_uarch \
- _(0x06, 0x9e, "Kaby Lake", "Kaby Lake DT/H/S/X") \
- _(0x06, 0x8e, "Kaby Lake", "Kaby Lake Y/U") \
- _(0x06, 0x8c, "Tiger Lake", "Tiger Lake U") \
- _(0x06, 0x86, "Tremont", "Elkhart Lake") \
- _(0x06, 0x85, "Knights Mill", "Knights Mill") \
- _(0x06, 0x7e, "Ice Lake", "Ice Lake U") \
- _(0x06, 0x7d, "Ice Lake", "Ice Lake Y") \
- _(0x06, 0x7a, "Goldmont Plus", "Gemini Lake") \
- _(0x06, 0x6c, "Ice Lake", "Ice Lake SP") \
- _(0x06, 0x6a, "Ice Lake", "Ice Lake DE") \
- _(0x06, 0x66, "Cannon Lake", "Cannon Lake U") \
- _(0x06, 0x5f, "Goldmont", "Denverton") \
- _(0x06, 0x5e, "Skylake", "Skylake DT/H/S") \
- _(0x06, 0x5c, "Goldmont", "Apollo Lake") \
- _(0x06, 0x5a, "Silvermont", "Moorefield") \
- _(0x06, 0x57, "Knights Landing", "Knights Landing") \
- _(0x06, 0x56, "Broadwell", "Broadwell DE") \
- _(0x06, 0x55, "Skylake", "Skylake X/SP") \
- _(0x06, 0x4f, "Broadwell", "Broadwell EP/EX") \
- _(0x06, 0x4e, "Skylake", "Skylake Y/U") \
- _(0x06, 0x4d, "Silvermont", "Rangeley") \
- _(0x06, 0x4c, "Airmont", "Braswell") \
- _(0x06, 0x47, "Broadwell", "Broadwell H") \
- _(0x06, 0x46, "Haswell", "Crystalwell") \
- _(0x06, 0x45, "Haswell", "Haswell ULT") \
- _(0x06, 0x3f, "Haswell", "Haswell E") \
- _(0x06, 0x3e, "Ivy Bridge", "Ivy Bridge E/EN/EP") \
- _(0x06, 0x3d, "Broadwell", "Broadwell U") \
- _(0x06, 0x3c, "Haswell", "Haswell") \
- _(0x06, 0x3a, "Ivy Bridge", "IvyBridge") \
- _(0x06, 0x37, "Silvermont", "BayTrail") \
- _(0x06, 0x36, "Saltwell", "Cedarview,Centerton") \
- _(0x06, 0x35, "Saltwell", "Cloverview") \
- _(0x06, 0x2f, "Westmere", "Westmere EX") \
- _(0x06, 0x2e, "Nehalem", "Nehalem EX") \
- _(0x06, 0x2d, "Sandy Bridge", "SandyBridge E/EN/EP") \
- _(0x06, 0x2c, "Westmere", "Westmere EP/EX,Gulftown") \
- _(0x06, 0x2a, "Sandy Bridge", "Sandy Bridge") \
- _(0x06, 0x27, "Saltwell", "Medfield") \
- _(0x06, 0x26, "Bonnell", "Tunnel Creek") \
- _(0x06, 0x25, "Westmere", "Arrandale,Clarksdale") \
- _(0x06, 0x1e, "Nehalem", "Clarksfield,Lynnfield,Jasper Forest") \
- _(0x06, 0x1d, "Penryn", "Dunnington") \
- _(0x06, 0x1c, "Bonnell", "Pineview,Silverthorne") \
- _(0x06, 0x1a, "Nehalem", "Nehalem EP,Bloomfield)") \
- _(0x06, 0x17, "Penryn", "Yorkfield,Wolfdale,Penryn,Harpertown")
+#define foreach_x86_cpu_uarch \
+ _ (0x06, 0x9e, "Kaby Lake", "Kaby Lake DT/H/S/X") \
+ _ (0x06, 0x9c, "Tremont", "Jasper Lake") \
+ _ (0x06, 0x9a, "Alder Lake", "Alder Lake L") \
+ _ (0x06, 0x97, "Alder Lake", "Alder Lake") \
+ _ (0x06, 0x96, "Tremont", "Elkhart Lake") \
+ _ (0x06, 0x8f, "Sapphire Rapids", "Sapphire Rapids X") \
+ _ (0x06, 0x8e, "Kaby Lake", "Kaby Lake Y/U") \
+ _ (0x06, 0x8c, "Tiger Lake", "Tiger Lake U") \
+ _ (0x06, 0x86, "Tremont", "Jacobsville") \
+ _ (0x06, 0x85, "Knights Mill", "Knights Mill") \
+ _ (0x06, 0x7e, "Ice Lake", "Ice Lake U") \
+ _ (0x06, 0x7d, "Ice Lake", "Ice Lake Y") \
+ _ (0x06, 0x7a, "Goldmont Plus", "Gemini Lake") \
+ _ (0x06, 0x6c, "Ice Lake", "Ice Lake SP") \
+ _ (0x06, 0x6a, "Ice Lake", "Ice Lake DE") \
+ _ (0x06, 0x66, "Cannon Lake", "Cannon Lake U") \
+ _ (0x06, 0x5f, "Goldmont", "Denverton") \
+ _ (0x06, 0x5e, "Skylake", "Skylake DT/H/S") \
+ _ (0x06, 0x5c, "Goldmont", "Apollo Lake") \
+ _ (0x06, 0x5a, "Silvermont", "Moorefield") \
+ _ (0x06, 0x57, "Knights Landing", "Knights Landing") \
+ _ (0x06, 0x56, "Broadwell", "Broadwell DE") \
+ _ (0x06, 0x55, "Skylake", "Skylake X/SP") \
+ _ (0x06, 0x4f, "Broadwell", "Broadwell EP/EX") \
+ _ (0x06, 0x4e, "Skylake", "Skylake Y/U") \
+ _ (0x06, 0x4d, "Silvermont", "Rangeley") \
+ _ (0x06, 0x4c, "Airmont", "Braswell") \
+ _ (0x06, 0x47, "Broadwell", "Broadwell H") \
+ _ (0x06, 0x46, "Haswell", "Crystalwell") \
+ _ (0x06, 0x45, "Haswell", "Haswell ULT") \
+ _ (0x06, 0x3f, "Haswell", "Haswell E") \
+ _ (0x06, 0x3e, "Ivy Bridge", "Ivy Bridge E/EN/EP") \
+ _ (0x06, 0x3d, "Broadwell", "Broadwell U") \
+ _ (0x06, 0x3c, "Haswell", "Haswell") \
+ _ (0x06, 0x3a, "Ivy Bridge", "IvyBridge") \
+ _ (0x06, 0x37, "Silvermont", "BayTrail") \
+ _ (0x06, 0x36, "Saltwell", "Cedarview,Centerton") \
+ _ (0x06, 0x35, "Saltwell", "Cloverview") \
+ _ (0x06, 0x2f, "Westmere", "Westmere EX") \
+ _ (0x06, 0x2e, "Nehalem", "Nehalem EX") \
+ _ (0x06, 0x2d, "Sandy Bridge", "SandyBridge E/EN/EP") \
+ _ (0x06, 0x2c, "Westmere", "Westmere EP/EX,Gulftown") \
+ _ (0x06, 0x2a, "Sandy Bridge", "Sandy Bridge") \
+ _ (0x06, 0x27, "Saltwell", "Medfield") \
+ _ (0x06, 0x26, "Bonnell", "Tunnel Creek") \
+ _ (0x06, 0x25, "Westmere", "Arrandale,Clarksdale") \
+ _ (0x06, 0x1e, "Nehalem", "Clarksfield,Lynnfield,Jasper Forest") \
+ _ (0x06, 0x1d, "Penryn", "Dunnington") \
+ _ (0x06, 0x1c, "Bonnell", "Pineview,Silverthorne") \
+ _ (0x06, 0x1a, "Nehalem", "Nehalem EP,Bloomfield)") \
+ _ (0x06, 0x17, "Penryn", "Yorkfield,Wolfdale,Penryn,Harpertown")
/* _(implementor-id, part-id, vendor-name, cpu-name, show CPU pass as string) */
-#define foreach_aarch64_cpu_uarch \
- _(0x41, 0xd03, "ARM", "Cortex-A53", 0) \
- _(0x41, 0xd07, "ARM", "Cortex-A57", 0) \
- _(0x41, 0xd08, "ARM", "Cortex-A72", 0) \
- _(0x41, 0xd09, "ARM", "Cortex-A73", 0) \
- _(0x41, 0xd0a, "ARM", "Cortex-A75", 0) \
- _(0x41, 0xd0b, "ARM", "Cortex-A76", 0) \
- _(0x41, 0xd0c, "ARM", "Neoverse-N1", 0) \
- _(0x41, 0xd4a, "ARM", "Neoverse-E1", 0) \
- _(0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0) \
- _(0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0) \
- _(0x43, 0x0a3, "Marvell", "OCTEON TX CN83XX", 0) \
- _(0x43, 0x0af, "Marvell", "THUNDERX2 CN99XX", 1) \
- _(0x43, 0x0b1, "Marvell", "OCTEON TX2 CN98XX", 1) \
- _(0x43, 0x0b2, "Marvell", "OCTEON TX2 CN96XX", 1)
+#define foreach_aarch64_cpu_uarch \
+ _ (0x41, 0xd03, "ARM", "Cortex-A53", 0) \
+ _ (0x41, 0xd07, "ARM", "Cortex-A57", 0) \
+ _ (0x41, 0xd08, "ARM", "Cortex-A72", 0) \
+ _ (0x41, 0xd09, "ARM", "Cortex-A73", 0) \
+ _ (0x41, 0xd0a, "ARM", "Cortex-A75", 0) \
+ _ (0x41, 0xd0b, "ARM", "Cortex-A76", 0) \
+ _ (0x41, 0xd0c, "ARM", "Neoverse-N1", 0) \
+ _ (0x41, 0xd49, "ARM", "Neoverse-N2", 0) \
+ _ (0x41, 0xd4a, "ARM", "Neoverse-E1", 0) \
+ _ (0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0) \
+ _ (0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0) \
+ _ (0x43, 0x0a3, "Marvell", "OCTEON TX CN83XX", 0) \
+ _ (0x43, 0x0af, "Marvell", "THUNDERX2 CN99XX", 1) \
+ _ (0x43, 0x0b1, "Marvell", "OCTEON TX2 CN98XX", 1) \
+ _ (0x43, 0x0b2, "Marvell", "OCTEON TX2 CN96XX", 1)
__clib_export u8 *
format_cpu_uarch (u8 * s, va_list * args)
@@ -88,13 +94,34 @@ format_cpu_uarch (u8 * s, va_list * args)
#if __x86_64__
u32 __attribute__ ((unused)) eax, ebx, ecx, edx;
u8 model, family, stepping;
+ u8 amd_vendor = 0;
+
+ if (__get_cpuid (0, &eax, &ebx, &ecx, &edx) == 0)
+ return format (s, "unknown (missing cpuid)");
+
+ if (amd_vendor (ebx, ecx, edx))
+ amd_vendor = 1;
if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0)
return format (s, "unknown (missing cpuid)");
- model = ((eax >> 4) & 0x0f) | ((eax >> 12) & 0xf0);
- family = (eax >> 8) & 0x0f;
stepping = eax & 0x0f;
+ if (amd_vendor)
+ {
+ family = ((eax >> 8) & 0x0f);
+ model = ((eax >> 4) & 0x0f);
+ if (family >= 0xf)
+ {
+ family = family + ((eax >> 20) & 0xf);
+ model = (model | ((eax >> 12) & 0xf0));
+ }
+ return format (s, "Zen (family 0x%02x model 0x%02x)", family, model);
+ }
+ else
+ {
+ model = ((eax >> 4) & 0x0f) | ((eax >> 12) & 0xf0);
+ family = (eax >> 8) & 0x0f;
+ }
#define _(f,m,a,c) if ((model == m) && (family == f)) return \
format(s, "[0x%x] %s ([0x%02x] %s) stepping 0x%x", f, a, m, c, stepping);
@@ -103,30 +130,28 @@ format(s, "[0x%x] %s ([0x%02x] %s) stepping 0x%x", f, a, m, c, stepping);
return format (s, "unknown (family 0x%02x model 0x%02x)", family, model);
#elif __aarch64__
- int fd;
unformat_input_t input;
u32 implementer, primary_part_number, variant, revision;
- fd = open ("/proc/cpuinfo", 0);
- if (fd < 0)
- return format (s, "unknown");
-
- unformat_init_clib_file (&input, fd);
- while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ if (unformat_init_file (&input, "/proc/cpuinfo"))
{
- if (unformat (&input, "CPU implementer%_: 0x%x", &implementer))
- ;
- else if (unformat (&input, "CPU part%_: 0x%x", &primary_part_number))
- ;
- else if (unformat (&input, "CPU variant%_: 0x%x", &variant))
- ;
- else if (unformat (&input, "CPU revision%_: %u", &revision))
- ;
- else
- unformat_skip_line (&input);
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&input, "CPU implementer%_: 0x%x", &implementer))
+ ;
+ else if (unformat (&input, "CPU part%_: 0x%x", &primary_part_number))
+ ;
+ else if (unformat (&input, "CPU variant%_: 0x%x", &variant))
+ ;
+ else if (unformat (&input, "CPU revision%_: %u", &revision))
+ ;
+ else
+ unformat_skip_line (&input);
+ }
+ unformat_free (&input);
}
- unformat_free (&input);
- close (fd);
+ else
+ return format (s, "unknown");
#define _(i,p,a,c,_format) if ((implementer == i) && (primary_part_number == p)){ \
if (_format)\
@@ -193,7 +218,7 @@ format_cpu_model_name (u8 * s, va_list * args)
#endif
}
-
+#if defined(__x86_64__) || defined(__aarch64__)
static inline char const *
flag_skip_prefix (char const *flag, const char *pfx, int len)
{
@@ -201,20 +226,22 @@ flag_skip_prefix (char const *flag, const char *pfx, int len)
return flag + len - 1;
return flag;
}
+#endif
__clib_export u8 *
-format_cpu_flags (u8 * s, va_list * args)
+format_cpu_flags (u8 *s, va_list *args)
{
#if defined(__x86_64__)
-#define _(flag, func, reg, bit) \
- if (clib_cpu_supports_ ## flag()) \
- s = format (s, "%s ", flag_skip_prefix(#flag, "x86_", sizeof("x86_")));
+#define _(flag, func, reg, bit) \
+ if (clib_cpu_supports_##flag ()) \
+ s = format (s, "%s ", flag_skip_prefix (#flag, "x86_", sizeof ("x86_")));
foreach_x86_64_flags return s;
#undef _
#elif defined(__aarch64__)
-#define _(flag, bit) \
- if (clib_cpu_supports_ ## flag()) \
- s = format (s, "%s ", flag_skip_prefix(#flag, "aarch64_", sizeof("aarch64_")));
+#define _(flag, bit) \
+ if (clib_cpu_supports_##flag ()) \
+ s = format (s, "%s ", \
+ flag_skip_prefix (#flag, "aarch64_", sizeof ("aarch64_")));
foreach_aarch64_flags return s;
#undef _
#else /* ! ! __x86_64__ && ! __aarch64__ */
@@ -225,17 +252,25 @@ format_cpu_flags (u8 * s, va_list * args)
__clib_export u32
clib_get_current_cpu_id ()
{
+#ifdef __linux__
unsigned cpu, node;
syscall (__NR_getcpu, &cpu, &node, 0);
return cpu;
+#else
+ return 0;
+#endif /* __linux__ */
}
__clib_export u32
clib_get_current_numa_node ()
{
+#ifdef __linux__
unsigned cpu, node;
syscall (__NR_getcpu, &cpu, &node, 0);
return node;
+#else
+ return 0;
+#endif /* __linux__ */
}
__clib_export u8 *
@@ -250,10 +285,39 @@ format_march_variant (u8 *s, va_list *args)
return format (s, "%s", variants[t]);
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+#ifdef __aarch64__
+
+__clib_export const clib_cpu_info_t *
+clib_get_cpu_info ()
+{
+ static int first_run = 1;
+ static clib_cpu_info_t info = {};
+ if (first_run)
+ {
+ FILE *fp = fopen ("/proc/cpuinfo", "r");
+ char buf[128];
+
+ if (!fp)
+ return 0;
+
+ while (!feof (fp))
+ {
+ if (!fgets (buf, sizeof (buf), fp))
+ break;
+ buf[127] = '\0';
+ if (strstr (buf, "CPU part"))
+ info.aarch64.part_num =
+ strtol (memchr (buf, ':', 128) + 2, NULL, 0);
+
+ if (strstr (buf, "CPU implementer"))
+ info.aarch64.implementer =
+ strtol (memchr (buf, ':', 128) + 2, NULL, 0);
+ }
+ fclose (fp);
+
+ first_run = 0;
+ }
+ return &info;
+}
+
+#endif
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
index c1f2e9e8248..b3743d4c26d 100644
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -21,21 +21,31 @@
#if defined(__x86_64__)
#define foreach_march_variant \
+ _ (scalar, "Generic (SIMD disabled)") \
_ (hsw, "Intel Haswell") \
_ (trm, "Intel Tremont") \
_ (skx, "Intel Skylake (server) / Cascade Lake") \
- _ (icl, "Intel Ice Lake")
+ _ (icl, "Intel Ice Lake") \
+ _ (adl, "Intel Alder Lake") \
+ _ (spr, "Intel Sapphire Rapids") \
+ _ (znver3, "AMD Milan") \
+ _ (znver4, "AMD Genoa")
#elif defined(__aarch64__)
#define foreach_march_variant \
_ (octeontx2, "Marvell Octeon TX2") \
_ (thunderx2t99, "Marvell ThunderX2 T99") \
_ (qdf24xx, "Qualcomm CentriqTM 2400") \
_ (cortexa72, "ARM Cortex-A72") \
- _ (neoversen1, "ARM Neoverse N1")
+ _ (neoversen1, "ARM Neoverse N1") \
+ _ (neoversen2, "ARM Neoverse N2")
#else
#define foreach_march_variant
#endif
+#define amd_vendor(t1, t2, t3) \
+ ((t1 == 0x68747541) && /* htuA */ \
+ (t2 == 0x444d4163) && /* DMAc */ \
+ (t3 == 0x69746e65)) /* itne */
typedef enum
{
CLIB_MARCH_VARIANT_TYPE = 0,
@@ -84,6 +94,9 @@ clib_march_select_fn_ptr (clib_march_fn_registration * r)
#define CLIB_MARCH_FN_POINTER(fn) \
(__typeof__ (fn) *) clib_march_select_fn_ptr (fn##_march_fn_registrations);
+#define CLIB_MARCH_FN_VOID_POINTER(fn) \
+ clib_march_select_fn_ptr (fn##_march_fn_registrations);
+
#define _CLIB_MARCH_FN_REGISTRATION(fn) \
static clib_march_fn_registration \
CLIB_MARCH_SFX(fn##_march_fn_registration) = \
@@ -120,6 +133,7 @@ _CLIB_MARCH_FN_REGISTRATION(fn)
_ (avx, 1, ecx, 28) \
_ (rdrand, 1, ecx, 30) \
_ (avx2, 7, ebx, 5) \
+ _ (bmi2, 7, ebx, 8) \
_ (rtm, 7, ebx, 11) \
_ (pqm, 7, ebx, 12) \
_ (pqe, 7, ebx, 15) \
@@ -134,7 +148,11 @@ _CLIB_MARCH_FN_REGISTRATION(fn)
_ (avx512_vpopcntdq, 7, ecx, 14) \
_ (movdiri, 7, ecx, 27) \
_ (movdir64b, 7, ecx, 28) \
- _ (invariant_tsc, 0x80000007, edx, 8)
+ _ (enqcmd, 7, ecx, 29) \
+ _ (avx512_fp16, 7, edx, 23) \
+ _ (aperfmperf, 0x00000006, ecx, 0) \
+ _ (invariant_tsc, 0x80000007, edx, 8) \
+ _ (monitorx, 0x80000001, ecx, 29)
#define foreach_aarch64_flags \
_ (fp, 0) \
@@ -161,8 +179,10 @@ _ (asimddp, 20) \
_ (sha512, 21) \
_ (sve, 22)
-u32 clib_get_current_cpu_id ();
-u32 clib_get_current_numa_node ();
+u32 clib_get_current_cpu_id (void);
+u32 clib_get_current_numa_node (void);
+
+typedef int (*clib_cpu_supports_func_t) (void);
#if defined(__x86_64__)
#include "cpuid.h"
@@ -179,8 +199,6 @@ clib_get_cpuid (const u32 lev, u32 * eax, u32 * ebx, u32 * ecx, u32 * edx)
return 1;
}
-typedef int (*clib_cpu_supports_func_t) ();
-
#define _(flag, func, reg, bit) \
static inline int \
clib_cpu_supports_ ## flag() \
@@ -234,6 +252,20 @@ clib_cpu_supports_aes ()
}
static inline int
+clib_cpu_march_priority_scalar ()
+{
+ return 1;
+}
+
+static inline int
+clib_cpu_march_priority_spr ()
+{
+ if (clib_cpu_supports_enqcmd ())
+ return 300;
+ return -1;
+}
+
+static inline int
clib_cpu_march_priority_icl ()
{
if (clib_cpu_supports_avx512_bitalg ())
@@ -242,6 +274,14 @@ clib_cpu_march_priority_icl ()
}
static inline int
+clib_cpu_march_priority_adl ()
+{
+ if (clib_cpu_supports_movdiri () && clib_cpu_supports_avx2 ())
+ return 150;
+ return -1;
+}
+
+static inline int
clib_cpu_march_priority_skx ()
{
if (clib_cpu_supports_avx512f ())
@@ -253,7 +293,7 @@ static inline int
clib_cpu_march_priority_trm ()
{
if (clib_cpu_supports_movdiri ())
- return 60;
+ return 40;
return -1;
}
@@ -265,116 +305,149 @@ clib_cpu_march_priority_hsw ()
return -1;
}
-static inline u32
-clib_cpu_implementer ()
+static inline int
+clib_cpu_march_priority_znver4 ()
{
- char buf[128];
- static u32 implementer = -1;
-
- if (-1 != implementer)
- return implementer;
-
- FILE *fp = fopen ("/proc/cpuinfo", "r");
- if (!fp)
- return implementer;
-
- while (!feof (fp))
- {
- if (!fgets (buf, sizeof (buf), fp))
- break;
- buf[127] = '\0';
- if (strstr (buf, "CPU implementer"))
- implementer = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0);
- if (-1 != implementer)
- break;
- }
- fclose (fp);
-
- return implementer;
+ if (clib_cpu_supports_avx512_bitalg () && clib_cpu_supports_monitorx ())
+ return 250;
+ return -1;
}
-static inline u32
-clib_cpu_part ()
+static inline int
+clib_cpu_march_priority_znver3 ()
{
- char buf[128];
- static u32 part = -1;
+ if (clib_cpu_supports_avx2 () && clib_cpu_supports_monitorx ())
+ return 70;
+ return -1;
+}
- if (-1 != part)
- return part;
+#define X86_CPU_ARCH_PERF_FUNC 0xA
- FILE *fp = fopen ("/proc/cpuinfo", "r");
- if (!fp)
- return part;
+static inline int
+clib_get_pmu_counter_count (u8 *fixed, u8 *general)
+{
+#if defined(__x86_64__)
+ u32 __clib_unused eax = 0, ebx = 0, ecx = 0, edx = 0;
+ clib_get_cpuid (X86_CPU_ARCH_PERF_FUNC, &eax, &ebx, &ecx, &edx);
- while (!feof (fp))
- {
- if (!fgets (buf, sizeof (buf), fp))
- break;
- buf[127] = '\0';
- if (strstr (buf, "CPU part"))
- part = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0);
- if (-1 != part)
- break;
- }
- fclose (fp);
+ *general = (eax & 0xFF00) >> 8;
+ *fixed = (edx & 0xF);
- return part;
+ return 1;
+#else
+ return 0;
+#endif
}
+typedef struct
+{
+ struct
+ {
+ u8 implementer;
+ u16 part_num;
+ } aarch64;
+} clib_cpu_info_t;
+
+const clib_cpu_info_t *clib_get_cpu_info ();
+
+/* ARM */
+#define AARCH64_CPU_IMPLEMENTER_ARM 0x41
+#define AARCH64_CPU_PART_CORTEXA72 0xd08
+#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c
+#define AARCH64_CPU_PART_NEOVERSEN2 0xd49
+
+/*cavium */
#define AARCH64_CPU_IMPLEMENTER_CAVIUM 0x43
#define AARCH64_CPU_PART_THUNDERX2 0x0af
#define AARCH64_CPU_PART_OCTEONTX2T96 0x0b2
#define AARCH64_CPU_PART_OCTEONTX2T98 0x0b1
-#define AARCH64_CPU_IMPLEMENTER_QDF24XX 0x51
+
+/* Qualcomm */
+#define AARCH64_CPU_IMPLEMENTER_QUALCOMM 0x51
#define AARCH64_CPU_PART_QDF24XX 0xc00
-#define AARCH64_CPU_IMPLEMENTER_CORTEXA72 0x41
-#define AARCH64_CPU_PART_CORTEXA72 0xd08
-#define AARCH64_CPU_IMPLEMENTER_NEOVERSEN1 0x41
-#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c
static inline int
clib_cpu_march_priority_octeontx2 ()
{
- if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) &&
- ((AARCH64_CPU_PART_OCTEONTX2T96 == clib_cpu_part ())
- || AARCH64_CPU_PART_OCTEONTX2T98 == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T96 ||
+ info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T98)
return 20;
+
return -1;
}
static inline int
clib_cpu_march_priority_thunderx2t99 ()
{
- if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) &&
- (AARCH64_CPU_PART_THUNDERX2 == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_THUNDERX2)
return 20;
+
return -1;
}
static inline int
clib_cpu_march_priority_qdf24xx ()
{
- if ((AARCH64_CPU_IMPLEMENTER_QDF24XX == clib_cpu_implementer ()) &&
- (AARCH64_CPU_PART_QDF24XX == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_QUALCOMM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_QDF24XX)
return 20;
+
return -1;
}
static inline int
clib_cpu_march_priority_cortexa72 ()
{
- if ((AARCH64_CPU_IMPLEMENTER_CORTEXA72 == clib_cpu_implementer ()) &&
- (AARCH64_CPU_PART_CORTEXA72 == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_CORTEXA72)
return 10;
+
return -1;
}
static inline int
clib_cpu_march_priority_neoversen1 ()
{
- if ((AARCH64_CPU_IMPLEMENTER_NEOVERSEN1 == clib_cpu_implementer ()) &&
- (AARCH64_CPU_PART_NEOVERSEN1 == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN1)
+ return 10;
+
+ return -1;
+}
+
+static inline int
+clib_cpu_march_priority_neoversen2 ()
+{
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN2)
return 10;
+
return -1;
}
diff --git a/src/vppinfra/crc32.h b/src/vppinfra/crc32.h
index fec67cd9757..5c5e548401a 100644
--- a/src/vppinfra/crc32.h
+++ b/src/vppinfra/crc32.h
@@ -21,67 +21,156 @@
#if __SSE4_2__
#define clib_crc32c_uses_intrinsics
#include <x86intrin.h>
-
-#define crc32_u64 _mm_crc32_u64
-#define crc32_u32 _mm_crc32_u32
-
static_always_inline u32
-clib_crc32c (u8 * s, int len)
+clib_crc32c_u8 (u32 last, u8 data)
{
- u32 v = 0;
-
-#if defined(__x86_64__)
- for (; len >= 8; len -= 8, s += 8)
- v = _mm_crc32_u64 (v, *((u64 *) s));
-#else
- /* workaround weird GCC bug when using _mm_crc32_u32
- which happens with -O2 optimization */
-#if !defined (__i686__)
- asm volatile ("":::"memory");
-#endif
-#endif
-
- for (; len >= 4; len -= 4, s += 4)
- v = _mm_crc32_u32 (v, *((u32 *) s));
+ return _mm_crc32_u8 (last, data);
+}
- for (; len >= 2; len -= 2, s += 2)
- v = _mm_crc32_u16 (v, *((u16 *) s));
+static_always_inline u32
+clib_crc32c_u16 (u32 last, u16 data)
+{
+ return _mm_crc32_u16 (last, data);
+}
- for (; len >= 1; len -= 1, s += 1)
- v = _mm_crc32_u8 (v, *((u16 *) s));
+static_always_inline u32
+clib_crc32c_u32 (u32 last, u32 data)
+{
+ return _mm_crc32_u32 (last, data);
+}
- return v;
+static_always_inline u32
+clib_crc32c_u64 (u32 last, u64 data)
+{
+ return _mm_crc32_u64 (last, data);
}
+#endif
-#elif __ARM_FEATURE_CRC32
+#if __ARM_FEATURE_CRC32
#define clib_crc32c_uses_intrinsics
#include <arm_acle.h>
+static_always_inline u32
+clib_crc32c_u8 (u32 last, u8 data)
+{
+ return __crc32cb (last, data);
+}
+static_always_inline u32
+clib_crc32c_u16 (u32 last, u16 data)
+{
+ return __crc32ch (last, data);
+}
-#define crc32_u64 __crc32cd
-#define crc32_u32 __crc32cw
+static_always_inline u32
+clib_crc32c_u32 (u32 last, u32 data)
+{
+ return __crc32cw (last, data);
+}
static_always_inline u32
-clib_crc32c (u8 * s, int len)
+clib_crc32c_u64 (u32 last, u64 data)
{
- u32 v = 0;
+ return __crc32cd (last, data);
+}
+#endif
+#ifdef clib_crc32c_uses_intrinsics
+static_always_inline u32
+clib_crc32c_with_init (u8 *s, int len, u32 last)
+{
for (; len >= 8; len -= 8, s += 8)
- v = __crc32cd (v, *((u64 *) s));
+ last = clib_crc32c_u64 (last, *((u64u *) s));
for (; len >= 4; len -= 4, s += 4)
- v = __crc32cw (v, *((u32 *) s));
+ last = clib_crc32c_u32 (last, *((u32u *) s));
for (; len >= 2; len -= 2, s += 2)
- v = __crc32ch (v, *((u16 *) s));
+ last = clib_crc32c_u16 (last, *((u16u *) s));
for (; len >= 1; len -= 1, s += 1)
- v = __crc32cb (v, *((u8 *) s));
+ last = clib_crc32c_u8 (last, *((u8 *) s));
- return v;
+ return last;
}
+static_always_inline u32
+clib_crc32c (u8 *s, int len)
+{
+ return clib_crc32c_with_init (s, len, 0);
+}
+#else
+
+static_always_inline u32
+_clib_crc32c (u32 crc, const u8 *p, uword len)
+{
+ static const u32 clib_crc32c_table[256] = {
+ 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL,
+ 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL,
+ 0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L,
+ 0x5E133C24L, 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+ 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L,
+ 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, 0x5D1D08BFL, 0xAF768BBCL,
+ 0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L,
+ 0x33ED7D2AL, 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+ 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL,
+ 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L, 0xC288CAB2L,
+ 0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL, 0x1642AE59L,
+ 0xE4292D5AL, 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+ 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, 0x417B1DBCL,
+ 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L,
+ 0x67DAFA54L, 0x95B17957L, 0xCBA24573L, 0x39C9C670L, 0x2A993584L,
+ 0xD8F2B687L, 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+ 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 0x96BF4DCCL,
+ 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL,
+ 0x3AC7F2EBL, 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L,
+ 0x0F36E6F7L, 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+ 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL,
+ 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 0x2C855CB2L, 0xDEEEDFB1L,
+ 0xCDBE2C45L, 0x3FD5AF46L, 0x7198540DL, 0x83F3D70EL, 0x90A324FAL,
+ 0x62C8A7F9L, 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+ 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL,
+ 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL,
+ 0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L, 0xB7072F64L, 0xA457DC90L,
+ 0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+ 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 0x92A8FC17L,
+ 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 0x55326B08L, 0xA759E80BL,
+ 0xB4091BFFL, 0x466298FCL, 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL,
+ 0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+ 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, 0x65D122B9L,
+ 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 0x2892ED69L, 0xDAF96E6AL,
+ 0xC9A99D9EL, 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L,
+ 0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+ 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 0x38CC2A06L,
+ 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L, 0x0D3D3E1AL,
+ 0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L,
+ 0xD0DDD530L, 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+ 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 0x8ECEE914L,
+ 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L,
+ 0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L,
+ 0x07198540L, 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+ 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 0xE330A81AL,
+ 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L,
+ 0xC5914FF2L, 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L,
+ 0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+ 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 0x34F4F86AL,
+ 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L,
+ 0x988C474DL, 0x6AE7C44EL, 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L,
+ 0xAD7D5351L
+ };
+
+ while (len--)
+ crc = (crc >> 8) ^ clib_crc32c_table[(u8) crc ^ p++[0]];
+
+ return crc;
+}
+
+static_always_inline u32
+clib_crc32c (const u8 *p, uword len)
+{
+ return _clib_crc32c (0, p, len);
+}
#endif
+
#endif /* __included_crc32_h__ */
/*
diff --git a/src/vppinfra/crypto/aes.h b/src/vppinfra/crypto/aes.h
new file mode 100644
index 00000000000..9e80e3b0318
--- /dev/null
+++ b/src/vppinfra/crypto/aes.h
@@ -0,0 +1,491 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __aes_h__
+#define __aes_h__
+
+typedef enum
+{
+ AES_KEY_128 = 0,
+ AES_KEY_192 = 1,
+ AES_KEY_256 = 2,
+} aes_key_size_t;
+
+#define AES_KEY_ROUNDS(x) (10 + x * 2)
+#define AES_KEY_BYTES(x) (16 + x * 8)
+
+static_always_inline u8x16
+aes_block_load (u8 * p)
+{
+ return *(u8x16u *) p;
+}
+
+static_always_inline u8x16
+aes_enc_round_x1 (u8x16 a, u8x16 k)
+{
+#if defined (__AES__)
+ return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return vaesmcq_u8 (vaeseq_u8 (a, u8x16_splat (0))) ^ k;
+#endif
+}
+
+#if defined(__VAES__) && defined(__AVX512F__)
+static_always_inline u8x64
+aes_enc_round_x4 (u8x64 a, u8x64 k)
+{
+ return (u8x64) _mm512_aesenc_epi128 ((__m512i) a, (__m512i) k);
+}
+
+static_always_inline u8x64
+aes_enc_last_round_x4 (u8x64 a, u8x64 k)
+{
+ return (u8x64) _mm512_aesenclast_epi128 ((__m512i) a, (__m512i) k);
+}
+
+static_always_inline u8x64
+aes_dec_round_x4 (u8x64 a, u8x64 k)
+{
+ return (u8x64) _mm512_aesdec_epi128 ((__m512i) a, (__m512i) k);
+}
+
+static_always_inline u8x64
+aes_dec_last_round_x4 (u8x64 a, u8x64 k)
+{
+ return (u8x64) _mm512_aesdeclast_epi128 ((__m512i) a, (__m512i) k);
+}
+#endif
+
+#ifdef __VAES__
+static_always_inline u8x32
+aes_enc_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesenc_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_enc_last_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesenclast_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_dec_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesdec_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_dec_last_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesdeclast_epi128 ((__m256i) a, (__m256i) k);
+}
+#endif
+
+static_always_inline u8x16
+aes_enc_last_round_x1 (u8x16 a, u8x16 k)
+{
+#if defined (__AES__)
+ return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return vaeseq_u8 (a, u8x16_splat (0)) ^ k;
+#endif
+}
+
+#ifdef __x86_64__
+
+static_always_inline u8x16
+aes_dec_round_x1 (u8x16 a, u8x16 k)
+{
+ return (u8x16) _mm_aesdec_si128 ((__m128i) a, (__m128i) k);
+}
+
+static_always_inline u8x16
+aes_dec_last_round_x1 (u8x16 a, u8x16 k)
+{
+ return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
+}
+#endif
+
+static_always_inline void
+aes_block_store (u8 * p, u8x16 r)
+{
+ *(u8x16u *) p = r;
+}
+
+static_always_inline u8x16
+aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+ block ^= round_keys[0];
+ for (int i = 1; i < rounds; i += 1)
+ block = aes_enc_round_x1 (block, round_keys[i]);
+ return aes_enc_last_round_x1 (block, round_keys[rounds]);
+}
+
+static_always_inline u8x16
+aes_inv_mix_column (u8x16 a)
+{
+#if defined (__AES__)
+ return (u8x16) _mm_aesimc_si128 ((__m128i) a);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return vaesimcq_u8 (a);
+#endif
+}
+
+#ifdef __x86_64__
+#define aes_keygen_assist(a, b) \
+ (u8x16) _mm_aeskeygenassist_si128((__m128i) a, b)
+
+/* AES-NI based AES key expansion based on code samples from
+ Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
+ (323641-001) */
+
+static_always_inline void
+aes128_key_assist (u8x16 * rk, u8x16 r)
+{
+ u8x16 t = rk[-1];
+ t ^= u8x16_word_shift_left (t, 4);
+ t ^= u8x16_word_shift_left (t, 4);
+ t ^= u8x16_word_shift_left (t, 4);
+ rk[0] = t ^ (u8x16) u32x4_shuffle ((u32x4) r, 3, 3, 3, 3);
+}
+
+static_always_inline void
+aes128_key_expand (u8x16 *rk, u8x16u const *k)
+{
+ rk[0] = k[0];
+ aes128_key_assist (rk + 1, aes_keygen_assist (rk[0], 0x01));
+ aes128_key_assist (rk + 2, aes_keygen_assist (rk[1], 0x02));
+ aes128_key_assist (rk + 3, aes_keygen_assist (rk[2], 0x04));
+ aes128_key_assist (rk + 4, aes_keygen_assist (rk[3], 0x08));
+ aes128_key_assist (rk + 5, aes_keygen_assist (rk[4], 0x10));
+ aes128_key_assist (rk + 6, aes_keygen_assist (rk[5], 0x20));
+ aes128_key_assist (rk + 7, aes_keygen_assist (rk[6], 0x40));
+ aes128_key_assist (rk + 8, aes_keygen_assist (rk[7], 0x80));
+ aes128_key_assist (rk + 9, aes_keygen_assist (rk[8], 0x1b));
+ aes128_key_assist (rk + 10, aes_keygen_assist (rk[9], 0x36));
+}
+
+static_always_inline void
+aes192_key_assist (u8x16 * r1, u8x16 * r2, u8x16 key_assist)
+{
+ u8x16 t;
+ r1[0] ^= t = u8x16_word_shift_left (r1[0], 4);
+ r1[0] ^= t = u8x16_word_shift_left (t, 4);
+ r1[0] ^= u8x16_word_shift_left (t, 4);
+ r1[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) key_assist, 0x55);
+ r2[0] ^= u8x16_word_shift_left (r2[0], 4);
+ r2[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) r1[0], 0xff);
+}
+
+static_always_inline void
+aes192_key_expand (u8x16 * rk, u8x16u const *k)
+{
+ u8x16 r1, r2;
+
+ rk[0] = r1 = k[0];
+ rk[1] = r2 = (u8x16) (u64x2) { *(u64 *) (k + 1), 0 };
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x1));
+ rk[1] = (u8x16) _mm_shuffle_pd ((__m128d) rk[1], (__m128d) r1, 0);
+ rk[2] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x2));
+ rk[3] = r1;
+ rk[4] = r2;
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x4));
+ rk[4] = (u8x16) _mm_shuffle_pd ((__m128d) rk[4], (__m128d) r1, 0);
+ rk[5] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x8));
+ rk[6] = r1;
+ rk[7] = r2;
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x10));
+ rk[7] = (u8x16) _mm_shuffle_pd ((__m128d) rk[7], (__m128d) r1, 0);
+ rk[8] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x20));
+ rk[9] = r1;
+ rk[10] = r2;
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x40));
+ rk[10] = (u8x16) _mm_shuffle_pd ((__m128d) rk[10], (__m128d) r1, 0);
+ rk[11] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x80));
+ rk[12] = r1;
+}
+
+static_always_inline void
+aes256_key_assist (u8x16 * rk, int i, u8x16 key_assist)
+{
+ u8x16 r, t;
+ rk += i;
+ r = rk[-2];
+ r ^= t = u8x16_word_shift_left (r, 4);
+ r ^= t = u8x16_word_shift_left (t, 4);
+ r ^= u8x16_word_shift_left (t, 4);
+ r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 3, 3, 3, 3);
+ rk[0] = r;
+
+ if (i >= 14)
+ return;
+
+ key_assist = aes_keygen_assist (rk[0], 0x0);
+ r = rk[-1];
+ r ^= t = u8x16_word_shift_left (r, 4);
+ r ^= t = u8x16_word_shift_left (t, 4);
+ r ^= u8x16_word_shift_left (t, 4);
+ r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 2, 2, 2, 2);
+ rk[1] = r;
+}
+
+static_always_inline void
+aes256_key_expand (u8x16 * rk, u8x16u const *k)
+{
+ rk[0] = k[0];
+ rk[1] = k[1];
+ aes256_key_assist (rk, 2, aes_keygen_assist (rk[1], 0x01));
+ aes256_key_assist (rk, 4, aes_keygen_assist (rk[3], 0x02));
+ aes256_key_assist (rk, 6, aes_keygen_assist (rk[5], 0x04));
+ aes256_key_assist (rk, 8, aes_keygen_assist (rk[7], 0x08));
+ aes256_key_assist (rk, 10, aes_keygen_assist (rk[9], 0x10));
+ aes256_key_assist (rk, 12, aes_keygen_assist (rk[11], 0x20));
+ aes256_key_assist (rk, 14, aes_keygen_assist (rk[13], 0x40));
+}
+#endif
+
+#ifdef __aarch64__
+
+static const u8x16 aese_prep_mask1 =
+ { 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
+static const u8x16 aese_prep_mask2 =
+ { 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
+
+static_always_inline void
+aes128_key_expand_round_neon (u8x16 * rk, u32 rcon)
+{
+ u8x16 r, t, last_round = rk[-1], z = { };
+ r = vqtbl1q_u8 (last_round, aese_prep_mask1);
+ r = vaeseq_u8 (r, z);
+ r ^= (u8x16) vdupq_n_u32 (rcon);
+ r ^= last_round;
+ r ^= t = vextq_u8 (z, last_round, 12);
+ r ^= t = vextq_u8 (z, t, 12);
+ r ^= vextq_u8 (z, t, 12);
+ rk[0] = r;
+}
+
+static_always_inline void
+aes128_key_expand (u8x16 *rk, u8x16u const *k)
+{
+ rk[0] = k[0];
+ aes128_key_expand_round_neon (rk + 1, 0x01);
+ aes128_key_expand_round_neon (rk + 2, 0x02);
+ aes128_key_expand_round_neon (rk + 3, 0x04);
+ aes128_key_expand_round_neon (rk + 4, 0x08);
+ aes128_key_expand_round_neon (rk + 5, 0x10);
+ aes128_key_expand_round_neon (rk + 6, 0x20);
+ aes128_key_expand_round_neon (rk + 7, 0x40);
+ aes128_key_expand_round_neon (rk + 8, 0x80);
+ aes128_key_expand_round_neon (rk + 9, 0x1b);
+ aes128_key_expand_round_neon (rk + 10, 0x36);
+}
+
+static_always_inline void
+aes192_key_expand_round_neon (u8x8 * rk, u32 rcon)
+{
+ u8x8 r, last_round = rk[-1], z = { };
+ u8x16 r2, z2 = { };
+
+ r2 = (u8x16) vdupq_lane_u64 ((uint64x1_t) last_round, 0);
+ r2 = vqtbl1q_u8 (r2, aese_prep_mask1);
+ r2 = vaeseq_u8 (r2, z2);
+ r2 ^= (u8x16) vdupq_n_u32 (rcon);
+
+ r = (u8x8) vdup_laneq_u64 ((u64x2) r2, 0);
+ r ^= rk[-3];
+ r ^= vext_u8 (z, rk[-3], 4);
+ rk[0] = r;
+
+ r = rk[-2] ^ vext_u8 (r, z, 4);
+ r ^= vext_u8 (z, r, 4);
+ rk[1] = r;
+
+ if (rcon == 0x80)
+ return;
+
+ r = rk[-1] ^ vext_u8 (r, z, 4);
+ r ^= vext_u8 (z, r, 4);
+ rk[2] = r;
+}
+
+static_always_inline void
+aes192_key_expand (u8x16 * ek, const u8x16u * k)
+{
+ u8x8 *rk = (u8x8 *) ek;
+ ek[0] = k[0];
+ rk[2] = *(u8x8u *) (k + 1);
+ aes192_key_expand_round_neon (rk + 3, 0x01);
+ aes192_key_expand_round_neon (rk + 6, 0x02);
+ aes192_key_expand_round_neon (rk + 9, 0x04);
+ aes192_key_expand_round_neon (rk + 12, 0x08);
+ aes192_key_expand_round_neon (rk + 15, 0x10);
+ aes192_key_expand_round_neon (rk + 18, 0x20);
+ aes192_key_expand_round_neon (rk + 21, 0x40);
+ aes192_key_expand_round_neon (rk + 24, 0x80);
+}
+
+
+static_always_inline void
+aes256_key_expand_round_neon (u8x16 * rk, u32 rcon)
+{
+ u8x16 r, t, z = { };
+
+ r = vqtbl1q_u8 (rk[-1], rcon ? aese_prep_mask1 : aese_prep_mask2);
+ r = vaeseq_u8 (r, z);
+ if (rcon)
+ r ^= (u8x16) vdupq_n_u32 (rcon);
+ r ^= rk[-2];
+ r ^= t = vextq_u8 (z, rk[-2], 12);
+ r ^= t = vextq_u8 (z, t, 12);
+ r ^= vextq_u8 (z, t, 12);
+ rk[0] = r;
+}
+
+static_always_inline void
+aes256_key_expand (u8x16 *rk, u8x16u const *k)
+{
+ rk[0] = k[0];
+ rk[1] = k[1];
+ aes256_key_expand_round_neon (rk + 2, 0x01);
+ aes256_key_expand_round_neon (rk + 3, 0);
+ aes256_key_expand_round_neon (rk + 4, 0x02);
+ aes256_key_expand_round_neon (rk + 5, 0);
+ aes256_key_expand_round_neon (rk + 6, 0x04);
+ aes256_key_expand_round_neon (rk + 7, 0);
+ aes256_key_expand_round_neon (rk + 8, 0x08);
+ aes256_key_expand_round_neon (rk + 9, 0);
+ aes256_key_expand_round_neon (rk + 10, 0x10);
+ aes256_key_expand_round_neon (rk + 11, 0);
+ aes256_key_expand_round_neon (rk + 12, 0x20);
+ aes256_key_expand_round_neon (rk + 13, 0);
+ aes256_key_expand_round_neon (rk + 14, 0x40);
+}
+
+#endif
+
+static_always_inline void
+aes_key_expand (u8x16 * key_schedule, u8 const *key, aes_key_size_t ks)
+{
+ switch (ks)
+ {
+ case AES_KEY_128:
+ aes128_key_expand (key_schedule, (u8x16u const *) key);
+ break;
+ case AES_KEY_192:
+ aes192_key_expand (key_schedule, (u8x16u const *) key);
+ break;
+ case AES_KEY_256:
+ aes256_key_expand (key_schedule, (u8x16u const *) key);
+ break;
+ }
+}
+
+static_always_inline void
+aes_key_enc_to_dec (u8x16 * ke, u8x16 * kd, aes_key_size_t ks)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+
+ kd[rounds] = ke[0];
+ kd[0] = ke[rounds];
+
+ for (int i = 1; i < (rounds / 2); i++)
+ {
+ kd[rounds - i] = aes_inv_mix_column (ke[i]);
+ kd[i] = aes_inv_mix_column (ke[rounds - i]);
+ }
+
+ kd[rounds / 2] = aes_inv_mix_column (ke[rounds / 2]);
+}
+#if defined(__VAES__) && defined(__AVX512F__)
+#define N_AES_LANES 4
+#define aes_load_partial(p, n) u8x64_load_partial ((u8 *) (p), n)
+#define aes_store_partial(v, p, n) u8x64_store_partial (v, (u8 *) (p), n)
+#define aes_reflect(r) u8x64_reflect_u8x16 (r)
+typedef u8x64 aes_data_t;
+typedef u8x64u aes_mem_t;
+typedef u32x16 aes_counter_t;
+#elif defined(__VAES__)
+#define N_AES_LANES 2
+#define aes_load_partial(p, n) u8x32_load_partial ((u8 *) (p), n)
+#define aes_store_partial(v, p, n) u8x32_store_partial (v, (u8 *) (p), n)
+#define aes_reflect(r) u8x32_reflect_u8x16 (r)
+typedef u8x32 aes_data_t;
+typedef u8x32u aes_mem_t;
+typedef u32x8 aes_counter_t;
+#else
+#define N_AES_LANES 1
+#define aes_load_partial(p, n) u8x16_load_partial ((u8 *) (p), n)
+#define aes_store_partial(v, p, n) u8x16_store_partial (v, (u8 *) (p), n)
+#define aes_reflect(r) u8x16_reflect (r)
+typedef u8x16 aes_data_t;
+typedef u8x16u aes_mem_t;
+typedef u32x4 aes_counter_t;
+#endif
+
+#define N_AES_BYTES (N_AES_LANES * 16)
+
+typedef union
+{
+ u8x16 x1;
+ u8x32 x2;
+ u8x64 x4;
+ u8x16 lanes[4];
+} aes_expaned_key_t;
+
+static_always_inline void
+aes_enc_round (aes_data_t *r, const aes_expaned_key_t *ek, uword n_blocks)
+{
+ for (int i = 0; i < n_blocks; i++)
+#if N_AES_LANES == 4
+ r[i] = aes_enc_round_x4 (r[i], ek->x4);
+#elif N_AES_LANES == 2
+ r[i] = aes_enc_round_x2 (r[i], ek->x2);
+#else
+ r[i] = aes_enc_round_x1 (r[i], ek->x1);
+#endif
+}
+
+static_always_inline void
+aes_enc_last_round (aes_data_t *r, aes_data_t *d, const aes_expaned_key_t *ek,
+ uword n_blocks)
+{
+ for (int i = 0; i < n_blocks; i++)
+#if N_AES_LANES == 4
+ d[i] ^= r[i] = aes_enc_last_round_x4 (r[i], ek->x4);
+#elif N_AES_LANES == 2
+ d[i] ^= r[i] = aes_enc_last_round_x2 (r[i], ek->x2);
+#else
+ d[i] ^= r[i] = aes_enc_last_round_x1 (r[i], ek->x1);
+#endif
+}
+
+#endif /* __aes_h__ */
diff --git a/src/vppinfra/crypto/aes_cbc.h b/src/vppinfra/crypto/aes_cbc.h
new file mode 100644
index 00000000000..ee9263df260
--- /dev/null
+++ b/src/vppinfra/crypto/aes_cbc.h
@@ -0,0 +1,745 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __crypto_aes_cbc_h__
+#define __crypto_aes_cbc_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/crypto/aes.h>
+
+typedef struct
+{
+ const u8x16 encrypt_key[15];
+ const u8x16 decrypt_key[15];
+} aes_cbc_key_data_t;
+
+static_always_inline void
+clib_aes_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *src, uword len,
+ const u8 *iv, aes_key_size_t ks, u8 *dst)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+ u8x16 r, *k = (u8x16 *) kd->encrypt_key;
+
+ r = *(u8x16u *) iv;
+
+ for (int i = 0; i < len; i += 16)
+ {
+ int j;
+ r = u8x16_xor3 (r, *(u8x16u *) (src + i), k[0]);
+ for (j = 1; j < rounds; j++)
+ r = aes_enc_round_x1 (r, k[j]);
+ r = aes_enc_last_round_x1 (r, k[rounds]);
+ *(u8x16u *) (dst + i) = r;
+ }
+}
+
+static_always_inline void
+clib_aes128_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_128, ciphertext);
+}
+
+static_always_inline void
+clib_aes192_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_192, ciphertext);
+}
+
+static_always_inline void
+clib_aes256_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_256, ciphertext);
+}
+
+static_always_inline void __clib_unused
+aes_cbc_dec (const u8x16 *k, u8x16u *src, u8x16u *dst, u8x16u *iv, int count,
+ int rounds)
+{
+ u8x16 r[4], c[4], f;
+
+ f = iv[0];
+ while (count >= 64)
+ {
+ c[0] = r[0] = src[0];
+ c[1] = r[1] = src[1];
+ c[2] = r[2] = src[2];
+ c[3] = r[3] = src[3];
+
+#if __x86_64__
+ r[0] ^= k[0];
+ r[1] ^= k[0];
+ r[2] ^= k[0];
+ r[3] ^= k[0];
+
+ for (int i = 1; i < rounds; i++)
+ {
+ r[0] = aes_dec_round_x1 (r[0], k[i]);
+ r[1] = aes_dec_round_x1 (r[1], k[i]);
+ r[2] = aes_dec_round_x1 (r[2], k[i]);
+ r[3] = aes_dec_round_x1 (r[3], k[i]);
+ }
+
+ r[0] = aes_dec_last_round_x1 (r[0], k[rounds]);
+ r[1] = aes_dec_last_round_x1 (r[1], k[rounds]);
+ r[2] = aes_dec_last_round_x1 (r[2], k[rounds]);
+ r[3] = aes_dec_last_round_x1 (r[3], k[rounds]);
+#else
+ for (int i = 0; i < rounds - 1; i++)
+ {
+ r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
+ r[1] = vaesimcq_u8 (vaesdq_u8 (r[1], k[i]));
+ r[2] = vaesimcq_u8 (vaesdq_u8 (r[2], k[i]));
+ r[3] = vaesimcq_u8 (vaesdq_u8 (r[3], k[i]));
+ }
+ r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
+ r[1] = vaesdq_u8 (r[1], k[rounds - 1]) ^ k[rounds];
+ r[2] = vaesdq_u8 (r[2], k[rounds - 1]) ^ k[rounds];
+ r[3] = vaesdq_u8 (r[3], k[rounds - 1]) ^ k[rounds];
+#endif
+ dst[0] = r[0] ^ f;
+ dst[1] = r[1] ^ c[0];
+ dst[2] = r[2] ^ c[1];
+ dst[3] = r[3] ^ c[2];
+ f = c[3];
+
+ count -= 64;
+ src += 4;
+ dst += 4;
+ }
+
+ while (count > 0)
+ {
+ c[0] = r[0] = src[0];
+#if __x86_64__
+ r[0] ^= k[0];
+ for (int i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x1 (r[0], k[i]);
+ r[0] = aes_dec_last_round_x1 (r[0], k[rounds]);
+#else
+ c[0] = r[0] = src[0];
+ for (int i = 0; i < rounds - 1; i++)
+ r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
+ r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
+#endif
+ dst[0] = r[0] ^ f;
+ f = c[0];
+
+ count -= 16;
+ src += 1;
+ dst += 1;
+ }
+}
+
+#if __x86_64__
+#if defined(__VAES__) && defined(__AVX512F__)
+
+static_always_inline u8x64
+aes_block_load_x4 (u8 *src[], int i)
+{
+ u8x64 r = {};
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[0] + i), 0);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[1] + i), 1);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[2] + i), 2);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[3] + i), 3);
+ return r;
+}
+
+static_always_inline void
+aes_block_store_x4 (u8 *dst[], int i, u8x64 r)
+{
+ aes_block_store (dst[0] + i, u8x64_extract_u8x16 (r, 0));
+ aes_block_store (dst[1] + i, u8x64_extract_u8x16 (r, 1));
+ aes_block_store (dst[2] + i, u8x64_extract_u8x16 (r, 2));
+ aes_block_store (dst[3] + i, u8x64_extract_u8x16 (r, 3));
+}
+
+static_always_inline u8x64
+aes4_cbc_dec_permute (u8x64 a, u8x64 b)
+{
+ return (u8x64) u64x8_shuffle2 (a, b, 6, 7, 8, 9, 10, 11, 12, 13);
+}
+
+static_always_inline void
+aes4_cbc_dec (const u8x16 *k, u8x64u *src, u8x64u *dst, u8x16u *iv, int count,
+ aes_key_size_t rounds)
+{
+ u8x64 f, k4, r[4], c[4] = {};
+ __mmask8 m;
+ int i, n_blocks = count >> 4;
+
+ f = u8x64_insert_u8x16 (u8x64_zero (), *iv, 3);
+
+ while (n_blocks >= 16)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+ c[3] = src[3];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+ r[2] = c[2] ^ k4;
+ r[3] = c[3] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ r[2] = aes_dec_round_x4 (r[2], k4);
+ r[3] = aes_dec_round_x4 (r[3], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+ r[2] = aes_dec_last_round_x4 (r[2], k4);
+ r[3] = aes_dec_last_round_x4 (r[3], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]);
+ dst[3] = r[3] ^= aes4_cbc_dec_permute (c[2], c[3]);
+ f = c[3];
+
+ n_blocks -= 16;
+ src += 4;
+ dst += 4;
+ }
+
+ if (n_blocks >= 12)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+ r[2] = c[2] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ r[2] = aes_dec_round_x4 (r[2], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+ r[2] = aes_dec_last_round_x4 (r[2], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]);
+ f = c[2];
+
+ n_blocks -= 12;
+ src += 3;
+ dst += 3;
+ }
+ else if (n_blocks >= 8)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ f = c[1];
+
+ n_blocks -= 8;
+ src += 2;
+ dst += 2;
+ }
+ else if (n_blocks >= 4)
+ {
+ c[0] = src[0];
+
+ r[0] = c[0] ^ u8x64_splat_u8x16 (k[0]);
+
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+
+ r[0] = aes_dec_last_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ f = c[0];
+
+ n_blocks -= 4;
+ src += 1;
+ dst += 1;
+ }
+
+ if (n_blocks > 0)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ m = (1 << (n_blocks * 2)) - 1;
+ c[0] =
+ (u8x64) _mm512_mask_loadu_epi64 ((__m512i) c[0], m, (__m512i *) src);
+ f = aes4_cbc_dec_permute (f, c[0]);
+ r[0] = c[0] ^ k4;
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+ r[0] = aes_dec_last_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+ _mm512_mask_storeu_epi64 ((__m512i *) dst, m, (__m512i) (r[0] ^ f));
+ }
+}
+#elif defined(__VAES__)
+
+static_always_inline u8x32
+aes_block_load_x2 (u8 *src[], int i)
+{
+ u8x32 r = {};
+ r = u8x32_insert_lo (r, aes_block_load (src[0] + i));
+ r = u8x32_insert_hi (r, aes_block_load (src[1] + i));
+ return r;
+}
+
+static_always_inline void
+aes_block_store_x2 (u8 *dst[], int i, u8x32 r)
+{
+ aes_block_store (dst[0] + i, u8x32_extract_lo (r));
+ aes_block_store (dst[1] + i, u8x32_extract_hi (r));
+}
+
+static_always_inline u8x32
+aes2_cbc_dec_permute (u8x32 a, u8x32 b)
+{
+ return (u8x32) u64x4_shuffle2 ((u64x4) a, (u64x4) b, 2, 3, 4, 5);
+}
+
+static_always_inline void
+aes2_cbc_dec (const u8x16 *k, u8x32u *src, u8x32u *dst, u8x16u *iv, int count,
+ aes_key_size_t rounds)
+{
+ u8x32 k2, f = {}, r[4], c[4] = {};
+ int i, n_blocks = count >> 4;
+
+ f = u8x32_insert_hi (f, *iv);
+
+ while (n_blocks >= 8)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+ c[3] = src[3];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+ r[2] = c[2] ^ k2;
+ r[3] = c[3] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ r[2] = aes_dec_round_x2 (r[2], k2);
+ r[3] = aes_dec_round_x2 (r[3], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+ r[2] = aes_dec_last_round_x2 (r[2], k2);
+ r[3] = aes_dec_last_round_x2 (r[3], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]);
+ dst[3] = r[3] ^= aes2_cbc_dec_permute (c[2], c[3]);
+ f = c[3];
+
+ n_blocks -= 8;
+ src += 4;
+ dst += 4;
+ }
+
+ if (n_blocks >= 6)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+ r[2] = c[2] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ r[2] = aes_dec_round_x2 (r[2], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+ r[2] = aes_dec_last_round_x2 (r[2], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]);
+ f = c[2];
+
+ n_blocks -= 6;
+ src += 3;
+ dst += 3;
+ }
+ else if (n_blocks >= 4)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ f = c[1];
+
+ n_blocks -= 4;
+ src += 2;
+ dst += 2;
+ }
+ else if (n_blocks >= 2)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ r[0] = c[0] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x2 (r[0], u8x32_splat_u8x16 (k[i]));
+
+ r[0] = aes_dec_last_round_x2 (r[0], u8x32_splat_u8x16 (k[i]));
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ f = c[0];
+
+ n_blocks -= 2;
+ src += 1;
+ dst += 1;
+ }
+
+ if (n_blocks > 0)
+ {
+ u8x16 rl = *(u8x16u *) src ^ k[0];
+ for (i = 1; i < rounds; i++)
+ rl = aes_dec_round_x1 (rl, k[i]);
+ rl = aes_dec_last_round_x1 (rl, k[i]);
+ *(u8x16u *) dst = rl ^ u8x32_extract_hi (f);
+ }
+}
+#endif
+#endif
+
+static_always_inline void
+clib_aes_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key,
+ aes_key_size_t ks)
+{
+ u8x16 e[15], d[15];
+ aes_key_expand (e, key, ks);
+ aes_key_enc_to_dec (e, d, ks);
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ {
+ ((u8x16 *) kd->decrypt_key)[i] = d[i];
+ ((u8x16 *) kd->encrypt_key)[i] = e[i];
+ }
+}
+
+static_always_inline void
+clib_aes128_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_128);
+}
+static_always_inline void
+clib_aes192_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_192);
+}
+static_always_inline void
+clib_aes256_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_256);
+}
+
+static_always_inline void
+clib_aes_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, aes_key_size_t ks,
+ u8 *plaintext)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+#if defined(__VAES__) && defined(__AVX512F__)
+ aes4_cbc_dec (kd->decrypt_key, (u8x64u *) ciphertext, (u8x64u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#elif defined(__VAES__)
+ aes2_cbc_dec (kd->decrypt_key, (u8x32u *) ciphertext, (u8x32u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#else
+ aes_cbc_dec (kd->decrypt_key, (u8x16u *) ciphertext, (u8x16u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#endif
+}
+
+static_always_inline void
+clib_aes128_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_128, plaintext);
+}
+
+static_always_inline void
+clib_aes192_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_192, plaintext);
+}
+
+static_always_inline void
+clib_aes256_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_256, plaintext);
+}
+
+#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
+#pragma GCC optimize("O3")
+#endif
+
+#if defined(__VAES__) && defined(__AVX512F__)
+#define u8xN u8x64
+#define u32xN u32x16
+#define u32xN_min_scalar u32x16_min_scalar
+#define u32xN_is_all_zero u32x16_is_all_zero
+#define u32xN_splat u32x16_splat
+#elif defined(__VAES__)
+#define u8xN u8x32
+#define u32xN u32x8
+#define u32xN_min_scalar u32x8_min_scalar
+#define u32xN_is_all_zero u32x8_is_all_zero
+#define u32xN_splat u32x8_splat
+#else
+#define u8xN u8x16
+#define u32xN u32x4
+#define u32xN_min_scalar u32x4_min_scalar
+#define u32xN_is_all_zero u32x4_is_all_zero
+#define u32xN_splat u32x4_splat
+#endif
+
+static_always_inline u32
+clib_aes_cbc_encrypt_multi (aes_cbc_key_data_t **key_data,
+ const uword *key_indices, u8 **plaintext,
+ const uword *oplen, u8 **iv, aes_key_size_t ks,
+ u8 **ciphertext, uword n_ops)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+ u8 placeholder[8192];
+ u32 i, j, count, n_left = n_ops;
+ u32xN placeholder_mask = {};
+ u32xN len = {};
+ u32 key_index[4 * N_AES_LANES];
+ u8 *src[4 * N_AES_LANES] = {};
+ u8 *dst[4 * N_AES_LANES] = {};
+ u8xN r[4] = {};
+ u8xN k[15][4] = {};
+
+ for (i = 0; i < 4 * N_AES_LANES; i++)
+ key_index[i] = ~0;
+
+more:
+ for (i = 0; i < 4 * N_AES_LANES; i++)
+ if (len[i] == 0)
+ {
+ if (n_left == 0)
+ {
+ /* no more work to enqueue, so we are enqueueing placeholder buffer
+ */
+ src[i] = dst[i] = placeholder;
+ len[i] = sizeof (placeholder);
+ placeholder_mask[i] = 0;
+ }
+ else
+ {
+ u8x16 t = aes_block_load (iv[0]);
+ ((u8x16 *) r)[i] = t;
+
+ src[i] = plaintext[0];
+ dst[i] = ciphertext[0];
+ len[i] = oplen[0];
+ placeholder_mask[i] = ~0;
+ if (key_index[i] != key_indices[0])
+ {
+ aes_cbc_key_data_t *kd;
+ key_index[i] = key_indices[0];
+ kd = key_data[key_index[i]];
+ for (j = 0; j < rounds + 1; j++)
+ ((u8x16 *) k[j])[i] = kd->encrypt_key[j];
+ }
+ n_left--;
+ iv++;
+ ciphertext++;
+ plaintext++;
+ key_indices++;
+ oplen++;
+ }
+ }
+
+ count = u32xN_min_scalar (len);
+
+ ASSERT (count % 16 == 0);
+
+ for (i = 0; i < count; i += 16)
+ {
+#if defined(__VAES__) && defined(__AVX512F__)
+ r[0] = u8x64_xor3 (r[0], aes_block_load_x4 (src, i), k[0][0]);
+ r[1] = u8x64_xor3 (r[1], aes_block_load_x4 (src + 4, i), k[0][1]);
+ r[2] = u8x64_xor3 (r[2], aes_block_load_x4 (src + 8, i), k[0][2]);
+ r[3] = u8x64_xor3 (r[3], aes_block_load_x4 (src + 12, i), k[0][3]);
+
+ for (j = 1; j < rounds; j++)
+ {
+ r[0] = aes_enc_round_x4 (r[0], k[j][0]);
+ r[1] = aes_enc_round_x4 (r[1], k[j][1]);
+ r[2] = aes_enc_round_x4 (r[2], k[j][2]);
+ r[3] = aes_enc_round_x4 (r[3], k[j][3]);
+ }
+ r[0] = aes_enc_last_round_x4 (r[0], k[j][0]);
+ r[1] = aes_enc_last_round_x4 (r[1], k[j][1]);
+ r[2] = aes_enc_last_round_x4 (r[2], k[j][2]);
+ r[3] = aes_enc_last_round_x4 (r[3], k[j][3]);
+
+ aes_block_store_x4 (dst, i, r[0]);
+ aes_block_store_x4 (dst + 4, i, r[1]);
+ aes_block_store_x4 (dst + 8, i, r[2]);
+ aes_block_store_x4 (dst + 12, i, r[3]);
+#elif defined(__VAES__)
+ r[0] = u8x32_xor3 (r[0], aes_block_load_x2 (src, i), k[0][0]);
+ r[1] = u8x32_xor3 (r[1], aes_block_load_x2 (src + 2, i), k[0][1]);
+ r[2] = u8x32_xor3 (r[2], aes_block_load_x2 (src + 4, i), k[0][2]);
+ r[3] = u8x32_xor3 (r[3], aes_block_load_x2 (src + 6, i), k[0][3]);
+
+ for (j = 1; j < rounds; j++)
+ {
+ r[0] = aes_enc_round_x2 (r[0], k[j][0]);
+ r[1] = aes_enc_round_x2 (r[1], k[j][1]);
+ r[2] = aes_enc_round_x2 (r[2], k[j][2]);
+ r[3] = aes_enc_round_x2 (r[3], k[j][3]);
+ }
+ r[0] = aes_enc_last_round_x2 (r[0], k[j][0]);
+ r[1] = aes_enc_last_round_x2 (r[1], k[j][1]);
+ r[2] = aes_enc_last_round_x2 (r[2], k[j][2]);
+ r[3] = aes_enc_last_round_x2 (r[3], k[j][3]);
+
+ aes_block_store_x2 (dst, i, r[0]);
+ aes_block_store_x2 (dst + 2, i, r[1]);
+ aes_block_store_x2 (dst + 4, i, r[2]);
+ aes_block_store_x2 (dst + 6, i, r[3]);
+#else
+#if __x86_64__
+ r[0] = u8x16_xor3 (r[0], aes_block_load (src[0] + i), k[0][0]);
+ r[1] = u8x16_xor3 (r[1], aes_block_load (src[1] + i), k[0][1]);
+ r[2] = u8x16_xor3 (r[2], aes_block_load (src[2] + i), k[0][2]);
+ r[3] = u8x16_xor3 (r[3], aes_block_load (src[3] + i), k[0][3]);
+
+ for (j = 1; j < rounds; j++)
+ {
+ r[0] = aes_enc_round_x1 (r[0], k[j][0]);
+ r[1] = aes_enc_round_x1 (r[1], k[j][1]);
+ r[2] = aes_enc_round_x1 (r[2], k[j][2]);
+ r[3] = aes_enc_round_x1 (r[3], k[j][3]);
+ }
+
+ r[0] = aes_enc_last_round_x1 (r[0], k[j][0]);
+ r[1] = aes_enc_last_round_x1 (r[1], k[j][1]);
+ r[2] = aes_enc_last_round_x1 (r[2], k[j][2]);
+ r[3] = aes_enc_last_round_x1 (r[3], k[j][3]);
+
+ aes_block_store (dst[0] + i, r[0]);
+ aes_block_store (dst[1] + i, r[1]);
+ aes_block_store (dst[2] + i, r[2]);
+ aes_block_store (dst[3] + i, r[3]);
+#else
+ r[0] ^= aes_block_load (src[0] + i);
+ r[1] ^= aes_block_load (src[1] + i);
+ r[2] ^= aes_block_load (src[2] + i);
+ r[3] ^= aes_block_load (src[3] + i);
+ for (j = 0; j < rounds - 1; j++)
+ {
+ r[0] = vaesmcq_u8 (vaeseq_u8 (r[0], k[j][0]));
+ r[1] = vaesmcq_u8 (vaeseq_u8 (r[1], k[j][1]));
+ r[2] = vaesmcq_u8 (vaeseq_u8 (r[2], k[j][2]));
+ r[3] = vaesmcq_u8 (vaeseq_u8 (r[3], k[j][3]));
+ }
+ r[0] = vaeseq_u8 (r[0], k[j][0]) ^ k[rounds][0];
+ r[1] = vaeseq_u8 (r[1], k[j][1]) ^ k[rounds][1];
+ r[2] = vaeseq_u8 (r[2], k[j][2]) ^ k[rounds][2];
+ r[3] = vaeseq_u8 (r[3], k[j][3]) ^ k[rounds][3];
+ aes_block_store (dst[0] + i, r[0]);
+ aes_block_store (dst[1] + i, r[1]);
+ aes_block_store (dst[2] + i, r[2]);
+ aes_block_store (dst[3] + i, r[3]);
+#endif
+#endif
+ }
+
+ len -= u32xN_splat (count);
+
+ for (i = 0; i < 4 * N_AES_LANES; i++)
+ {
+ src[i] += count;
+ dst[i] += count;
+ }
+
+ if (n_left > 0)
+ goto more;
+
+ if (!u32xN_is_all_zero (len & placeholder_mask))
+ goto more;
+
+ return n_ops;
+}
+
+#undef u8xN
+#undef u32xN
+#undef u32xN_min_scalar
+#undef u32xN_is_all_zero
+#undef u32xN_splat
+
+#endif /* __crypto_aes_cbc_h__ */
diff --git a/src/vppinfra/crypto/aes_ctr.h b/src/vppinfra/crypto/aes_ctr.h
new file mode 100644
index 00000000000..74a9f96d90d
--- /dev/null
+++ b/src/vppinfra/crypto/aes_ctr.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef __crypto_aes_ctr_h__
+#define __crypto_aes_ctr_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/string.h>
+#include <vppinfra/crypto/aes.h>
+
+typedef struct
+{
+ const aes_expaned_key_t exp_key[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+} aes_ctr_key_data_t;
+
+typedef struct
+{
+ const aes_expaned_key_t exp_key[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+ aes_counter_t ctr; /* counter (reflected) */
+ u8 keystream_bytes[N_AES_BYTES]; /* keystream leftovers */
+ u32 n_keystream_bytes; /* number of keystream leftovers */
+} aes_ctr_ctx_t;
+
+static_always_inline aes_counter_t
+aes_ctr_one_block (aes_ctr_ctx_t *ctx, aes_counter_t ctr, const u8 *src,
+ u8 *dst, u32 n_parallel, u32 n_bytes, int rounds, int last)
+{
+ u32 __clib_aligned (N_AES_BYTES)
+ inc[] = { N_AES_LANES, 0, 0, 0, N_AES_LANES, 0, 0, 0,
+ N_AES_LANES, 0, 0, 0, N_AES_LANES, 0, 0, 0 };
+ const aes_expaned_key_t *k = ctx->exp_key;
+ const aes_mem_t *sv = (aes_mem_t *) src;
+ aes_mem_t *dv = (aes_mem_t *) dst;
+ aes_data_t d[4], t[4];
+ u32 r;
+
+ n_bytes -= (n_parallel - 1) * N_AES_BYTES;
+
+ /* AES First Round */
+ for (int i = 0; i < n_parallel; i++)
+ {
+#if N_AES_LANES == 4
+ t[i] = k[0].x4 ^ (u8x64) aes_reflect ((u8x64) ctr);
+#elif N_AES_LANES == 2
+ t[i] = k[0].x2 ^ (u8x32) aes_reflect ((u8x32) ctr);
+#else
+ t[i] = k[0].x1 ^ (u8x16) aes_reflect ((u8x16) ctr);
+#endif
+ ctr += *(aes_counter_t *) inc;
+ }
+
+ /* Load Data */
+ for (int i = 0; i < n_parallel - last; i++)
+ d[i] = sv[i];
+
+ if (last)
+ d[n_parallel - 1] =
+ aes_load_partial ((u8 *) (sv + n_parallel - 1), n_bytes);
+
+ /* AES Intermediate Rounds */
+ for (r = 1; r < rounds; r++)
+ aes_enc_round (t, k + r, n_parallel);
+
+ /* AES Last Round */
+ aes_enc_last_round (t, d, k + r, n_parallel);
+
+ /* Store Data */
+ for (int i = 0; i < n_parallel - last; i++)
+ dv[i] = d[i];
+
+ if (last)
+ {
+ aes_store_partial (d[n_parallel - 1], dv + n_parallel - 1, n_bytes);
+ *(aes_data_t *) ctx->keystream_bytes = t[n_parallel - 1];
+ ctx->n_keystream_bytes = N_AES_BYTES - n_bytes;
+ }
+
+ return ctr;
+}
+
+static_always_inline void
+clib_aes_ctr_init (aes_ctr_ctx_t *ctx, const aes_ctr_key_data_t *kd,
+ const u8 *iv, aes_key_size_t ks)
+{
+ u32x4 ctr = (u32x4) u8x16_reflect (*(u8x16u *) iv);
+#if N_AES_LANES == 4
+ ctx->ctr = (aes_counter_t) u32x16_splat_u32x4 (ctr) +
+ (u32x16){ 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0 };
+#elif N_AES_LANES == 2
+ ctx->ctr = (aes_counter_t) u32x8_splat_u32x4 (ctr) +
+ (u32x8){ 0, 0, 0, 0, 1, 0, 0, 0 };
+#else
+ ctx->ctr = ctr;
+#endif
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ ((aes_expaned_key_t *) ctx->exp_key)[i] = kd->exp_key[i];
+ ctx->n_keystream_bytes = 0;
+}
+
+static_always_inline void
+clib_aes_ctr_transform (aes_ctr_ctx_t *ctx, const u8 *src, u8 *dst,
+ u32 n_bytes, aes_key_size_t ks)
+{
+ int r = AES_KEY_ROUNDS (ks);
+ aes_counter_t ctr = ctx->ctr;
+
+ if (ctx->n_keystream_bytes)
+ {
+ u8 *ks = ctx->keystream_bytes + N_AES_BYTES - ctx->n_keystream_bytes;
+
+ if (ctx->n_keystream_bytes >= n_bytes)
+ {
+ for (int i = 0; i < n_bytes; i++)
+ dst[i] = src[i] ^ ks[i];
+ ctx->n_keystream_bytes -= n_bytes;
+ return;
+ }
+
+ for (int i = 0; i < ctx->n_keystream_bytes; i++)
+ dst++[0] = src++[0] ^ ks[i];
+
+ n_bytes -= ctx->n_keystream_bytes;
+ ctx->n_keystream_bytes = 0;
+ }
+
+ /* main loop */
+ for (int n = 4 * N_AES_BYTES; n_bytes >= n; n_bytes -= n, dst += n, src += n)
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 4, n, r, 0);
+
+ if (n_bytes)
+ {
+ if (n_bytes > 3 * N_AES_BYTES)
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 4, n_bytes, r, 1);
+ else if (n_bytes > 2 * N_AES_BYTES)
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 3, n_bytes, r, 1);
+ else if (n_bytes > N_AES_BYTES)
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 2, n_bytes, r, 1);
+ else
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 1, n_bytes, r, 1);
+ }
+ else
+ ctx->n_keystream_bytes = 0;
+
+ ctx->ctr = ctr;
+}
+
+static_always_inline void
+clib_aes_ctr_key_expand (aes_ctr_key_data_t *kd, const u8 *key,
+ aes_key_size_t ks)
+{
+ u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+ aes_expaned_key_t *k = (aes_expaned_key_t *) kd->exp_key;
+
+ /* expand AES key */
+ aes_key_expand (ek, key, ks);
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ k[i].lanes[0] = k[i].lanes[1] = k[i].lanes[2] = k[i].lanes[3] = ek[i];
+}
+
+static_always_inline void
+clib_aes128_ctr (const aes_ctr_key_data_t *kd, const u8 *src, u32 n_bytes,
+ const u8 *iv, u8 *dst)
+{
+ aes_ctr_ctx_t ctx;
+ clib_aes_ctr_init (&ctx, kd, iv, AES_KEY_128);
+ clib_aes_ctr_transform (&ctx, src, dst, n_bytes, AES_KEY_128);
+}
+
+static_always_inline void
+clib_aes192_ctr (const aes_ctr_key_data_t *kd, const u8 *src, u32 n_bytes,
+ const u8 *iv, u8 *dst)
+{
+ aes_ctr_ctx_t ctx;
+ clib_aes_ctr_init (&ctx, kd, iv, AES_KEY_192);
+ clib_aes_ctr_transform (&ctx, src, dst, n_bytes, AES_KEY_192);
+}
+
+static_always_inline void
+clib_aes256_ctr (const aes_ctr_key_data_t *kd, const u8 *src, u32 n_bytes,
+ const u8 *iv, u8 *dst)
+{
+ aes_ctr_ctx_t ctx;
+ clib_aes_ctr_init (&ctx, kd, iv, AES_KEY_256);
+ clib_aes_ctr_transform (&ctx, src, dst, n_bytes, AES_KEY_256);
+}
+
+#endif /* __crypto_aes_ctr_h__ */
diff --git a/src/vppinfra/crypto/aes_gcm.h b/src/vppinfra/crypto/aes_gcm.h
new file mode 100644
index 00000000000..5b628c87745
--- /dev/null
+++ b/src/vppinfra/crypto/aes_gcm.h
@@ -0,0 +1,944 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __crypto_aes_gcm_h__
+#define __crypto_aes_gcm_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/string.h>
+#include <vppinfra/crypto/aes.h>
+#include <vppinfra/crypto/ghash.h>
+
+#define NUM_HI 36
+#if N_AES_LANES == 4
+typedef u8x64u aes_ghash_t;
+#define aes_gcm_splat(v) u8x64_splat (v)
+#define aes_gcm_ghash_reduce(c) ghash4_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash4_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash4_final (&(c)->gd)
+#elif N_AES_LANES == 2
+typedef u8x32u aes_ghash_t;
+#define aes_gcm_splat(v) u8x32_splat (v)
+#define aes_gcm_ghash_reduce(c) ghash2_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash2_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash2_final (&(c)->gd)
+#else
+typedef u8x16 aes_ghash_t;
+#define aes_gcm_splat(v) u8x16_splat (v)
+#define aes_gcm_ghash_reduce(c) ghash_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash_final (&(c)->gd)
+#endif
+
+typedef enum
+{
+ AES_GCM_OP_UNKNONW = 0,
+ AES_GCM_OP_ENCRYPT,
+ AES_GCM_OP_DECRYPT,
+ AES_GCM_OP_GMAC
+} aes_gcm_op_t;
+
+typedef struct
+{
+ /* pre-calculated hash key values */
+ const u8x16 Hi[NUM_HI];
+ /* extracted AES key */
+ const aes_expaned_key_t Ke[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+} aes_gcm_key_data_t;
+
+typedef struct
+{
+ aes_gcm_op_t operation;
+ int last;
+ u8 rounds;
+ uword data_bytes;
+ uword aad_bytes;
+
+ u8x16 T;
+
+ /* hash */
+ const u8x16 *Hi;
+ const aes_ghash_t *next_Hi;
+
+ /* expaded keys */
+ const aes_expaned_key_t *Ke;
+
+ /* counter */
+ u32 counter;
+ u8x16 EY0;
+ aes_counter_t Y;
+
+ /* ghash */
+ ghash_ctx_t gd;
+} aes_gcm_ctx_t;
+
+static_always_inline u8x16
+aes_gcm_final_block (aes_gcm_ctx_t *ctx)
+{
+ return (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
+}
+
+static_always_inline void
+aes_gcm_ghash_mul_first (aes_gcm_ctx_t *ctx, aes_data_t data, u32 n_lanes)
+{
+ uword hash_offset = NUM_HI - n_lanes;
+ ctx->next_Hi = (aes_ghash_t *) (ctx->Hi + hash_offset);
+#if N_AES_LANES == 4
+ u8x64 tag4 = {};
+ tag4 = u8x64_insert_u8x16 (tag4, ctx->T, 0);
+ ghash4_mul_first (&ctx->gd, aes_reflect (data) ^ tag4, *ctx->next_Hi++);
+#elif N_AES_LANES == 2
+ u8x32 tag2 = {};
+ tag2 = u8x32_insert_lo (tag2, ctx->T);
+ ghash2_mul_first (&ctx->gd, aes_reflect (data) ^ tag2, *ctx->next_Hi++);
+#else
+ ghash_mul_first (&ctx->gd, aes_reflect (data) ^ ctx->T, *ctx->next_Hi++);
+#endif
+}
+
+static_always_inline void
+aes_gcm_ghash_mul_next (aes_gcm_ctx_t *ctx, aes_data_t data)
+{
+#if N_AES_LANES == 4
+ ghash4_mul_next (&ctx->gd, aes_reflect (data), *ctx->next_Hi++);
+#elif N_AES_LANES == 2
+ ghash2_mul_next (&ctx->gd, aes_reflect (data), *ctx->next_Hi++);
+#else
+ ghash_mul_next (&ctx->gd, aes_reflect (data), *ctx->next_Hi++);
+#endif
+}
+
+static_always_inline void
+aes_gcm_ghash_mul_final_block (aes_gcm_ctx_t *ctx)
+{
+#if N_AES_LANES == 4
+ u8x64 h = u8x64_insert_u8x16 (u8x64_zero (), ctx->Hi[NUM_HI - 1], 0);
+ u8x64 r4 = u8x64_insert_u8x16 (u8x64_zero (), aes_gcm_final_block (ctx), 0);
+ ghash4_mul_next (&ctx->gd, r4, h);
+#elif N_AES_LANES == 2
+ u8x32 h = u8x32_insert_lo (u8x32_zero (), ctx->Hi[NUM_HI - 1]);
+ u8x32 r2 = u8x32_insert_lo (u8x32_zero (), aes_gcm_final_block (ctx));
+ ghash2_mul_next (&ctx->gd, r2, h);
+#else
+ ghash_mul_next (&ctx->gd, aes_gcm_final_block (ctx), ctx->Hi[NUM_HI - 1]);
+#endif
+}
+
+static_always_inline void
+aes_gcm_enc_ctr0_round (aes_gcm_ctx_t *ctx, int aes_round)
+{
+ if (aes_round == 0)
+ ctx->EY0 ^= ctx->Ke[0].x1;
+ else if (aes_round == ctx->rounds)
+ ctx->EY0 = aes_enc_last_round_x1 (ctx->EY0, ctx->Ke[aes_round].x1);
+ else
+ ctx->EY0 = aes_enc_round_x1 (ctx->EY0, ctx->Ke[aes_round].x1);
+}
+
+static_always_inline void
+aes_gcm_ghash (aes_gcm_ctx_t *ctx, u8 *data, u32 n_left)
+{
+ uword i;
+ aes_data_t r = {};
+ const aes_mem_t *d = (aes_mem_t *) data;
+
+ for (int n = 8 * N_AES_BYTES; n_left >= n; n_left -= n, d += 8)
+ {
+ if (ctx->operation == AES_GCM_OP_GMAC && n_left == n)
+ {
+ aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_AES_LANES + 1);
+ for (i = 1; i < 8; i++)
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_final_block (ctx);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ goto done;
+ }
+
+ aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_AES_LANES);
+ for (i = 1; i < 8; i++)
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ }
+
+ if (n_left > 0)
+ {
+ int n_lanes = (n_left + 15) / 16;
+
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ n_lanes++;
+
+ if (n_left < N_AES_BYTES)
+ {
+ clib_memcpy_fast (&r, d, n_left);
+ aes_gcm_ghash_mul_first (ctx, r, n_lanes);
+ }
+ else
+ {
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+ n_left -= N_AES_BYTES;
+ i = 1;
+
+ if (n_left >= 4 * N_AES_BYTES)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 1]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 2]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 3]);
+ n_left -= 4 * N_AES_BYTES;
+ i += 4;
+ }
+ if (n_left >= 2 * N_AES_BYTES)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 1]);
+ n_left -= 2 * N_AES_BYTES;
+ i += 2;
+ }
+
+ if (n_left >= N_AES_BYTES)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ n_left -= N_AES_BYTES;
+ i += 1;
+ }
+
+ if (n_left)
+ {
+ clib_memcpy_fast (&r, d + i, n_left);
+ aes_gcm_ghash_mul_next (ctx, r);
+ }
+ }
+
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ aes_gcm_ghash_mul_final_block (ctx);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ }
+ else if (ctx->operation == AES_GCM_OP_GMAC)
+ ctx->T =
+ ghash_mul (aes_gcm_final_block (ctx) ^ ctx->T, ctx->Hi[NUM_HI - 1]);
+
+done:
+ /* encrypt counter 0 E(Y0, k) */
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ for (int i = 0; i < ctx->rounds + 1; i += 1)
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline void
+aes_gcm_enc_first_round (aes_gcm_ctx_t *ctx, aes_data_t *r, uword n_blocks)
+{
+ const aes_expaned_key_t Ke0 = ctx->Ke[0];
+ uword i = 0;
+
+ /* As counter is stored in network byte order for performance reasons we
+ are incrementing least significant byte only except in case where we
+ overlow. As we are processing four 128, 256 or 512-blocks in parallel
+ except the last round, overflow can happen only when n_blocks == 4 */
+
+#if N_AES_LANES == 4
+ const u32x16 ctr_inv_4444 = { 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24,
+ 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24 };
+
+ const u32x16 ctr_4444 = {
+ 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0,
+ };
+
+ if (n_blocks == 4)
+ for (; i < 2; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_4444;
+ }
+
+ if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 242))
+ {
+ u32x16 Yr = (u32x16) aes_reflect ((u8x64) ctx->Y);
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
+ Yr += ctr_4444;
+ ctx->Y = (u32x16) aes_reflect ((u8x64) Yr);
+ }
+ }
+ else
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_4444;
+ }
+ }
+ ctx->counter += n_blocks * 4;
+#elif N_AES_LANES == 2
+ const u32x8 ctr_inv_22 = { 0, 0, 0, 2 << 24, 0, 0, 0, 2 << 24 };
+ const u32x8 ctr_22 = { 2, 0, 0, 0, 2, 0, 0, 0 };
+
+ if (n_blocks == 4)
+ for (; i < 2; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_22;
+ }
+
+ if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 250))
+ {
+ u32x8 Yr = (u32x8) aes_reflect ((u8x32) ctx->Y);
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
+ Yr += ctr_22;
+ ctx->Y = (u32x8) aes_reflect ((u8x32) Yr);
+ }
+ }
+ else
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_22;
+ }
+ }
+ ctx->counter += n_blocks * 2;
+#else
+ const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
+
+ if (PREDICT_TRUE ((u8) ctx->counter < 0xfe) || n_blocks < 3)
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_1;
+ }
+ ctx->counter += n_blocks;
+ }
+ else
+ {
+ r[i++] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_1;
+ ctx->counter += 1;
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
+ ctx->counter++;
+ ctx->Y[3] = clib_host_to_net_u32 (ctx->counter);
+ }
+ }
+#endif
+}
+
+static_always_inline void
+aes_gcm_enc_last_round (aes_gcm_ctx_t *ctx, aes_data_t *r, aes_data_t *d,
+ const aes_expaned_key_t *Ke, uword n_blocks)
+{
+ /* additional ronuds for AES-192 and AES-256 */
+ for (int i = 10; i < ctx->rounds; i++)
+ aes_enc_round (r, Ke + i, n_blocks);
+
+ aes_enc_last_round (r, d, Ke + ctx->rounds, n_blocks);
+}
+
+static_always_inline void
+aes_gcm_calc (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst, u32 n,
+ u32 n_bytes, int with_ghash)
+{
+ const aes_expaned_key_t *k = ctx->Ke;
+ const aes_mem_t *sv = (aes_mem_t *) src;
+ aes_mem_t *dv = (aes_mem_t *) dst;
+ uword ghash_blocks, gc = 1;
+ aes_data_t r[4];
+ u32 i, n_lanes;
+
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ {
+ ghash_blocks = 4;
+ n_lanes = N_AES_LANES * 4;
+ }
+ else
+ {
+ ghash_blocks = n;
+ n_lanes = n * N_AES_LANES;
+#if N_AES_LANES != 1
+ if (ctx->last)
+ n_lanes = (n_bytes + 15) / 16;
+#endif
+ }
+
+ n_bytes -= (n - 1) * N_AES_BYTES;
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, n);
+ aes_enc_round (r, k + 1, n);
+
+ /* load data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ {
+ for (i = 0; i < n - ctx->last; i++)
+ d[i] = sv[i];
+
+ if (ctx->last)
+ d[n - 1] = aes_load_partial ((u8 *) (sv + n - 1), n_bytes);
+ }
+
+ /* GHASH multiply block 0 */
+ if (with_ghash)
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+
+ /* AES rounds 2 and 3 */
+ aes_enc_round (r, k + 2, n);
+ aes_enc_round (r, k + 3, n);
+
+ /* GHASH multiply block 1 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 4 and 5 */
+ aes_enc_round (r, k + 4, n);
+ aes_enc_round (r, k + 5, n);
+
+ /* GHASH multiply block 2 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 6 and 7 */
+ aes_enc_round (r, k + 6, n);
+ aes_enc_round (r, k + 7, n);
+
+ /* GHASH multiply block 3 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* load 4 blocks of data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ {
+ for (i = 0; i < n - ctx->last; i++)
+ d[i] = sv[i];
+
+ if (ctx->last)
+ d[n - 1] = aes_load_partial (sv + n - 1, n_bytes);
+ }
+
+ /* AES rounds 8 and 9 */
+ aes_enc_round (r, k + 8, n);
+ aes_enc_round (r, k + 9, n);
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, n);
+
+ /* store data */
+ for (i = 0; i < n - ctx->last; i++)
+ dv[i] = d[i];
+
+ if (ctx->last)
+ aes_store_partial (d[n - 1], dv + n - 1, n_bytes);
+
+ /* GHASH reduce 1st step */
+ aes_gcm_ghash_reduce (ctx);
+
+ /* GHASH reduce 2nd step */
+ if (with_ghash)
+ aes_gcm_ghash_reduce2 (ctx);
+
+ /* GHASH final step */
+ if (with_ghash)
+ aes_gcm_ghash_final (ctx);
+}
+
+static_always_inline void
+aes_gcm_calc_double (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst)
+{
+ const aes_expaned_key_t *k = ctx->Ke;
+ const aes_mem_t *sv = (aes_mem_t *) src;
+ aes_mem_t *dv = (aes_mem_t *) dst;
+ aes_data_t r[4];
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, 4);
+ aes_enc_round (r, k + 1, 4);
+
+ /* load 4 blocks of data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i];
+
+ /* GHASH multiply block 0 */
+ aes_gcm_ghash_mul_first (ctx, d[0], N_AES_LANES * 8);
+
+ /* AES rounds 2 and 3 */
+ aes_enc_round (r, k + 2, 4);
+ aes_enc_round (r, k + 3, 4);
+
+ /* GHASH multiply block 1 */
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 4 and 5 */
+ aes_enc_round (r, k + 4, 4);
+ aes_enc_round (r, k + 5, 4);
+
+ /* GHASH multiply block 2 */
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 6 and 7 */
+ aes_enc_round (r, k + 6, 4);
+ aes_enc_round (r, k + 7, 4);
+
+ /* GHASH multiply block 3 */
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* AES rounds 8 and 9 */
+ aes_enc_round (r, k + 8, 4);
+ aes_enc_round (r, k + 9, 4);
+
+ /* load 4 blocks of data - encrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i];
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, 4);
+
+ /* store 4 blocks of data */
+ for (int i = 0; i < 4; i++)
+ dv[i] = d[i];
+
+ /* load next 4 blocks of data data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i + 4];
+
+ /* GHASH multiply block 4 */
+ aes_gcm_ghash_mul_next (ctx, (d[0]));
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, 4);
+ aes_enc_round (r, k + 1, 4);
+
+ /* GHASH multiply block 5 */
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 2 and 3 */
+ aes_enc_round (r, k + 2, 4);
+ aes_enc_round (r, k + 3, 4);
+
+ /* GHASH multiply block 6 */
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 4 and 5 */
+ aes_enc_round (r, k + 4, 4);
+ aes_enc_round (r, k + 5, 4);
+
+ /* GHASH multiply block 7 */
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* AES rounds 6 and 7 */
+ aes_enc_round (r, k + 6, 4);
+ aes_enc_round (r, k + 7, 4);
+
+ /* GHASH reduce 1st step */
+ aes_gcm_ghash_reduce (ctx);
+
+ /* AES rounds 8 and 9 */
+ aes_enc_round (r, k + 8, 4);
+ aes_enc_round (r, k + 9, 4);
+
+ /* GHASH reduce 2nd step */
+ aes_gcm_ghash_reduce2 (ctx);
+
+ /* load 4 blocks of data - encrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i + 4];
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, 4);
+
+ /* store data */
+ for (int i = 0; i < 4; i++)
+ dv[i + 4] = d[i];
+
+ /* GHASH final step */
+ aes_gcm_ghash_final (ctx);
+}
+
+static_always_inline void
+aes_gcm_mask_bytes (aes_data_t *d, uword n_bytes)
+{
+ const union
+ {
+ u8 b[64];
+ aes_data_t r;
+ } scale = {
+ .b = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
+ };
+
+ d[0] &= (aes_gcm_splat (n_bytes) > scale.r);
+}
+
+static_always_inline void
+aes_gcm_calc_last (aes_gcm_ctx_t *ctx, aes_data_t *d, int n_blocks,
+ u32 n_bytes)
+{
+ int n_lanes = (N_AES_LANES == 1 ? n_blocks : (n_bytes + 15) / 16) + 1;
+ n_bytes -= (n_blocks - 1) * N_AES_BYTES;
+ int i;
+
+ aes_gcm_enc_ctr0_round (ctx, 0);
+ aes_gcm_enc_ctr0_round (ctx, 1);
+
+ if (n_bytes != N_AES_BYTES)
+ aes_gcm_mask_bytes (d + n_blocks - 1, n_bytes);
+
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+
+ aes_gcm_enc_ctr0_round (ctx, 2);
+ aes_gcm_enc_ctr0_round (ctx, 3);
+
+ if (n_blocks > 1)
+ aes_gcm_ghash_mul_next (ctx, d[1]);
+
+ aes_gcm_enc_ctr0_round (ctx, 4);
+ aes_gcm_enc_ctr0_round (ctx, 5);
+
+ if (n_blocks > 2)
+ aes_gcm_ghash_mul_next (ctx, d[2]);
+
+ aes_gcm_enc_ctr0_round (ctx, 6);
+ aes_gcm_enc_ctr0_round (ctx, 7);
+
+ if (n_blocks > 3)
+ aes_gcm_ghash_mul_next (ctx, d[3]);
+
+ aes_gcm_enc_ctr0_round (ctx, 8);
+ aes_gcm_enc_ctr0_round (ctx, 9);
+
+ aes_gcm_ghash_mul_final_block (ctx);
+ aes_gcm_ghash_reduce (ctx);
+
+ for (i = 10; i < ctx->rounds; i++)
+ aes_gcm_enc_ctr0_round (ctx, i);
+
+ aes_gcm_ghash_reduce2 (ctx);
+
+ aes_gcm_ghash_final (ctx);
+
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline void
+aes_gcm_enc (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, u32 n_left)
+{
+ aes_data_t d[4];
+
+ if (PREDICT_FALSE (n_left == 0))
+ {
+ int i;
+ for (i = 0; i < ctx->rounds + 1; i++)
+ aes_gcm_enc_ctr0_round (ctx, i);
+ return;
+ }
+
+ if (n_left < 4 * N_AES_BYTES)
+ {
+ ctx->last = 1;
+ if (n_left > 3 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 4, n_left);
+ }
+ else if (n_left > 2 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 3, n_left);
+ }
+ else if (n_left > N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 2, n_left);
+ }
+ else
+ {
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 1, n_left);
+ }
+ return;
+ }
+
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N_AES_BYTES, /* with_ghash */ 0);
+
+ /* next */
+ n_left -= 4 * N_AES_BYTES;
+ dst += 4 * N_AES_BYTES;
+ src += 4 * N_AES_BYTES;
+
+ for (int n = 8 * N_AES_BYTES; n_left >= n; n_left -= n, src += n, dst += n)
+ aes_gcm_calc_double (ctx, d, src, dst);
+
+ if (n_left >= 4 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N_AES_BYTES, /* with_ghash */ 1);
+
+ /* next */
+ n_left -= 4 * N_AES_BYTES;
+ dst += 4 * N_AES_BYTES;
+ src += 4 * N_AES_BYTES;
+ }
+
+ if (n_left == 0)
+ {
+ aes_gcm_calc_last (ctx, d, 4, 4 * N_AES_BYTES);
+ return;
+ }
+
+ ctx->last = 1;
+
+ if (n_left > 3 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 4, n_left);
+ }
+ else if (n_left > 2 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 3, n_left);
+ }
+ else if (n_left > N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 2, n_left);
+ }
+ else
+ {
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 1, n_left);
+ }
+}
+
+static_always_inline void
+aes_gcm_dec (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, uword n_left)
+{
+ aes_data_t d[4] = {};
+ ghash_ctx_t gd;
+
+ /* main encryption loop */
+ for (int n = 8 * N_AES_BYTES; n_left >= n; n_left -= n, dst += n, src += n)
+ aes_gcm_calc_double (ctx, d, src, dst);
+
+ if (n_left >= 4 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N_AES_BYTES, /* with_ghash */ 1);
+
+ /* next */
+ n_left -= 4 * N_AES_BYTES;
+ dst += N_AES_BYTES * 4;
+ src += N_AES_BYTES * 4;
+ }
+
+ if (n_left)
+ {
+ ctx->last = 1;
+
+ if (n_left > 3 * N_AES_BYTES)
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
+ else if (n_left > 2 * N_AES_BYTES)
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
+ else if (n_left > N_AES_BYTES)
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
+ else
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
+ }
+
+ /* interleaved counter 0 encryption E(Y0, k) and ghash of final GCM
+ * (bit length) block */
+
+ aes_gcm_enc_ctr0_round (ctx, 0);
+ aes_gcm_enc_ctr0_round (ctx, 1);
+
+ ghash_mul_first (&gd, aes_gcm_final_block (ctx) ^ ctx->T,
+ ctx->Hi[NUM_HI - 1]);
+
+ aes_gcm_enc_ctr0_round (ctx, 2);
+ aes_gcm_enc_ctr0_round (ctx, 3);
+
+ ghash_reduce (&gd);
+
+ aes_gcm_enc_ctr0_round (ctx, 4);
+ aes_gcm_enc_ctr0_round (ctx, 5);
+
+ ghash_reduce2 (&gd);
+
+ aes_gcm_enc_ctr0_round (ctx, 6);
+ aes_gcm_enc_ctr0_round (ctx, 7);
+
+ ctx->T = ghash_final (&gd);
+
+ aes_gcm_enc_ctr0_round (ctx, 8);
+ aes_gcm_enc_ctr0_round (ctx, 9);
+
+ for (int i = 10; i < ctx->rounds + 1; i += 1)
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline int
+aes_gcm (const u8 *src, u8 *dst, const u8 *aad, u8 *ivp, u8 *tag,
+ u32 data_bytes, u32 aad_bytes, u8 tag_len,
+ const aes_gcm_key_data_t *kd, int aes_rounds, aes_gcm_op_t op)
+{
+ u8 *addt = (u8 *) aad;
+ u32x4 Y0;
+
+ aes_gcm_ctx_t _ctx = { .counter = 2,
+ .rounds = aes_rounds,
+ .operation = op,
+ .data_bytes = data_bytes,
+ .aad_bytes = aad_bytes,
+ .Ke = kd->Ke,
+ .Hi = kd->Hi },
+ *ctx = &_ctx;
+
+ /* initalize counter */
+ Y0 = (u32x4) (u64x2){ *(u64u *) ivp, 0 };
+ Y0[2] = *(u32u *) (ivp + 8);
+ Y0[3] = 1 << 24;
+ ctx->EY0 = (u8x16) Y0;
+
+#if N_AES_LANES == 4
+ ctx->Y = u32x16_splat_u32x4 (Y0) + (u32x16){
+ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
+ };
+#elif N_AES_LANES == 2
+ ctx->Y =
+ u32x8_splat_u32x4 (Y0) + (u32x8){ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24 };
+#else
+ ctx->Y = Y0 + (u32x4){ 0, 0, 0, 1 << 24 };
+#endif
+
+ /* calculate ghash for AAD */
+ aes_gcm_ghash (ctx, addt, aad_bytes);
+
+ /* ghash and encrypt/edcrypt */
+ if (op == AES_GCM_OP_ENCRYPT)
+ aes_gcm_enc (ctx, src, dst, data_bytes);
+ else if (op == AES_GCM_OP_DECRYPT)
+ aes_gcm_dec (ctx, src, dst, data_bytes);
+
+ /* final tag is */
+ ctx->T = u8x16_reflect (ctx->T) ^ ctx->EY0;
+
+ /* tag_len 16 -> 0 */
+ tag_len &= 0xf;
+
+ if (op == AES_GCM_OP_ENCRYPT || op == AES_GCM_OP_GMAC)
+ {
+ /* store tag */
+ if (tag_len)
+ u8x16_store_partial (ctx->T, tag, tag_len);
+ else
+ ((u8x16u *) tag)[0] = ctx->T;
+ }
+ else
+ {
+ /* check tag */
+ if (tag_len)
+ {
+ u16 mask = pow2_mask (tag_len);
+ u8x16 expected = u8x16_load_partial (tag, tag_len);
+ if ((u8x16_msb_mask (expected == ctx->T) & mask) == mask)
+ return 1;
+ }
+ else
+ {
+ if (u8x16_is_equal (ctx->T, *(u8x16u *) tag))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static_always_inline void
+clib_aes_gcm_key_expand (aes_gcm_key_data_t *kd, const u8 *key,
+ aes_key_size_t ks)
+{
+ u8x16 H;
+ u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+ aes_expaned_key_t *Ke = (aes_expaned_key_t *) kd->Ke;
+
+ /* expand AES key */
+ aes_key_expand (ek, key, ks);
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ Ke[i].lanes[0] = Ke[i].lanes[1] = Ke[i].lanes[2] = Ke[i].lanes[3] = ek[i];
+
+ /* pre-calculate H */
+ H = aes_encrypt_block (u8x16_zero (), ek, ks);
+ H = u8x16_reflect (H);
+ ghash_precompute (H, (u8x16 *) kd->Hi, ARRAY_LEN (kd->Hi));
+}
+
+static_always_inline void
+clib_aes128_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
+{
+ aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
+ tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_ENCRYPT);
+}
+
+static_always_inline void
+clib_aes256_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
+{
+ aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
+ tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_ENCRYPT);
+}
+
+static_always_inline int
+clib_aes128_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
+{
+ return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
+ data_bytes, aad_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_DECRYPT);
+}
+
+static_always_inline int
+clib_aes256_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
+{
+ return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
+ data_bytes, aad_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_DECRYPT);
+}
+
+static_always_inline void
+clib_aes128_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *tag)
+{
+ aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_GMAC);
+}
+
+static_always_inline void
+clib_aes256_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *tag)
+{
+ aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_GMAC);
+}
+
+#endif /* __crypto_aes_gcm_h__ */
diff --git a/src/vppinfra/crypto/ghash.h b/src/vppinfra/crypto/ghash.h
new file mode 100644
index 00000000000..66e3f6a673a
--- /dev/null
+++ b/src/vppinfra/crypto/ghash.h
@@ -0,0 +1,515 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/*
+ *------------------------------------------------------------------
+ * Copyright(c) 2018, Intel Corporation All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES * LOSS OF USE,
+ * DATA, OR PROFITS * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *------------------------------------------------------------------
+ */
+
+/*
+ * Based on work by: Shay Gueron, Michael E. Kounavis, Erdinc Ozturk,
+ * Vinodh Gopal, James Guilford, Tomasz Kantecki
+ *
+ * References:
+ * [1] Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation on
+ * Intel Architecture Processors. August, 2010
+ * [2] Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode on
+ * Intel Architecture Processors. October, 2012.
+ * [3] intel-ipsec-mb library, https://github.com/01org/intel-ipsec-mb.git
+ *
+ * Definitions:
+ * GF Galois Extension Field GF(2^128) - finite field where elements are
+ * represented as polynomials with coefficients in GF(2) with the
+ * highest degree of 127. Polynomials are represented as 128-bit binary
+ * numbers where each bit represents one coefficient.
+ * e.g. polynomial x^5 + x^3 + x + 1 is represented in binary 101011.
+ * H hash key (128 bit)
+ * POLY irreducible polynomial x^127 + x^7 + x^2 + x + 1
+ * RPOLY irreducible polynomial x^128 + x^127 + x^126 + x^121 + 1
+ * + addition in GF, which equals to XOR operation
+ * * multiplication in GF
+ *
+ * GF multiplication consists of 2 steps:
+ * - carry-less multiplication of two 128-bit operands into 256-bit result
+ * - reduction of 256-bit result into 128-bit with modulo POLY
+ *
+ * GHash is calculated on 128-bit blocks of data according to the following
+ * formula:
+ * GH = (GH + data) * hash_key
+ *
+ * To avoid bit-reflection of data, this code uses GF multipication
+ * with reversed polynomial:
+ * a * b * x^-127 mod RPOLY
+ *
+ * To improve computation speed table Hi is precomputed with powers of H',
+ * where H' is calculated as H<<1 mod RPOLY.
+ * This allows us to improve performance by deferring reduction. For example
+ * to caclulate ghash of 4 128-bit blocks of data (b0, b1, b2, b3), we can do:
+ *
+ * u8x16 Hi[4];
+ * ghash_precompute (H, Hi, 4);
+ *
+ * ghash_ctx_t _gd, *gd = &_gd;
+ * ghash_mul_first (gd, GH ^ b0, Hi[3]);
+ * ghash_mul_next (gd, b1, Hi[2]);
+ * ghash_mul_next (gd, b2, Hi[1]);
+ * ghash_mul_next (gd, b3, Hi[0]);
+ * ghash_reduce (gd);
+ * ghash_reduce2 (gd);
+ * GH = ghash_final (gd);
+ *
+ * Reduction step is split into 3 functions so it can be better interleaved
+ * with other code, (i.e. with AES computation).
+ */
+
+#ifndef __ghash_h__
+#define __ghash_h__
+
+static_always_inline u8x16
+gmul_lo_lo (u8x16 a, u8x16 b)
+{
+#if defined (__PCLMUL__)
+ return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x00);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a),
+ (poly64_t) vget_low_p64 ((poly64x2_t) b));
+#endif
+}
+
+static_always_inline u8x16
+gmul_hi_lo (u8x16 a, u8x16 b)
+{
+#if defined (__PCLMUL__)
+ return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x01);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return (u8x16) vmull_p64 ((poly64_t) vget_high_p64 ((poly64x2_t) a),
+ (poly64_t) vget_low_p64 ((poly64x2_t) b));
+#endif
+}
+
+static_always_inline u8x16
+gmul_lo_hi (u8x16 a, u8x16 b)
+{
+#if defined (__PCLMUL__)
+ return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x10);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a),
+ (poly64_t) vget_high_p64 ((poly64x2_t) b));
+#endif
+}
+
+static_always_inline u8x16
+gmul_hi_hi (u8x16 a, u8x16 b)
+{
+#if defined (__PCLMUL__)
+ return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x11);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return (u8x16) vmull_high_p64 ((poly64x2_t) a, (poly64x2_t) b);
+#endif
+}
+
+typedef struct
+{
+ u8x16 mid, hi, lo, tmp_lo, tmp_hi;
+ u8x32 hi2, lo2, mid2, tmp_lo2, tmp_hi2;
+ u8x64 hi4, lo4, mid4, tmp_lo4, tmp_hi4;
+ int pending;
+} ghash_ctx_t;
+
+static const u8x16 ghash_poly = {
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
+};
+
+static const u8x16 ghash_poly2 = {
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
+};
+
+static_always_inline void
+ghash_mul_first (ghash_ctx_t *gd, u8x16 a, u8x16 b)
+{
+ /* a1 * b1 */
+ gd->hi = gmul_hi_hi (a, b);
+ /* a0 * b0 */
+ gd->lo = gmul_lo_lo (a, b);
+ /* a0 * b1 ^ a1 * b0 */
+ gd->mid = gmul_hi_lo (a, b) ^ gmul_lo_hi (a, b);
+
+ /* set gd->pending to 0 so next invocation of ghash_mul_next(...) knows that
+ there is no pending data in tmp_lo and tmp_hi */
+ gd->pending = 0;
+}
+
+static_always_inline void
+ghash_mul_next (ghash_ctx_t *gd, u8x16 a, u8x16 b)
+{
+ /* a1 * b1 */
+ u8x16 hi = gmul_hi_hi (a, b);
+ /* a0 * b0 */
+ u8x16 lo = gmul_lo_lo (a, b);
+
+ /* this branch will be optimized out by the compiler, and it allows us to
+ reduce number of XOR operations by using ternary logic */
+ if (gd->pending)
+ {
+ /* there is peding data from previous invocation so we can XOR */
+ gd->hi = u8x16_xor3 (gd->hi, gd->tmp_hi, hi);
+ gd->lo = u8x16_xor3 (gd->lo, gd->tmp_lo, lo);
+ gd->pending = 0;
+ }
+ else
+ {
+ /* there is no peding data from previous invocation so we postpone XOR */
+ gd->tmp_hi = hi;
+ gd->tmp_lo = lo;
+ gd->pending = 1;
+ }
+
+ /* gd->mid ^= a0 * b1 ^ a1 * b0 */
+ gd->mid = u8x16_xor3 (gd->mid, gmul_hi_lo (a, b), gmul_lo_hi (a, b));
+}
+
+static_always_inline void
+ghash_reduce (ghash_ctx_t *gd)
+{
+ u8x16 r;
+
+ /* Final combination:
+ gd->lo ^= gd->mid << 64
+ gd->hi ^= gd->mid >> 64 */
+ u8x16 midl = u8x16_word_shift_left (gd->mid, 8);
+ u8x16 midr = u8x16_word_shift_right (gd->mid, 8);
+
+ if (gd->pending)
+ {
+ gd->lo = u8x16_xor3 (gd->lo, gd->tmp_lo, midl);
+ gd->hi = u8x16_xor3 (gd->hi, gd->tmp_hi, midr);
+ }
+ else
+ {
+ gd->lo ^= midl;
+ gd->hi ^= midr;
+ }
+ r = gmul_hi_lo (ghash_poly2, gd->lo);
+ gd->lo ^= u8x16_word_shift_left (r, 8);
+}
+
+static_always_inline void
+ghash_reduce2 (ghash_ctx_t *gd)
+{
+ gd->tmp_lo = gmul_lo_lo (ghash_poly2, gd->lo);
+ gd->tmp_hi = gmul_lo_hi (ghash_poly2, gd->lo);
+}
+
+static_always_inline u8x16
+ghash_final (ghash_ctx_t *gd)
+{
+ return u8x16_xor3 (gd->hi, u8x16_word_shift_right (gd->tmp_lo, 4),
+ u8x16_word_shift_left (gd->tmp_hi, 4));
+}
+
+static_always_inline u8x16
+ghash_mul (u8x16 a, u8x16 b)
+{
+ ghash_ctx_t _gd, *gd = &_gd;
+ ghash_mul_first (gd, a, b);
+ ghash_reduce (gd);
+ ghash_reduce2 (gd);
+ return ghash_final (gd);
+}
+
+#if defined(__VPCLMULQDQ__) && defined(__AVX512F__)
+
+static const u8x64 ghash4_poly2 = {
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+};
+
+static_always_inline u8x64
+gmul4_lo_lo (u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x00);
+}
+
+static_always_inline u8x64
+gmul4_hi_lo (u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x01);
+}
+
+static_always_inline u8x64
+gmul4_lo_hi (u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x10);
+}
+
+static_always_inline u8x64
+gmul4_hi_hi (u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x11);
+}
+
+static_always_inline void
+ghash4_mul_first (ghash_ctx_t *gd, u8x64 a, u8x64 b)
+{
+ gd->hi4 = gmul4_hi_hi (a, b);
+ gd->lo4 = gmul4_lo_lo (a, b);
+ gd->mid4 = gmul4_hi_lo (a, b) ^ gmul4_lo_hi (a, b);
+ gd->pending = 0;
+}
+
+static_always_inline void
+ghash4_mul_next (ghash_ctx_t *gd, u8x64 a, u8x64 b)
+{
+ u8x64 hi = gmul4_hi_hi (a, b);
+ u8x64 lo = gmul4_lo_lo (a, b);
+
+ if (gd->pending)
+ {
+ /* there is peding data from previous invocation so we can XOR */
+ gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, hi);
+ gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, lo);
+ gd->pending = 0;
+ }
+ else
+ {
+ /* there is no peding data from previous invocation so we postpone XOR */
+ gd->tmp_hi4 = hi;
+ gd->tmp_lo4 = lo;
+ gd->pending = 1;
+ }
+ gd->mid4 = u8x64_xor3 (gd->mid4, gmul4_hi_lo (a, b), gmul4_lo_hi (a, b));
+}
+
+static_always_inline void
+ghash4_reduce (ghash_ctx_t *gd)
+{
+ u8x64 r;
+
+ /* Final combination:
+ gd->lo4 ^= gd->mid4 << 64
+ gd->hi4 ^= gd->mid4 >> 64 */
+
+ u8x64 midl = u8x64_word_shift_left (gd->mid4, 8);
+ u8x64 midr = u8x64_word_shift_right (gd->mid4, 8);
+
+ if (gd->pending)
+ {
+ gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, midl);
+ gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, midr);
+ }
+ else
+ {
+ gd->lo4 ^= midl;
+ gd->hi4 ^= midr;
+ }
+
+ r = gmul4_hi_lo (ghash4_poly2, gd->lo4);
+ gd->lo4 ^= u8x64_word_shift_left (r, 8);
+}
+
+static_always_inline void
+ghash4_reduce2 (ghash_ctx_t *gd)
+{
+ gd->tmp_lo4 = gmul4_lo_lo (ghash4_poly2, gd->lo4);
+ gd->tmp_hi4 = gmul4_lo_hi (ghash4_poly2, gd->lo4);
+}
+
+static_always_inline u8x16
+ghash4_final (ghash_ctx_t *gd)
+{
+ u8x64 r;
+ u8x32 t;
+
+ r = u8x64_xor3 (gd->hi4, u8x64_word_shift_right (gd->tmp_lo4, 4),
+ u8x64_word_shift_left (gd->tmp_hi4, 4));
+
+ /* horizontal XOR of 4 128-bit lanes */
+ t = u8x64_extract_lo (r) ^ u8x64_extract_hi (r);
+ return u8x32_extract_hi (t) ^ u8x32_extract_lo (t);
+}
+#endif
+
+#if defined(__VPCLMULQDQ__)
+
+static const u8x32 ghash2_poly2 = {
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+};
+
+static_always_inline u8x32
+gmul2_lo_lo (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x00);
+}
+
+static_always_inline u8x32
+gmul2_hi_lo (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x01);
+}
+
+static_always_inline u8x32
+gmul2_lo_hi (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x10);
+}
+
+static_always_inline u8x32
+gmul2_hi_hi (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x11);
+}
+
+static_always_inline void
+ghash2_mul_first (ghash_ctx_t *gd, u8x32 a, u8x32 b)
+{
+ gd->hi2 = gmul2_hi_hi (a, b);
+ gd->lo2 = gmul2_lo_lo (a, b);
+ gd->mid2 = gmul2_hi_lo (a, b) ^ gmul2_lo_hi (a, b);
+ gd->pending = 0;
+}
+
+static_always_inline void
+ghash2_mul_next (ghash_ctx_t *gd, u8x32 a, u8x32 b)
+{
+ u8x32 hi = gmul2_hi_hi (a, b);
+ u8x32 lo = gmul2_lo_lo (a, b);
+
+ if (gd->pending)
+ {
+ /* there is peding data from previous invocation so we can XOR */
+ gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, hi);
+ gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, lo);
+ gd->pending = 0;
+ }
+ else
+ {
+ /* there is no peding data from previous invocation so we postpone XOR */
+ gd->tmp_hi2 = hi;
+ gd->tmp_lo2 = lo;
+ gd->pending = 1;
+ }
+ gd->mid2 = u8x32_xor3 (gd->mid2, gmul2_hi_lo (a, b), gmul2_lo_hi (a, b));
+}
+
+static_always_inline void
+ghash2_reduce (ghash_ctx_t *gd)
+{
+ u8x32 r;
+
+ /* Final combination:
+ gd->lo2 ^= gd->mid2 << 64
+ gd->hi2 ^= gd->mid2 >> 64 */
+
+ u8x32 midl = u8x32_word_shift_left (gd->mid2, 8);
+ u8x32 midr = u8x32_word_shift_right (gd->mid2, 8);
+
+ if (gd->pending)
+ {
+ gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, midl);
+ gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, midr);
+ }
+ else
+ {
+ gd->lo2 ^= midl;
+ gd->hi2 ^= midr;
+ }
+
+ r = gmul2_hi_lo (ghash2_poly2, gd->lo2);
+ gd->lo2 ^= u8x32_word_shift_left (r, 8);
+}
+
+static_always_inline void
+ghash2_reduce2 (ghash_ctx_t *gd)
+{
+ gd->tmp_lo2 = gmul2_lo_lo (ghash2_poly2, gd->lo2);
+ gd->tmp_hi2 = gmul2_lo_hi (ghash2_poly2, gd->lo2);
+}
+
+static_always_inline u8x16
+ghash2_final (ghash_ctx_t *gd)
+{
+ u8x32 r;
+
+ r = u8x32_xor3 (gd->hi2, u8x32_word_shift_right (gd->tmp_lo2, 4),
+ u8x32_word_shift_left (gd->tmp_hi2, 4));
+
+ /* horizontal XOR of 2 128-bit lanes */
+ return u8x32_extract_hi (r) ^ u8x32_extract_lo (r);
+}
+#endif
+
+static_always_inline void
+ghash_precompute (u8x16 H, u8x16 * Hi, int n)
+{
+ u8x16 r8;
+ u32x4 r32;
+ /* calcullate H<<1 mod poly from the hash key */
+ r8 = (u8x16) ((u64x2) H >> 63);
+ H = (u8x16) ((u64x2) H << 1);
+ H |= u8x16_word_shift_left (r8, 8);
+ r32 = (u32x4) u8x16_word_shift_right (r8, 8);
+#ifdef __SSE2__
+ r32 = u32x4_shuffle (r32, 0, 1, 2, 0);
+#else
+ r32[3] = r32[0];
+#endif
+ r32 = r32 == (u32x4) {1, 0, 0, 1};
+ Hi[n - 1] = H = H ^ ((u8x16) r32 & ghash_poly);
+
+ /* calculate H^(i + 1) */
+ for (int i = n - 2; i >= 0; i--)
+ Hi[i] = ghash_mul (H, Hi[i + 1]);
+}
+
+#endif /* __ghash_h__ */
+
diff --git a/src/vppinfra/crypto/poly1305.h b/src/vppinfra/crypto/poly1305.h
new file mode 100644
index 00000000000..cd6ea60cdf7
--- /dev/null
+++ b/src/vppinfra/crypto/poly1305.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __clib_poly1305_h__
+#define __clib_poly1305_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/string.h>
+
+/* implementation of DJB's poly1305 using 64-bit arithmetrics */
+
+typedef struct
+{
+ const u64 r[3], s[2];
+ u64 h[3];
+
+ /* partial data */
+ union
+ {
+ u8 as_u8[16];
+ u64 as_u64[2];
+ } partial;
+
+ size_t n_partial_bytes;
+} clib_poly1305_ctx;
+
+static_always_inline void
+clib_poly1305_init (clib_poly1305_ctx *ctx, const u8 key[32])
+{
+ u64u *k = (u64u *) key;
+ u64 *h = (u64 *) ctx->h;
+ u64 *r = (u64 *) ctx->r;
+ u64 *s = (u64 *) ctx->s;
+
+ /* initialize accumulator */
+ h[0] = h[1] = h[2] = 0;
+
+ /* clamp 1st half of the key and store it into r[] */
+ r[0] = k[0] & 0x0ffffffc0fffffff;
+ r[1] = k[1] & 0x0ffffffc0ffffffc;
+ s[0] = k[2];
+ s[1] = k[3];
+
+ /* precompute (r[1] >> 2) * 5 */
+ r[2] = r[1] + (r[1] >> 2);
+
+ ctx->n_partial_bytes = 0;
+}
+
+static_always_inline void
+_clib_poly1305_multiply_and_reduce (u64 h[3], const u64 r[3])
+{
+ union
+ {
+ struct
+ {
+ u64 lo, hi;
+ };
+ u128 n;
+ } l0, l1, l2;
+ u64 c;
+
+ /*
+ h2 h1 h0
+ x r1 r0
+ ---------------------------------------
+ r0 x h2 r0 x h1 r0 × h0
+ + r1 x h2 r1 x h1 r1 x h0
+ ---------------------------------------
+
+ for p = 2^130-5, following applies:
+ (r * 2^130) mod p == (r * 5) mod p
+
+ bits above 130 can be shifted right (divided by 2^130)
+ and multiplied by 5 per equation above
+
+ h2 h1 h0
+ x r1 r0
+ ----------------------------------------------
+ r0 x h2 r0 x h1 r0 × h0
+ + r1 x h0
+ + 5x (r1 >>2) x h2 5x (r1 >>2) x h1
+ ----------------------------------------------
+ [0:l2.lo] [l1.hi:l1.lo] [l0.hi:l0.lo]
+ */
+
+ l0.n = l1.n = l2.n = 0;
+ /* u64 x u64 = u128 multiplications */
+ l0.n += (u128) h[0] * r[0];
+ l0.n += (u128) h[1] * r[2]; /* r[2] holds precomputed (r[1] >> 2) * 5 */
+ l1.n += (u128) h[0] * r[1];
+ l1.n += (u128) h[1] * r[0];
+
+ /* u64 x u64 = u64 multiplications, as h[2] may have only lower 2 bits set
+ * and r[1] have clamped bits 60-63 */
+ l1.n += (u128) (h[2] * r[2]);
+ l2.n += (u128) (h[2] * r[0]);
+
+ /* propagate upper 64 bits to higher limb */
+ c = 0;
+ l1.lo = u64_add_with_carry (&c, l1.lo, l0.hi);
+ l2.lo = u64_add_with_carry (&c, l2.lo, l1.hi);
+
+ l2.hi = l2.lo;
+ /* keep bits [128:129] */
+ l2.lo &= 3;
+
+ /* bits 130 and above multiply with 5 and store to l2.hi */
+ l2.hi -= l2.lo;
+ l2.hi += l2.hi >> 2;
+
+ /* add l2.hi to l0.lo with carry propagation and store result to h2:h1:h0 */
+ c = 0;
+ h[0] = u64_add_with_carry (&c, l0.lo, l2.hi);
+ h[1] = u64_add_with_carry (&c, l1.lo, 0);
+ h[2] = u64_add_with_carry (&c, l2.lo, 0);
+}
+
+static_always_inline u32
+_clib_poly1305_add_blocks (clib_poly1305_ctx *ctx, const u8 *msg,
+ uword n_bytes, const u32 bit17)
+{
+ u64 r[3], h[3];
+
+ for (int i = 0; i < 3; i++)
+ {
+ h[i] = ctx->h[i];
+ r[i] = ctx->r[i];
+ }
+
+ for (const u64u *m = (u64u *) msg; n_bytes >= 16; n_bytes -= 16, m += 2)
+ {
+ u64 c = 0;
+
+ /* h += m */
+ h[0] = u64_add_with_carry (&c, h[0], m[0]);
+ h[1] = u64_add_with_carry (&c, h[1], m[1]);
+ h[2] = u64_add_with_carry (&c, h[2], bit17 ? 1 : 0);
+
+ /* h = (h * r) mod p */
+ _clib_poly1305_multiply_and_reduce (h, r);
+ }
+
+ for (int i = 0; i < 3; i++)
+ ctx->h[i] = h[i];
+
+ return n_bytes;
+}
+
+static_always_inline void
+clib_poly1305_update (clib_poly1305_ctx *ctx, const u8 *msg, uword len)
+{
+ uword n_left = len;
+
+ if (n_left == 0)
+ return;
+
+ if (ctx->n_partial_bytes)
+ {
+ u16 missing_bytes = 16 - ctx->n_partial_bytes;
+ if (PREDICT_FALSE (n_left < missing_bytes))
+ {
+ clib_memcpy_fast (ctx->partial.as_u8 + ctx->n_partial_bytes, msg,
+ n_left);
+ ctx->n_partial_bytes += n_left;
+ return;
+ }
+
+ clib_memcpy_fast (ctx->partial.as_u8 + ctx->n_partial_bytes, msg,
+ missing_bytes);
+ _clib_poly1305_add_blocks (ctx, ctx->partial.as_u8, 16, 1);
+ ctx->n_partial_bytes = 0;
+ n_left -= missing_bytes;
+ msg += missing_bytes;
+ }
+
+ n_left = _clib_poly1305_add_blocks (ctx, msg, n_left, 1);
+
+ if (n_left)
+ {
+ ctx->partial.as_u64[0] = ctx->partial.as_u64[1] = 0;
+ clib_memcpy_fast (ctx->partial.as_u8, msg + len - n_left, n_left);
+ ctx->n_partial_bytes = n_left;
+ }
+}
+
+static_always_inline void
+clib_poly1305_final (clib_poly1305_ctx *ctx, u8 *out)
+{
+ const u64 p[] = { 0xFFFFFFFFFFFFFFFB, 0xFFFFFFFFFFFFFFFF, 3 }; /* 2^128-5 */
+ const u64 *s = ctx->s;
+ u64u *t = (u64u *) out;
+ u64 h0, h1, t0, t1;
+ u64 c;
+
+ if (ctx->n_partial_bytes)
+ {
+ ctx->partial.as_u8[ctx->n_partial_bytes] = 1;
+ _clib_poly1305_add_blocks (ctx, ctx->partial.as_u8, 16, 0);
+ }
+
+ h0 = ctx->h[0];
+ h1 = ctx->h[1];
+
+ /* h may not be fully reduced, try to subtract 2^128-5 */
+ c = 0;
+ t0 = u64_sub_with_borrow (&c, h0, p[0]);
+ t1 = u64_sub_with_borrow (&c, h1, p[1]);
+ u64_sub_with_borrow (&c, ctx->h[2], p[2]);
+
+ if (!c)
+ {
+ h0 = t0;
+ h1 = t1;
+ }
+
+ c = 0;
+ t[0] = u64_add_with_carry (&c, h0, s[0]);
+ t[1] = u64_add_with_carry (&c, h1, s[1]);
+}
+
+static_always_inline void
+clib_poly1305 (const u8 *key, const u8 *msg, uword len, u8 *out)
+{
+ clib_poly1305_ctx ctx;
+ clib_poly1305_init (&ctx, key);
+ clib_poly1305_update (&ctx, msg, len);
+ clib_poly1305_final (&ctx, out);
+}
+
+#endif /* __clib_poly1305_h__ */
diff --git a/src/vppinfra/crypto/sha2.h b/src/vppinfra/crypto/sha2.h
new file mode 100644
index 00000000000..69a24a2d087
--- /dev/null
+++ b/src/vppinfra/crypto/sha2.h
@@ -0,0 +1,715 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef included_sha2_h
+#define included_sha2_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/string.h>
+
+#define SHA256_ROTR(x, y) ((x >> y) | (x << (32 - y)))
+#define SHA256_CH(a, b, c) ((a & b) ^ (~a & c))
+#define SHA256_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
+#define SHA256_CSIGMA0(x) \
+ (SHA256_ROTR (x, 2) ^ SHA256_ROTR (x, 13) ^ SHA256_ROTR (x, 22));
+#define SHA256_CSIGMA1(x) \
+ (SHA256_ROTR (x, 6) ^ SHA256_ROTR (x, 11) ^ SHA256_ROTR (x, 25));
+#define SHA256_SSIGMA0(x) (SHA256_ROTR (x, 7) ^ SHA256_ROTR (x, 18) ^ (x >> 3))
+#define SHA256_SSIGMA1(x) \
+ (SHA256_ROTR (x, 17) ^ SHA256_ROTR (x, 19) ^ (x >> 10))
+
+#define SHA256_MSG_SCHED(w, j) \
+ { \
+ w[j] = w[j - 7] + w[j - 16]; \
+ w[j] += SHA256_SSIGMA0 (w[j - 15]); \
+ w[j] += SHA256_SSIGMA1 (w[j - 2]); \
+ }
+
+#define SHA256_TRANSFORM(s, w, i, k) \
+ { \
+ __typeof__ (s[0]) t1, t2; \
+ t1 = k + w[i] + s[7]; \
+ t1 += SHA256_CSIGMA1 (s[4]); \
+ t1 += SHA256_CH (s[4], s[5], s[6]); \
+ t2 = SHA256_CSIGMA0 (s[0]); \
+ t2 += SHA256_MAJ (s[0], s[1], s[2]); \
+ s[7] = s[6]; \
+ s[6] = s[5]; \
+ s[5] = s[4]; \
+ s[4] = s[3] + t1; \
+ s[3] = s[2]; \
+ s[2] = s[1]; \
+ s[1] = s[0]; \
+ s[0] = t1 + t2; \
+ }
+
+#define SHA512_ROTR(x, y) ((x >> y) | (x << (64 - y)))
+#define SHA512_CH(a, b, c) ((a & b) ^ (~a & c))
+#define SHA512_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
+#define SHA512_CSIGMA0(x) \
+ (SHA512_ROTR (x, 28) ^ SHA512_ROTR (x, 34) ^ SHA512_ROTR (x, 39))
+#define SHA512_CSIGMA1(x) \
+ (SHA512_ROTR (x, 14) ^ SHA512_ROTR (x, 18) ^ SHA512_ROTR (x, 41))
+#define SHA512_SSIGMA0(x) (SHA512_ROTR (x, 1) ^ SHA512_ROTR (x, 8) ^ (x >> 7))
+#define SHA512_SSIGMA1(x) \
+ (SHA512_ROTR (x, 19) ^ SHA512_ROTR (x, 61) ^ (x >> 6))
+
+#define SHA512_MSG_SCHED(w, j) \
+ { \
+ w[j] = w[j - 7] + w[j - 16]; \
+ w[j] += SHA512_SSIGMA0 (w[j - 15]); \
+ w[j] += SHA512_SSIGMA1 (w[j - 2]); \
+ }
+
+#define SHA512_TRANSFORM(s, w, i, k) \
+ { \
+ __typeof__ (s[0]) t1, t2; \
+ t1 = k + w[i] + s[7]; \
+ t1 += SHA512_CSIGMA1 (s[4]); \
+ t1 += SHA512_CH (s[4], s[5], s[6]); \
+ t2 = SHA512_CSIGMA0 (s[0]); \
+ t2 += SHA512_MAJ (s[0], s[1], s[2]); \
+ s[7] = s[6]; \
+ s[6] = s[5]; \
+ s[5] = s[4]; \
+ s[4] = s[3] + t1; \
+ s[3] = s[2]; \
+ s[2] = s[1]; \
+ s[1] = s[0]; \
+ s[0] = t1 + t2; \
+ }
+
+#if defined(__SHA__) && defined(__x86_64__)
+#define CLIB_SHA256_ISA_INTEL
+#define CLIB_SHA256_ISA
+#endif
+
+#ifdef __ARM_FEATURE_SHA2
+#define CLIB_SHA256_ISA_ARM
+#define CLIB_SHA256_ISA
+#endif
+
+static const u32 sha224_h[8] = { 0xc1059ed8, 0x367cd507, 0x3070dd17,
+ 0xf70e5939, 0xffc00b31, 0x68581511,
+ 0x64f98fa7, 0xbefa4fa4 };
+
+static const u32 sha256_h[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372,
+ 0xa54ff53a, 0x510e527f, 0x9b05688c,
+ 0x1f83d9ab, 0x5be0cd19 };
+
+static const u32 clib_sha2_256_k[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+ 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
+ 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+ 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
+ 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+ 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+static const u64 sha384_h[8] = { 0xcbbb9d5dc1059ed8, 0x629a292a367cd507,
+ 0x9159015a3070dd17, 0x152fecd8f70e5939,
+ 0x67332667ffc00b31, 0x8eb44a8768581511,
+ 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4 };
+
+static const u64 sha512_h[8] = { 0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
+ 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
+ 0x510e527fade682d1, 0x9b05688c2b3e6c1f,
+ 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 };
+
+static const u64 sha512_224_h[8] = { 0x8c3d37c819544da2, 0x73e1996689dcd4d6,
+ 0x1dfab7ae32ff9c82, 0x679dd514582f9fcf,
+ 0x0f6d2b697bd44da8, 0x77e36f7304c48942,
+ 0x3f9d85a86a1d36c8, 0x1112e6ad91d692a1 };
+
+static const u64 sha512_256_h[8] = { 0x22312194fc2bf72c, 0x9f555fa3c84c64c2,
+ 0x2393b86b6f53b151, 0x963877195940eabd,
+ 0x96283ee2a88effe3, 0xbe5e1e2553863992,
+ 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2 };
+
+static const u64 clib_sha2_512_k[80] = {
+ 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f,
+ 0xe9b5dba58189dbbc, 0x3956c25bf348b538, 0x59f111f1b605d019,
+ 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 0xd807aa98a3030242,
+ 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
+ 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235,
+ 0xc19bf174cf692694, 0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
+ 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, 0x2de92c6f592b0275,
+ 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
+ 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f,
+ 0xbf597fc7beef0ee4, 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
+ 0x06ca6351e003826f, 0x142929670a0e6e70, 0x27b70a8546d22ffc,
+ 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
+ 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6,
+ 0x92722c851482353b, 0xa2bfe8a14cf10364, 0xa81a664bbc423001,
+ 0xc24b8b70d0f89791, 0xc76c51a30654be30, 0xd192e819d6ef5218,
+ 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8,
+ 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99,
+ 0x34b0bcb5e19b48a8, 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
+ 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, 0x748f82ee5defb2fc,
+ 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
+ 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915,
+ 0xc67178f2e372532b, 0xca273eceea26619c, 0xd186b8c721c0c207,
+ 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 0x06f067aa72176fba,
+ 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b,
+ 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc,
+ 0x431d67c49c100d4c, 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
+ 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
+};
+
+typedef enum
+{
+ CLIB_SHA2_224,
+ CLIB_SHA2_256,
+ CLIB_SHA2_384,
+ CLIB_SHA2_512,
+ CLIB_SHA2_512_224,
+ CLIB_SHA2_512_256,
+} clib_sha2_type_t;
+
+#define CLIB_SHA2_256_BLOCK_SIZE 64
+#define CLIB_SHA2_512_BLOCK_SIZE 128
+#define SHA2_MAX_BLOCK_SIZE CLIB_SHA2_512_BLOCK_SIZE
+#define SHA2_MAX_DIGEST_SIZE 64
+
+static const struct
+{
+ u8 block_size;
+ u8 digest_size;
+ const u32 *h32;
+ const u64 *h64;
+} clib_sha2_variants[] = {
+ [CLIB_SHA2_224] = {
+ .block_size = CLIB_SHA2_256_BLOCK_SIZE,
+ .digest_size = 28,
+ .h32 = sha224_h,
+ },
+ [CLIB_SHA2_256] = {
+ .block_size = CLIB_SHA2_256_BLOCK_SIZE,
+ .digest_size = 32,
+ .h32 = sha256_h,
+ },
+ [CLIB_SHA2_384] = {
+ .block_size = CLIB_SHA2_512_BLOCK_SIZE,
+ .digest_size = 48,
+ .h64 = sha384_h,
+ },
+ [CLIB_SHA2_512] = {
+ .block_size = CLIB_SHA2_512_BLOCK_SIZE,
+ .digest_size = 64,
+ .h64 = sha512_h,
+ },
+ [CLIB_SHA2_512_224] = {
+ .block_size = CLIB_SHA2_512_BLOCK_SIZE,
+ .digest_size = 28,
+ .h64 = sha512_224_h,
+ },
+ [CLIB_SHA2_512_256] = {
+ .block_size = CLIB_SHA2_512_BLOCK_SIZE,
+ .digest_size = 32,
+ .h64 = sha512_256_h,
+ },
+};
+
+typedef union
+{
+ u32 h32[8];
+ u64 h64[8];
+#ifdef CLIB_SHA256_ISA
+ u32x4 h32x4[2];
+#endif
+} clib_sha2_h_t;
+
+typedef struct
+{
+ u64 total_bytes;
+ u16 n_pending;
+ clib_sha2_h_t h;
+ union
+ {
+ u8 as_u8[SHA2_MAX_BLOCK_SIZE];
+ u64 as_u64[SHA2_MAX_BLOCK_SIZE / sizeof (u64)];
+ uword as_uword[SHA2_MAX_BLOCK_SIZE / sizeof (uword)];
+ } pending;
+} clib_sha2_state_t;
+
+typedef struct
+{
+ clib_sha2_type_t type;
+ u8 block_size;
+ u8 digest_size;
+ clib_sha2_state_t state;
+} clib_sha2_ctx_t;
+
+static_always_inline void
+clib_sha2_state_init (clib_sha2_state_t *state, clib_sha2_type_t type)
+{
+ clib_sha2_state_t st = {};
+
+ if (clib_sha2_variants[type].block_size == CLIB_SHA2_256_BLOCK_SIZE)
+ for (int i = 0; i < 8; i++)
+ st.h.h32[i] = clib_sha2_variants[type].h32[i];
+ else
+ for (int i = 0; i < 8; i++)
+ st.h.h64[i] = clib_sha2_variants[type].h64[i];
+
+ *state = st;
+}
+
+static_always_inline void
+clib_sha2_init (clib_sha2_ctx_t *ctx, clib_sha2_type_t type)
+{
+ clib_sha2_state_init (&ctx->state, type);
+ ctx->block_size = clib_sha2_variants[type].block_size;
+ ctx->digest_size = clib_sha2_variants[type].digest_size;
+ ctx->type = type;
+}
+
+#ifdef CLIB_SHA256_ISA
+static inline void
+clib_sha256_vec_cycle_w (u32x4 w[], u8 i)
+{
+ u8 j = (i + 1) % 4;
+ u8 k = (i + 2) % 4;
+ u8 l = (i + 3) % 4;
+#ifdef CLIB_SHA256_ISA_INTEL
+ w[i] = (u32x4) _mm_sha256msg1_epu32 ((__m128i) w[i], (__m128i) w[j]);
+ w[i] += (u32x4) _mm_alignr_epi8 ((__m128i) w[l], (__m128i) w[k], 4);
+ w[i] = (u32x4) _mm_sha256msg2_epu32 ((__m128i) w[i], (__m128i) w[l]);
+#elif defined(CLIB_SHA256_ISA_ARM)
+ w[i] = vsha256su1q_u32 (vsha256su0q_u32 (w[i], w[j]), w[k], w[l]);
+#endif
+}
+
+static inline void
+clib_sha256_vec_4_rounds (u32x4 w, u8 n, u32x4 s[])
+{
+#ifdef CLIB_SHA256_ISA_INTEL
+ u32x4 r = *(u32x4 *) (clib_sha2_256_k + 4 * n) + w;
+ s[0] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[0], (__m128i) s[1],
+ (__m128i) r);
+ r = (u32x4) u64x2_interleave_hi ((u64x2) r, (u64x2) r);
+ s[1] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[1], (__m128i) s[0],
+ (__m128i) r);
+#elif defined(CLIB_SHA256_ISA_ARM)
+ u32x4 r0, s0;
+ const u32x4u *k = (u32x4u *) clib_sha2_256_k;
+
+ r0 = w + k[n];
+ s0 = s[0];
+ s[0] = vsha256hq_u32 (s[0], s[1], r0);
+ s[1] = vsha256h2q_u32 (s[1], s0, r0);
+#endif
+}
+#endif
+
+#if defined(CLIB_SHA256_ISA)
+static inline u32x4
+clib_sha256_vec_load (u32x4 r)
+{
+#if defined(CLIB_SHA256_ISA_INTEL)
+ return u32x4_byte_swap (r);
+#elif defined(CLIB_SHA256_ISA_ARM)
+ return vreinterpretq_u32_u8 (vrev32q_u8 (vreinterpretq_u8_u32 (r)));
+#endif
+}
+
+static inline void
+clib_sha256_vec_shuffle (u32x4 d[2])
+{
+#if defined(CLIB_SHA256_ISA_INTEL)
+ /* {0, 1, 2, 3}, {4, 5, 6, 7} -> {7, 6, 3, 2}, {5, 4, 1, 0} */
+ u32x4 r;
+ r = (u32x4) _mm_shuffle_ps ((__m128) d[1], (__m128) d[0], 0xbb);
+ d[1] = (u32x4) _mm_shuffle_ps ((__m128) d[1], (__m128) d[0], 0x11);
+ d[0] = r;
+#endif
+}
+#endif
+
+static inline void
+clib_sha256_block (clib_sha2_state_t *st, const u8 *msg, uword n_blocks)
+{
+#if defined(CLIB_SHA256_ISA)
+ u32x4 h[2];
+ u32x4u *m = (u32x4u *) msg;
+
+ h[0] = st->h.h32x4[0];
+ h[1] = st->h.h32x4[1];
+
+ clib_sha256_vec_shuffle (h);
+
+ for (; n_blocks; m += 4, n_blocks--)
+ {
+ u32x4 s[2], w[4];
+
+ s[0] = h[0];
+ s[1] = h[1];
+
+ w[0] = clib_sha256_vec_load (m[0]);
+ w[1] = clib_sha256_vec_load (m[1]);
+ w[2] = clib_sha256_vec_load (m[2]);
+ w[3] = clib_sha256_vec_load (m[3]);
+
+ clib_sha256_vec_4_rounds (w[0], 0, s);
+ clib_sha256_vec_4_rounds (w[1], 1, s);
+ clib_sha256_vec_4_rounds (w[2], 2, s);
+ clib_sha256_vec_4_rounds (w[3], 3, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 4, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 5, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 6, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 7, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 8, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 9, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 10, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 11, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 12, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 13, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 14, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 15, s);
+
+ h[0] += s[0];
+ h[1] += s[1];
+ }
+
+ clib_sha256_vec_shuffle (h);
+
+ st->h.h32x4[0] = h[0];
+ st->h.h32x4[1] = h[1];
+#else
+ u32 w[64], s[8], i;
+ clib_sha2_h_t h;
+
+ h = st->h;
+
+ for (; n_blocks; msg += CLIB_SHA2_256_BLOCK_SIZE, n_blocks--)
+ {
+ for (i = 0; i < 8; i++)
+ s[i] = h.h32[i];
+
+ for (i = 0; i < 16; i++)
+ {
+ w[i] = clib_net_to_host_u32 ((((u32u *) msg)[i]));
+ SHA256_TRANSFORM (s, w, i, clib_sha2_256_k[i]);
+ }
+
+ for (i = 16; i < 64; i++)
+ {
+ SHA256_MSG_SCHED (w, i);
+ SHA256_TRANSFORM (s, w, i, clib_sha2_256_k[i]);
+ }
+
+ for (i = 0; i < 8; i++)
+ h.h32[i] += s[i];
+ }
+
+ st->h = h;
+#endif
+}
+
+static_always_inline void
+clib_sha512_block (clib_sha2_state_t *st, const u8 *msg, uword n_blocks)
+{
+ u64 w[80], s[8], i;
+ clib_sha2_h_t h;
+
+ h = st->h;
+
+ for (; n_blocks; msg += CLIB_SHA2_512_BLOCK_SIZE, n_blocks--)
+ {
+ for (i = 0; i < 8; i++)
+ s[i] = h.h64[i];
+
+ for (i = 0; i < 16; i++)
+ {
+ w[i] = clib_net_to_host_u64 ((((u64u *) msg)[i]));
+ SHA512_TRANSFORM (s, w, i, clib_sha2_512_k[i]);
+ }
+
+ for (i = 16; i < 80; i++)
+ {
+ SHA512_MSG_SCHED (w, i);
+ SHA512_TRANSFORM (s, w, i, clib_sha2_512_k[i]);
+ }
+
+ for (i = 0; i < 8; i++)
+ h.h64[i] += s[i];
+ }
+
+ st->h = h;
+}
+
+static_always_inline void
+clib_sha2_update_internal (clib_sha2_state_t *st, u8 block_size, const u8 *msg,
+ uword n_bytes)
+{
+ uword n_blocks;
+ if (st->n_pending)
+ {
+ uword n_left = block_size - st->n_pending;
+ if (n_bytes < n_left)
+ {
+ clib_memcpy_fast (st->pending.as_u8 + st->n_pending, msg, n_bytes);
+ st->n_pending += n_bytes;
+ return;
+ }
+ else
+ {
+ clib_memcpy_fast (st->pending.as_u8 + st->n_pending, msg, n_left);
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ clib_sha512_block (st, st->pending.as_u8, 1);
+ else
+ clib_sha256_block (st, st->pending.as_u8, 1);
+ st->n_pending = 0;
+ st->total_bytes += block_size;
+ n_bytes -= n_left;
+ msg += n_left;
+ }
+ }
+
+ if ((n_blocks = n_bytes / block_size))
+ {
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ clib_sha512_block (st, msg, n_blocks);
+ else
+ clib_sha256_block (st, msg, n_blocks);
+ n_bytes -= n_blocks * block_size;
+ msg += n_blocks * block_size;
+ st->total_bytes += n_blocks * block_size;
+ }
+
+ if (n_bytes)
+ {
+ clib_memset_u8 (st->pending.as_u8, 0, block_size);
+ clib_memcpy_fast (st->pending.as_u8, msg, n_bytes);
+ st->n_pending = n_bytes;
+ }
+ else
+ st->n_pending = 0;
+}
+
+static_always_inline void
+clib_sha2_update (clib_sha2_ctx_t *ctx, const u8 *msg, uword n_bytes)
+{
+ clib_sha2_update_internal (&ctx->state, ctx->block_size, msg, n_bytes);
+}
+
+static_always_inline void
+clib_sha2_final_internal (clib_sha2_state_t *st, u8 block_size, u8 digest_size,
+ u8 *digest)
+{
+ int i;
+
+ st->total_bytes += st->n_pending;
+ if (st->n_pending == 0)
+ {
+ clib_memset (st->pending.as_u8, 0, block_size);
+ st->pending.as_u8[0] = 0x80;
+ }
+ else if (st->n_pending + sizeof (u64) + sizeof (u8) > block_size)
+ {
+ st->pending.as_u8[st->n_pending] = 0x80;
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ clib_sha512_block (st, st->pending.as_u8, 1);
+ else
+ clib_sha256_block (st, st->pending.as_u8, 1);
+ clib_memset (st->pending.as_u8, 0, block_size);
+ }
+ else
+ st->pending.as_u8[st->n_pending] = 0x80;
+
+ st->pending.as_u64[block_size / 8 - 1] =
+ clib_net_to_host_u64 (st->total_bytes * 8);
+
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ {
+ clib_sha512_block (st, st->pending.as_u8, 1);
+ for (i = 0; i < digest_size / sizeof (u64); i++)
+ ((u64 *) digest)[i] = clib_net_to_host_u64 (st->h.h64[i]);
+
+ /* sha512-224 case - write half of u64 */
+ if (i * sizeof (u64) < digest_size)
+ ((u32 *) digest)[2 * i] = clib_net_to_host_u32 (st->h.h64[i] >> 32);
+ }
+ else
+ {
+ clib_sha256_block (st, st->pending.as_u8, 1);
+ for (i = 0; i < digest_size / sizeof (u32); i++)
+ *((u32 *) digest + i) = clib_net_to_host_u32 (st->h.h32[i]);
+ }
+}
+
+static_always_inline void
+clib_sha2_final (clib_sha2_ctx_t *ctx, u8 *digest)
+{
+ clib_sha2_final_internal (&ctx->state, ctx->block_size, ctx->digest_size,
+ digest);
+}
+
+static_always_inline void
+clib_sha2 (clib_sha2_type_t type, const u8 *msg, uword len, u8 *digest)
+{
+ clib_sha2_ctx_t ctx;
+ clib_sha2_init (&ctx, type);
+ clib_sha2_update (&ctx, msg, len);
+ clib_sha2_final (&ctx, digest);
+}
+
+#define clib_sha224(...) clib_sha2 (CLIB_SHA2_224, __VA_ARGS__)
+#define clib_sha256(...) clib_sha2 (CLIB_SHA2_256, __VA_ARGS__)
+#define clib_sha384(...) clib_sha2 (CLIB_SHA2_384, __VA_ARGS__)
+#define clib_sha512(...) clib_sha2 (CLIB_SHA2_512, __VA_ARGS__)
+#define clib_sha512_224(...) clib_sha2 (CLIB_SHA2_512_224, __VA_ARGS__)
+#define clib_sha512_256(...) clib_sha2 (CLIB_SHA2_512_256, __VA_ARGS__)
+
+/*
+ * HMAC
+ */
+
+typedef struct
+{
+ clib_sha2_h_t ipad_h;
+ clib_sha2_h_t opad_h;
+} clib_sha2_hmac_key_data_t;
+
+typedef struct
+{
+ clib_sha2_type_t type;
+ u8 block_size;
+ u8 digest_size;
+ clib_sha2_state_t ipad_state;
+ clib_sha2_state_t opad_state;
+} clib_sha2_hmac_ctx_t;
+
+static_always_inline void
+clib_sha2_hmac_key_data (clib_sha2_type_t type, const u8 *key, uword key_len,
+ clib_sha2_hmac_key_data_t *kd)
+{
+ u8 block_size = clib_sha2_variants[type].block_size;
+ u8 data[SHA2_MAX_BLOCK_SIZE] = {};
+ u8 ikey[SHA2_MAX_BLOCK_SIZE];
+ u8 okey[SHA2_MAX_BLOCK_SIZE];
+ clib_sha2_state_t ipad_state;
+ clib_sha2_state_t opad_state;
+
+ /* key */
+ if (key_len > block_size)
+ {
+ /* key is longer than block, calculate hash of key */
+ clib_sha2_ctx_t ctx;
+ clib_sha2_init (&ctx, type);
+ clib_sha2_update (&ctx, key, key_len);
+ clib_sha2_final (&ctx, (u8 *) data);
+ }
+ else
+ clib_memcpy_fast (data, key, key_len);
+
+ for (int i = 0, w = 0; w < block_size; w += sizeof (uword), i++)
+ {
+ ((uwordu *) ikey)[i] = ((uwordu *) data)[i] ^ 0x3636363636363636UL;
+ ((uwordu *) okey)[i] = ((uwordu *) data)[i] ^ 0x5c5c5c5c5c5c5c5cUL;
+ }
+
+ clib_sha2_state_init (&ipad_state, type);
+ clib_sha2_state_init (&opad_state, type);
+
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ {
+ clib_sha512_block (&ipad_state, ikey, 1);
+ clib_sha512_block (&opad_state, okey, 1);
+ }
+ else
+ {
+ clib_sha256_block (&ipad_state, ikey, 1);
+ clib_sha256_block (&opad_state, okey, 1);
+ }
+
+ kd->ipad_h = ipad_state.h;
+ kd->opad_h = opad_state.h;
+}
+
+static_always_inline void
+clib_sha2_hmac_init (clib_sha2_hmac_ctx_t *ctx, clib_sha2_type_t type,
+ clib_sha2_hmac_key_data_t *kd)
+{
+ u8 block_size = clib_sha2_variants[type].block_size;
+ u8 digest_size = clib_sha2_variants[type].digest_size;
+
+ *ctx = (clib_sha2_hmac_ctx_t) {
+ .type = type,
+ .block_size = block_size,
+ .digest_size = digest_size,
+ .ipad_state = {
+ .h = kd->ipad_h,
+ .total_bytes = block_size,
+ },
+ .opad_state = {
+ .h = kd->opad_h,
+ .total_bytes = block_size,
+ },
+ };
+}
+
+static_always_inline void
+clib_sha2_hmac_update (clib_sha2_hmac_ctx_t *ctx, const u8 *msg, uword len)
+{
+ clib_sha2_update_internal (&ctx->ipad_state, ctx->block_size, msg, len);
+}
+
+static_always_inline void
+clib_sha2_hmac_final (clib_sha2_hmac_ctx_t *ctx, u8 *digest)
+{
+ u8 i_digest[SHA2_MAX_DIGEST_SIZE];
+
+ clib_sha2_final_internal (&ctx->ipad_state, ctx->block_size,
+ ctx->digest_size, i_digest);
+ clib_sha2_update_internal (&ctx->opad_state, ctx->block_size, i_digest,
+ ctx->digest_size);
+ clib_sha2_final_internal (&ctx->opad_state, ctx->block_size,
+ ctx->digest_size, digest);
+}
+
+static_always_inline void
+clib_sha2_hmac (clib_sha2_type_t type, const u8 *key, uword key_len,
+ const u8 *msg, uword len, u8 *digest)
+{
+ clib_sha2_hmac_ctx_t _ctx, *ctx = &_ctx;
+ clib_sha2_hmac_key_data_t kd;
+
+ clib_sha2_hmac_key_data (type, key, key_len, &kd);
+ clib_sha2_hmac_init (ctx, type, &kd);
+ clib_sha2_hmac_update (ctx, msg, len);
+ clib_sha2_hmac_final (ctx, digest);
+}
+
+#define clib_hmac_sha224(...) clib_sha2_hmac (CLIB_SHA2_224, __VA_ARGS__)
+#define clib_hmac_sha256(...) clib_sha2_hmac (CLIB_SHA2_256, __VA_ARGS__)
+#define clib_hmac_sha384(...) clib_sha2_hmac (CLIB_SHA2_384, __VA_ARGS__)
+#define clib_hmac_sha512(...) clib_sha2_hmac (CLIB_SHA2_512, __VA_ARGS__)
+#define clib_hmac_sha512_224(...) \
+ clib_sha2_hmac (CLIB_SHA2_512_224, __VA_ARGS__)
+#define clib_hmac_sha512_256(...) \
+ clib_sha2_hmac (CLIB_SHA2_512_256, __VA_ARGS__)
+
+#endif /* included_sha2_h */
diff --git a/src/vppinfra/devicetree.c b/src/vppinfra/devicetree.c
new file mode 100644
index 00000000000..b725d5a0aed
--- /dev/null
+++ b/src/vppinfra/devicetree.c
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/clib.h>
+#include <vppinfra/devicetree.h>
+
+#ifdef __linux
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+static_always_inline clib_dt_node_t *
+clib_dt_node_add_child (clib_dt_main_t *dm, clib_dt_node_t *n, char *name)
+{
+ clib_dt_node_t *cn;
+
+ cn = clib_mem_alloc (sizeof (clib_dt_node_t));
+ *cn = (clib_dt_node_t){ .parent = n, .depth = n ? n->depth + 1 : 0 };
+ vec_add1 (dm->nodes, cn);
+
+ if (n == 0)
+ {
+ ASSERT (dm->root == 0);
+ dm->root = cn;
+ return cn;
+ }
+
+ vec_add1 (n->child_nodes, cn);
+ cn->path = format (0, "%v/%s", n->path, name);
+ cn->dt_main = dm;
+ hash_set_mem (dm->node_by_path, cn->path, cn);
+ if (vec_len (n->child_nodes) > 1)
+ {
+ clib_dt_node_t *prev = n->child_nodes[vec_len (n->child_nodes) - 2];
+ prev->next = cn;
+ cn->prev = prev;
+ }
+
+ return cn;
+}
+#endif
+
+void
+clib_dt_main_free (clib_dt_main_t *dm)
+{
+ vec_foreach_pointer (n, dm->nodes)
+ {
+ vec_foreach_pointer (p, n->properties)
+ clib_mem_free (p);
+ vec_free (n->child_nodes);
+ vec_free (n->path);
+ vec_free (n->properties);
+ }
+
+ vec_free (dm->nodes);
+ hash_free (dm->node_by_path);
+ hash_free (dm->node_by_phandle);
+}
+
+#ifdef __linux
+__clib_export clib_error_t *
+clib_dt_read_from_sysfs (clib_dt_main_t *dm)
+{
+ DIR *dir, **dir_stack = 0;
+ struct dirent *e;
+ clib_dt_node_t *n;
+ u8 *path = 0;
+ u32 path_prefix_len;
+ clib_error_t *err = 0;
+
+ path = format (0, CLIB_DT_LINUX_PREFIX);
+ path_prefix_len = vec_len (path);
+ vec_add1 (path, 0);
+
+ dir = opendir ((char *) path);
+ if (!dir)
+ {
+ err = clib_error_return (0, "'%s' opendir failed", path);
+ goto done;
+ }
+
+ dm->node_by_path = hash_create_vec (0, sizeof (u8), sizeof (uword));
+ dm->node_by_phandle = hash_create (0, sizeof (uword));
+ vec_set_len (path, path_prefix_len);
+ n = clib_dt_node_add_child (dm, 0, 0);
+
+ while (1)
+ {
+ e = readdir (dir);
+
+ if (!e)
+ {
+ closedir (dir);
+ if (vec_len (dir_stack) == 0)
+ break;
+
+ dir = dir_stack[vec_len (dir_stack) - 1];
+ vec_pop (dir_stack);
+ n = n->parent;
+ continue;
+ }
+
+ if (e->d_type == DT_REG)
+ {
+ path = format (path, "%v/%s%c", n->path, e->d_name, 0);
+ int fd = open ((char *) path, 0);
+ if (fd >= 0)
+ {
+ struct stat st;
+ if (fstat (fd, &st) == 0)
+ {
+ u32 sz = sizeof (clib_dt_property_t) + st.st_size;
+ clib_dt_property_t *p = clib_mem_alloc (sz);
+ clib_memset (p, 0, sz);
+
+ if (read (fd, p->data, st.st_size) == st.st_size)
+ {
+ strncpy (p->name, e->d_name, sizeof (p->name));
+ p->size = st.st_size;
+ vec_add1 (n->properties, p);
+ if (strncmp ("name", p->name, 5) == 0)
+ n->name = p;
+ if ((strncmp ("phandle", p->name, 8) == 0) &&
+ (p->size == 4))
+ {
+ u32 phandle =
+ clib_net_to_host_u32 (*(u32u *) p->data);
+ hash_set (dm->node_by_phandle, phandle, n);
+ }
+ }
+ else
+ {
+ clib_mem_free (p);
+ err = clib_error_return (0, "'%s' read failed", path);
+ close (fd);
+ goto done;
+ }
+ }
+ else
+ {
+ err = clib_error_return (0, "'%s' fstat failed", path);
+ close (fd);
+ goto done;
+ }
+ close (fd);
+ }
+ else
+ {
+ err = clib_error_return (0, "'%s' open failed", path);
+ goto done;
+ }
+
+ vec_set_len (path, path_prefix_len);
+ }
+ else if (e->d_type == DT_DIR)
+ {
+ DIR *subdir;
+ if (strncmp (".", e->d_name, 2) == 0 ||
+ strncmp ("..", e->d_name, 3) == 0)
+ continue;
+
+ path = format (path, "%v/%s%c", n->path, e->d_name, 0);
+ subdir = opendir ((char *) path);
+ vec_set_len (path, path_prefix_len);
+ if (subdir)
+ {
+ vec_add1 (dir_stack, dir);
+ dir = subdir;
+ n = clib_dt_node_add_child (dm, n, e->d_name);
+ }
+ else
+ {
+ err = clib_error_return (0, "'%s' opendir failed", path);
+ goto done;
+ }
+ }
+ else
+ err =
+ clib_error_return (0, "unknown entry %s [%u]", e->d_name, e->d_type);
+ }
+
+done:
+ if (err)
+ clib_dt_main_free (dm);
+ while (vec_len (dir_stack))
+ closedir (vec_pop (dir_stack));
+ vec_free (dir_stack);
+ vec_free (path);
+ return err;
+}
+#endif
+
+clib_dt_node_t *
+clib_dt_get_child_node (clib_dt_node_t *n, char *name)
+{
+ vec_foreach_pointer (cn, n->child_nodes)
+ {
+ u8 *p = cn->path + vec_len (cn->path) - 1;
+ u32 i = 0;
+
+ while (p > cn->path && p[-1] != '/')
+ p--;
+
+ if (p[-1] != '/')
+ continue;
+
+ while (p[i] == name[i] && name[i] != 0)
+ i++;
+
+ if (name[i] != 0)
+ continue;
+
+ return cn;
+ }
+
+ return 0;
+}
+
+__clib_export clib_dt_node_t *
+clib_dt_get_node_with_path (clib_dt_main_t *dm, char *fmt, ...)
+{
+ u8 *s;
+ uword *p;
+
+ va_list va;
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ if (s[0] != '/')
+ return 0;
+
+ p = hash_get_mem (dm->node_by_path, s);
+ if (p)
+ return (clib_dt_node_t *) p[0];
+
+ return 0;
+}
+
+__clib_export clib_dt_property_t *
+clib_dt_get_node_property_by_name (clib_dt_node_t *n, char *name)
+{
+ vec_foreach_pointer (p, n->properties)
+ if (strncmp (name, p->name, sizeof (p->name)) == 0)
+ return p;
+ return 0;
+}
+
+__clib_export int
+clib_dt_node_is_compatible (clib_dt_node_t *n, char *comp)
+{
+ clib_dt_property_t *p;
+ char *s;
+
+ p = clib_dt_get_node_property_by_name (n, "compatible");
+
+ if (!p)
+ return 0;
+
+ s = (char *) p->data;
+ for (u32 i = 1, len = 1; i <= p->size; i++)
+ {
+ if (p->data[i - 1] == 0)
+ {
+ if (strncmp (comp, s, len) == 0)
+ return 1;
+ s = (char *) p->data + i;
+ len = 1;
+ }
+ else
+ len++;
+ }
+
+ return 0;
+}
+
+__clib_export u8 *
+format_clib_dt_property_data (u8 *s, va_list *args)
+{
+ clib_dt_property_t *p = va_arg (*args, clib_dt_property_t *);
+ u32 sz = p->size, is_printable = 0;
+ u32 n_nulls = 0;
+
+ if (sz > 2 && p->data[sz - 1] == 0 && p->data[0] != 0)
+ {
+ is_printable = 1;
+ for (u32 i = 1; i < sz - 1; i++)
+ {
+ u8 c = p->data[i];
+ if (c == 0)
+ {
+ if (p->data[i - 1] == 0)
+ {
+ is_printable = 0;
+ break;
+ }
+ n_nulls++;
+ }
+ else if ((c < 0x20) || (c > 0x7f))
+ {
+ is_printable = 0;
+ break;
+ }
+ }
+ }
+
+ if (is_printable)
+ {
+ s = format (s, "'%s'", p->data);
+ if (n_nulls)
+ {
+ for (u32 i = 2; i < p->size; i++)
+ if (((u8 *) p->data)[i - 1] == 0)
+ s = format (s, ", '%s'", ((u8 *) p->data) + i);
+ }
+ }
+ else
+ {
+ s = format (s, "< %02x", p->data[0]);
+ for (u32 i = 0; i < p->size; i++)
+ s = format (s, " %02x", p->data[i]);
+ s = format (s, " >");
+ }
+ return s;
+}
+
+__clib_export clib_dt_node_t *
+clib_dt_dereference_node (clib_dt_node_t *n, char *name)
+{
+ clib_dt_property_t *p;
+ uword *h;
+
+ p = clib_dt_get_node_property_by_name (n, name);
+ if (!p || (p->size != sizeof (u32)))
+ return 0;
+
+ h = hash_get (n->dt_main->node_by_phandle,
+ clib_net_to_host_u32 (*(u32u *) p->data));
+
+ if (h)
+ return (clib_dt_node_t *) h[0];
+
+ return 0;
+}
diff --git a/src/vppinfra/devicetree.h b/src/vppinfra/devicetree.h
new file mode 100644
index 00000000000..21c2e0f7006
--- /dev/null
+++ b/src/vppinfra/devicetree.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef CLIB_DEVICETREE_H_
+#define CLIB_DEVICETREE_H_
+
+#include <vppinfra/clib.h>
+#include <vlib/vlib.h>
+
+#ifdef __linux
+#define CLIB_DT_LINUX_PREFIX "/sys/firmware/devicetree/base"
+#endif
+
+typedef struct
+{
+ char name[32];
+ u32 size;
+ u8 data[];
+} clib_dt_property_t;
+
+typedef struct clib_dt_main clib_dt_main_t;
+
+typedef struct clib_dt_node
+{
+ u8 *path;
+ struct clib_dt_node *parent;
+ struct clib_dt_node *prev;
+ struct clib_dt_node *next;
+ struct clib_dt_node **child_nodes;
+ u8 depth;
+ clib_dt_property_t *name;
+ clib_dt_property_t **properties;
+ clib_dt_main_t *dt_main;
+} clib_dt_node_t;
+
+typedef struct clib_dt_main
+{
+ clib_dt_node_t **nodes;
+ clib_dt_node_t *root;
+ uword *node_by_path;
+ uword *node_by_phandle;
+} clib_dt_main_t;
+
+clib_dt_node_t *clib_dt_get_node_with_path (clib_dt_main_t *dm, char *fmt,
+ ...);
+clib_dt_property_t *clib_dt_get_node_property_by_name (clib_dt_node_t *,
+ char *);
+int clib_dt_node_is_compatible (clib_dt_node_t *, char *);
+clib_dt_node_t *clib_dt_dereference_node (clib_dt_node_t *, char *);
+#ifdef __linux
+clib_error_t *clib_dt_read_from_sysfs (clib_dt_main_t *dm);
+#endif
+
+format_function_t format_clib_dt_desc;
+format_function_t format_clib_dt_property_data;
+
+static_always_inline int
+clib_dt_proprerty_is_u32 (clib_dt_property_t *p)
+{
+ if (p == 0 || p->size != 4)
+ return 0;
+ return 1;
+}
+
+static_always_inline u32
+clib_dt_proprerty_get_u32 (clib_dt_property_t *p)
+{
+ return clib_net_to_host_u32 (*(u32u *) p->data);
+}
+
+#endif /* CLIB_DEVICETREE_H_ */
diff --git a/src/vppinfra/dlmalloc.c b/src/vppinfra/dlmalloc.c
index 36c80b09b87..5cdc6f6cc13 100644
--- a/src/vppinfra/dlmalloc.c
+++ b/src/vppinfra/dlmalloc.c
@@ -5,8 +5,8 @@
comments, complaints, performance data, etc to dl@cs.oswego.edu
*/
+#include <vppinfra/clib.h>
#include <vppinfra/dlmalloc.h>
-#include <vppinfra/sanitizer.h>
/*------------------------------ internal #includes ---------------------- */
@@ -460,7 +460,7 @@ static FORCEINLINE void x86_clear_lock(int* sl) {
#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
/* Plain spin locks use single word (embedded in malloc_states) */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static int spin_acquire_lock(int *sl) {
int spins = 0;
while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) {
@@ -1286,7 +1286,7 @@ static struct malloc_state _gm_;
((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
/* Return segment holding given address */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static msegmentptr segment_holding(mstate m, char* addr) {
msegmentptr sp = &m->seg;
for (;;) {
@@ -1298,7 +1298,7 @@ static msegmentptr segment_holding(mstate m, char* addr) {
}
/* Return true if segment contains a segment link */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static int has_segment_link(mstate m, msegmentptr ss) {
msegmentptr sp = &m->seg;
for (;;) {
@@ -1616,7 +1616,7 @@ static size_t traverse_and_check(mstate m);
#if (FOOTERS && !INSECURE)
/* Check if (alleged) mstate m has expected magic field */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static inline int
ok_magic (const mstate m)
{
@@ -2083,7 +2083,7 @@ static void do_check_malloc_state(mstate m) {
/* ----------------------------- statistics ------------------------------ */
#if !NO_MALLINFO
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static struct dlmallinfo internal_mallinfo(mstate m) {
struct dlmallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
ensure_initialization();
@@ -2493,7 +2493,7 @@ static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) {
/* -------------------------- mspace management -------------------------- */
/* Initialize top chunk and its size */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void init_top(mstate m, mchunkptr p, size_t psize) {
/* Ensure alignment */
size_t offset = align_offset(chunk2mem(p));
@@ -2538,7 +2538,7 @@ static void reset_on_error(mstate m) {
#endif /* PROCEED_ON_ERROR */
/* Allocate chunk and prepend remainder with chunk in successor base. */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
size_t nb) {
mchunkptr p = align_as_chunk(newbase);
@@ -2581,7 +2581,7 @@ static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
}
/* Add a segment to hold a new noncontiguous region */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
/* Determine locations and sizes of segment, fenceposts, old top */
char* old_top = (char*)m->top;
@@ -2596,7 +2596,7 @@ static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
msegmentptr ss = (msegmentptr)(chunk2mem(sp));
mchunkptr tnext = chunk_plus_offset(sp, ssize);
mchunkptr p = tnext;
- int nfences = 0;
+ int __attribute__((unused)) nfences = 0;
/* reset top to new space */
init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
@@ -2637,7 +2637,7 @@ static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
/* -------------------------- System allocation -------------------------- */
/* Get memory from system using MORECORE or MMAP */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* sys_alloc(mstate m, size_t nb) {
char* tbase = CMFAIL;
size_t tsize = 0;
@@ -2852,7 +2852,7 @@ static void* sys_alloc(mstate m, size_t nb) {
/* ----------------------- system deallocation -------------------------- */
/* Unmap and unlink any mmapped segments that don't contain used chunks */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static size_t release_unused_segments(mstate m) {
size_t released = 0;
int nsegs = 0;
@@ -2900,7 +2900,7 @@ static size_t release_unused_segments(mstate m) {
return released;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static int sys_trim(mstate m, size_t pad) {
size_t released = 0;
ensure_initialization();
@@ -2969,7 +2969,7 @@ static int sys_trim(mstate m, size_t pad) {
/* Consolidate and bin a chunk. Differs from exported versions
of free mainly in that the chunk need not be marked as inuse.
*/
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void dispose_chunk(mstate m, mchunkptr p, size_t psize) {
mchunkptr next = chunk_plus_offset(p, psize);
if (!pinuse(p)) {
@@ -3041,7 +3041,7 @@ static void dispose_chunk(mstate m, mchunkptr p, size_t psize) {
/* ---------------------------- malloc --------------------------- */
/* allocate a large request from the best fitting chunk in a treebin */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* tmalloc_large(mstate m, size_t nb) {
tchunkptr v = 0;
size_t rsize = -nb; /* Unsigned negation */
@@ -3113,7 +3113,7 @@ static void* tmalloc_large(mstate m, size_t nb) {
}
/* allocate a small request from the best fitting chunk in a treebin */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* tmalloc_small(mstate m, size_t nb) {
tchunkptr t, v;
size_t rsize;
@@ -3420,7 +3420,7 @@ void* dlcalloc(size_t n_elements, size_t elem_size) {
/* ------------ Internal support for realloc, memalign, etc -------------- */
/* Try to realloc; only in-place unless can_move true */
-static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
+static __clib_nosanitize_addr mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
int can_move) {
mchunkptr newp = 0;
size_t oldsize = chunksize(p);
@@ -3499,7 +3499,7 @@ static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
return newp;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
void* mem = 0;
if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
@@ -4082,7 +4082,7 @@ int mspace_track_large_chunks(mspace msp, int enable) {
return ret;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
size_t destroy_mspace(mspace msp) {
size_t freed = 0;
mstate ms = (mstate)msp;
@@ -4118,7 +4118,7 @@ void mspace_get_address_and_size (mspace msp, char **addrp, size_t *sizep)
*sizep = this_seg->size;
}
-CLIB_NOSANITIZE_ADDR __clib_export
+__clib_nosanitize_addr
int mspace_is_heap_object (mspace msp, void *p)
{
msegment *this_seg;
@@ -4144,7 +4144,7 @@ int mspace_is_heap_object (mspace msp, void *p)
return 0;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
void *mspace_least_addr (mspace msp)
{
mstate ms = (mstate) msp;
@@ -4158,7 +4158,7 @@ void mspace_disable_expand (mspace msp)
disable_expand (ms);
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
int mspace_enable_disable_trace (mspace msp, int enable)
{
mstate ms = (mstate)msp;
@@ -4175,7 +4175,7 @@ int mspace_enable_disable_trace (mspace msp, int enable)
return (was_enabled);
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
int mspace_is_traced (mspace msp)
{
mstate ms = (mstate)msp;
@@ -4185,7 +4185,7 @@ int mspace_is_traced (mspace msp)
return 0;
}
-CLIB_NOSANITIZE_ADDR __clib_export
+__clib_nosanitize_addr
void* mspace_get_aligned (mspace msp,
unsigned long n_user_data_bytes,
unsigned long align,
@@ -4265,7 +4265,7 @@ void* mspace_get_aligned (mspace msp,
return (void *) searchp;
}
-CLIB_NOSANITIZE_ADDR __clib_export
+__clib_nosanitize_addr
void mspace_put (mspace msp, void *p_arg)
{
char *object_header;
@@ -4315,7 +4315,7 @@ void mspace_put_no_offset (mspace msp, void *p_arg)
mspace_free (msp, p_arg);
}
-CLIB_NOSANITIZE_ADDR __clib_export
+__clib_nosanitize_addr
size_t mspace_usable_size_with_delta (const void *p)
{
size_t usable_size;
@@ -4341,7 +4341,7 @@ size_t mspace_usable_size_with_delta (const void *p)
versions. This is not so nice but better than the alternatives.
*/
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
void* mspace_malloc(mspace msp, size_t bytes) {
mstate ms = (mstate)msp;
if (!ok_magic(ms)) {
@@ -4456,7 +4456,7 @@ void* mspace_malloc(mspace msp, size_t bytes) {
return 0;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
void mspace_free(mspace msp, void* mem) {
if (mem != 0) {
mchunkptr p = mem2chunk(mem);
@@ -4623,6 +4623,7 @@ void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
return mem;
}
+__clib_nosanitize_addr
void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
void* mem = 0;
if (oldmem != 0) {
@@ -4655,6 +4656,7 @@ void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
return mem;
}
+__clib_nosanitize_addr
void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
mstate ms = (mstate)msp;
if (!ok_magic(ms)) {
@@ -4794,7 +4796,7 @@ size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
}
#if !NO_MALLINFO
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
struct dlmallinfo mspace_mallinfo(mspace msp) {
mstate ms = (mstate)msp;
if (!ok_magic(ms)) {
@@ -4804,7 +4806,7 @@ struct dlmallinfo mspace_mallinfo(mspace msp) {
}
#endif /* NO_MALLINFO */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
size_t mspace_usable_size(const void* mem) {
if (mem != 0) {
mchunkptr p = mem2chunk(mem);
diff --git a/src/vppinfra/dlmalloc.h b/src/vppinfra/dlmalloc.h
index b8adf74831d..5fcaf7c30ca 100644
--- a/src/vppinfra/dlmalloc.h
+++ b/src/vppinfra/dlmalloc.h
@@ -1447,6 +1447,8 @@ DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
*/
DLMALLOC_EXPORT int mspace_mallopt(int, int);
+DLMALLOC_EXPORT void* mspace_realloc_in_place (mspace msp, void *oldmem, size_t bytes);
+
DLMALLOC_EXPORT void* mspace_get_aligned (mspace msp,
unsigned long n_user_data_bytes,
unsigned long align,
diff --git a/src/vppinfra/elf.c b/src/vppinfra/elf.c
index 11fac45b27e..f660195e101 100644
--- a/src/vppinfra/elf.c
+++ b/src/vppinfra/elf.c
@@ -1357,7 +1357,7 @@ elf_read_file (elf_main_t * em, char *file_name)
goto done;
}
- CLIB_MEM_UNPOISON (data, mmap_length);
+ clib_mem_unpoison (data, mmap_length);
em->file_name = file_name;
@@ -1455,7 +1455,6 @@ static void
layout_sections (elf_main_t * em)
{
elf_section_t *s;
- u32 n_sections_with_changed_exec_address = 0;
u32 *deferred_symbol_and_string_sections = 0;
u32 n_deleted_sections = 0;
/* note: rebuild is always zero. Intent lost in the sands of time */
@@ -1614,7 +1613,6 @@ layout_sections (elf_main_t * em)
if (s->header.flags & ELF_SECTION_FLAG_ALLOC)
{
s->exec_address_change = exec_address - s->header.exec_address;
- n_sections_with_changed_exec_address += s->exec_address_change != 0;
s->header.exec_address = exec_address;
}
@@ -1704,7 +1702,6 @@ layout_sections (elf_main_t * em)
continue;
s_lo = s_hi = 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (si, g->section_index_bitmap) {
u64 lo, hi;
@@ -1729,7 +1726,6 @@ layout_sections (elf_main_t * em)
s_hi = hi;
}
}
- /* *INDENT-ON* */
if (n_sections == 0)
continue;
@@ -1977,7 +1973,7 @@ elf_create_section_with_contents (elf_main_t * em,
if ((p = hash_get_mem (em->section_by_name, section_name)))
{
s = vec_elt_at_index (em->sections, p[0]);
- _vec_len (s->contents) = 0;
+ vec_set_len (s->contents, 0);
c = s->contents;
}
else
diff --git a/src/vppinfra/elf.h b/src/vppinfra/elf.h
index cceb13e256b..56869f1b9c7 100644
--- a/src/vppinfra/elf.h
+++ b/src/vppinfra/elf.h
@@ -966,12 +966,10 @@ elf_get_section_contents (elf_main_t * em,
result = 0;
if (vec_len (s->contents) > 0)
{
+ vec_attr_t va = { .elt_sz = elt_size };
/* Make vector copy of contents with given element size. */
- result = _vec_resize (result,
- vec_len (s->contents) / elt_size,
- vec_len (s->contents),
- /* header_bytes */ 0,
- /* align */ 0);
+ result =
+ _vec_realloc_internal (result, vec_len (s->contents) / elt_size, &va);
clib_memcpy (result, s->contents, vec_len (s->contents));
}
diff --git a/src/vppinfra/elf_clib.c b/src/vppinfra/elf_clib.c
index d2865f800e3..d4d511e0fba 100644
--- a/src/vppinfra/elf_clib.c
+++ b/src/vppinfra/elf_clib.c
@@ -319,20 +319,33 @@ symbol_by_address_or_name (char *by_name,
return 0;
}
-uword
-clib_elf_symbol_by_name (char *by_name, clib_elf_symbol_t * s)
+__clib_export uword
+clib_elf_symbol_by_name (char *by_name, clib_elf_symbol_t *s)
{
return symbol_by_address_or_name (by_name, /* by_address */ 0, s);
}
-uword
-clib_elf_symbol_by_address (uword by_address, clib_elf_symbol_t * s)
+__clib_export uword
+clib_elf_symbol_by_address (uword by_address, clib_elf_symbol_t *s)
{
return symbol_by_address_or_name ( /* by_name */ 0, by_address, s);
}
-u8 *
-format_clib_elf_symbol (u8 * s, va_list * args)
+__clib_export const char *
+clib_elf_symbol_name (clib_elf_symbol_t *s)
+{
+ clib_elf_main_t *cem = &clib_elf_main;
+ elf_main_t *em;
+ elf_symbol_table_t *t;
+
+ em = vec_elt_at_index (cem->elf_mains, s->elf_main_index);
+ t = vec_elt_at_index (em->symbol_tables, s->symbol_table_index);
+
+ return (const char *) elf_symbol_name (t, &s->symbol);
+}
+
+__clib_export u8 *
+format_clib_elf_symbol (u8 *s, va_list *args)
{
clib_elf_main_t *cem = &clib_elf_main;
clib_elf_symbol_t *sym = va_arg (*args, clib_elf_symbol_t *);
diff --git a/src/vppinfra/elf_clib.h b/src/vppinfra/elf_clib.h
index 25b928c22a5..4e5d4d72165 100644
--- a/src/vppinfra/elf_clib.h
+++ b/src/vppinfra/elf_clib.h
@@ -131,6 +131,8 @@ typedef struct
uword clib_elf_symbol_by_name (char *name, clib_elf_symbol_t * result);
uword clib_elf_symbol_by_address (uword address, clib_elf_symbol_t * result);
+const char *clib_elf_symbol_name (clib_elf_symbol_t *result);
+
format_function_t format_clib_elf_symbol, format_clib_elf_symbol_with_address;
#endif /* included_clib_elf_self_h */
diff --git a/src/vppinfra/elog.c b/src/vppinfra/elog.c
index 8ae752eb6af..a0e5712aaab 100644
--- a/src/vppinfra/elog.c
+++ b/src/vppinfra/elog.c
@@ -494,7 +494,7 @@ elog_alloc_internal (elog_main_t * em, u32 n_events, int free_ring)
em->event_ring_size = n_events = max_pow2 (n_events);
vec_validate_aligned (em->event_ring, n_events, CLIB_CACHE_LINE_BYTES);
- _vec_len (em->event_ring) = n_events;
+ vec_set_len (em->event_ring, n_events);
}
__clib_export void
@@ -1198,7 +1198,7 @@ elog_write_file_not_inline (elog_main_t * em, char *clib_file, int flush_ring)
__clib_export clib_error_t *
elog_read_file_not_inline (elog_main_t * em, char *clib_file)
{
- serialize_main_t m;
+ serialize_main_t m = { 0 };
clib_error_t *error;
error = unserialize_open_clib_file (&m, clib_file);
diff --git a/src/vppinfra/error.c b/src/vppinfra/error.c
index b2b1a83e552..374b8b5256a 100644
--- a/src/vppinfra/error.c
+++ b/src/vppinfra/error.c
@@ -109,8 +109,8 @@ dispatch_message (u8 * msg)
}
__clib_export void
-_clib_error (int how_to_die,
- char *function_name, uword line_number, char *fmt, ...)
+_clib_error (int how_to_die, const char *function_name, uword line_number,
+ const char *fmt, ...)
{
u8 *msg = 0;
va_list va;
@@ -146,8 +146,8 @@ _clib_error (int how_to_die,
}
__clib_export clib_error_t *
-_clib_error_return (clib_error_t * errors,
- any code, uword flags, char *where, char *fmt, ...)
+_clib_error_return (clib_error_t *errors, any code, uword flags,
+ const char *where, const char *fmt, ...)
{
clib_error_t *e;
va_list va;
diff --git a/src/vppinfra/error.h b/src/vppinfra/error.h
index e0e2d4726b2..9eae8ea6818 100644
--- a/src/vppinfra/error.h
+++ b/src/vppinfra/error.h
@@ -85,10 +85,9 @@ extern void *clib_error_free_vector (clib_error_t * errors);
#define clib_error_free(e) e = clib_error_free_vector(e)
-extern clib_error_t *_clib_error_return (clib_error_t * errors,
- any code,
- uword flags,
- char *where, char *fmt, ...);
+extern clib_error_t *_clib_error_return (clib_error_t *errors, any code,
+ uword flags, const char *where,
+ const char *fmt, ...);
#define clib_error_return_code(e,code,flags,args...) \
_clib_error_return((e),(code),(flags),(char *)clib_error_function,args)
diff --git a/src/vppinfra/error_bootstrap.h b/src/vppinfra/error_bootstrap.h
index 185f4c6c4af..ae23d1bcca8 100644
--- a/src/vppinfra/error_bootstrap.h
+++ b/src/vppinfra/error_bootstrap.h
@@ -62,9 +62,8 @@ enum
/* Low level error reporting function.
Code specifies whether to call exit, abort or nothing at
all (for non-fatal warnings). */
-extern void _clib_error (int code,
- char *function_name,
- uword line_number, char *format, ...);
+extern void _clib_error (int code, const char *function_name,
+ uword line_number, const char *format, ...);
#define ASSERT(truth) \
do { \
diff --git a/src/vppinfra/fifo.c b/src/vppinfra/fifo.c
index 52d65ae1f37..2b1cfea6fe0 100644
--- a/src/vppinfra/fifo.c
+++ b/src/vppinfra/fifo.c
@@ -77,12 +77,16 @@
*/
__clib_export void *
-_clib_fifo_resize (void *v_old, uword n_new_elts, uword elt_bytes)
+_clib_fifo_resize (void *v_old, uword n_new_elts, uword align, uword elt_bytes)
{
- void *v_new, *end, *head;
- uword n_old_elts, header_bytes;
+ void *end, *head;
+ u8 *v_new = 0;
+ uword n_old_elts;
uword n_copy_bytes, n_zero_bytes;
clib_fifo_header_t *f_new, *f_old;
+ vec_attr_t va = { .elt_sz = elt_bytes,
+ .hdr_sz = sizeof (clib_fifo_header_t),
+ .align = align };
n_old_elts = clib_fifo_elts (v_old);
n_new_elts += n_old_elts;
@@ -91,15 +95,10 @@ _clib_fifo_resize (void *v_old, uword n_new_elts, uword elt_bytes)
else
n_new_elts = max_pow2 (n_new_elts);
- header_bytes = vec_header_bytes (sizeof (clib_fifo_header_t));
-
- v_new = clib_mem_alloc_no_fail (n_new_elts * elt_bytes + header_bytes);
- v_new += header_bytes;
-
+ v_new = _vec_alloc_internal (n_new_elts, &va);
f_new = clib_fifo_header (v_new);
f_new->head_index = 0;
f_new->tail_index = n_old_elts;
- _vec_len (v_new) = n_new_elts;
/* Copy old -> new. */
n_copy_bytes = n_old_elts * elt_bytes;
diff --git a/src/vppinfra/fifo.h b/src/vppinfra/fifo.h
index 5dc1b4512cf..b6a8b8f5c3b 100644
--- a/src/vppinfra/fifo.h
+++ b/src/vppinfra/fifo.h
@@ -54,7 +54,7 @@ typedef struct
always_inline clib_fifo_header_t *
clib_fifo_header (void *f)
{
- return vec_header (f, sizeof (clib_fifo_header_t));
+ return vec_header (f);
}
/* Aliases. */
@@ -91,31 +91,31 @@ clib_fifo_reset (void *v)
if (v)
{
f->head_index = f->tail_index = 0;
- _vec_len (v) = 0;
+ vec_set_len (v, 0);
}
}
/* External resize function. */
-void *_clib_fifo_resize (void *v, uword n_elts, uword elt_bytes);
+void *_clib_fifo_resize (void *v, uword n_elts, uword align, uword elt_bytes);
-#define clib_fifo_resize(f,n_elts) \
- f = _clib_fifo_resize ((f), (n_elts), sizeof ((f)[0]))
+#define clib_fifo_resize(f, n_elts) \
+ f = _clib_fifo_resize ((f), (n_elts), _vec_align (f, 0), _vec_elt_sz (f))
always_inline void *
-_clib_fifo_validate (void *v, uword n_elts, uword elt_bytes)
+_clib_fifo_validate (void *v, uword n_elts, uword align, uword elt_bytes)
{
if (clib_fifo_free_elts (v) < n_elts)
- v = _clib_fifo_resize (v, n_elts, elt_bytes);
+ v = _clib_fifo_resize (v, n_elts, align, elt_bytes);
return v;
}
-#define clib_fifo_validate(f,n_elts) \
- f = _clib_fifo_validate ((f), (n_elts), sizeof (f[0]))
+#define clib_fifo_validate(f, n_elts) \
+ f = _clib_fifo_validate ((f), (n_elts), _vec_align (f, 0), _vec_elt_sz (f))
/* Advance tail pointer by N_ELTS which can be either positive or negative. */
always_inline void *
-_clib_fifo_advance_tail (void *v, word n_elts, uword elt_bytes,
- uword * tail_return)
+_clib_fifo_advance_tail (void *v, word n_elts, uword align, uword elt_bytes,
+ uword *tail_return)
{
word i, l, n_free;
clib_fifo_header_t *f;
@@ -123,7 +123,7 @@ _clib_fifo_advance_tail (void *v, word n_elts, uword elt_bytes,
n_free = clib_fifo_free_elts (v);
if (n_free < n_elts)
{
- v = _clib_fifo_resize (v, n_elts, elt_bytes);
+ v = _clib_fifo_resize (v, n_elts, align, elt_bytes);
n_free = clib_fifo_free_elts (v);
}
@@ -158,12 +158,13 @@ _clib_fifo_advance_tail (void *v, word n_elts, uword elt_bytes,
return v;
}
-#define clib_fifo_advance_tail(f,n_elts) \
-({ \
- uword _i; \
- (f) = _clib_fifo_advance_tail ((f), (n_elts), sizeof ((f)[0]), &_i); \
- (f) + _i; \
-})
+#define clib_fifo_advance_tail(f, n_elts) \
+ ({ \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), (n_elts), _vec_align (f, 0), \
+ _vec_elt_sz (f), &_i); \
+ (f) + _i; \
+ })
always_inline uword
clib_fifo_advance_head (void *v, uword n_elts)
@@ -189,36 +190,46 @@ clib_fifo_advance_head (void *v, uword n_elts)
}
/* Add given element to fifo. */
-#define clib_fifo_add1(f,e) \
-do { \
- uword _i; \
- (f) = _clib_fifo_advance_tail ((f), 1, sizeof ((f)[0]), &_i); \
- (f)[_i] = (e); \
-} while (0)
+#define clib_fifo_add1(f, e) \
+ do \
+ { \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), 1, _vec_align (f, 0), \
+ _vec_elt_sz (f), &_i); \
+ (f)[_i] = (e); \
+ } \
+ while (0)
/* Add element to fifo; return pointer to new element. */
-#define clib_fifo_add2(f,p) \
-do { \
- uword _i; \
- (f) = _clib_fifo_advance_tail ((f), 1, sizeof ((f)[0]), &_i); \
- (p) = (f) + _i; \
-} while (0)
+#define clib_fifo_add2(f, p) \
+ do \
+ { \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), 1, _vec_align (f, 0), \
+ _vec_elt_sz (f), &_i); \
+ (p) = (f) + _i; \
+ } \
+ while (0)
/* Add several elements to fifo. */
-#define clib_fifo_add(f,e,n) \
-do { \
- uword _i, _l; word _n0, _n1; \
- \
- _n0 = (n); \
- (f) = _clib_fifo_advance_tail ((f), _n0, sizeof ((f)[0]), &_i); \
- _l = clib_fifo_len (f); \
- _n1 = _i + _n0 - _l; \
- _n1 = _n1 < 0 ? 0 : _n1; \
- _n0 -= _n1; \
- clib_memcpy_fast ((f) + _i, (e), _n0 * sizeof ((f)[0])); \
- if (_n1) \
- clib_memcpy_fast ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \
-} while (0)
+#define clib_fifo_add(f, e, n) \
+ do \
+ { \
+ uword _i, _l; \
+ word _n0, _n1; \
+ \
+ _n0 = (n); \
+ (f) = _clib_fifo_advance_tail ((f), _n0, _vec_align (f, 0), \
+ _vec_elt_sz (f), &_i); \
+ _l = clib_fifo_len (f); \
+ _n1 = _i + _n0 - _l; \
+ _n1 = _n1 < 0 ? 0 : _n1; \
+ _n0 -= _n1; \
+ clib_memcpy_fast ((f) + _i, (e), _n0 * sizeof ((f)[0])); \
+ if (_n1) \
+ clib_memcpy_fast ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \
+ } \
+ while (0)
/* Subtract element from fifo. */
#define clib_fifo_sub1(f,e) \
@@ -254,7 +265,7 @@ clib_fifo_tail_index (void *v)
#define clib_fifo_head(v) ((v) + clib_fifo_head_index (v))
#define clib_fifo_tail(v) ((v) + clib_fifo_tail_index (v))
-#define clib_fifo_free(f) vec_free_h((f),sizeof(clib_fifo_header_t))
+#define clib_fifo_free(f) vec_free ((f))
always_inline uword
clib_fifo_elt_index (void *v, uword i)
@@ -274,7 +285,7 @@ clib_fifo_elt_index (void *v, uword i)
return result;
}
-#define clib_fifo_elt_at_index(v,i) ((v) + clib_fifo_elt_index (v, (i)))
+#define clib_fifo_elt_at_index(v, i) ((v) + (i))
#define clib_fifo_foreach(v,f,body) \
do { \
diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h
index 09dd2fd0496..71956137665 100644
--- a/src/vppinfra/file.h
+++ b/src/vppinfra/file.h
@@ -163,6 +163,8 @@ clib_file_write (clib_file_t * f)
return f->write_function (f);
}
+u8 *clib_file_get_resolved_basename (char *fmt, ...);
+
#endif /* included_clib_file_h */
/*
diff --git a/src/vppinfra/format.c b/src/vppinfra/format.c
index ccd999e582f..642d3e20654 100644
--- a/src/vppinfra/format.c
+++ b/src/vppinfra/format.c
@@ -114,7 +114,7 @@ justify (u8 * s, format_info_t * fi, uword s_len_orig)
l0 = l1;
if (l1 > l0)
- _vec_len (s) = l0;
+ vec_set_len (s, l0);
else if (l0 > l1)
{
uword n = l0 - l1;
@@ -833,6 +833,16 @@ done:
return s;
}
+__clib_export char *
+format_c_string (u8 *s, const char *fmt, ...)
+{
+ va_list args;
+ va_start (args, fmt);
+ s = va_format (s, fmt, &args);
+ va_end (args);
+ vec_add1 (s, '\0');
+ return (char *) s;
+}
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vppinfra/format.h b/src/vppinfra/format.h
index 70882adac99..14bac869f89 100644
--- a/src/vppinfra/format.h
+++ b/src/vppinfra/format.h
@@ -98,6 +98,7 @@ _(format_hex_bytes_no_wrap);
_(format_white_space);
_(format_f64);
_(format_time_interval);
+_ (format_duration);
#ifdef CLIB_UNIX
/* Unix specific formats. */
@@ -132,8 +133,11 @@ typedef struct _unformat_input_t
(and argument). */
uword (*fill_buffer) (struct _unformat_input_t * i);
- /* Return values for fill buffer function which indicate whether not
- input has been exhausted. */
+ /* User's function to be called on input_free */
+ void (*free) (struct _unformat_input_t *i);
+
+ /* Return values for fill buffer function which indicate whether not
+ input has been exhausted. */
#define UNFORMAT_END_OF_INPUT (~0)
#define UNFORMAT_MORE_INPUT 0
@@ -154,6 +158,8 @@ unformat_init (unformat_input_t * i,
always_inline void
unformat_free (unformat_input_t * i)
{
+ if (i->free)
+ i->free (i);
vec_free (i->buffer);
vec_free (i->buffer_marks);
clib_memset (i, 0, sizeof (i[0]));
@@ -199,6 +205,22 @@ unformat_put_input (unformat_input_t * input)
input->index -= 1;
}
+always_inline uword
+is_white_space (uword c)
+{
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
/* Peek current input character without advancing. */
always_inline uword
unformat_peek_input (unformat_input_t * input)
@@ -242,8 +264,8 @@ uword va_unformat (unformat_input_t * i, const char *fmt, va_list * args);
void unformat_init_command_line (unformat_input_t * input, char *argv[]);
/* Setup for unformat of given string. */
-void unformat_init_string (unformat_input_t * input,
- char *string, int string_len);
+void unformat_init_string (unformat_input_t *input, const char *string,
+ int string_len);
always_inline void
unformat_init_cstring (unformat_input_t * input, char *string)
@@ -254,6 +276,12 @@ unformat_init_cstring (unformat_input_t * input, char *string)
/* Setup for unformat of given vector string; vector will be freed by unformat_string. */
void unformat_init_vector (unformat_input_t * input, u8 * vector_string);
+/* Unformat u8 */
+unformat_function_t unformat_u8;
+
+/* Unformat u16 */
+unformat_function_t unformat_u16;
+
/* Format function for unformat input usable when an unformat error
has occurred. */
u8 *format_unformat_error (u8 * s, va_list * va);
@@ -287,6 +315,16 @@ unformat_function_t unformat_eof;
/* Parse memory size e.g. 100, 100k, 100m, 100g. */
unformat_function_t unformat_memory_size;
+/* Unformat C string array, takes array length as 2nd argument */
+unformat_function_t unformat_c_string_array;
+
+/* Unformat sigle and double quoted string */
+unformat_function_t unformat_single_quoted_string;
+unformat_function_t unformat_double_quoted_string;
+
+/* Format base 10 e.g. 100, 100K, 100M, 100G */
+u8 *format_base10 (u8 *s, va_list *va);
+
/* Unparse memory size e.g. 100, 100k, 100m, 100g. */
u8 *format_memory_size (u8 * s, va_list * va);
@@ -301,12 +339,21 @@ u8 *format_c_identifier (u8 * s, va_list * va);
/* Format hexdump with both hex and printable chars - compatible with text2pcap */
u8 *format_hexdump (u8 * s, va_list * va);
+u8 *format_hexdump_u16 (u8 *s, va_list *va);
+u8 *format_hexdump_u32 (u8 *s, va_list *va);
+u8 *format_hexdump_u64 (u8 *s, va_list *va);
+
+/* Format bitmap of array of uword numbers */
+u8 *format_uword_bitmap (u8 *s, va_list *va);
/* Unix specific formats. */
#ifdef CLIB_UNIX
/* Setup input from Unix file. */
void unformat_init_clib_file (unformat_input_t * input, int file_descriptor);
+/* Setup input from flesystem path. */
+uword unformat_init_file (unformat_input_t *input, char *fmt, ...);
+
/* Take input from Unix environment variable; returns
1 if variable exists zero otherwise. */
uword unformat_init_unix_env (unformat_input_t * input, char *var);
@@ -325,6 +372,8 @@ int test_unformat_main (unformat_input_t * input);
created circular dependency problems. */
int test_vec_main (unformat_input_t * input);
+char *format_c_string (u8 *s, const char *fmt, ...);
+
#endif /* included_format_h */
/*
diff --git a/src/vppinfra/format_ansi.h b/src/vppinfra/format_ansi.h
new file mode 100644
index 00000000000..c35406aacf7
--- /dev/null
+++ b/src/vppinfra/format_ansi.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef __FORMAT_ANSI_H__
+#define __FORMAT_ANSI_H__
+
+#define ANSI_RESET "\x1b[0m"
+#define ANSI_BOLD "\x1b[1m"
+#define ANSI_ITALIC "\x1b[3m"
+#define ANSI_UNDERLINE "\x1b[4m"
+#define ANSI_BLINK "\x1b[5m"
+#define ANSI_FG_BLACK "\x1b[30m"
+#define ANSI_FG_RED "\x1b[31m"
+#define ANSI_FG_GREEN "\x1b[32m"
+#define ANSI_FG_YELLOW "\x1b[33m"
+#define ANSI_FG_BLUE "\x1b[34m"
+#define ANSI_FG_MAGENTA "\x1b[35m"
+#define ANSI_FG_CYAN "\x1b[36m"
+#define ANSI_FG_WHITE "\x1b[37m"
+#define ANSI_FG_DEFAULT "\x1b[39m"
+#define ANSI_BG_BLACK "\x1b[40m"
+#define ANSI_BG_RED "\x1b[41m"
+#define ANSI_BG_GREEN "\x1b[42m"
+#define ANSI_BG_YELLOW "\x1b[43m"
+#define ANSI_BG_BLUE "\x1b[44m"
+#define ANSI_BG_MAGENTA "\x1b[45m"
+#define ANSI_BG_CYAN "\x1b[46m"
+#define ANSI_BG_WHITE "\x1b[47m"
+#define ANSI_BG_DEFAULT "\x1b[49m"
+#define ANSI_FG_BR_BLACK "\x1b[90m"
+#define ANSI_FG_BR_RED "\x1b[91m"
+#define ANSI_FG_BR_GREEN "\x1b[92m"
+#define ANSI_FG_BR_YELLOW "\x1b[93m"
+#define ANSI_FG_BR_BLUE "\x1b[94m"
+#define ANSI_FG_BR_MAGENTA "\x1b[95m"
+#define ANSI_FG_BR_CYAN "\x1b[96m"
+#define ANSI_FG_BR_WHITE "\x1b[97m"
+#define ANSI_BG_BR_BLACK "\x1b[100m"
+#define ANSI_BG_BR_RED "\x1b[101m"
+#define ANSI_BG_BR_GREEN "\x1b[102m"
+#define ANSI_BG_BR_YELLOW "\x1b[103m"
+#define ANSI_BG_BR_BLUE "\x1b[104m"
+#define ANSI_BG_BR_MAGENTA "\x1b[105m"
+#define ANSI_BG_BR_CYAN "\x1b[106m"
+#define ANSI_BG_BR_WHITE "\x1b[107m"
+
+#endif /* __FORMAT_ANSI_H__ */
diff --git a/src/vppinfra/format_table.c b/src/vppinfra/format_table.c
new file mode 100644
index 00000000000..dd92e417acd
--- /dev/null
+++ b/src/vppinfra/format_table.c
@@ -0,0 +1,295 @@
+/*
+ Copyright (c) 2020 Damjan Marion
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vppinfra/format.h>
+#include <vppinfra/format_table.h>
+
+static table_text_attr_t default_title = {
+ .flags = TTAF_FG_COLOR_SET | TTAF_BOLD,
+ .fg_color = TTAC_YELLOW,
+ .align = TTAA_CENTER,
+};
+
+static table_text_attr_t default_body = {
+ .align = TTAA_RIGHT,
+};
+
+static table_text_attr_t default_header_col = {
+ .flags = TTAF_FG_COLOR_SET,
+ .fg_color = TTAC_YELLOW,
+ .align = TTAA_CENTER,
+};
+
+static table_text_attr_t default_header_row = {
+ .flags = TTAF_FG_COLOR_SET | TTAF_BOLD,
+ .fg_color = TTAC_GREEN,
+ .align = TTAA_LEFT,
+};
+
+u8 *
+format_text_cell (table_t *t, u8 *s, table_cell_t *c, table_text_attr_t *def,
+ int size)
+{
+ table_text_attr_t _a = {}, *a = &_a;
+
+ if (c == 0)
+ return format (s, t->no_ansi ? "" : "\x1b[0m");
+
+ clib_memcpy (a, def, sizeof (table_text_attr_t));
+
+ if (t->no_ansi == 0)
+ {
+ int *codes = 0;
+ if (c->attr.flags & TTAF_FG_COLOR_SET)
+ {
+ a->fg_color = c->attr.fg_color;
+ a->flags |= TTAF_FG_COLOR_SET;
+ a->flags |= c->attr.flags & TTAF_FG_COLOR_BRIGHT;
+ }
+
+ if (c->attr.flags & TTAF_BG_COLOR_SET)
+ {
+ a->bg_color = c->attr.bg_color;
+ a->flags |= TTAF_BG_COLOR_SET;
+ a->flags |= c->attr.flags & TTAF_BG_COLOR_BRIGHT;
+ }
+
+ if (a->flags & TTAF_RESET)
+ vec_add1 (codes, 0);
+
+ if (a->flags & TTAF_BOLD)
+ vec_add1 (codes, 1);
+
+ if (a->flags & TTAF_DIM)
+ vec_add1 (codes, 2);
+
+ if (a->flags & TTAF_UNDERLINE)
+ vec_add1 (codes, 4);
+
+ if (a->flags & TTAF_FG_COLOR_SET)
+ vec_add1 (codes,
+ (a->flags & TTAF_FG_COLOR_BRIGHT ? 90 : 30) + a->fg_color);
+
+ if (a->flags & TTAF_BG_COLOR_SET)
+ vec_add1 (codes,
+ (a->flags & TTAF_BG_COLOR_BRIGHT ? 100 : 40) + a->bg_color);
+
+ if (codes)
+ {
+ s = format (s, "\x1b[");
+ for (int i = 0; i < vec_len (codes); i++)
+ s = format (s, "%s%u", i ? ";" : "", codes[i]);
+ s = format (s, "m");
+ vec_free (codes);
+ }
+ }
+
+ u8 *fmt = 0;
+ table_text_attr_align_t align = c->attr.align;
+ if (align == TTAA_DEFAULT)
+ align = a->align;
+ if (align == TTAA_LEFT)
+ fmt = format (fmt, "%%-%uv%c", size, 0);
+ else if (align == TTAA_CENTER)
+ fmt = format (fmt, "%%=%uv%c", size, 0);
+ else
+ fmt = format (fmt, "%%%uv%c", size, 0);
+ s = format (s, (char *) fmt, c->text);
+ vec_free (fmt);
+ return format (s, t->no_ansi ? "" : "\x1b[0m");
+}
+
+u8 *
+format_table (u8 *s, va_list *args)
+{
+ table_t *t = va_arg (*args, table_t *);
+ table_cell_t title_cell = { .text = t->title };
+ int table_width = 0;
+ u32 indent = format_get_indent (s);
+ for (int i = 0; i < vec_len (t->row_sizes); i++)
+ table_width += t->row_sizes[i];
+
+ if (t->title)
+ {
+ table_text_attr_t *title_default;
+ title_default =
+ t->default_title.as_u32 ? &t->default_title : &default_title;
+ s = format_text_cell (t, s, &title_cell, title_default, table_width);
+ s = format (s, "\n%U", format_white_space, indent);
+ }
+
+ for (int c = 0; c < vec_len (t->cells); c++)
+ {
+ table_text_attr_t *col_default;
+
+ if (c < t->n_header_cols)
+ col_default = t->default_header_col.as_u32 ? &t->default_header_col :
+ &default_header_col;
+ else
+ col_default =
+ t->default_body.as_u32 ? &t->default_body : &default_body;
+
+ for (int r = 0; r < vec_len (t->cells[c]); r++)
+ {
+ table_text_attr_t *row_default = col_default;
+ if (r)
+ s = format (s, " ");
+ if (r < t->n_header_rows && c >= t->n_header_cols)
+ row_default = t->default_header_row.as_u32 ?
+ &t->default_header_row :
+ &default_header_row;
+ s = format_text_cell (t, s, &t->cells[c][r], row_default,
+ t->row_sizes[r]);
+ }
+ if (c + 1 < vec_len (t->cells))
+ s = format (s, "\n%U", format_white_space, indent);
+ }
+
+ return s;
+}
+
+void
+table_format_title (table_t *t, char *fmt, ...)
+{
+ va_list va;
+
+ va_start (va, fmt);
+ t->title = va_format (t->title, fmt, &va);
+ va_end (va);
+}
+
+static table_cell_t *
+table_get_cell (table_t *t, int c, int r)
+{
+ c += t->n_header_cols;
+ r += t->n_header_rows;
+
+ /* grow table if needed */
+ vec_validate (t->cells, c);
+ for (int i = 0; i < vec_len (t->cells); i++)
+ vec_validate (t->cells[i], r);
+ return &t->cells[c][r];
+}
+
+void
+table_format_cell (table_t *t, int c, int r, char *fmt, ...)
+{
+ table_cell_t *cell = table_get_cell (t, c, r);
+ va_list va;
+
+ c += t->n_header_cols;
+ r += t->n_header_rows;
+
+ va_start (va, fmt);
+ cell->text = va_format (t->cells[c][r].text, fmt, &va);
+ va_end (va);
+
+ vec_validate (t->row_sizes, r);
+ t->row_sizes[r] = clib_max (t->row_sizes[r], vec_len (t->cells[c][r].text));
+}
+
+void
+table_set_cell_align (table_t *t, int c, int r, table_text_attr_align_t a)
+{
+ table_cell_t *cell = table_get_cell (t, c, r);
+ cell->attr.align = a;
+}
+
+void
+table_set_cell_fg_color (table_t *t, int c, int r, table_text_attr_color_t v)
+{
+ table_cell_t *cell = table_get_cell (t, c, r);
+ cell->attr.fg_color = v & 0x7;
+ cell->attr.flags |= TTAF_FG_COLOR_SET;
+ if (v & 8)
+ cell->attr.flags |= TTAF_FG_COLOR_BRIGHT;
+ else
+ cell->attr.flags &= ~TTAF_FG_COLOR_BRIGHT;
+}
+
+void
+table_set_cell_bg_color (table_t *t, int c, int r, table_text_attr_color_t v)
+{
+ table_cell_t *cell = table_get_cell (t, c, r);
+ cell->attr.bg_color = v & 0x7;
+ cell->attr.flags |= TTAF_BG_COLOR_SET;
+ if (v & 8)
+ cell->attr.flags |= TTAF_BG_COLOR_BRIGHT;
+ else
+ cell->attr.flags &= ~TTAF_BG_COLOR_BRIGHT;
+}
+
+void
+table_free (table_t *t)
+{
+ for (int c = 0; c < vec_len (t->cells); c++)
+ {
+ for (int r = 0; r < vec_len (t->cells[c]); r++)
+ vec_free (t->cells[c][r].text);
+ vec_free (t->cells[c]);
+ }
+ vec_free (t->cells);
+ vec_free (t->row_sizes);
+ vec_free (t->title);
+ clib_memset (t, 0, sizeof (table_t));
+}
+
+void
+table_add_header_col (table_t *t, int n_strings, ...)
+{
+ va_list arg;
+ int r, c = t->n_header_cols++;
+ int n_rows;
+
+ vec_insert (t->cells, 1, c);
+ n_rows = clib_max (n_strings, 1);
+ n_rows = clib_max (vec_len (t->row_sizes), n_rows);
+ vec_validate (t->cells[c], n_rows - 1);
+
+ va_start (arg, n_strings);
+ for (r = 0; r < n_rows; r++)
+ {
+ if (n_strings-- > 0)
+ table_format_cell (t, -1, r - t->n_header_rows, "%s",
+ va_arg (arg, char *));
+ }
+ va_end (arg);
+}
+
+void
+table_add_header_row (table_t *t, int n_strings, ...)
+{
+ va_list arg;
+ int c, r = t->n_header_rows++;
+
+ vec_validate (t->cells, n_strings + t->n_header_cols - 1);
+
+ va_start (arg, n_strings);
+ for (c = t->n_header_cols; c < vec_len (t->cells); c++)
+ {
+ vec_insert (t->cells[c + t->n_header_cols], 1, r);
+ if (n_strings-- > 0)
+ table_format_cell (t, c, -1, "%s", va_arg (arg, char *));
+ }
+ va_end (arg);
+}
diff --git a/src/vppinfra/format_table.h b/src/vppinfra/format_table.h
new file mode 100644
index 00000000000..4f4a7724b0e
--- /dev/null
+++ b/src/vppinfra/format_table.h
@@ -0,0 +1,118 @@
+/*
+ Copyright (c) 2020 Damjan Marion
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef __format_table_h__
+#define __format_table_h__
+
+typedef enum
+{
+ TTAF_RESET = (1 << 0),
+ TTAF_BOLD = (1 << 1),
+ TTAF_DIM = (1 << 2),
+ TTAF_UNDERLINE = (1 << 3),
+ TTAF_FG_COLOR_SET = (1 << 4),
+ TTAF_BG_COLOR_SET = (1 << 5),
+ TTAF_FG_COLOR_BRIGHT = (1 << 6),
+ TTAF_BG_COLOR_BRIGHT = (1 << 7),
+} table_text_attr_flags_t;
+
+typedef enum
+{
+ TTAC_BLACK = 0,
+ TTAC_RED = 1,
+ TTAC_GREEN = 2,
+ TTAC_YELLOW = 3,
+ TTAC_BLUE = 4,
+ TTAC_MAGENTA = 5,
+ TTAC_CYAN = 6,
+ TTAC_WHITE = 7,
+ TTAC_BRIGHT_BLACK = 8,
+ TTAC_BRIGHT_RED = 9,
+ TTAC_BRIGHT_GREEN = 10,
+ TTAC_BRIGHT_YELLOW = 11,
+ TTAC_BRIGHT_BLUE = 12,
+ TTAC_BRIGHT_MAGENTA = 13,
+ TTAC_BRIGHT_CYAN = 14,
+ TTAC_BRIGHT_WHITE = 15,
+} table_text_attr_color_t;
+
+typedef enum
+{
+ TTAA_DEFAULT = 0,
+ TTAA_LEFT = 1,
+ TTAA_RIGHT = 2,
+ TTAA_CENTER = 3,
+} table_text_attr_align_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ table_text_attr_flags_t flags : 16;
+ table_text_attr_color_t fg_color : 4;
+ table_text_attr_color_t bg_color : 4;
+ table_text_attr_align_t align : 4;
+ };
+ u32 as_u32;
+ };
+} table_text_attr_t;
+
+typedef struct
+{
+ table_text_attr_t attr;
+ u8 *text;
+} table_cell_t;
+
+typedef struct
+{
+ u8 no_ansi : 1;
+ u8 *title;
+ table_cell_t **cells;
+ int *row_sizes;
+ int n_header_cols;
+ int n_header_rows;
+ int n_footer_cols;
+ table_text_attr_t default_title;
+ table_text_attr_t default_body;
+ table_text_attr_t default_header_col;
+ table_text_attr_t default_header_row;
+} table_t;
+
+__clib_export format_function_t format_table;
+
+__clib_export void table_format_title (table_t *t, char *fmt, ...);
+__clib_export void table_format_cell (table_t *t, int c, int r, char *fmt,
+ ...);
+__clib_export void table_set_cell_align (table_t *t, int c, int r,
+ table_text_attr_align_t a);
+__clib_export void table_set_cell_fg_color (table_t *t, int c, int r,
+ table_text_attr_color_t v);
+__clib_export void table_set_cell_bg_color (table_t *t, int c, int r,
+ table_text_attr_color_t v);
+__clib_export void table_free (table_t *t);
+__clib_export void table_add_header_col (table_t *t, int n_strings, ...);
+__clib_export void table_add_header_row (table_t *t, int n_strings, ...);
+
+#endif
diff --git a/src/vppinfra/freebsd/mem.c b/src/vppinfra/freebsd/mem.c
new file mode 100644
index 00000000000..7d27a0dc169
--- /dev/null
+++ b/src/vppinfra/freebsd/mem.c
@@ -0,0 +1,471 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ * Copyright(c) 2024 Tom Jones <thj@freebsd.org>
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/memrange.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/lock.h>
+#include <vppinfra/time.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/clib_error.h>
+
+#ifndef F_FBSD_SPECIFIC_BASE
+#define F_FBSD_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_FBSD_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_FBSD_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+#ifndef MFD_HUGETLB
+#define MFD_HUGETLB 0x0004U
+#endif
+
+#ifndef MAP_HUGE_SHIFT
+#define MAP_HUGE_SHIFT 26
+#endif
+
+#ifndef MFD_HUGE_SHIFT
+#define MFD_HUGE_SHIFT 26
+#endif
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE MAP_FIXED
+#endif
+
+static void
+map_lock ()
+{
+ while (clib_atomic_test_and_set (&clib_mem_main.map_lock))
+ CLIB_PAUSE ();
+}
+
+static void
+map_unlock ()
+{
+ clib_atomic_release (&clib_mem_main.map_lock);
+}
+
+void
+clib_mem_main_init (void)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ long sysconf_page_size;
+ uword page_size;
+ void *va;
+
+ if (mm->log2_page_sz != CLIB_MEM_PAGE_SZ_UNKNOWN)
+ return;
+
+ /* system page size */
+ sysconf_page_size = sysconf (_SC_PAGESIZE);
+ if (sysconf_page_size < 0)
+ {
+ clib_panic ("Could not determine the page size");
+ }
+ page_size = sysconf_page_size;
+ mm->log2_page_sz = min_log2 (page_size);
+
+ mm->log2_default_hugepage_sz = min_log2 (page_size);
+ mm->log2_sys_default_hugepage_sz = mm->log2_default_hugepage_sz;
+
+ /* numa nodes */
+ va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (va == MAP_FAILED)
+ return;
+
+ if (mlock (va, page_size))
+ goto done;
+
+ /*
+ * TODO: In linux/mem.c we can move pages to numa domains, this isn't an
+ * option in FreeBSD yet.
+ */
+
+done:
+ munmap (va, page_size);
+}
+
+__clib_export u64
+clib_mem_get_fd_page_size (int fd)
+{
+ struct stat st = { 0 };
+ if (fstat (fd, &st) == -1)
+ return 0;
+ return st.st_blksize;
+}
+
+__clib_export clib_mem_page_sz_t
+clib_mem_get_fd_log2_page_size (int fd)
+{
+ uword page_size = clib_mem_get_fd_page_size (fd);
+ return page_size ? min_log2 (page_size) : CLIB_MEM_PAGE_SZ_UNKNOWN;
+}
+
+__clib_export void
+clib_mem_vm_randomize_va (uword *requested_va,
+ clib_mem_page_sz_t log2_page_size)
+{
+ /* TODO: Not yet implemented */
+}
+
+__clib_export int
+clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ int fd;
+ unsigned int memfd_flags;
+ va_list va;
+ u8 *s = 0;
+
+ if (log2_page_size == mm->log2_page_sz)
+ log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT;
+ else if (log2_page_size == mm->log2_sys_default_hugepage_sz)
+ log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE;
+
+ switch (log2_page_size)
+ {
+ case CLIB_MEM_PAGE_SZ_UNKNOWN:
+ return CLIB_MEM_ERROR;
+ case CLIB_MEM_PAGE_SZ_DEFAULT:
+ memfd_flags = MFD_ALLOW_SEALING;
+ break;
+ case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
+ memfd_flags = MFD_HUGETLB;
+ break;
+ default:
+ memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
+ }
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ /* memfd_create maximum string size is 249 chars without trailing zero */
+ if (vec_len (s) > 249)
+ vec_set_len (s, 249);
+ vec_add1 (s, 0);
+
+ fd = memfd_create ((char *) s, memfd_flags);
+ if (fd == -1)
+ {
+ vec_reset_length (mm->error);
+ mm->error = clib_error_return_unix (mm->error, "memfd_create");
+ vec_free (s);
+ return CLIB_MEM_ERROR;
+ }
+
+ vec_free (s);
+
+ if ((memfd_flags & MFD_ALLOW_SEALING) &&
+ ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1))
+ {
+ vec_reset_length (mm->error);
+ mm->error = clib_error_return_unix (mm->error, "fcntl (F_ADD_SEALS)");
+ close (fd);
+ return CLIB_MEM_ERROR;
+ }
+
+ return fd;
+}
+
+uword
+clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ uword pagesize = 1ULL << log2_page_sz;
+ uword sys_page_sz = 1ULL << mm->log2_page_sz;
+ uword n_bytes;
+ void *base = 0, *p;
+
+ size = round_pow2 (size, pagesize);
+
+ /* in adition of requested reservation, we also rserve one system page
+ * (typically 4K) adjacent to the start off reservation */
+
+ if (start)
+ {
+ /* start address is provided, so we just need to make sure we are not
+ * replacing existing map */
+ if (start & pow2_mask (log2_page_sz))
+ return ~0;
+ base = (void *) start - sys_page_sz;
+ base = mmap (base, size + sys_page_sz, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+
+ return (base == MAP_FAILED) ? ~0 : start;
+ }
+
+ /* to make sure that we get reservation aligned to page_size we need to
+ * request one additional page as mmap will return us address which is
+ * aligned only to system page size */
+ base =
+ mmap (0, size + pagesize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (base == MAP_FAILED)
+ return ~0;
+
+ /* return additional space at the end of allocation */
+ p = base + size + pagesize;
+ n_bytes = (uword) p & pow2_mask (log2_page_sz);
+ if (n_bytes)
+ {
+ p -= n_bytes;
+ munmap (p, n_bytes);
+ }
+
+ /* return additional space at the start of allocation */
+ n_bytes = pagesize - sys_page_sz - n_bytes;
+ if (n_bytes)
+ {
+ munmap (base, n_bytes);
+ base += n_bytes;
+ }
+
+ return (uword) base + sys_page_sz;
+}
+
+__clib_export clib_mem_vm_map_hdr_t *
+clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t *hdr)
+{
+ /* TODO: Not yet implemented */
+ return NULL;
+}
+
+void *
+clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
+ uword size, int fd, uword offset, char *name)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ clib_mem_vm_map_hdr_t *hdr;
+ uword sys_page_sz = 1ULL << mm->log2_page_sz;
+ int mmap_flags = MAP_FIXED, is_huge = 0;
+
+ if (fd != -1)
+ {
+ mmap_flags |= MAP_SHARED;
+ log2_page_sz = clib_mem_get_fd_log2_page_size (fd);
+ if (log2_page_sz > mm->log2_page_sz)
+ is_huge = 1;
+ }
+ else
+ {
+ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+
+ if (log2_page_sz == mm->log2_page_sz)
+ log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
+
+ switch (log2_page_sz)
+ {
+ case CLIB_MEM_PAGE_SZ_UNKNOWN:
+ /* will fail later */
+ break;
+ case CLIB_MEM_PAGE_SZ_DEFAULT:
+ log2_page_sz = mm->log2_page_sz;
+ break;
+ case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
+ /* We shouldn't be selecting HUGETLB on FreeBSD */
+ log2_page_sz = CLIB_MEM_PAGE_SZ_UNKNOWN;
+ break;
+ default:
+ log2_page_sz = mm->log2_page_sz;
+ break;
+ }
+ }
+
+ size = round_pow2 (size, 1ULL << log2_page_sz);
+
+ base = (void *) clib_mem_vm_reserve ((uword) base, size, log2_page_sz);
+
+ if (base == (void *) ~0)
+ return CLIB_MEM_VM_MAP_FAILED;
+
+ base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset);
+
+ if (base == MAP_FAILED)
+ return CLIB_MEM_VM_MAP_FAILED;
+
+ if (is_huge && (mlock (base, size) != 0))
+ {
+ munmap (base, size);
+ return CLIB_MEM_VM_MAP_FAILED;
+ }
+
+ hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+
+ if (hdr != base - sys_page_sz)
+ {
+ munmap (base, size);
+ return CLIB_MEM_VM_MAP_FAILED;
+ }
+
+ map_lock ();
+
+ if (mm->last_map)
+ {
+ mprotect (mm->last_map, sys_page_sz, PROT_READ | PROT_WRITE);
+ mm->last_map->next = hdr;
+ mprotect (mm->last_map, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->first_map = hdr;
+
+ clib_mem_unpoison (hdr, sys_page_sz);
+ hdr->next = 0;
+ hdr->prev = mm->last_map;
+ snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name);
+ mm->last_map = hdr;
+
+ hdr->base_addr = (uword) base;
+ hdr->log2_page_sz = log2_page_sz;
+ hdr->num_pages = size >> log2_page_sz;
+ hdr->fd = fd;
+ hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0;
+ mprotect (hdr, sys_page_sz, PROT_NONE);
+
+ map_unlock ();
+
+ clib_mem_unpoison (base, size);
+ return base;
+}
+
+__clib_export int
+clib_mem_vm_unmap (void *base)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ uword size, sys_page_sz = 1ULL << mm->log2_page_sz;
+ clib_mem_vm_map_hdr_t *hdr = base - sys_page_sz;
+ ;
+
+ map_lock ();
+ if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
+ goto out;
+
+ size = hdr->num_pages << hdr->log2_page_sz;
+ if (munmap ((void *) hdr->base_addr, size) != 0)
+ goto out;
+
+ if (hdr->next)
+ {
+ mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
+ hdr->next->prev = hdr->prev;
+ mprotect (hdr->next, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->last_map = hdr->prev;
+
+ if (hdr->prev)
+ {
+ mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
+ hdr->prev->next = hdr->next;
+ mprotect (hdr->prev, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->first_map = hdr->next;
+
+ map_unlock ();
+
+ if (munmap (hdr, sys_page_sz) != 0)
+ return CLIB_MEM_ERROR;
+
+ return 0;
+out:
+ map_unlock ();
+ return CLIB_MEM_ERROR;
+}
+
+__clib_export void
+clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
+ uword n_pages, clib_mem_page_stats_t *stats)
+{
+ int i, *status = 0;
+ void **ptr = 0;
+
+ log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
+
+ vec_validate (status, n_pages - 1);
+ vec_validate (ptr, n_pages - 1);
+
+ for (i = 0; i < n_pages; i++)
+ ptr[i] = start + (i << log2_page_size);
+
+ clib_memset (stats, 0, sizeof (clib_mem_page_stats_t));
+ stats->total = n_pages;
+ stats->log2_page_sz = log2_page_size;
+
+ /*
+ * TODO: Until FreeBSD has support for tracking pages in NUMA domains just
+ * return that all are unknown for the statsistics.
+ */
+ stats->unknown = n_pages;
+
+ vec_free (status);
+ vec_free (ptr);
+}
+
+__clib_export u64 *
+clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size,
+ int n_pages)
+{
+ struct mem_extract meme;
+ int pagesize = sysconf (_SC_PAGESIZE);
+ int fd;
+ int i;
+ u64 *r = 0;
+
+ log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
+
+ if ((fd = open ((char *) "/dev/mem", O_RDONLY)) == -1)
+ return 0;
+
+ for (i = 0; i < n_pages; i++)
+ {
+ meme.me_vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
+
+ if (ioctl (fd, MEM_EXTRACT_PADDR, &meme) == -1)
+ goto done;
+ vec_add1 (r, meme.me_paddr * pagesize);
+ }
+
+done:
+ close (fd);
+ if (vec_len (r) != n_pages)
+ {
+ vec_free (r);
+ return 0;
+ }
+ return r;
+}
+
+__clib_export int
+clib_mem_set_numa_affinity (u8 numa_node, int force)
+{
+ /* TODO: Not yet implemented */
+ return CLIB_MEM_ERROR;
+}
+
+__clib_export int
+clib_mem_set_default_numa_affinity ()
+{
+ /* TODO: Not yet implemented */
+ return 0;
+}
diff --git a/src/vppinfra/graph.c b/src/vppinfra/graph.c
deleted file mode 100644
index 4c92f8ef45f..00000000000
--- a/src/vppinfra/graph.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vppinfra/graph.h>
-
-/* Set link distance, creating link if not found. */
-u32
-graph_set_link (graph_t * g, u32 src, u32 dst, u32 distance)
-{
- graph_node_t *src_node, *dst_node;
- graph_link_t *l;
- u32 old_distance;
-
- /* The following validate will not work if src or dst are on the
- pool free list. */
- if (src < vec_len (g->nodes))
- ASSERT (!pool_is_free_index (g->nodes, src));
- if (dst < vec_len (g->nodes))
- ASSERT (!pool_is_free_index (g->nodes, dst));
-
- /* Make new (empty) nodes to make src and dst valid. */
- pool_validate_index (g->nodes, clib_max (src, dst));
-
- src_node = pool_elt_at_index (g->nodes, src);
- dst_node = pool_elt_at_index (g->nodes, dst);
-
- l = graph_dir_get_link_to_node (&src_node->next, dst);
- if (l)
- {
- old_distance = l->distance;
- l->distance = distance;
-
- l = graph_dir_get_link_to_node (&dst_node->prev, src);
- l->distance = distance;
- }
- else
- {
- uword li_next, li_prev;
-
- old_distance = ~0;
-
- li_next = graph_dir_add_link (&src_node->next, dst, distance);
- li_prev = graph_dir_add_link (&dst_node->prev, src, distance);
-
- l = vec_elt_at_index (src_node->next.links, li_next);
- l->link_to_self_index = li_prev;
-
- l = vec_elt_at_index (dst_node->prev.links, li_prev);
- l->link_to_self_index = li_next;
- }
-
- return old_distance;
-}
-
-void
-graph_del_link (graph_t * g, u32 src, u32 dst)
-{
- graph_node_t *src_node, *dst_node;
-
- src_node = pool_elt_at_index (g->nodes, src);
- dst_node = pool_elt_at_index (g->nodes, dst);
-
- graph_dir_del_link (&src_node->next, dst);
- graph_dir_del_link (&dst_node->next, src);
-}
-
-/* Delete source node and all links from other nodes from/to source. */
-uword
-graph_del_node (graph_t * g, u32 src)
-{
- graph_node_t *src_node, *n;
- uword index;
- graph_link_t *l;
-
- src_node = pool_elt_at_index (g->nodes, src);
-
- vec_foreach (l, src_node->next.links)
- {
- n = pool_elt_at_index (g->nodes, l->node_index);
- graph_dir_del_link (&n->prev, src);
- }
-
- vec_foreach (l, src_node->prev.links)
- {
- n = pool_elt_at_index (g->nodes, l->node_index);
- graph_dir_del_link (&n->next, src);
- }
-
- graph_dir_free (&src_node->next);
- graph_dir_free (&src_node->prev);
-
- index = src_node - g->nodes;
- pool_put (g->nodes, src_node);
- clib_memset (src_node, ~0, sizeof (src_node[0]));
-
- return index;
-}
-
-uword
-unformat_graph (unformat_input_t * input, va_list * args)
-{
- graph_t *g = va_arg (*args, graph_t *);
- typedef struct
- {
- u32 src, dst, distance;
- } T;
- T *links = 0, *l;
- uword result;
-
- while (1)
- {
- vec_add2 (links, l, 1);
- if (!unformat (input, "%d%d%d", &l->src, &l->dst, &l->distance))
- break;
- }
- _vec_len (links) -= 1;
- result = vec_len (links) > 0;
- vec_foreach (l, links)
- {
- graph_set_link (g, l->src, l->dst, l->distance);
- graph_set_link (g, l->dst, l->src, l->distance);
- }
-
- vec_free (links);
- return result;
-}
-
-u8 *
-format_graph_node (u8 * s, va_list * args)
-{
- graph_t *g = va_arg (*args, graph_t *);
- u32 node_index = va_arg (*args, u32);
-
- if (g->format_node)
- s = format (s, "%U", g->format_node, g, node_index);
- else
- s = format (s, "%d", node_index);
-
- return s;
-}
-
-u8 *
-format_graph (u8 * s, va_list * args)
-{
- graph_t *g = va_arg (*args, graph_t *);
- graph_node_t *n;
- graph_link_t *l;
- u32 indent = format_get_indent (s);
-
- s = format (s, "graph %d nodes", pool_elts (g->nodes));
- /* *INDENT-OFF* */
- pool_foreach (n, g->nodes) {
- s = format (s, "\n%U", format_white_space, indent + 2);
- s = format (s, "%U -> ", format_graph_node, g, n - g->nodes);
- vec_foreach (l, n->next.links)
- s = format (s, "%U (%d), ",
- format_graph_node, g, l->node_index,
- l->distance);
- }
- /* *INDENT-ON* */
-
- return s;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/graph.h b/src/vppinfra/graph.h
deleted file mode 100644
index 1c26118f76c..00000000000
--- a/src/vppinfra/graph.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef included_clib_graph_h
-#define included_clib_graph_h
-
-#include <vppinfra/format.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/pool.h>
-
-/* Generic graphs. */
-typedef struct
-{
- /* Next node along this link. */
- u32 node_index;
-
- /* Other direction link index to reach back to current node. */
- u32 link_to_self_index;
-
- /* Distance to next node. */
- u32 distance;
-} graph_link_t;
-
-/* Direction on graph: either next or previous. */
-typedef struct
-{
- /* Vector of links. */
- graph_link_t *links;
-
- /* Hash mapping node index to link which visits this node. */
- uword *link_index_by_node_index;
-} graph_dir_t;
-
-always_inline void
-graph_dir_free (graph_dir_t * d)
-{
- vec_free (d->links);
- hash_free (d->link_index_by_node_index);
-}
-
-always_inline graph_link_t *
-graph_dir_get_link_to_node (graph_dir_t * d, u32 node_index)
-{
- uword *p = hash_get (d->link_index_by_node_index, node_index);
- return p ? vec_elt_at_index (d->links, p[0]) : 0;
-}
-
-always_inline uword
-graph_dir_add_link (graph_dir_t * d, u32 node_index, u32 distance)
-{
- graph_link_t *l;
- ASSERT (!graph_dir_get_link_to_node (d, node_index));
- vec_add2 (d->links, l, 1);
- l->node_index = node_index;
- l->distance = distance;
- hash_set (d->link_index_by_node_index, node_index, l - d->links);
- return l - d->links;
-}
-
-always_inline void
-graph_dir_del_link (graph_dir_t * d, u32 node_index)
-{
- graph_link_t *l = graph_dir_get_link_to_node (d, node_index);
- uword li = l - d->links;
- uword n_links = vec_len (d->links);
-
- ASSERT (l != 0);
- hash_unset (d->link_index_by_node_index, node_index);
- n_links -= 1;
- if (li < n_links)
- d->links[li] = d->links[n_links];
- _vec_len (d->links) = n_links;
-}
-
-typedef struct
-{
- /* Nodes we are connected to plus distances. */
- graph_dir_t next, prev;
-} graph_node_t;
-
-typedef struct
-{
- /* Pool of nodes. */
- graph_node_t *nodes;
-
- void *opaque;
-
- format_function_t *format_node;
-} graph_t;
-
-/* Set link distance, creating link if not found. */
-u32 graph_set_link (graph_t * g, u32 src, u32 dst, u32 distance);
-
-always_inline void
-graph_set_bidirectional_link (graph_t * g, u32 src, u32 dst, u32 distance)
-{
- graph_set_link (g, src, dst, distance);
- graph_set_link (g, dst, src, distance);
-}
-
-void graph_del_link (graph_t * g, u32 src, u32 dst);
-uword graph_del_node (graph_t * g, u32 src);
-
-unformat_function_t unformat_graph;
-format_function_t format_graph;
-format_function_t format_graph_node;
-
-#endif /* included_clib_graph_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/hash.c b/src/vppinfra/hash.c
index fc6c4518048..0e650e67a90 100644
--- a/src/vppinfra/hash.c
+++ b/src/vppinfra/hash.c
@@ -77,237 +77,53 @@ set_is_user (void *v, uword i, uword is_user)
static u8 *hash_format_pair_default (u8 * s, va_list * args);
-#if uword_bits == 64
-
-static inline u64
-zap64 (u64 x, word n)
-{
-#define _(n) (((u64) 1 << (u64) (8*(n))) - (u64) 1)
- static u64 masks_little_endian[] = {
- 0, _(1), _(2), _(3), _(4), _(5), _(6), _(7),
- };
- static u64 masks_big_endian[] = {
- 0, ~_(7), ~_(6), ~_(5), ~_(4), ~_(3), ~_(2), ~_(1),
- };
-#undef _
- if (clib_arch_is_big_endian)
- return x & masks_big_endian[n];
- else
- return x & masks_little_endian[n];
-}
-
-/**
- * make address-sanitizer skip this:
- * clib_mem_unaligned + zap64 casts its input as u64, computes a mask
- * according to the input length, and returns the casted maked value.
- * Therefore all the 8 Bytes of the u64 are systematically read, which
- * rightfully causes address-sanitizer to raise an error on smaller inputs.
- *
- * However the invalid Bytes are discarded within zap64(), which is why
- * this can be silenced safely.
- *
- * The above is true *unless* the extra bytes cross a page boundary
- * into unmapped or no-access space, hence the boundary crossing check.
- */
-static inline u64
-hash_memory64 (void *p, word n_bytes, u64 state)
+__clib_export uword
+hash_memory (void *p, word n_bytes, uword state)
{
- u64 *q = p;
+ uword last[3] = {};
+ uwordu *q = p;
u64 a, b, c, n;
- int page_boundary_crossing;
- u64 start_addr, end_addr;
- union
- {
- u8 as_u8[8];
- u64 as_u64;
- } tmp;
-
- /*
- * If the request crosses a 4k boundary, it's not OK to assume
- * that the zap64 game is safe. 4k is the minimum known page size.
- */
- start_addr = (u64) p;
- end_addr = start_addr + n_bytes + 7;
- page_boundary_crossing = (start_addr >> 12) != (end_addr >> 12);
-
- a = b = 0x9e3779b97f4a7c13LL;
- c = state;
- n = n_bytes;
-
- while (n >= 3 * sizeof (u64))
- {
- a += clib_mem_unaligned (q + 0, u64);
- b += clib_mem_unaligned (q + 1, u64);
- c += clib_mem_unaligned (q + 2, u64);
- hash_mix64 (a, b, c);
- n -= 3 * sizeof (u64);
- q += 3;
- }
-
- c += n_bytes;
- switch (n / sizeof (u64))
- {
- case 2:
- a += clib_mem_unaligned (q + 0, u64);
- b += clib_mem_unaligned (q + 1, u64);
- if (n % sizeof (u64))
- {
- if (PREDICT_TRUE (page_boundary_crossing == 0))
- c +=
- zap64 (CLIB_MEM_OVERFLOW
- (clib_mem_unaligned (q + 2, u64), q + 2, sizeof (u64)),
- n % sizeof (u64)) << 8;
- else
- {
- clib_memcpy_fast (tmp.as_u8, q + 2, n % sizeof (u64));
- c += zap64 (tmp.as_u64, n % sizeof (u64)) << 8;
- }
- }
- break;
-
- case 1:
- a += clib_mem_unaligned (q + 0, u64);
- if (n % sizeof (u64))
- {
- if (PREDICT_TRUE (page_boundary_crossing == 0))
- b +=
- zap64 (CLIB_MEM_OVERFLOW
- (clib_mem_unaligned (q + 1, u64), q + 1, sizeof (u64)),
- n % sizeof (u64));
- else
- {
- clib_memcpy_fast (tmp.as_u8, q + 1, n % sizeof (u64));
- b += zap64 (tmp.as_u64, n % sizeof (u64));
- }
- }
- break;
- case 0:
- if (n % sizeof (u64))
- {
- if (PREDICT_TRUE (page_boundary_crossing == 0))
- a +=
- zap64 (CLIB_MEM_OVERFLOW
- (clib_mem_unaligned (q + 0, u64), q + 0, sizeof (u64)),
- n % sizeof (u64));
- else
- {
- clib_memcpy_fast (tmp.as_u8, q, n % sizeof (u64));
- a += zap64 (tmp.as_u64, n % sizeof (u64));
- }
- }
- break;
- }
-
- hash_mix64 (a, b, c);
-
- return c;
-}
-
-#else /* if uword_bits == 64 */
-
-static inline u32
-zap32 (u32 x, word n)
-{
-#define _(n) (((u32) 1 << (u32) (8*(n))) - (u32) 1)
- static u32 masks_little_endian[] = {
- 0, _(1), _(2), _(3),
- };
- static u32 masks_big_endian[] = {
- 0, ~_(3), ~_(2), ~_(1),
- };
-#undef _
- if (clib_arch_is_big_endian)
- return x & masks_big_endian[n];
- else
- return x & masks_little_endian[n];
-}
-
-static inline u32
-hash_memory32 (void *p, word n_bytes, u32 state)
-{
- u32 *q = p;
- u32 a, b, c, n;
-
- a = b = 0x9e3779b9;
+ a = b = (uword_bits == 64) ? 0x9e3779b97f4a7c13LL : 0x9e3779b9;
c = state;
n = n_bytes;
- while (n >= 3 * sizeof (u32))
+ while (n >= 3 * sizeof (uword))
{
- a += clib_mem_unaligned (q + 0, u32);
- b += clib_mem_unaligned (q + 1, u32);
- c += clib_mem_unaligned (q + 2, u32);
- hash_mix32 (a, b, c);
- n -= 3 * sizeof (u32);
+ a += q[0];
+ b += q[1];
+ c += q[2];
+ hash_mix (a, b, c);
+ n -= 3 * sizeof (uword);
q += 3;
}
c += n_bytes;
- switch (n / sizeof (u32))
- {
- case 2:
- a += clib_mem_unaligned (q + 0, u32);
- b += clib_mem_unaligned (q + 1, u32);
- if (n % sizeof (u32))
- c += zap32 (clib_mem_unaligned (q + 2, u32), n % sizeof (u32)) << 8;
- break;
-
- case 1:
- a += clib_mem_unaligned (q + 0, u32);
- if (n % sizeof (u32))
- b += zap32 (clib_mem_unaligned (q + 1, u32), n % sizeof (u32));
- break;
- case 0:
- if (n % sizeof (u32))
- a += zap32 (clib_mem_unaligned (q + 0, u32), n % sizeof (u32));
- break;
+ if (n > 0)
+ {
+ clib_memcpy_fast (&last, q, n);
+ a += last[0];
+ b += last[1];
+ c += last[2];
}
- hash_mix32 (a, b, c);
+ hash_mix (a, b, c);
return c;
}
-#endif
-
-__clib_export uword
-hash_memory (void *p, word n_bytes, uword state)
-{
- uword *q = p;
-
-#if uword_bits == 64
- return hash_memory64 (q, n_bytes, state);
-#else
- return hash_memory32 (q, n_bytes, state);
-#endif
-}
-#if uword_bits == 64
always_inline uword
hash_uword (uword x)
{
- u64 a, b, c;
+ uword a, b, c;
- a = b = 0x9e3779b97f4a7c13LL;
+ a = b = (uword_bits == 64) ? 0x9e3779b97f4a7c13LL : 0x9e3779b9;
c = 0;
a += x;
- hash_mix64 (a, b, c);
+ hash_mix (a, b, c);
return c;
}
-#else
-always_inline uword
-hash_uword (uword x)
-{
- u32 a, b, c;
-
- a = b = 0x9e3779b9;
- c = 0;
- a += x;
- hash_mix32 (a, b, c);
- return c;
-}
-#endif
/* Call sum function. Hash code will be sum function value
modulo the prime length of the hash table. */
@@ -469,9 +285,7 @@ set_indirect (void *v, hash_pair_indirect_t * pi, uword key,
new_len = len + 1;
if (new_len * hash_pair_bytes (h) > (1ULL << log2_bytes))
{
- pi->pairs = clib_mem_realloc (pi->pairs,
- 1ULL << (log2_bytes + 1),
- 1ULL << log2_bytes);
+ pi->pairs = clib_mem_realloc (pi->pairs, 1ULL << (log2_bytes + 1));
log2_bytes++;
}
@@ -528,7 +342,7 @@ unset_indirect (void *v, uword i, hash_pair_t * q)
else
zero_pair (h, q);
if (is_vec)
- _vec_len (pi->pairs) -= 1;
+ vec_dec_len (pi->pairs, 1);
else
indirect_pair_set (pi, indirect_pair_get_log2_bytes (pi), len - 1);
}
@@ -734,6 +548,7 @@ _hash_create (uword elts, hash_t * h_user)
hash_t *h;
uword log2_pair_size;
void *v;
+ vec_attr_t va = { .hdr_sz = sizeof (h[0]), .align = sizeof (hash_pair_t) };
/* Size of hash is power of 2 >= ELTS and larger than
number of bits in is_user bitmap elements. */
@@ -744,19 +559,19 @@ _hash_create (uword elts, hash_t * h_user)
if (h_user)
log2_pair_size = h_user->log2_pair_size;
- v = _vec_resize ((void *) 0,
- /* vec len: */ elts,
- /* data bytes: */
- (elts << log2_pair_size) * sizeof (hash_pair_t),
- /* header bytes: */
- sizeof (h[0]) +
- (elts / BITS (h->is_user[0])) * sizeof (h->is_user[0]),
- /* alignment */ sizeof (hash_pair_t));
+ va.elt_sz = (1 << log2_pair_size) * sizeof (hash_pair_t),
+ v = _vec_alloc_internal (elts, &va);
h = hash_header (v);
if (h_user)
- h[0] = h_user[0];
+ {
+ h[0] = h_user[0];
+ h->is_user = 0;
+ }
+ vec_validate_aligned (
+ h->is_user, ((elts / BITS (h->is_user[0])) * sizeof (h->is_user[0])) - 1,
+ CLIB_CACHE_LINE_BYTES);
h->log2_pair_size = log2_pair_size;
h->elts = 0;
@@ -796,6 +611,7 @@ _hash_free (void *v)
clib_mem_free (p->indirect.pairs);
}
+ vec_free (h->is_user);
vec_free_header (h);
return 0;
@@ -812,11 +628,9 @@ hash_resize_internal (void *old, uword new_size, uword free_old)
{
hash_t *h = old ? hash_header (old) : 0;
new = _hash_create (new_size, h);
- /* *INDENT-OFF* */
hash_foreach_pair (p, old, {
new = _hash_set3 (new, p->key, &p->value[0], 0);
});
- /* *INDENT-ON* */
}
if (free_old)
@@ -824,7 +638,7 @@ hash_resize_internal (void *old, uword new_size, uword free_old)
return new;
}
-void *
+__clib_export void *
hash_resize (void *old, uword new_size)
{
return hash_resize_internal (old, new_size, 1);
@@ -999,7 +813,7 @@ hash_bytes (void *v)
if (!v)
return 0;
- bytes = vec_capacity (v, hash_header_bytes (v));
+ bytes = vec_mem_size (v);
for (i = 0; i < hash_capacity (v); i++)
{
@@ -1009,7 +823,7 @@ hash_bytes (void *v)
if (h->log2_pair_size > 0)
bytes += 1 << indirect_pair_get_log2_bytes (&p->indirect);
else
- bytes += vec_capacity (p->indirect.pairs, 0);
+ bytes += vec_mem_size (p->indirect.pairs);
}
}
return bytes;
@@ -1068,11 +882,9 @@ format_hash (u8 *s, va_list *va)
if (verbose)
{
- /* *INDENT-OFF* */
hash_foreach_pair (p, v, {
s = format (s, " %U\n", h->format_pair, h->format_pair_arg, v, p);
});
- /* *INDENT-ON* */
}
return s;
diff --git a/src/vppinfra/hash.h b/src/vppinfra/hash.h
index e4a65d21e65..3c754c8e29f 100644
--- a/src/vppinfra/hash.h
+++ b/src/vppinfra/hash.h
@@ -93,24 +93,14 @@ typedef struct hash_header
/* Bit i is set if pair i is a user object (as opposed to being
either zero or an indirect array of pairs). */
- uword is_user[0];
+ uword *is_user;
} hash_t;
-/* Hash header size in bytes */
-always_inline uword
-hash_header_bytes (void *v)
-{
- hash_t *h;
- uword is_user_bytes =
- (sizeof (h->is_user[0]) * vec_len (v)) / BITS (h->is_user[0]);
- return sizeof (h[0]) + is_user_bytes;
-}
-
/* Returns a pointer to the hash header given the vector pointer */
always_inline hash_t *
hash_header (void *v)
{
- return vec_header (v, hash_header_bytes (v));
+ return vec_header (v);
}
/* Number of elements in the hash table */
@@ -133,8 +123,9 @@ always_inline uword
hash_is_user (void *v, uword i)
{
hash_t *h = hash_header (v);
- uword i0 = i / BITS (h->is_user[0]);
- uword i1 = i % BITS (h->is_user[0]);
+ uword bits = BITS (h->is_user[0]);
+ uword i0 = i / bits;
+ uword i1 = i % bits;
return (h->is_user[i0] & ((uword) 1 << i1)) != 0;
}
@@ -278,9 +269,20 @@ uword hash_bytes (void *v);
always_inline void
hash_set_mem_alloc (uword ** h, const void *key, uword v)
{
+ int objsize = __builtin_object_size (key, 0);
size_t ksz = hash_header (*h)->user;
- void *copy = clib_mem_alloc (ksz);
- clib_memcpy_fast (copy, key, ksz);
+ void *copy;
+ if (objsize > 0)
+ {
+ ASSERT (objsize == ksz);
+ copy = clib_mem_alloc (objsize);
+ clib_memcpy_fast (copy, key, objsize);
+ }
+ else
+ {
+ copy = clib_mem_alloc (ksz);
+ clib_memcpy_fast (copy, key, ksz);
+ }
hash_set_mem (*h, copy, v);
}
@@ -526,6 +528,12 @@ do { \
#define hash_mix64_step_3(a,b,c) hash_mix_step(a,b,c,35,49,11)
#define hash_mix64_step_4(a,b,c) hash_mix_step(a,b,c,12,18,22)
+#if uword_bits == 64
+#define hash_mix(a, b, c) hash_mix64 (a, b, c)
+#else
+#define hash_mix(a, b, c) hash_mix32 (a, b, c)
+#endif
+
/* Hash function based on that of Bob Jenkins (bob_jenkins@compuserve.com).
Thanks, Bob. */
#define hash_mix64(a0,b0,c0) \
diff --git a/src/vppinfra/heap.c b/src/vppinfra/heap.c
index bc22da1d8f6..9920528732d 100644
--- a/src/vppinfra/heap.c
+++ b/src/vppinfra/heap.c
@@ -139,7 +139,7 @@ elt_delete (heap_header_t * h, heap_elt_t * e)
if (e < l)
vec_add1 (h->free_elts, e - h->elts);
else
- _vec_len (h->elts)--;
+ vec_dec_len (h->elts, 1);
}
/*
@@ -200,7 +200,7 @@ elt_new (heap_header_t * h)
if ((l = vec_len (h->free_elts)) > 0)
{
e = elt_at (h, h->free_elts[l - 1]);
- _vec_len (h->free_elts) -= 1;
+ vec_dec_len (h->free_elts, 1);
}
else
vec_add2 (h->elts, e, 1);
@@ -276,7 +276,7 @@ remove_free_block (void *v, uword b, uword i)
h->free_lists[b][i] = t;
set_free_elt (v, elt_at (h, t), i);
}
- _vec_len (h->free_lists[b]) = l - 1;
+ vec_set_len (h->free_lists[b], l - 1);
}
static heap_elt_t *
@@ -413,6 +413,9 @@ _heap_alloc (void *v,
if (!e)
{
uword max_len;
+ vec_attr_t va = { .elt_sz = elt_bytes,
+ .hdr_sz = sizeof (h[0]),
+ .align = HEAP_DATA_ALIGN };
offset = vec_len (v);
max_len = heap_get_max_len (v);
@@ -422,12 +425,9 @@ _heap_alloc (void *v,
h = heap_header (v);
if (!v || !(h->flags & HEAP_IS_STATIC))
- v = _vec_resize (v,
- align_size,
- (offset + align_size) * elt_bytes,
- sizeof (h[0]), HEAP_DATA_ALIGN);
+ v = _vec_realloc_internal (v, offset + align_size, &va);
else
- _vec_len (v) += align_size;
+ vec_inc_len (v, align_size);
if (offset == 0)
{
@@ -624,7 +624,7 @@ _heap_free (void *v)
vec_free (h->free_elts);
vec_free (h->small_free_elt_free_index);
if (!(h->flags & HEAP_IS_STATIC))
- vec_free_h (v, sizeof (h[0]));
+ vec_free (v);
return v;
}
@@ -640,10 +640,10 @@ heap_bytes (void *v)
bytes = sizeof (h[0]);
bytes += vec_len (v) * sizeof (h->elt_bytes);
for (b = 0; b < vec_len (h->free_lists); b++)
- bytes += vec_capacity (h->free_lists[b], 0);
+ bytes += vec_mem_size (h->free_lists[b]);
bytes += vec_bytes (h->free_lists);
- bytes += vec_capacity (h->elts, 0);
- bytes += vec_capacity (h->free_elts, 0);
+ bytes += vec_mem_size (h->elts);
+ bytes += vec_mem_size (h->free_elts);
bytes += vec_bytes (h->used_elt_bitmap);
return bytes;
@@ -680,6 +680,7 @@ debug_elt (u8 * s, void *v, word i, word n)
i = -n / 2;
for (e = e0; 1; e = heap_next (e))
{
+ s = format (s, " ");
if (heap_is_free (e))
s = format (s, "index %4d, free\n", e - h->elts);
else if (h->format_elt)
diff --git a/src/vppinfra/heap.h b/src/vppinfra/heap.h
index 22fc335c072..45f3131a45b 100644
--- a/src/vppinfra/heap.h
+++ b/src/vppinfra/heap.h
@@ -160,13 +160,7 @@ typedef struct
always_inline heap_header_t *
heap_header (void *v)
{
- return vec_header (v, sizeof (heap_header_t));
-}
-
-always_inline uword
-heap_header_bytes ()
-{
- return vec_header_bytes (sizeof (heap_header_t));
+ return vec_header (v);
}
always_inline void
@@ -191,6 +185,9 @@ always_inline void *
_heap_dup (void *v_old, uword v_bytes)
{
heap_header_t *h_old, *h_new;
+ vec_attr_t va = { .align = HEAP_DATA_ALIGN,
+ .hdr_sz = sizeof (heap_header_t),
+ .elt_sz = 1 };
void *v_new;
h_old = heap_header (v_old);
@@ -198,10 +195,7 @@ _heap_dup (void *v_old, uword v_bytes)
if (!v_old)
return v_old;
- v_new = 0;
- v_new =
- _vec_resize (v_new, _vec_len (v_old), v_bytes, sizeof (heap_header_t),
- HEAP_DATA_ALIGN);
+ v_new = _vec_alloc_internal (_vec_len (v_old), &va);
h_new = heap_header (v_new);
heap_dup_header (h_old, h_new);
clib_memcpy_fast (v_new, v_old, v_bytes);
@@ -220,9 +214,10 @@ uword heap_bytes (void *v);
always_inline void *
_heap_new (u32 len, u32 n_elt_bytes)
{
- void *v = _vec_resize ((void *) 0, len, (uword) len * n_elt_bytes,
- sizeof (heap_header_t),
- HEAP_DATA_ALIGN);
+ vec_attr_t va = { .align = HEAP_DATA_ALIGN,
+ .hdr_sz = sizeof (heap_header_t),
+ .elt_sz = n_elt_bytes };
+ void *v = _vec_alloc_internal (len, &va);
heap_header (v)->elt_bytes = n_elt_bytes;
return v;
}
@@ -249,27 +244,6 @@ heap_get_max_len (void *v)
return v ? heap_header (v)->max_len : 0;
}
-/* Create fixed size heap with given block of memory. */
-always_inline void *
-heap_create_from_memory (void *memory, uword max_len, uword elt_bytes)
-{
- heap_header_t *h;
- void *v;
-
- if (max_len * elt_bytes < sizeof (h[0]))
- return 0;
-
- h = memory;
- clib_memset (h, 0, sizeof (h[0]));
- h->max_len = max_len;
- h->elt_bytes = elt_bytes;
- h->flags = HEAP_IS_STATIC;
-
- v = (void *) (memory + heap_header_bytes ());
- _vec_len (v) = 0;
- return v;
-}
-
/* Execute BODY for each allocated heap element. */
#define heap_foreach(var,len,heap,body) \
do { \
diff --git a/src/vppinfra/interrupt.c b/src/vppinfra/interrupt.c
index 20b7450ceed..c9f0078c5e4 100644
--- a/src/vppinfra/interrupt.c
+++ b/src/vppinfra/interrupt.c
@@ -1,43 +1,33 @@
-
-/*
- * Copyright (c) 2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
*/
#include <vppinfra/clib.h>
-#include <vppinfra/bitops.h> /* for count_set_bits */
-#include <vppinfra/vec.h>
#include <vppinfra/interrupt.h>
-#include <vppinfra/format.h>
__clib_export void
-clib_interrupt_init (void **data, uword n_int)
+clib_interrupt_init (void **data, u32 n_int)
{
clib_interrupt_header_t *h;
- uword sz = sizeof (clib_interrupt_header_t);
- uword data_size = round_pow2 (n_int, CLIB_CACHE_LINE_BYTES * 8) / 8;
+ const u32 bits_in_cl = 8 << CLIB_LOG2_CACHE_LINE_BYTES;
+ u32 sz = sizeof (clib_interrupt_header_t);
+ u32 n_cl = round_pow2 (n_int, bits_in_cl) / bits_in_cl;
- sz += 2 * data_size;
+ sz += 2 * n_cl * CLIB_CACHE_LINE_BYTES;
h = data[0] = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
clib_memset (data[0], 0, sz);
h->n_int = n_int;
- h->n_uword_alloc = (data_size * 8) >> log2_uword_bits;
+ h->uwords_allocated = n_cl * bits_in_cl / uword_bits;
+ h->uwords_used = round_pow2 (n_int, uword_bits) / uword_bits;
+ h->local = (uword *) (h + 1);
+ h->remote = h->local + h->uwords_allocated;
}
__clib_export void
-clib_interrupt_resize (void **data, uword n_int)
+clib_interrupt_resize (void **data, u32 n_int)
{
clib_interrupt_header_t *h = data[0];
+ u32 new_n_uwords, i;
if (data[0] == 0)
{
@@ -45,48 +35,37 @@ clib_interrupt_resize (void **data, uword n_int)
return;
}
- if (n_int < h->n_int)
+ if (n_int == h->n_int)
+ return;
+
+ new_n_uwords = round_pow2 (n_int, uword_bits) / uword_bits;
+
+ if (new_n_uwords > h->uwords_allocated)
{
- uword *old_bmp, *old_abp, v;
- old_bmp = clib_interrupt_get_bitmap (data[0]);
- old_abp = clib_interrupt_get_atomic_bitmap (data[0]);
- for (uword i = 0; i < h->n_uword_alloc; i++)
- {
- v = old_abp[i];
- old_abp[i] = 0;
- if (n_int > ((i + 1) * uword_bits))
- old_bmp[i] |= v;
- else if (n_int > (i * uword_bits))
- old_bmp[i] = (old_bmp[i] | v) & pow2_mask (n_int - i * uword_bits);
- else
- old_bmp[i] = 0;
- }
+ clib_interrupt_header_t *nh;
+ clib_interrupt_init ((void **) &nh, n_int);
+ for (int i = 0; i < h->uwords_used; i++)
+ nh->local[i] = h->local[i] | h->remote[i];
+ clib_mem_free (data[0]);
+ data[0] = nh;
+ return;
}
- else if (n_int > h->n_uword_alloc * uword_bits)
- {
- void *old = data[0];
- uword *old_bmp, *old_abp, *new_bmp;
- uword n_uwords = round_pow2 (h->n_int, uword_bits) / uword_bits;
- clib_interrupt_init (data, n_int);
- h = data[0];
+ h->n_int = n_int;
+ h->uwords_used = new_n_uwords;
- new_bmp = clib_interrupt_get_bitmap (data[0]);
- old_bmp = clib_interrupt_get_bitmap (old);
- old_abp = clib_interrupt_get_atomic_bitmap (old);
+ for (i = 0; i < new_n_uwords; i++)
+ h->local[i] |= h->remote[i];
- for (uword i = 0; i < n_uwords; i++)
- new_bmp[i] = old_bmp[i] | old_abp[i];
+ for (i = 0; i < h->uwords_allocated; i++)
+ h->remote[i] = 0;
- clib_mem_free (old);
- }
- h->n_int = n_int;
+ for (i = new_n_uwords; i < h->uwords_allocated; i++)
+ h->local[i] = 0;
+
+ n_int &= pow2_mask (log2_uword_bits);
+
+ if (n_int)
+ h->local[n_int >> log2_uword_bits] &= pow2_mask (n_int);
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/interrupt.h b/src/vppinfra/interrupt.h
index 60c01fa0248..b0d7dde272a 100644
--- a/src/vppinfra/interrupt.h
+++ b/src/vppinfra/interrupt.h
@@ -1,34 +1,25 @@
-/*
- * Copyright (c) 2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
*/
#ifndef included_clib_interrupt_h
#define included_clib_interrupt_h
#include <vppinfra/clib.h>
-#include <vppinfra/bitops.h> /* for count_set_bits */
#include <vppinfra/vec.h>
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- int n_int;
- uword n_uword_alloc;
+ u32 n_int;
+ u32 uwords_allocated;
+ u32 uwords_used;
+ uword *local;
+ uword *remote;
} clib_interrupt_header_t;
-void clib_interrupt_init (void **data, uword n_interrupts);
-void clib_interrupt_resize (void **data, uword n_interrupts);
+void clib_interrupt_init (void **data, u32 n_interrupts);
+void clib_interrupt_resize (void **data, u32 n_interrupts);
static_always_inline void
clib_interrupt_free (void **data)
@@ -49,94 +40,98 @@ clib_interrupt_get_n_int (void *d)
return 0;
}
-static_always_inline uword *
-clib_interrupt_get_bitmap (void *d)
-{
- return d + sizeof (clib_interrupt_header_t);
-}
-
-static_always_inline uword *
-clib_interrupt_get_atomic_bitmap (void *d)
-{
- clib_interrupt_header_t *h = d;
- return clib_interrupt_get_bitmap (d) + h->n_uword_alloc;
-}
-
static_always_inline void
clib_interrupt_set (void *in, int int_num)
{
- uword *bmp = clib_interrupt_get_bitmap (in);
- uword mask = 1ULL << (int_num & (uword_bits - 1));
- bmp += int_num >> log2_uword_bits;
+ clib_interrupt_header_t *h = in;
+ u32 off = int_num >> log2_uword_bits;
+ uword bit = 1ULL << (int_num & pow2_mask (log2_uword_bits));
- ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+ ASSERT (int_num < h->n_int);
- *bmp |= mask;
+ h->local[off] |= bit;
}
static_always_inline void
clib_interrupt_set_atomic (void *in, int int_num)
{
- uword *bmp = clib_interrupt_get_atomic_bitmap (in);
- uword mask = 1ULL << (int_num & (uword_bits - 1));
- bmp += int_num >> log2_uword_bits;
+ clib_interrupt_header_t *h = in;
+ u32 off = int_num >> log2_uword_bits;
+ uword bit = 1ULL << (int_num & pow2_mask (log2_uword_bits));
- ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+ ASSERT (int_num < h->n_int);
- __atomic_fetch_or (bmp, mask, __ATOMIC_RELAXED);
+ __atomic_fetch_or (h->remote + off, bit, __ATOMIC_RELAXED);
}
static_always_inline void
clib_interrupt_clear (void *in, int int_num)
{
- uword *bmp = clib_interrupt_get_bitmap (in);
- uword *abm = clib_interrupt_get_atomic_bitmap (in);
- uword mask = 1ULL << (int_num & (uword_bits - 1));
- uword off = int_num >> log2_uword_bits;
+ clib_interrupt_header_t *h = in;
+ u32 off = int_num >> log2_uword_bits;
+ uword bit = 1ULL << (int_num & pow2_mask (log2_uword_bits));
+ uword *loc = h->local;
+ uword *rem = h->remote;
+ uword v;
- ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+ ASSERT (int_num < h->n_int);
- bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
- bmp[off] &= ~mask;
+ v = loc[off] | __atomic_exchange_n (rem + off, 0, __ATOMIC_SEQ_CST);
+ loc[off] = v & ~bit;
}
static_always_inline int
-clib_interrupt_get_next (void *in, int last)
+clib_interrupt_get_next_and_clear (void *in, int last)
{
- uword *bmp = clib_interrupt_get_bitmap (in);
- uword *abm = clib_interrupt_get_atomic_bitmap (in);
clib_interrupt_header_t *h = in;
- uword bmp_uword, off;
+ uword bit, v;
+ uword *loc = h->local;
+ uword *rem = h->remote;
+ u32 off, n_uwords = h->uwords_used;
- ASSERT (last >= -1 && last < h->n_int);
+ ASSERT (last >= -1 && last < (int) h->n_int);
off = (last + 1) >> log2_uword_bits;
- last -= off << log2_uword_bits;
- bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
- bmp_uword = bmp[off] & ~pow2_mask (last + 1);
+ if (off >= n_uwords)
+ return -1;
-next:
- if (bmp_uword)
- return (off << log2_uword_bits) + count_trailing_zeros (bmp_uword);
+ v = loc[off] | __atomic_exchange_n (rem + off, 0, __ATOMIC_SEQ_CST);
+ loc[off] = v;
- off++;
+ v &= ~pow2_mask ((last + 1) & pow2_mask (log2_uword_bits));
- if (off > h->n_int >> log2_uword_bits)
- return -1;
+ while (v == 0)
+ {
+ if (++off == n_uwords)
+ return -1;
- bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
- bmp_uword = bmp[off];
+ v = loc[off] | __atomic_exchange_n (rem + off, 0, __ATOMIC_SEQ_CST);
+ loc[off] = v;
+ }
- goto next;
+ bit = get_lowest_set_bit (v);
+ loc[off] &= ~bit;
+ return get_lowest_set_bit_index (bit) + (int) (off << log2_uword_bits);
}
-#endif /* included_clib_interrupt_h */
+static_always_inline int
+clib_interrupt_is_any_pending (void *in)
+{
+ clib_interrupt_header_t *h = in;
+ u32 n_uwords = h->uwords_used;
+ uword *loc = h->local;
+ uword *rem = h->remote;
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+ for (u32 i = 0; i < n_uwords; i++)
+ if (loc[i])
+ return 1;
+
+ for (u32 i = 0; i < n_uwords; i++)
+ if (rem[i])
+ return 1;
+
+ return 0;
+}
+
+#endif /* included_clib_interrupt_h */
diff --git a/src/vppinfra/jsonformat.c b/src/vppinfra/jsonformat.c
new file mode 100644
index 00000000000..73cb94769d8
--- /dev/null
+++ b/src/vppinfra/jsonformat.c
@@ -0,0 +1,512 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/cJSON.h>
+#include <vnet/ethernet/mac_address.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip_format_fns.h>
+#include <vpp/api/types.h>
+#include "jsonformat.h"
+
+#define _(T) \
+ int vl_api_##T##_fromjson (cJSON *o, T *d) \
+ { \
+ if (!cJSON_IsNumber (o)) \
+ return -1; \
+ d[0] = (T) cJSON_GetNumberValue (o); \
+ return 0; \
+ }
+foreach_type_fromjson
+#undef _
+
+ int
+ vl_api_bool_fromjson (cJSON *o, bool *d)
+{
+ if (!cJSON_IsBool(o)) return -1;
+ *d = o->valueint ? true : false;
+ return 0;
+}
+
+int vl_api_u8_string_fromjson(cJSON *o, u8 *s, int len)
+{
+ unformat_input_t input;
+ char *p = cJSON_GetStringValue(o);
+ unformat_init_string (&input, p, strlen(p));
+ if (!unformat (&input, "0x%U", unformat_hex_string, s))
+ return -1;
+ return 0;
+}
+
+u8 *
+u8string_fromjson(cJSON *o, char *fieldname)
+{
+ u8 *s = 0;
+ unformat_input_t input;
+ cJSON *item = cJSON_GetObjectItem(o, fieldname);
+ if (!item) {
+ printf("Illegal JSON, no such fieldname %s\n", fieldname);
+ return 0;
+ }
+
+ char *p = cJSON_GetStringValue(item);
+ unformat_init_string (&input, p, strlen(p));
+ if (!unformat (&input, "0x%U", unformat_hex_string, &s))
+ return 0;
+ return s;
+}
+
+int
+u8string_fromjson2(cJSON *o, char *fieldname, u8 *data)
+{
+ u8 *s = u8string_fromjson(o, fieldname);
+ if (!s)
+ return -1;
+ memcpy(data, s, vec_len(s));
+ vec_free(s);
+ return 0;
+}
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+uword
+unformat_ip4_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ unsigned a[4];
+
+ if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256)
+ return 0;
+
+ result[0] = a[0];
+ result[1] = a[1];
+ result[2] = a[2];
+ result[3] = a[3];
+
+ return 1;
+}
+
+/* Parse an IP6 address. */
+uword
+unformat_ip6_address (unformat_input_t * input, va_list * args)
+{
+ ip6_address_t *result = va_arg (*args, ip6_address_t *);
+ u16 hex_quads[8];
+ uword hex_quad, n_hex_quads, hex_digit, n_hex_digits;
+ uword c, n_colon, double_colon_index;
+
+ n_hex_quads = hex_quad = n_hex_digits = n_colon = 0;
+ double_colon_index = ARRAY_LEN (hex_quads);
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ hex_digit = 16;
+ if (c >= '0' && c <= '9')
+ hex_digit = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ hex_digit = c + 10 - 'a';
+ else if (c >= 'A' && c <= 'F')
+ hex_digit = c + 10 - 'A';
+ else if (c == ':' && n_colon < 2)
+ n_colon++;
+ else
+ {
+ unformat_put_input (input);
+ break;
+ }
+
+ /* Too many hex quads. */
+ if (n_hex_quads >= ARRAY_LEN (hex_quads))
+ return 0;
+
+ if (hex_digit < 16)
+ {
+ hex_quad = (hex_quad << 4) | hex_digit;
+
+ /* Hex quad must fit in 16 bits. */
+ if (n_hex_digits >= 4)
+ return 0;
+
+ n_colon = 0;
+ n_hex_digits++;
+ }
+
+ /* Save position of :: */
+ if (n_colon == 2)
+ {
+ /* More than one :: ? */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ return 0;
+ double_colon_index = n_hex_quads;
+ }
+
+ if (n_colon > 0 && n_hex_digits > 0)
+ {
+ hex_quads[n_hex_quads++] = hex_quad;
+ hex_quad = 0;
+ n_hex_digits = 0;
+ }
+ }
+
+ if (n_hex_digits > 0)
+ hex_quads[n_hex_quads++] = hex_quad;
+
+
+ {
+ word i;
+
+ /* Expand :: to appropriate number of zero hex quads. */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ {
+ word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads;
+
+ for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--)
+ hex_quads[n_zero + i] = hex_quads[i];
+
+ for (i = 0; i < n_zero; i++)
+ {
+ ASSERT ((double_colon_index + i) < ARRAY_LEN (hex_quads));
+ hex_quads[double_colon_index + i] = 0;
+ }
+
+ n_hex_quads = ARRAY_LEN (hex_quads);
+ }
+
+ /* Too few hex quads given. */
+ if (n_hex_quads < ARRAY_LEN (hex_quads))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (hex_quads); i++)
+ result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]);
+
+ return 1;
+ }
+}
+
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 max_zero_run = 0, this_zero_run = 0;
+ int max_zero_run_index = -1, this_zero_run_index = 0;
+ int in_zero_run = 0, i;
+ int last_double_colon = 0;
+
+ /* Ugh, this is a pain. Scan forward looking for runs of 0's */
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (a->as_u16[i] == 0)
+ {
+ if (in_zero_run)
+ this_zero_run++;
+ else
+ {
+ in_zero_run = 1;
+ this_zero_run = 1;
+ this_zero_run_index = i;
+ }
+ }
+ else
+ {
+ if (in_zero_run)
+ {
+ /* offer to compress the biggest run of > 1 zero */
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+ in_zero_run = 0;
+ this_zero_run = 0;
+ }
+ }
+
+ if (in_zero_run)
+ {
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == max_zero_run_index)
+ {
+ s = format (s, "::");
+ i += max_zero_run - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+int
+vl_api_ip4_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_address_t *a)
+{
+ unformat_input_t input;
+ char *p = cJSON_GetStringValue(o);
+ if (!p)
+ return -1;
+ unformat_init_string (&input, p, strlen(p));
+ if (!unformat (&input, "%U", unformat_ip4_address, a))
+ return -1;
+ return 0;
+}
+
+int
+vl_api_ip4_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_prefix_t *a)
+{
+ unformat_input_t input;
+ char *p = cJSON_GetStringValue(o);
+ if (!p)
+ return -1;
+ unformat_init_string (&input, p, strlen(p));
+ if (!unformat (&input, "%U/%d", unformat_ip4_address, &a->address,
+ &a->len))
+ return -1;
+ return 0;
+}
+
+int
+vl_api_ip4_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_prefix_t *a)
+{
+ return vl_api_ip4_prefix_t_fromjson(mp, len, o, a);
+}
+int
+vl_api_ip6_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_address_t *a)
+{
+ unformat_input_t input;
+ char *p = cJSON_GetStringValue(o);
+ if (!p)
+ return -1;
+ unformat_init_string (&input, p, strlen(p));
+ if (!unformat (&input, "%U", unformat_ip6_address, a))
+ return -1;
+ return 0;
+}
+
+int
+vl_api_ip6_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_prefix_t *a)
+{
+ unformat_input_t input;
+ char *p = cJSON_GetStringValue(o);
+ if (!p)
+ return -1;
+ unformat_init_string (&input, p, strlen(p));
+ if (!unformat (&input, "%U/%d", unformat_ip6_address, &a->address, &a->len))
+ return -1;
+ return 0;
+}
+
+int
+vl_api_ip6_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_prefix_t *a)
+{
+ return vl_api_ip6_prefix_t_fromjson(mp, len, o, a);
+}
+
+int
+vl_api_address_t_fromjson (void **mp, int *len, cJSON *o, vl_api_address_t *a)
+{
+ unformat_input_t input;
+
+ char *p = cJSON_GetStringValue(o);
+ if (!p)
+ return -1;
+ unformat_init_string (&input, p, strlen(p));
+ if (unformat (&input, "%U", unformat_ip4_address, &a->un.ip4))
+ a->af = ADDRESS_IP4;
+ else if (unformat (&input, "%U", unformat_ip6_address, &a->un.ip6))
+ a->af = ADDRESS_IP6;
+ else
+ return -1;
+ return 0;
+}
+
+int
+vl_api_prefix_t_fromjson (void **mp, int *len, cJSON *o, vl_api_prefix_t *a)
+{
+ unformat_input_t input;
+
+ char *p = cJSON_GetStringValue(o);
+
+ if (!p)
+ return -1;
+ unformat_init_string (&input, p, strlen(p));
+ int plen;
+ if (unformat (&input, "%U/%d", unformat_ip4_address, &a->address.un.ip4, &plen))
+ a->address.af = ADDRESS_IP4;
+ else if (unformat (&input, "%U/%d", unformat_ip6_address, &a->address.un.ip6, &plen))
+ a->address.af = ADDRESS_IP6;
+ else
+ return -1;
+ a->len = plen;
+ return 0;
+}
+
+int
+vl_api_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_prefix_t *a)
+{
+ return vl_api_prefix_t_fromjson(mp, len, o, a);
+}
+
+uword
+unformat_mac_address (unformat_input_t * input, va_list * args)
+{
+ mac_address_t *mac = va_arg (*args, mac_address_t *);
+ u32 i, a[3];
+
+ if (unformat (input, "%_%X:%X:%X:%X:%X:%X%_",
+ 1, &mac->bytes[0], 1, &mac->bytes[1], 1, &mac->bytes[2],
+ 1, &mac->bytes[3], 1, &mac->bytes[4], 1, &mac->bytes[5]))
+ return (1);
+ else if (unformat (input, "%_%x.%x.%x%_", &a[0], &a[1], &a[2]))
+ {
+ for (i = 0; i < ARRAY_LEN (a); i++)
+ if (a[i] >= (1 << 16))
+ return 0;
+
+ mac->bytes[0] = (a[0] >> 8) & 0xff;
+ mac->bytes[1] = (a[0] >> 0) & 0xff;
+ mac->bytes[2] = (a[1] >> 8) & 0xff;
+ mac->bytes[3] = (a[1] >> 0) & 0xff;
+ mac->bytes[4] = (a[2] >> 8) & 0xff;
+ mac->bytes[5] = (a[2] >> 0) & 0xff;
+
+ return (1);
+ }
+ return (0);
+}
+
+int
+vl_api_mac_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_mac_address_t *a)
+{
+ unformat_input_t input;
+
+ char *p = cJSON_GetStringValue(o);
+ unformat_init_string (&input, p, strlen(p));
+ if (!unformat (&input, "%U", unformat_mac_address, a))
+ return -1;
+ return 0;
+}
+
+/* Format an IP4 address. */
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+void
+vl_api_string_cJSON_AddToObject(cJSON * const object, const char * const name, vl_api_string_t *astr)
+{
+
+ if (astr == 0) return;
+ u32 length = clib_net_to_host_u32 (astr->length);
+
+ char *cstr = malloc(length + 1);
+ memcpy(cstr, astr->buf, length);
+ cstr[length] = '\0';
+ cJSON_AddStringToObject(object, name, cstr);
+ free(cstr);
+}
+
+u8 *
+format_vl_api_timestamp_t(u8 * s, va_list * args)
+{
+ f64 timestamp = va_arg (*args, f64);
+ struct tm *tm;
+ word msec;
+
+ time_t t = timestamp;
+ tm = gmtime (&t);
+ msec = 1e6 * (timestamp - t);
+ return format (s, "%4d-%02d-%02dT%02d:%02d:%02d.%06dZ", 1900 + tm->tm_year,
+ 1 + tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min,
+ tm->tm_sec, msec);
+}
+
+u8 *
+format_vl_api_timedelta_t(u8 * s, va_list * args)
+{
+ return format_vl_api_timestamp_t(s, args);
+}
+
+uword
+unformat_vl_api_timedelta_t(unformat_input_t * input, va_list * args)
+{
+ return 0;
+}
+
+uword
+unformat_vl_api_timestamp_t(unformat_input_t * input, va_list * args)
+{
+ return 0;
+}
+uword unformat_vl_api_gbp_scope_t(unformat_input_t * input, va_list * args)
+{
+ return 0;
+}
+
+cJSON *
+vl_api_ip4_address_with_prefix_t_tojson (vl_api_ip4_prefix_t *a) {
+ return vl_api_ip4_prefix_t_tojson (a);
+}
+cJSON *
+vl_api_ip6_address_with_prefix_t_tojson (vl_api_ip6_prefix_t *a) {
+ return vl_api_ip6_prefix_t_tojson (a);
+}
+cJSON *
+vl_api_address_with_prefix_t_tojson (vl_api_prefix_t *a) {
+ return vl_api_prefix_t_tojson (a);
+}
+u8 *
+format_vl_api_mac_address_t (u8 * s, va_list * args)
+{
+ const mac_address_t *mac = va_arg (*args, mac_address_t *);
+
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ mac->bytes[0], mac->bytes[1], mac->bytes[2],
+ mac->bytes[3], mac->bytes[4], mac->bytes[5]);
+}
+#define _(T) \
+ cJSON *vl_api_##T##_t_tojson (vl_api_##T##_t *a) \
+ { \
+ char *s = format_c_string (0, "%U", format_vl_api_##T##_t, a, 0); \
+ cJSON *o = cJSON_CreateString (s); \
+ vec_free (s); \
+ return o; \
+ }
+foreach_type_tojson
+#undef _
diff --git a/src/vppinfra/jsonformat.h b/src/vppinfra/jsonformat.h
new file mode 100644
index 00000000000..062e4e188ef
--- /dev/null
+++ b/src/vppinfra/jsonformat.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_json_convert_h
+#define included_json_convert_h
+
+#include <stdbool.h>
+#include <vppinfra/cJSON.h>
+#include <vnet/ethernet/mac_address.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip_types.api_types.h>
+#include <vnet/ethernet/ethernet_types.api_types.h>
+
+#define foreach_type_fromjson \
+ _ (i8) \
+ _ (u8) \
+ _ (i16) \
+ _ (u16) \
+ _ (i32) \
+ _ (u32) \
+ _ (u64) \
+ _ (f64)
+
+#define _(T) CJSON_PUBLIC (int) vl_api_##T##_fromjson (cJSON *o, T *d);
+foreach_type_fromjson
+#undef _
+
+/* Prototypes */
+CJSON_PUBLIC (int) vl_api_bool_fromjson (cJSON *o, bool *d);
+CJSON_PUBLIC (int)
+vl_api_ip4_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_address_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip4_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip4_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip6_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_address_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip6_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip6_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_address_t_fromjson (void **mp, int *len, cJSON *o, vl_api_address_t *a);
+CJSON_PUBLIC (int)
+vl_api_prefix_t_fromjson (void **mp, int *len, cJSON *o, vl_api_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_mac_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_mac_address_t *a);
+
+CJSON_PUBLIC (uword)
+unformat_ip4_address (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_ip6_address (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (u8 *) format_ip6_address (u8 *s, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_mac_address (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (u8 *) format_ip4_address (u8 *s, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_vl_api_timedelta_t (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_vl_api_timestamp_t (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_vl_api_gbp_scope_t (unformat_input_t *input, va_list *args);
+
+CJSON_PUBLIC (void)
+vl_api_string_cJSON_AddToObject (cJSON *const object, const char *const name,
+ vl_api_string_t *astr);
+
+CJSON_PUBLIC (u8 *) u8string_fromjson (cJSON *o, char *fieldname);
+CJSON_PUBLIC (int) u8string_fromjson2 (cJSON *o, char *fieldname, u8 *data);
+CJSON_PUBLIC (int) vl_api_u8_string_fromjson (cJSON *o, u8 *s, int len);
+
+#define foreach_type_tojson \
+ _ (ip4_address) \
+ _ (ip4_prefix) \
+ _ (ip6_address) \
+ _ (ip6_prefix) \
+ _ (address) \
+ _ (prefix) \
+ _ (mac_address)
+
+#define _(T) CJSON_PUBLIC (cJSON *) vl_api_##T##_t_tojson (vl_api_##T##_t *);
+foreach_type_tojson
+#undef _
+
+CJSON_PUBLIC (cJSON *)
+ vl_api_ip4_address_with_prefix_t_tojson (vl_api_ip4_prefix_t *a);
+CJSON_PUBLIC (cJSON *)
+vl_api_ip6_address_with_prefix_t_tojson (vl_api_ip6_prefix_t *a);
+CJSON_PUBLIC (cJSON *)
+vl_api_address_with_prefix_t_tojson (vl_api_prefix_t *a);
+
+#endif
diff --git a/src/vppinfra/lb_hash_hash.h b/src/vppinfra/lb_hash_hash.h
index fb251591eeb..f355515bce4 100644
--- a/src/vppinfra/lb_hash_hash.h
+++ b/src/vppinfra/lb_hash_hash.h
@@ -24,11 +24,11 @@ static_always_inline u32
lb_hash_hash (u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
{
u64 val = 0;
- val = crc32_u64 (val, k0);
- val = crc32_u64 (val, k1);
- val = crc32_u64 (val, k2);
- val = crc32_u64 (val, k3);
- val = crc32_u64 (val, k4);
+ val = clib_crc32c_u64 (val, k0);
+ val = clib_crc32c_u64 (val, k1);
+ val = clib_crc32c_u64 (val, k2);
+ val = clib_crc32c_u64 (val, k3);
+ val = clib_crc32c_u64 (val, k4);
return (u32) val;
}
@@ -37,8 +37,8 @@ static_always_inline u32
lb_hash_hash_2_tuples (u64 k0, u32 k1)
{
u64 val = 0;
- val = crc32_u64 (val, k0);
- val = crc32_u32 (val, k1);
+ val = clib_crc32c_u64 (val, k0);
+ val = clib_crc32c_u32 (val, k1);
return (u32) val;
}
#else
diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c
index 036890f9c8d..17b4412e6c9 100644
--- a/src/vppinfra/linux/mem.c
+++ b/src/vppinfra/linux/mem.c
@@ -28,9 +28,9 @@
#include <vppinfra/mem.h>
#include <vppinfra/lock.h>
#include <vppinfra/time.h>
+#include <vppinfra/bitmap.h>
#include <vppinfra/format.h>
#include <vppinfra/clib_error.h>
-#include <vppinfra/linux/sysfs.h>
#ifndef F_LINUX_SPECIFIC_BASE
#define F_LINUX_SPECIFIC_BASE 1024
@@ -75,40 +75,6 @@ map_unlock ()
clib_atomic_release (&clib_mem_main.map_lock);
}
-__clib_export uword
-clib_mem_get_default_hugepage_size (void)
-{
- unformat_input_t input;
- static u32 size = 0;
- int fd;
-
- if (size)
- goto done;
-
- /*
- * If the kernel doesn't support hugepages, /proc/meminfo won't
- * say anything about it. Use the regular page size as a default.
- */
- size = clib_mem_get_page_size () / 1024;
-
- if ((fd = open ("/proc/meminfo", 0)) == -1)
- return 0;
-
- unformat_init_clib_file (&input, fd);
-
- while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (&input, "Hugepagesize:%_%u kB", &size))
- ;
- else
- unformat_skip_line (&input);
- }
- unformat_free (&input);
- close (fd);
-done:
- return 1024ULL * size;
-}
-
static clib_mem_page_sz_t
legacy_get_log2_default_hugepage_size (void)
{
@@ -133,18 +99,26 @@ legacy_get_log2_default_hugepage_size (void)
}
void
-clib_mem_main_init ()
+clib_mem_main_init (void)
{
+ unsigned long nodemask = 0, maxnode = CLIB_MAX_NUMAS;
+ unsigned long flags = MPOL_F_MEMS_ALLOWED;
clib_mem_main_t *mm = &clib_mem_main;
+ long sysconf_page_size;
uword page_size;
- void *va;
- int fd;
+ void *va = 0;
+ int fd, mode;
if (mm->log2_page_sz != CLIB_MEM_PAGE_SZ_UNKNOWN)
return;
/* system page size */
- page_size = sysconf (_SC_PAGESIZE);
+ sysconf_page_size = sysconf (_SC_PAGESIZE);
+ if (sysconf_page_size < 0)
+ {
+ clib_panic ("Could not determine the page size");
+ }
+ page_size = sysconf_page_size;
mm->log2_page_sz = min_log2 (page_size);
/* default system hugeppage size */
@@ -156,24 +130,11 @@ clib_mem_main_init ()
else /* likely kernel older than 4.14 */
mm->log2_default_hugepage_sz = legacy_get_log2_default_hugepage_size ();
- /* numa nodes */
- va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
- MAP_ANONYMOUS, -1, 0);
- if (va == MAP_FAILED)
- return;
+ mm->log2_sys_default_hugepage_sz = mm->log2_default_hugepage_sz;
- if (mlock (va, page_size))
- goto done;
-
- for (int i = 0; i < CLIB_MAX_NUMAS; i++)
- {
- int status;
- if (syscall (__NR_move_pages, 0, 1, &va, &i, &status, 0) == 0)
- mm->numa_node_bitmap |= 1ULL << i;
- }
-
-done:
- munmap (va, page_size);
+ /* numa nodes */
+ if (syscall (__NR_get_mempolicy, &mode, &nodemask, maxnode, va, flags) == 0)
+ mm->numa_node_bitmap = nodemask;
}
__clib_export u64
@@ -270,7 +231,7 @@ clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
if (log2_page_size == mm->log2_page_sz)
log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT;
- else if (log2_page_size == mm->log2_default_hugepage_sz)
+ else if (log2_page_size == mm->log2_sys_default_hugepage_sz)
log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE;
switch (log2_page_size)
@@ -293,7 +254,7 @@ clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
/* memfd_create maximum string size is 249 chars without trailing zero */
if (vec_len (s) > 249)
- _vec_len (s) = 249;
+ vec_set_len (s, 249);
vec_add1 (s, 0);
/* memfd_create introduced in kernel 3.17, we don't support older kernels */
@@ -487,14 +448,12 @@ clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
else
mm->first_map = hdr;
- CLIB_MEM_UNPOISON (hdr, sys_page_sz);
+ clib_mem_unpoison (hdr, sys_page_sz);
hdr->next = 0;
hdr->prev = mm->last_map;
snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name);
mm->last_map = hdr;
- map_unlock ();
-
hdr->base_addr = (uword) base;
hdr->log2_page_sz = log2_page_sz;
hdr->num_pages = size >> log2_page_sz;
@@ -502,7 +461,9 @@ clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0;
mprotect (hdr, sys_page_sz, PROT_NONE);
- CLIB_MEM_UNPOISON (base, size);
+ map_unlock ();
+
+ clib_mem_unpoison (base, size);
return base;
}
@@ -556,6 +517,7 @@ clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
{
int i, *status = 0;
void **ptr = 0;
+ unsigned char incore;
log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
@@ -577,6 +539,19 @@ clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
for (i = 0; i < n_pages; i++)
{
+ /* move_pages() returns -ENONET in status for huge pages on 5.19+ kernel.
+ * Retry with get_mempolicy() to obtain NUMA node info only if the pages
+ * are allocated and in memory, which is checked by mincore(). */
+ if (status[i] == -ENOENT &&
+ syscall (__NR_mincore, ptr[i], 1, &incore) == 0 && (incore & 1) != 0)
+ {
+ if (syscall (__NR_get_mempolicy, &status[i], 0, 0, ptr[i],
+ MPOL_F_NODE | MPOL_F_ADDR) != 0)
+ {
+ /* if get_mempolicy fails, keep the original value in status */
+ status[i] = -ENONET;
+ }
+ }
if (status[i] >= 0 && status[i] < CLIB_MAX_NUMAS)
{
stats->mapped++;
@@ -640,8 +615,8 @@ __clib_export int
clib_mem_set_numa_affinity (u8 numa_node, int force)
{
clib_mem_main_t *mm = &clib_mem_main;
- long unsigned int mask[16] = { 0 };
- int mask_len = sizeof (mask) * 8 + 1;
+ clib_bitmap_t *bmp = 0;
+ int rv;
/* no numa support */
if (mm->numa_node_bitmap == 0)
@@ -657,19 +632,21 @@ clib_mem_set_numa_affinity (u8 numa_node, int force)
return 0;
}
- mask[0] = 1 << numa_node;
+ bmp = clib_bitmap_set (bmp, numa_node, 1);
- if (syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, mask,
- mask_len))
- goto error;
+ rv = syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, bmp,
+ vec_len (bmp) * sizeof (bmp[0]) * 8 + 1);
+ clib_bitmap_free (bmp);
vec_reset_length (mm->error);
- return 0;
-error:
- vec_reset_length (mm->error);
- mm->error = clib_error_return_unix (mm->error, (char *) __func__);
- return CLIB_MEM_ERROR;
+ if (rv)
+ {
+ mm->error = clib_error_return_unix (mm->error, (char *) __func__);
+ return CLIB_MEM_ERROR;
+ }
+
+ return 0;
}
__clib_export int
diff --git a/src/vppinfra/linux/sysfs.c b/src/vppinfra/linux/sysfs.c
index 758eaa1a86c..61ee6378c8c 100644
--- a/src/vppinfra/linux/sysfs.c
+++ b/src/vppinfra/linux/sysfs.c
@@ -70,7 +70,7 @@ clib_sysfs_read (char *file_name, char *fmt, ...)
return clib_error_return_unix (0, "read `%s'", file_name);
}
- _vec_len (s) = sz;
+ vec_set_len (s, sz);
unformat_init_vector (&input, s);
va_list va;
@@ -87,32 +87,6 @@ clib_sysfs_read (char *file_name, char *fmt, ...)
return 0;
}
-__clib_export u8 *
-clib_sysfs_link_to_name (char *link)
-{
- char *p, buffer[64];
- unformat_input_t in;
- u8 *s = 0;
- int r;
-
- r = readlink (link, buffer, sizeof (buffer) - 1);
-
- if (r < 0)
- return 0;
-
- buffer[r] = 0;
- p = strrchr (buffer, '/');
-
- if (!p)
- return 0;
-
- unformat_init_string (&in, p + 1, strlen (p + 1));
- if (unformat (&in, "%s", &s) != 1)
- clib_unix_warning ("no string?");
- unformat_free (&in);
-
- return s;
-}
clib_error_t *
clib_sysfs_set_nr_hugepages (int numa_node, int log2_page_size, int nr)
@@ -154,7 +128,7 @@ clib_sysfs_set_nr_hugepages (int numa_node, int log2_page_size, int nr)
goto done;
}
- _vec_len (p) -= 1;
+ vec_dec_len (p, 1);
p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0);
clib_sysfs_write ((char *) p, "%d", nr);
@@ -207,7 +181,7 @@ clib_sysfs_get_xxx_hugepages (char *type, int numa_node,
goto done;
}
- _vec_len (p) -= 1;
+ vec_dec_len (p, 1);
p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size,
type, 0);
error = clib_sysfs_read ((char *) p, "%d", val);
@@ -263,13 +237,21 @@ clib_sysfs_prealloc_hugepages (int numa_node, int log2_page_size, int nr)
return clib_sysfs_set_nr_hugepages (numa_node, log2_page_size, n + needed);
}
-__clib_export uword *
-clib_sysfs_list_to_bitmap (char *filename)
+__clib_export clib_bitmap_t *
+clib_sysfs_read_bitmap (char *fmt, ...)
{
FILE *fp;
uword *r = 0;
+ va_list va;
+ u8 *filename;
+
+ va_start (va, fmt);
+ filename = va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (filename, 0);
- fp = fopen (filename, "r");
+ fp = fopen ((char *) filename, "r");
+ vec_free (filename);
if (fp != NULL)
{
diff --git a/src/vppinfra/linux/sysfs.h b/src/vppinfra/linux/sysfs.h
index 9cbc34823dd..f2f822d9741 100644
--- a/src/vppinfra/linux/sysfs.h
+++ b/src/vppinfra/linux/sysfs.h
@@ -17,13 +17,12 @@
#define included_linux_sysfs_h
#include <vppinfra/error.h>
+#include <vppinfra/bitmap.h>
clib_error_t *clib_sysfs_write (char *file_name, char *fmt, ...);
clib_error_t *clib_sysfs_read (char *file_name, char *fmt, ...);
-u8 *clib_sysfs_link_to_name (char *link);
-
clib_error_t *clib_sysfs_set_nr_hugepages (int numa_node,
int log2_page_size, int nr);
clib_error_t *clib_sysfs_get_nr_hugepages (int numa_node,
@@ -35,7 +34,7 @@ clib_error_t *clib_sysfs_get_surplus_hugepages (int numa_node,
clib_error_t *clib_sysfs_prealloc_hugepages (int numa_node,
int log2_page_size, int nr);
-uword *clib_sysfs_list_to_bitmap (char *filename);
+uword *clib_sysfs_read_bitmap (char *fmt, ...);
#endif /* included_linux_sysfs_h */
diff --git a/src/vppinfra/longjmp.S b/src/vppinfra/longjmp.S
index a3435ccb969..c5090877fd7 100644
--- a/src/vppinfra/longjmp.S
+++ b/src/vppinfra/longjmp.S
@@ -816,6 +816,56 @@ cdecl(clib_calljmp):
mov sp, x3
ret
.size cdecl(clib_calljmp), .-cdecl(clib_calljmp)
+#elif defined(__riscv)
+#define foreach_0_to_11 _(0) _(1) _(2) _(3) _(4) _(5) _(6) _(7) _(8) _(9) _(10) _(11)
+ .global cdecl(clib_setjmp)
+ .align 1
+ .type cdecl(clib_setjmp), @function
+cdecl(clib_setjmp):
+ sd ra, 0*8(a0)
+ sd sp, 1*8(a0)
+#define _(x) sd s##x, (x + 2)*8(a0);
+ foreach_0_to_11
+#undef _
+#define _(x) fsd fs##x, (x + 14)*8(a0);
+ foreach_0_to_11
+#undef _
+ mv a0,a1
+ ret
+ .size cdecl(clib_setjmp), .-cdecl(clib_setjmp)
+
+ .global cdecl(clib_longjmp)
+ .align 1
+ .type cdecl(clib_longjmp), @function
+cdecl(clib_longjmp):
+ ld ra, 0*8(a0)
+ ld sp, 1*8(a0)
+#define _(x) ld s##x, (x + 2)*8(a0);
+ foreach_0_to_11
+#undef _
+#define _(x) fld fs##x, (x + 14)*8(a0);
+ foreach_0_to_11
+#undef _
+ mv a0,a1
+ ret
+ .size cdecl(clib_longjmp), .-cdecl(clib_longjmp)
+
+ .global cdecl(clib_calljmp)
+ .align 1
+ .type cdecl(clib_calljmp), @function
+cdecl(clib_calljmp):
+ andi a2,a2, -16 /* Make sure stack is 16-byte aligned. */
+ addi a2, a2, -16 /* allocate space on the new stack */
+ sd ra, 8(a2) /* store return address */
+ sd sp, 0(a2) /* store existing stack pointer */
+ mv sp, a2 /* change stack */
+ mv a2, a0 /* functon pointer to a2 */
+ mv a0, a1 /* 2nd argument becomes 1st one */
+ jalr a2 /* function call */
+ ld ra, 8(sp) /* restore old return address */
+ ld sp, 0(sp) /* restore old stack pointer */
+ ret
+ .size cdecl(clib_calljmp), .-cdecl(clib_calljmp)
#else
#error "unknown machine"
#endif
diff --git a/src/vppinfra/longjmp.h b/src/vppinfra/longjmp.h
index 67c650a6174..62daaad59bd 100644
--- a/src/vppinfra/longjmp.h
+++ b/src/vppinfra/longjmp.h
@@ -95,6 +95,9 @@
#define CLIB_ARCH_LONGJMP_REGS (22)
#elif defined(_mips) && __mips == 64
#define CLIB_ARCH_LONGJMP_REGS (12)
+#elif defined(__riscv)
+/* ra, sp, s0-s11, fs0-fs11 */
+#define CLIB_ARCH_LONGJMP_REGS (26)
#else
#error "unknown machine"
#endif
diff --git a/src/vppinfra/macros.c b/src/vppinfra/macros.c
index b8a8e1744aa..b8644b2738e 100644
--- a/src/vppinfra/macros.c
+++ b/src/vppinfra/macros.c
@@ -175,7 +175,7 @@ clib_macro_eval (clib_macro_main_t * mm, i8 * s, i32 complain, u16 level,
/* add results to answer */
vec_append (rv, ts);
/* Remove NULL termination or the results are sad */
- _vec_len (rv) = vec_len (rv) - 1;
+ vec_set_len (rv, vec_len (rv) - 1);
vec_free (ts);
}
else
@@ -183,8 +183,7 @@ clib_macro_eval (clib_macro_main_t * mm, i8 * s, i32 complain, u16 level,
if (complain)
clib_warning ("Undefined Variable Reference: %s\n", varname);
vec_append (rv, format (0, "UNSET "));
- _vec_len (rv) = vec_len (rv) - 1;
-
+ vec_set_len (rv, vec_len (rv) - 1);
}
vec_free (varname);
}
@@ -252,13 +251,11 @@ clib_macro_free (clib_macro_main_t * mm)
hash_free (mm->the_builtin_eval_hash);
- /* *INDENT-OFF* */
hash_foreach_pair (p, mm->the_value_table_hash,
({
vec_add1 (strings_to_free, (u8 *) (p->key));
vec_add1 (strings_to_free, (u8 *) (p->value[0]));
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (strings_to_free); i++)
vec_free (strings_to_free[i]);
@@ -291,14 +288,12 @@ format_clib_macro_main (u8 * s, va_list * args)
name_sort_t *nses = 0, *ns;
int i;
- /* *INDENT-OFF* */
hash_foreach_pair (p, mm->the_value_table_hash,
({
vec_add2 (nses, ns, 1);
ns->name = (u8 *)(p->key);
ns->value = (u8 *)(p->value[0]);
}));
- /* *INDENT-ON* */
if (vec_len (nses) == 0)
return s;
diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h
index 1cab0ae7252..ab9c5da30ec 100644
--- a/src/vppinfra/mem.h
+++ b/src/vppinfra/mem.h
@@ -47,12 +47,16 @@
#include <vppinfra/os.h>
#include <vppinfra/string.h> /* memcpy, clib_memset */
-#include <vppinfra/sanitizer.h>
+#ifdef CLIB_SANITIZE_ADDR
+#include <sanitizer/asan_interface.h>
+#endif
#define CLIB_MAX_MHEAPS 256
#define CLIB_MAX_NUMAS 16
#define CLIB_MEM_VM_MAP_FAILED ((void *) ~0)
#define CLIB_MEM_ERROR (-1)
+#define CLIB_MEM_LOG2_MIN_ALIGN (3)
+#define CLIB_MEM_MIN_ALIGN (1 << CLIB_MEM_LOG2_MIN_ALIGN)
typedef enum
{
@@ -93,9 +97,10 @@ typedef struct _clib_mem_vm_map_hdr
struct _clib_mem_vm_map_hdr *prev, *next;
} clib_mem_vm_map_hdr_t;
-#define foreach_clib_mem_heap_flag \
- _(0, LOCKED, "locked") \
- _(1, UNMAP_ON_DESTROY, "unmap-on-destroy")
+#define foreach_clib_mem_heap_flag \
+ _ (0, LOCKED, "locked") \
+ _ (1, UNMAP_ON_DESTROY, "unmap-on-destroy") \
+ _ (2, TRACED, "traced")
typedef enum
{
@@ -130,9 +135,12 @@ typedef struct
/* log2 system page size */
clib_mem_page_sz_t log2_page_sz;
- /* log2 system default hugepage size */
+ /* log2 default hugepage size */
clib_mem_page_sz_t log2_default_hugepage_sz;
+ /* log2 system default hugepage size */
+ clib_mem_page_sz_t log2_sys_default_hugepage_sz;
+
/* bitmap of available numa nodes */
u32 numa_node_bitmap;
@@ -157,6 +165,22 @@ extern clib_mem_main_t clib_mem_main;
/* Unspecified NUMA socket */
#define VEC_NUMA_UNSPECIFIED (0xFF)
+static_always_inline void
+clib_mem_poison (const void volatile *p, uword s)
+{
+#ifdef CLIB_SANITIZE_ADDR
+ ASAN_POISON_MEMORY_REGION (p, s);
+#endif
+}
+
+static_always_inline void
+clib_mem_unpoison (const void volatile *p, uword s)
+{
+#ifdef CLIB_SANITIZE_ADDR
+ ASAN_UNPOISON_MEMORY_REGION (p, s);
+#endif
+}
+
always_inline clib_mem_heap_t *
clib_mem_get_per_cpu_heap (void)
{
@@ -210,77 +234,29 @@ clib_mem_set_thread_index (void)
ASSERT (__os_thread_index > 0);
}
-always_inline uword
-clib_mem_size_nocheck (void *p)
-{
- size_t mspace_usable_size_with_delta (const void *p);
- return mspace_usable_size_with_delta (p);
-}
-
-/* Memory allocator which may call os_out_of_memory() if it fails */
-always_inline void *
-clib_mem_alloc_aligned_at_offset (uword size, uword align, uword align_offset,
- int os_out_of_memory_on_failure)
-{
- void *mspace_get_aligned (void *msp, unsigned long n_user_data_bytes,
- unsigned long align, unsigned long align_offset);
- clib_mem_heap_t *h = clib_mem_get_per_cpu_heap ();
- void *p;
-
- if (align_offset > align)
- {
- if (align > 0)
- align_offset %= align;
- else
- align_offset = align;
- }
-
- p = mspace_get_aligned (h->mspace, size, align, align_offset);
-
- if (PREDICT_FALSE (0 == p))
- {
- if (os_out_of_memory_on_failure)
- os_out_of_memory ();
- return 0;
- }
-
- CLIB_MEM_UNPOISON (p, size);
- return p;
-}
-
-/* Memory allocator which calls os_out_of_memory() when it fails */
-always_inline void *
-clib_mem_alloc (uword size)
-{
- return clib_mem_alloc_aligned_at_offset (size, /* align */ 1,
- /* align_offset */ 0,
- /* os_out_of_memory */ 1);
-}
-
-always_inline void *
-clib_mem_alloc_aligned (uword size, uword align)
-{
- return clib_mem_alloc_aligned_at_offset (size, align, /* align_offset */ 0,
- /* os_out_of_memory */ 1);
-}
-
/* Memory allocator which calls os_out_of_memory() when it fails */
-always_inline void *
-clib_mem_alloc_or_null (uword size)
-{
- return clib_mem_alloc_aligned_at_offset (size, /* align */ 1,
- /* align_offset */ 0,
- /* os_out_of_memory */ 0);
-}
-
-always_inline void *
-clib_mem_alloc_aligned_or_null (uword size, uword align)
-{
- return clib_mem_alloc_aligned_at_offset (size, align, /* align_offset */ 0,
- /* os_out_of_memory */ 0);
-}
-
-
+void *clib_mem_alloc (uword size);
+void *clib_mem_alloc_aligned (uword size, uword align);
+void *clib_mem_alloc_or_null (uword size);
+void *clib_mem_alloc_aligned_or_null (uword size, uword align);
+void *clib_mem_realloc (void *p, uword new_size);
+void *clib_mem_realloc_aligned (void *p, uword new_size, uword align);
+uword clib_mem_is_heap_object (void *p);
+void clib_mem_free (void *p);
+
+void *clib_mem_heap_alloc (void *heap, uword size);
+void *clib_mem_heap_alloc_aligned (void *heap, uword size, uword align);
+void *clib_mem_heap_alloc_or_null (void *heap, uword size);
+void *clib_mem_heap_alloc_aligned_or_null (void *heap, uword size,
+ uword align);
+void *clib_mem_heap_realloc (void *heap, void *p, uword new_size);
+void *clib_mem_heap_realloc_aligned (void *heap, void *p, uword new_size,
+ uword align);
+uword clib_mem_heap_is_heap_object (void *heap, void *p);
+void clib_mem_heap_free (void *heap, void *p);
+
+uword clib_mem_size (void *p);
+void clib_mem_free_s (void *p);
/* Memory allocator which panics when it fails.
Use macro so that clib_panic macro can expand __FUNCTION__ and __LINE__. */
@@ -299,62 +275,6 @@ clib_mem_alloc_aligned_or_null (uword size, uword align)
/* Alias to stack allocator for naming consistency. */
#define clib_mem_alloc_stack(bytes) __builtin_alloca(bytes)
-always_inline uword
-clib_mem_is_heap_object (void *p)
-{
- int mspace_is_heap_object (void *msp, void *p);
- clib_mem_heap_t *h = clib_mem_get_per_cpu_heap ();
- return mspace_is_heap_object (h->mspace, p);
-}
-
-always_inline void
-clib_mem_free (void *p)
-{
- void mspace_put (void *msp, void *p_arg);
- clib_mem_heap_t *h = clib_mem_get_per_cpu_heap ();
-
- /* Make sure object is in the correct heap. */
- ASSERT (clib_mem_is_heap_object (p));
-
- CLIB_MEM_POISON (p, clib_mem_size_nocheck (p));
-
- mspace_put (h->mspace, p);
-}
-
-always_inline void *
-clib_mem_realloc (void *p, uword new_size, uword old_size)
-{
- /* By default use alloc, copy and free to emulate realloc. */
- void *q = clib_mem_alloc (new_size);
- if (q)
- {
- uword copy_size;
- if (old_size < new_size)
- copy_size = old_size;
- else
- copy_size = new_size;
- clib_memcpy_fast (q, p, copy_size);
- clib_mem_free (p);
- }
- return q;
-}
-
-always_inline uword
-clib_mem_size (void *p)
-{
- ASSERT (clib_mem_is_heap_object (p));
- return clib_mem_size_nocheck (p);
-}
-
-always_inline void
-clib_mem_free_s (void *p)
-{
- uword size = clib_mem_size (p);
- CLIB_MEM_UNPOISON (p, size);
- memset_s_inline (p, size, 0, size);
- clib_mem_free (p);
-}
-
always_inline clib_mem_heap_t *
clib_mem_get_heap (void)
{
@@ -379,10 +299,27 @@ void *clib_mem_init_thread_safe (void *memory, uword memory_size);
void clib_mem_exit (void);
+typedef struct
+{
+ /* Address of callers: outer first, inner last. */
+ uword callers[12];
+
+ /* Count of allocations with this traceback. */
+ u32 n_allocations;
+
+ /* Count of bytes allocated with this traceback. */
+ u32 n_bytes;
+
+ /* Offset of this item */
+ uword offset;
+} mheap_trace_t;
+
void clib_mem_trace (int enable);
int clib_mem_is_traced (void);
+mheap_trace_t *clib_mem_trace_dup (clib_mem_heap_t *heap);
+
typedef struct
{
/* Total number of objects allocated. */
@@ -434,7 +371,7 @@ clib_mem_vm_alloc (uword size)
if (mmap_addr == (void *) -1)
mmap_addr = 0;
else
- CLIB_MEM_UNPOISON (mmap_addr, size);
+ clib_mem_unpoison (mmap_addr, size);
return mmap_addr;
}
@@ -470,15 +407,26 @@ clib_mem_get_page_size (void)
return 1ULL << clib_mem_main.log2_page_sz;
}
+static_always_inline void
+clib_mem_set_log2_default_hugepage_size (clib_mem_page_sz_t log2_page_sz)
+{
+ clib_mem_main.log2_default_hugepage_sz = log2_page_sz;
+}
+
static_always_inline clib_mem_page_sz_t
clib_mem_get_log2_default_hugepage_size ()
{
return clib_mem_main.log2_default_hugepage_sz;
}
+static_always_inline uword
+clib_mem_get_default_hugepage_size (void)
+{
+ return 1ULL << clib_mem_main.log2_default_hugepage_sz;
+}
+
int clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...);
uword clib_mem_get_fd_page_size (int fd);
-uword clib_mem_get_default_hugepage_size (void);
clib_mem_page_sz_t clib_mem_get_fd_log2_page_size (int fd);
uword clib_mem_vm_reserve (uword start, uword size,
clib_mem_page_sz_t log2_page_sz);
diff --git a/src/vppinfra/mem_bulk.c b/src/vppinfra/mem_bulk.c
index ba8b2e94909..4dd6a168217 100644
--- a/src/vppinfra/mem_bulk.c
+++ b/src/vppinfra/mem_bulk.c
@@ -66,7 +66,7 @@ clib_mem_bulk_init (u32 elt_sz, u32 align, u32 min_elts_per_chunk)
if (min_elts_per_chunk == 0)
min_elts_per_chunk = CLIB_MEM_BULK_DEFAULT_MIN_ELTS_PER_CHUNK;
- CLIB_MEM_UNPOISON (b, sizeof (clib_mem_bulk_t));
+ clib_mem_unpoison (b, sizeof (clib_mem_bulk_t));
clib_memset (b, 0, sizeof (clib_mem_bulk_t));
b->mspace = heap->mspace;
b->align = align;
@@ -92,7 +92,7 @@ again:
while (c)
{
next = c->next;
- CLIB_MEM_POISON (c, bulk_chunk_size (b));
+ clib_mem_poison (c, bulk_chunk_size (b));
mspace_free (ms, c);
c = next;
}
@@ -104,7 +104,7 @@ again:
goto again;
}
- CLIB_MEM_POISON (b, sizeof (clib_mem_bulk_t));
+ clib_mem_poison (b, sizeof (clib_mem_bulk_t));
mspace_free (ms, b);
}
@@ -148,7 +148,7 @@ clib_mem_bulk_alloc (clib_mem_bulk_handle_t h)
{
u32 i, sz = bulk_chunk_size (b);
c = mspace_memalign (b->mspace, b->chunk_align, sz);
- CLIB_MEM_UNPOISON (c, sz);
+ clib_mem_unpoison (c, sz);
clib_memset (c, 0, sizeof (clib_mem_bulk_chunk_hdr_t));
b->avail_chunks = c;
c->n_free = b->elts_per_chunk;
@@ -192,7 +192,7 @@ clib_mem_bulk_free (clib_mem_bulk_handle_t h, void *p)
{
/* chunk is empty - give it back */
remove_from_chunk_list (&b->avail_chunks, c);
- CLIB_MEM_POISON (c, bulk_chunk_size (b));
+ clib_mem_poison (c, bulk_chunk_size (b));
mspace_free (b->mspace, c);
return;
}
diff --git a/src/vppinfra/mem_dlmalloc.c b/src/vppinfra/mem_dlmalloc.c
index e2a0f71e084..d5ff21e58c0 100644
--- a/src/vppinfra/mem_dlmalloc.c
+++ b/src/vppinfra/mem_dlmalloc.c
@@ -19,27 +19,11 @@
#include <vppinfra/lock.h>
#include <vppinfra/hash.h>
#include <vppinfra/elf_clib.h>
-#include <vppinfra/sanitizer.h>
-
-typedef struct
-{
- /* Address of callers: outer first, inner last. */
- uword callers[12];
-
- /* Count of allocations with this traceback. */
- u32 n_allocations;
-
- /* Count of bytes allocated with this traceback. */
- u32 n_bytes;
-
- /* Offset of this item */
- uword offset;
-} mheap_trace_t;
+#include <vppinfra/stack.h>
typedef struct
{
clib_spinlock_t lock;
- uword enabled;
mheap_trace_t *traces;
@@ -53,36 +37,49 @@ typedef struct
uword *trace_index_by_offset;
/* So we can easily shut off current segment trace, if any */
- void *current_traced_mheap;
+ const clib_mem_heap_t *current_traced_mheap;
} mheap_trace_main_t;
mheap_trace_main_t mheap_trace_main;
-void
-mheap_get_trace (uword offset, uword size)
+static __thread int mheap_trace_thread_disable;
+
+static void
+mheap_get_trace_internal (const clib_mem_heap_t *heap, uword offset,
+ uword size)
{
mheap_trace_main_t *tm = &mheap_trace_main;
mheap_trace_t *t;
- uword i, n_callers, trace_index, *p;
- mheap_trace_t trace;
- uword save_enabled;
+ uword i, trace_index, *p;
+ mheap_trace_t trace = {};
+ int index;
- if (tm->enabled == 0 || (clib_mem_get_heap () != tm->current_traced_mheap))
+ if (heap != tm->current_traced_mheap || mheap_trace_thread_disable)
return;
- /* Spurious Coverity warnings be gone. */
- clib_memset (&trace, 0, sizeof (trace));
-
clib_spinlock_lock (&tm->lock);
- /* Turn off tracing to avoid embarrassment... */
- save_enabled = tm->enabled;
- tm->enabled = 0;
+ /* heap could have changed while we were waiting on the lock */
+ if (heap != tm->current_traced_mheap)
+ goto out;
+
+ /* Turn off tracing for this thread to avoid embarrassment... */
+ mheap_trace_thread_disable = 1;
+
+ index = -2; /* skip first 2 stack frames */
+ foreach_clib_stack_frame (sf)
+ {
+ if (index >= 0)
+ {
+ if (index == ARRAY_LEN (trace.callers))
+ break;
+ trace.callers[index] = sf->ip;
+ }
+ index++;
+ }
- /* Skip our frame and mspace_get_aligned's frame */
- n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers), 2);
- if (n_callers == 0)
+ if (index < 1)
goto out;
if (!tm->trace_by_callers)
@@ -101,7 +98,7 @@ mheap_get_trace (uword offset, uword size)
if (i > 0)
{
trace_index = tm->trace_free_list[i - 1];
- _vec_len (tm->trace_free_list) = i - 1;
+ vec_set_len (tm->trace_free_list, i - 1);
}
else
{
@@ -114,14 +111,12 @@ mheap_get_trace (uword offset, uword size)
{
hash_pair_t *p;
mheap_trace_t *q;
- /* *INDENT-OFF* */
hash_foreach_pair (p, tm->trace_by_callers,
({
q = uword_to_pointer (p->key, mheap_trace_t *);
ASSERT (q >= old_start && q < old_end);
p->key = pointer_to_uword (tm->traces + (q - old_start));
}));
- /* *INDENT-ON* */
}
trace_index = t - tm->traces;
}
@@ -139,34 +134,33 @@ mheap_get_trace (uword offset, uword size)
hash_set (tm->trace_index_by_offset, offset, t - tm->traces);
out:
- tm->enabled = save_enabled;
+ mheap_trace_thread_disable = 0;
clib_spinlock_unlock (&tm->lock);
}
-void
-mheap_put_trace (uword offset, uword size)
+static void
+mheap_put_trace_internal (const clib_mem_heap_t *heap, uword offset,
+ uword size)
{
mheap_trace_t *t;
uword trace_index, *p;
mheap_trace_main_t *tm = &mheap_trace_main;
- uword save_enabled;
- if (tm->enabled == 0)
+ if (heap != tm->current_traced_mheap || mheap_trace_thread_disable)
return;
clib_spinlock_lock (&tm->lock);
- /* Turn off tracing for a moment */
- save_enabled = tm->enabled;
- tm->enabled = 0;
+ /* heap could have changed while we were waiting on the lock */
+ if (heap != tm->current_traced_mheap)
+ goto out;
+
+ /* Turn off tracing for this thread for a moment */
+ mheap_trace_thread_disable = 1;
p = hash_get (tm->trace_index_by_offset, offset);
if (!p)
- {
- tm->enabled = save_enabled;
- clib_spinlock_unlock (&tm->lock);
- return;
- }
+ goto out;
trace_index = p[0];
hash_unset (tm->trace_index_by_offset, offset);
@@ -183,17 +177,34 @@ mheap_put_trace (uword offset, uword size)
vec_add1 (tm->trace_free_list, trace_index);
clib_memset (t, 0, sizeof (t[0]));
}
- tm->enabled = save_enabled;
+
+out:
+ mheap_trace_thread_disable = 0;
clib_spinlock_unlock (&tm->lock);
}
+void
+mheap_get_trace (uword offset, uword size)
+{
+ mheap_get_trace_internal (clib_mem_get_heap (), offset, size);
+}
+
+void
+mheap_put_trace (uword offset, uword size)
+{
+ mheap_put_trace_internal (clib_mem_get_heap (), offset, size);
+}
+
always_inline void
mheap_trace_main_free (mheap_trace_main_t * tm)
{
+ CLIB_SPINLOCK_ASSERT_LOCKED (&tm->lock);
+ tm->current_traced_mheap = 0;
vec_free (tm->traces);
vec_free (tm->trace_free_list);
hash_free (tm->trace_by_callers);
hash_free (tm->trace_index_by_offset);
+ mheap_trace_thread_disable = 0;
}
static clib_mem_heap_t *
@@ -235,7 +246,7 @@ clib_mem_create_heap_internal (void *base, uword size,
mspace_disable_expand (h->mspace);
- CLIB_MEM_POISON (mspace_least_addr (h->mspace),
+ clib_mem_poison (mspace_least_addr (h->mspace),
mspace_footprint (h->mspace));
return h;
@@ -257,7 +268,14 @@ clib_mem_init_internal (void *base, uword size,
clib_mem_set_heap (h);
if (mheap_trace_main.lock == 0)
- clib_spinlock_init (&mheap_trace_main.lock);
+ {
+ /* clib_spinlock_init() dynamically allocates the spinlock in the current
+ * per-cpu heap, but it is used for all traces accross all heaps and
+ * hence we can't really allocate it in the current per-cpu heap as it
+ * could be destroyed later */
+ static struct clib_spinlock_s mheap_trace_main_lock = {};
+ mheap_trace_main.lock = &mheap_trace_main_lock;
+ }
return h;
}
@@ -288,13 +306,12 @@ clib_mem_destroy (void)
{
mheap_trace_main_t *tm = &mheap_trace_main;
clib_mem_heap_t *heap = clib_mem_get_heap ();
- void *base = mspace_least_addr (heap->mspace);
- if (tm->enabled && heap->mspace == tm->current_traced_mheap)
- tm->enabled = 0;
+ if (heap->mspace == tm->current_traced_mheap)
+ mheap_trace (heap, 0);
destroy_mspace (heap->mspace);
- clib_mem_vm_unmap (base);
+ clib_mem_vm_unmap (heap);
}
__clib_export u8 *
@@ -357,6 +374,7 @@ format_mheap_trace (u8 * s, va_list * va)
int verbose = va_arg (*va, int);
int have_traces = 0;
int i;
+ int n = 0;
clib_spinlock_lock (&tm->lock);
if (vec_len (tm->traces) > 0 &&
@@ -383,9 +401,10 @@ format_mheap_trace (u8 * s, va_list * va)
total_objects_traced += t->n_allocations;
- /* When not verbose only report allocations of more than 1k. */
- if (!verbose && t->n_bytes < 1024)
+ /* When not verbose only report the 50 biggest allocations */
+ if (!verbose && n >= 50)
continue;
+ n++;
if (t == traces_copy)
s = format (s, "%=9s%=9s %=10s Traceback\n", "Bytes", "Count",
@@ -464,13 +483,13 @@ format_clib_mem_heap (u8 * s, va_list * va)
format_white_space, indent + 2, format_msize, mi.usmblks);
}
- if (mspace_is_traced (heap->mspace))
+ if (heap->flags & CLIB_MEM_HEAP_F_TRACED)
s = format (s, "\n%U", format_mheap_trace, tm, verbose);
return s;
}
-__clib_export void
-clib_mem_get_heap_usage (clib_mem_heap_t * heap, clib_mem_usage_t * usage)
+__clib_export __clib_flatten void
+clib_mem_get_heap_usage (clib_mem_heap_t *heap, clib_mem_usage_t *usage)
{
struct dlmallinfo mi = mspace_mallinfo (heap->mspace);
@@ -493,43 +512,68 @@ uword clib_mem_validate_serial = 0;
__clib_export void
mheap_trace (clib_mem_heap_t * h, int enable)
{
- (void) mspace_enable_disable_trace (h->mspace, enable);
+ mheap_trace_main_t *tm = &mheap_trace_main;
- if (enable == 0)
- mheap_trace_main_free (&mheap_trace_main);
+ clib_spinlock_lock (&tm->lock);
+
+ if (tm->current_traced_mheap != 0 && tm->current_traced_mheap != h)
+ {
+ clib_warning ("tracing already enabled for another heap, ignoring");
+ goto out;
+ }
+
+ if (enable)
+ {
+ h->flags |= CLIB_MEM_HEAP_F_TRACED;
+ tm->current_traced_mheap = h;
+ }
+ else
+ {
+ h->flags &= ~CLIB_MEM_HEAP_F_TRACED;
+ mheap_trace_main_free (&mheap_trace_main);
+ }
+
+out:
+ clib_spinlock_unlock (&tm->lock);
}
__clib_export void
clib_mem_trace (int enable)
{
- mheap_trace_main_t *tm = &mheap_trace_main;
void *current_heap = clib_mem_get_heap ();
-
- tm->enabled = enable;
mheap_trace (current_heap, enable);
-
- if (enable)
- tm->current_traced_mheap = current_heap;
- else
- tm->current_traced_mheap = 0;
}
int
clib_mem_is_traced (void)
{
clib_mem_heap_t *h = clib_mem_get_heap ();
- return mspace_is_traced (h->mspace);
+ return (h->flags &= CLIB_MEM_HEAP_F_TRACED) != 0;
}
__clib_export uword
clib_mem_trace_enable_disable (uword enable)
{
- uword rv;
+ uword rv = !mheap_trace_thread_disable;
+ mheap_trace_thread_disable = !enable;
+ return rv;
+}
+
+__clib_export mheap_trace_t *
+clib_mem_trace_dup (clib_mem_heap_t *heap)
+{
mheap_trace_main_t *tm = &mheap_trace_main;
+ mheap_trace_t *traces_copy = 0;
- rv = tm->enabled;
- tm->enabled = enable;
- return rv;
+ clib_spinlock_lock (&tm->lock);
+ if (vec_len (tm->traces) > 0 && heap == tm->current_traced_mheap)
+ {
+ traces_copy = vec_dup (tm->traces);
+ qsort (traces_copy, vec_len (traces_copy), sizeof (traces_copy[0]),
+ mheap_trace_sort);
+ }
+ clib_spinlock_unlock (&tm->lock);
+ return traces_copy;
}
__clib_export clib_mem_heap_t *
@@ -567,37 +611,224 @@ clib_mem_destroy_heap (clib_mem_heap_t * h)
{
mheap_trace_main_t *tm = &mheap_trace_main;
- if (tm->enabled && h->mspace == tm->current_traced_mheap)
- tm->enabled = 0;
+ if (h->mspace == tm->current_traced_mheap)
+ mheap_trace (h, 0);
destroy_mspace (h->mspace);
if (h->flags & CLIB_MEM_HEAP_F_UNMAP_ON_DESTROY)
clib_mem_vm_unmap (h->base);
}
-__clib_export uword
-clib_mem_get_heap_free_space (clib_mem_heap_t * h)
+__clib_export __clib_flatten uword
+clib_mem_get_heap_free_space (clib_mem_heap_t *h)
{
struct dlmallinfo dlminfo = mspace_mallinfo (h->mspace);
return dlminfo.fordblks;
}
-__clib_export void *
-clib_mem_get_heap_base (clib_mem_heap_t * h)
+__clib_export __clib_flatten void *
+clib_mem_get_heap_base (clib_mem_heap_t *h)
{
return h->base;
}
-__clib_export uword
-clib_mem_get_heap_size (clib_mem_heap_t * heap)
+__clib_export __clib_flatten uword
+clib_mem_get_heap_size (clib_mem_heap_t *heap)
{
return heap->size;
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+/* Memory allocator which may call os_out_of_memory() if it fails */
+static inline void *
+clib_mem_heap_alloc_inline (void *heap, uword size, uword align,
+ int os_out_of_memory_on_failure)
+{
+ clib_mem_heap_t *h = heap ? heap : clib_mem_get_per_cpu_heap ();
+ void *p;
+
+ align = clib_max (CLIB_MEM_MIN_ALIGN, align);
+
+ p = mspace_memalign (h->mspace, align, size);
+
+ if (PREDICT_FALSE (0 == p))
+ {
+ if (os_out_of_memory_on_failure)
+ os_out_of_memory ();
+ return 0;
+ }
+
+ if (PREDICT_FALSE (h->flags & CLIB_MEM_HEAP_F_TRACED))
+ mheap_get_trace_internal (h, pointer_to_uword (p), clib_mem_size (p));
+
+ clib_mem_unpoison (p, size);
+ return p;
+}
+
+/* Memory allocator which calls os_out_of_memory() when it fails */
+__clib_export __clib_flatten void *
+clib_mem_alloc (uword size)
+{
+ return clib_mem_heap_alloc_inline (0, size, CLIB_MEM_MIN_ALIGN,
+ /* os_out_of_memory */ 1);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_alloc_aligned (uword size, uword align)
+{
+ return clib_mem_heap_alloc_inline (0, size, align,
+ /* os_out_of_memory */ 1);
+}
+
+/* Memory allocator which calls os_out_of_memory() when it fails */
+__clib_export __clib_flatten void *
+clib_mem_alloc_or_null (uword size)
+{
+ return clib_mem_heap_alloc_inline (0, size, CLIB_MEM_MIN_ALIGN,
+ /* os_out_of_memory */ 0);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_alloc_aligned_or_null (uword size, uword align)
+{
+ return clib_mem_heap_alloc_inline (0, size, align,
+ /* os_out_of_memory */ 0);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_alloc (void *heap, uword size)
+{
+ return clib_mem_heap_alloc_inline (heap, size, CLIB_MEM_MIN_ALIGN,
+ /* os_out_of_memory */ 1);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_alloc_aligned (void *heap, uword size, uword align)
+{
+ return clib_mem_heap_alloc_inline (heap, size, align,
+ /* os_out_of_memory */ 1);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_alloc_or_null (void *heap, uword size)
+{
+ return clib_mem_heap_alloc_inline (heap, size, CLIB_MEM_MIN_ALIGN,
+ /* os_out_of_memory */ 0);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_alloc_aligned_or_null (void *heap, uword size, uword align)
+{
+ return clib_mem_heap_alloc_inline (heap, size, align,
+ /* os_out_of_memory */ 0);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_realloc_aligned (void *heap, void *p, uword new_size,
+ uword align)
+{
+ uword old_alloc_size;
+ clib_mem_heap_t *h = heap ? heap : clib_mem_get_per_cpu_heap ();
+ void *new;
+
+ ASSERT (count_set_bits (align) == 1);
+
+ old_alloc_size = p ? mspace_usable_size (p) : 0;
+
+ if (new_size == old_alloc_size)
+ return p;
+
+ if (p && pointer_is_aligned (p, align) &&
+ mspace_realloc_in_place (h->mspace, p, new_size))
+ {
+ clib_mem_unpoison (p, new_size);
+ if (PREDICT_FALSE (h->flags & CLIB_MEM_HEAP_F_TRACED))
+ {
+ mheap_put_trace_internal (h, pointer_to_uword (p), old_alloc_size);
+ mheap_get_trace_internal (h, pointer_to_uword (p),
+ clib_mem_size (p));
+ }
+ }
+ else
+ {
+ new = clib_mem_heap_alloc_inline (h, new_size, align, 1);
+
+ clib_mem_unpoison (new, new_size);
+ if (old_alloc_size)
+ {
+ clib_mem_unpoison (p, old_alloc_size);
+ clib_memcpy_fast (new, p, clib_min (new_size, old_alloc_size));
+ clib_mem_heap_free (h, p);
+ }
+ p = new;
+ }
+
+ return p;
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_realloc (void *heap, void *p, uword new_size)
+{
+ return clib_mem_heap_realloc_aligned (heap, p, new_size, CLIB_MEM_MIN_ALIGN);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_realloc_aligned (void *p, uword new_size, uword align)
+{
+ return clib_mem_heap_realloc_aligned (0, p, new_size, align);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_realloc (void *p, uword new_size)
+{
+ return clib_mem_heap_realloc_aligned (0, p, new_size, CLIB_MEM_MIN_ALIGN);
+}
+
+__clib_export __clib_flatten uword
+clib_mem_heap_is_heap_object (void *heap, void *p)
+{
+ clib_mem_heap_t *h = heap ? heap : clib_mem_get_per_cpu_heap ();
+ return mspace_is_heap_object (h->mspace, p);
+}
+
+__clib_export __clib_flatten uword
+clib_mem_is_heap_object (void *p)
+{
+ return clib_mem_heap_is_heap_object (0, p);
+}
+
+__clib_export __clib_flatten void
+clib_mem_heap_free (void *heap, void *p)
+{
+ clib_mem_heap_t *h = heap ? heap : clib_mem_get_per_cpu_heap ();
+ uword size = clib_mem_size (p);
+
+ /* Make sure object is in the correct heap. */
+ ASSERT (clib_mem_heap_is_heap_object (h, p));
+
+ if (PREDICT_FALSE (h->flags & CLIB_MEM_HEAP_F_TRACED))
+ mheap_put_trace_internal (h, pointer_to_uword (p), size);
+ clib_mem_poison (p, clib_mem_size (p));
+
+ mspace_free (h->mspace, p);
+}
+
+__clib_export __clib_flatten void
+clib_mem_free (void *p)
+{
+ clib_mem_heap_free (0, p);
+}
+
+__clib_export __clib_flatten uword
+clib_mem_size (void *p)
+{
+ return mspace_usable_size (p);
+}
+
+__clib_export void
+clib_mem_free_s (void *p)
+{
+ uword size = clib_mem_size (p);
+ clib_mem_unpoison (p, size);
+ memset_s_inline (p, size, 0, size);
+ clib_mem_free (p);
+}
diff --git a/src/vppinfra/memcpy.h b/src/vppinfra/memcpy.h
index f3adc78d53d..e895cbf7485 100644
--- a/src/vppinfra/memcpy.h
+++ b/src/vppinfra/memcpy.h
@@ -6,6 +6,49 @@
#ifndef included_memcpy_h
#define included_memcpy_h
+static_always_inline void
+clib_memcpy_may_overrun (void *dst, void *src, u32 n_bytes)
+{
+ word n_left = n_bytes;
+#if defined(CLIB_HAVE_VEC512)
+ u8x64u *sv = (u8x64u *) src;
+ u8x64u *dv = (u8x64u *) dst;
+#elif defined(CLIB_HAVE_VEC256)
+ u8x32u *sv = (u8x32u *) src;
+ u8x32u *dv = (u8x32u *) dst;
+#elif defined(CLIB_HAVE_VEC128)
+ u8x16u *sv = (u8x16u *) src;
+ u8x16u *dv = (u8x16u *) dst;
+#else
+ u64u *sv = (u64u *) src;
+ u64u *dv = (u64u *) dst;
+#endif
+
+ while (n_left >= 4 * sizeof (sv[0]))
+ {
+ __typeof__ (*sv) v0, v1, v2, v3;
+ v0 = sv[0];
+ v1 = sv[1];
+ v2 = sv[2];
+ v3 = sv[3];
+ sv += 4;
+ n_left -= 4 * sizeof (sv[0]);
+ dv[0] = v0;
+ dv[1] = v1;
+ dv[2] = v2;
+ dv[3] = v3;
+ dv += 4;
+ }
+
+ while (n_left > 0)
+ {
+ dv[0] = sv[0];
+ sv += 1;
+ dv += 1;
+ n_left -= sizeof (sv[0]);
+ }
+}
+
#ifndef __COVERITY__
static_always_inline void
diff --git a/src/vppinfra/memcpy_avx2.h b/src/vppinfra/memcpy_avx2.h
deleted file mode 100644
index f7a36f0e5cb..00000000000
--- a/src/vppinfra/memcpy_avx2.h
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef included_clib_memcpy_avx2_h
-#define included_clib_memcpy_avx2_h
-
-#include <stdint.h>
-#include <x86intrin.h>
-#include <vppinfra/warnings.h>
-
-/* *INDENT-OFF* */
-WARN_OFF (stringop-overflow)
-/* *INDENT-ON* */
-
-static inline void
-clib_mov16 (u8 * dst, const u8 * src)
-{
- __m128i xmm0;
-
- xmm0 = _mm_loadu_si128 ((const __m128i *) src);
- _mm_storeu_si128 ((__m128i *) dst, xmm0);
-}
-
-static inline void
-clib_mov32 (u8 * dst, const u8 * src)
-{
- __m256i ymm0;
-
- ymm0 = _mm256_loadu_si256 ((const __m256i *) src);
- _mm256_storeu_si256 ((__m256i *) dst, ymm0);
-}
-
-static inline void
-clib_mov64 (u8 * dst, const u8 * src)
-{
- clib_mov32 ((u8 *) dst + 0 * 32, (const u8 *) src + 0 * 32);
- clib_mov32 ((u8 *) dst + 1 * 32, (const u8 *) src + 1 * 32);
-}
-
-static inline void
-clib_mov128 (u8 * dst, const u8 * src)
-{
- clib_mov64 ((u8 *) dst + 0 * 64, (const u8 *) src + 0 * 64);
- clib_mov64 ((u8 *) dst + 1 * 64, (const u8 *) src + 1 * 64);
-}
-
-static inline void
-clib_mov128blocks (u8 * dst, const u8 * src, size_t n)
-{
- __m256i ymm0, ymm1, ymm2, ymm3;
-
- while (n >= 128)
- {
- ymm0 =
- _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 0 * 32));
- n -= 128;
- ymm1 =
- _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 1 * 32));
- ymm2 =
- _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 2 * 32));
- ymm3 =
- _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 3 * 32));
- src = (const u8 *) src + 128;
- _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 0 * 32), ymm0);
- _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 1 * 32), ymm1);
- _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 2 * 32), ymm2);
- _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 3 * 32), ymm3);
- dst = (u8 *) dst + 128;
- }
-}
-
-static inline void *
-clib_memcpy_fast_avx2 (void *dst, const void *src, size_t n)
-{
- uword dstu = (uword) dst;
- uword srcu = (uword) src;
- void *ret = dst;
- size_t dstofss;
- size_t bits;
-
- /**
- * Copy less than 16 bytes
- */
- if (n < 16)
- {
- if (n & 0x01)
- {
- *(u8 *) dstu = *(const u8 *) srcu;
- srcu = (uword) ((const u8 *) srcu + 1);
- dstu = (uword) ((u8 *) dstu + 1);
- }
- if (n & 0x02)
- {
- *(u16 *) dstu = *(const u16 *) srcu;
- srcu = (uword) ((const u16 *) srcu + 1);
- dstu = (uword) ((u16 *) dstu + 1);
- }
- if (n & 0x04)
- {
- *(u32 *) dstu = *(const u32 *) srcu;
- srcu = (uword) ((const u32 *) srcu + 1);
- dstu = (uword) ((u32 *) dstu + 1);
- }
- if (n & 0x08)
- {
- *(u64 *) dstu = *(const u64 *) srcu;
- }
- return ret;
- }
-
- /**
- * Fast way when copy size doesn't exceed 512 bytes
- */
- if (n <= 32)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 48)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst + 16, (const u8 *) src + 16);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 64)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
- return ret;
- }
- if (n <= 256)
- {
- if (n >= 128)
- {
- n -= 128;
- clib_mov128 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 128;
- dst = (u8 *) dst + 128;
- }
- COPY_BLOCK_128_BACK31:
- if (n >= 64)
- {
- n -= 64;
- clib_mov64 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 64;
- dst = (u8 *) dst + 64;
- }
- if (n > 32)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
- return ret;
- }
- if (n > 0)
- {
- clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
- }
- return ret;
- }
-
- /**
- * Make store aligned when copy size exceeds 256 bytes
- */
- dstofss = (uword) dst & 0x1F;
- if (dstofss > 0)
- {
- dstofss = 32 - dstofss;
- n -= dstofss;
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + dstofss;
- dst = (u8 *) dst + dstofss;
- }
-
- /**
- * Copy 128-byte blocks.
- */
- clib_mov128blocks ((u8 *) dst, (const u8 *) src, n);
- bits = n;
- n = n & 127;
- bits -= n;
- src = (const u8 *) src + bits;
- dst = (u8 *) dst + bits;
-
- /**
- * Copy whatever left
- */
- goto COPY_BLOCK_128_BACK31;
-}
-
-/* *INDENT-OFF* */
-WARN_ON (stringop-overflow)
-/* *INDENT-ON* */
-
-#endif /* included_clib_memcpy_avx2_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/memcpy_avx512.h b/src/vppinfra/memcpy_avx512.h
deleted file mode 100644
index 98dac75beac..00000000000
--- a/src/vppinfra/memcpy_avx512.h
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef included_clib_memcpy_avx512_h
-#define included_clib_memcpy_avx512_h
-
-#include <stdint.h>
-#include <x86intrin.h>
-#include <vppinfra/warnings.h>
-
-/* *INDENT-OFF* */
-WARN_OFF (stringop-overflow)
-/* *INDENT-ON* */
-
-static inline void
-clib_mov16 (u8 * dst, const u8 * src)
-{
- __m128i xmm0;
-
- xmm0 = _mm_loadu_si128 ((const __m128i *) src);
- _mm_storeu_si128 ((__m128i *) dst, xmm0);
-}
-
-static inline void
-clib_mov32 (u8 * dst, const u8 * src)
-{
- __m256i ymm0;
-
- ymm0 = _mm256_loadu_si256 ((const __m256i *) src);
- _mm256_storeu_si256 ((__m256i *) dst, ymm0);
-}
-
-static inline void
-clib_mov64 (u8 * dst, const u8 * src)
-{
- __m512i zmm0;
-
- zmm0 = _mm512_loadu_si512 ((const void *) src);
- _mm512_storeu_si512 ((void *) dst, zmm0);
-}
-
-static inline void
-clib_mov128 (u8 * dst, const u8 * src)
-{
- clib_mov64 (dst + 0 * 64, src + 0 * 64);
- clib_mov64 (dst + 1 * 64, src + 1 * 64);
-}
-
-static inline void
-clib_mov256 (u8 * dst, const u8 * src)
-{
- clib_mov128 (dst + 0 * 128, src + 0 * 128);
- clib_mov128 (dst + 1 * 128, src + 1 * 128);
-}
-
-static inline void
-clib_mov128blocks (u8 * dst, const u8 * src, size_t n)
-{
- __m512i zmm0, zmm1;
-
- while (n >= 128)
- {
- zmm0 = _mm512_loadu_si512 ((const void *) (src + 0 * 64));
- n -= 128;
- zmm1 = _mm512_loadu_si512 ((const void *) (src + 1 * 64));
- src = src + 128;
- _mm512_storeu_si512 ((void *) (dst + 0 * 64), zmm0);
- _mm512_storeu_si512 ((void *) (dst + 1 * 64), zmm1);
- dst = dst + 128;
- }
-}
-
-static inline void
-clib_mov512blocks (u8 * dst, const u8 * src, size_t n)
-{
- __m512i zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
-
- while (n >= 512)
- {
- zmm0 = _mm512_loadu_si512 ((const void *) (src + 0 * 64));
- n -= 512;
- zmm1 = _mm512_loadu_si512 ((const void *) (src + 1 * 64));
- zmm2 = _mm512_loadu_si512 ((const void *) (src + 2 * 64));
- zmm3 = _mm512_loadu_si512 ((const void *) (src + 3 * 64));
- zmm4 = _mm512_loadu_si512 ((const void *) (src + 4 * 64));
- zmm5 = _mm512_loadu_si512 ((const void *) (src + 5 * 64));
- zmm6 = _mm512_loadu_si512 ((const void *) (src + 6 * 64));
- zmm7 = _mm512_loadu_si512 ((const void *) (src + 7 * 64));
- src = src + 512;
- _mm512_storeu_si512 ((void *) (dst + 0 * 64), zmm0);
- _mm512_storeu_si512 ((void *) (dst + 1 * 64), zmm1);
- _mm512_storeu_si512 ((void *) (dst + 2 * 64), zmm2);
- _mm512_storeu_si512 ((void *) (dst + 3 * 64), zmm3);
- _mm512_storeu_si512 ((void *) (dst + 4 * 64), zmm4);
- _mm512_storeu_si512 ((void *) (dst + 5 * 64), zmm5);
- _mm512_storeu_si512 ((void *) (dst + 6 * 64), zmm6);
- _mm512_storeu_si512 ((void *) (dst + 7 * 64), zmm7);
- dst = dst + 512;
- }
-}
-
-static inline void *
-clib_memcpy_fast_avx512 (void *dst, const void *src, size_t n)
-{
- uword dstu = (uword) dst;
- uword srcu = (uword) src;
- void *ret = dst;
- size_t dstofss;
- size_t bits;
-
- /**
- * Copy less than 16 bytes
- */
- if (n < 16)
- {
- if (n & 0x01)
- {
- *(u8 *) dstu = *(const u8 *) srcu;
- srcu = (uword) ((const u8 *) srcu + 1);
- dstu = (uword) ((u8 *) dstu + 1);
- }
- if (n & 0x02)
- {
- *(u16 *) dstu = *(const u16 *) srcu;
- srcu = (uword) ((const u16 *) srcu + 1);
- dstu = (uword) ((u16 *) dstu + 1);
- }
- if (n & 0x04)
- {
- *(u32 *) dstu = *(const u32 *) srcu;
- srcu = (uword) ((const u32 *) srcu + 1);
- dstu = (uword) ((u32 *) dstu + 1);
- }
- if (n & 0x08)
- *(u64 *) dstu = *(const u64 *) srcu;
- return ret;
- }
-
- /**
- * Fast way when copy size doesn't exceed 512 bytes
- */
- if (n <= 32)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 64)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
- return ret;
- }
- if (n <= 512)
- {
- if (n >= 256)
- {
- n -= 256;
- clib_mov256 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 256;
- dst = (u8 *) dst + 256;
- }
- if (n >= 128)
- {
- n -= 128;
- clib_mov128 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 128;
- dst = (u8 *) dst + 128;
- }
- COPY_BLOCK_128_BACK63:
- if (n > 64)
- {
- clib_mov64 ((u8 *) dst, (const u8 *) src);
- clib_mov64 ((u8 *) dst - 64 + n, (const u8 *) src - 64 + n);
- return ret;
- }
- if (n > 0)
- clib_mov64 ((u8 *) dst - 64 + n, (const u8 *) src - 64 + n);
- return ret;
- }
-
- /**
- * Make store aligned when copy size exceeds 512 bytes
- */
- dstofss = (uword) dst & 0x3F;
- if (dstofss > 0)
- {
- dstofss = 64 - dstofss;
- n -= dstofss;
- clib_mov64 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + dstofss;
- dst = (u8 *) dst + dstofss;
- }
-
- /**
- * Copy 512-byte blocks.
- * Use copy block function for better instruction order control,
- * which is important when load is unaligned.
- */
- clib_mov512blocks ((u8 *) dst, (const u8 *) src, n);
- bits = n;
- n = n & 511;
- bits -= n;
- src = (const u8 *) src + bits;
- dst = (u8 *) dst + bits;
-
- /**
- * Copy 128-byte blocks.
- * Use copy block function for better instruction order control,
- * which is important when load is unaligned.
- */
- if (n >= 128)
- {
- clib_mov128blocks ((u8 *) dst, (const u8 *) src, n);
- bits = n;
- n = n & 127;
- bits -= n;
- src = (const u8 *) src + bits;
- dst = (u8 *) dst + bits;
- }
-
- /**
- * Copy whatever left
- */
- goto COPY_BLOCK_128_BACK63;
-}
-
-/* *INDENT-OFF* */
-WARN_ON (stringop-overflow)
-/* *INDENT-ON* */
-
-#endif /* included_clib_memcpy_avx512_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/memcpy_sse3.h b/src/vppinfra/memcpy_sse3.h
deleted file mode 100644
index aea2005d95a..00000000000
--- a/src/vppinfra/memcpy_sse3.h
+++ /dev/null
@@ -1,368 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef included_clib_memcpy_sse3_h
-#define included_clib_memcpy_sse3_h
-
-#include <stdint.h>
-#include <x86intrin.h>
-#include <vppinfra/warnings.h>
-
-/* *INDENT-OFF* */
-WARN_OFF (stringop-overflow)
-/* *INDENT-ON* */
-
-static inline void
-clib_mov16 (u8 * dst, const u8 * src)
-{
- __m128i xmm0;
-
- xmm0 = _mm_loadu_si128 ((const __m128i *) src);
- _mm_storeu_si128 ((__m128i *) dst, xmm0);
-}
-
-static inline void
-clib_mov32 (u8 * dst, const u8 * src)
-{
- clib_mov16 ((u8 *) dst + 0 * 16, (const u8 *) src + 0 * 16);
- clib_mov16 ((u8 *) dst + 1 * 16, (const u8 *) src + 1 * 16);
-}
-
-static inline void
-clib_mov64 (u8 * dst, const u8 * src)
-{
- clib_mov32 ((u8 *) dst + 0 * 32, (const u8 *) src + 0 * 32);
- clib_mov32 ((u8 *) dst + 1 * 32, (const u8 *) src + 1 * 32);
-}
-
-static inline void
-clib_mov128 (u8 * dst, const u8 * src)
-{
- clib_mov64 ((u8 *) dst + 0 * 64, (const u8 *) src + 0 * 64);
- clib_mov64 ((u8 *) dst + 1 * 64, (const u8 *) src + 1 * 64);
-}
-
-static inline void
-clib_mov256 (u8 * dst, const u8 * src)
-{
- clib_mov128 ((u8 *) dst + 0 * 128, (const u8 *) src + 0 * 128);
- clib_mov128 ((u8 *) dst + 1 * 128, (const u8 *) src + 1 * 128);
-}
-
-/**
- * Macro for copying unaligned block from one location to another with constant load offset,
- * 47 bytes leftover maximum,
- * locations should not overlap.
- * Requirements:
- * - Store is aligned
- * - Load offset is <offset>, which must be immediate value within [1, 15]
- * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
- * - <dst>, <src>, <len> must be variables
- * - __m128i <xmm0> ~ <xmm8> must be pre-defined
- */
-#define CLIB_MVUNALIGN_LEFT47_IMM(dst, src, len, offset) \
-({ \
- int tmp; \
- while (len >= 128 + 16 - offset) { \
- xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \
- len -= 128; \
- xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \
- xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \
- xmm3 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 3 * 16)); \
- xmm4 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 4 * 16)); \
- xmm5 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 5 * 16)); \
- xmm6 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 6 * 16)); \
- xmm7 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 7 * 16)); \
- xmm8 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 8 * 16)); \
- src = (const u8 *)src + 128; \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \
- dst = (u8 *)dst + 128; \
- } \
- tmp = len; \
- len = ((len - 16 + offset) & 127) + 16 - offset; \
- tmp -= len; \
- src = (const u8 *)src + tmp; \
- dst = (u8 *)dst + tmp; \
- if (len >= 32 + 16 - offset) { \
- while (len >= 32 + 16 - offset) { \
- xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \
- len -= 32; \
- xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \
- xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \
- src = (const u8 *)src + 32; \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
- dst = (u8 *)dst + 32; \
- } \
- tmp = len; \
- len = ((len - 16 + offset) & 31) + 16 - offset; \
- tmp -= len; \
- src = (const u8 *)src + tmp; \
- dst = (u8 *)dst + tmp; \
- } \
-})
-
-/**
- * Macro for copying unaligned block from one location to another,
- * 47 bytes leftover maximum,
- * locations should not overlap.
- * Use switch here because the aligning instruction requires immediate value for shift count.
- * Requirements:
- * - Store is aligned
- * - Load offset is <offset>, which must be within [1, 15]
- * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
- * - <dst>, <src>, <len> must be variables
- * - __m128i <xmm0> ~ <xmm8> used in CLIB_MVUNALIGN_LEFT47_IMM must be pre-defined
- */
-#define CLIB_MVUNALIGN_LEFT47(dst, src, len, offset) \
-({ \
- switch (offset) { \
- case 0x01: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x01); break; \
- case 0x02: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x02); break; \
- case 0x03: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x03); break; \
- case 0x04: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x04); break; \
- case 0x05: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x05); break; \
- case 0x06: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x06); break; \
- case 0x07: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x07); break; \
- case 0x08: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x08); break; \
- case 0x09: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x09); break; \
- case 0x0A: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0A); break; \
- case 0x0B: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0B); break; \
- case 0x0C: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0C); break; \
- case 0x0D: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0D); break; \
- case 0x0E: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0E); break; \
- case 0x0F: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0F); break; \
- default:; \
- } \
-})
-
-static inline void *
-clib_memcpy_fast_sse3 (void *dst, const void *src, size_t n)
-{
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
- uword dstu = (uword) dst;
- uword srcu = (uword) src;
- void *ret = dst;
- size_t dstofss;
- size_t srcofs;
-
- /**
- * Copy less than 16 bytes
- */
- if (n < 16)
- {
- if (n & 0x01)
- {
- *(u8 *) dstu = *(const u8 *) srcu;
- srcu = (uword) ((const u8 *) srcu + 1);
- dstu = (uword) ((u8 *) dstu + 1);
- }
- if (n & 0x02)
- {
- *(u16 *) dstu = *(const u16 *) srcu;
- srcu = (uword) ((const u16 *) srcu + 1);
- dstu = (uword) ((u16 *) dstu + 1);
- }
- if (n & 0x04)
- {
- *(u32 *) dstu = *(const u32 *) srcu;
- srcu = (uword) ((const u32 *) srcu + 1);
- dstu = (uword) ((u32 *) dstu + 1);
- }
- if (n & 0x08)
- {
- *(u64 *) dstu = *(const u64 *) srcu;
- }
- return ret;
- }
-
- /**
- * Fast way when copy size doesn't exceed 512 bytes
- */
- if (n <= 32)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 48)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 64)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst + 32, (const u8 *) src + 32);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 128)
- {
- goto COPY_BLOCK_128_BACK15;
- }
- if (n <= 512)
- {
- if (n >= 256)
- {
- n -= 256;
- clib_mov128 ((u8 *) dst, (const u8 *) src);
- clib_mov128 ((u8 *) dst + 128, (const u8 *) src + 128);
- src = (const u8 *) src + 256;
- dst = (u8 *) dst + 256;
- }
- COPY_BLOCK_255_BACK15:
- if (n >= 128)
- {
- n -= 128;
- clib_mov128 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 128;
- dst = (u8 *) dst + 128;
- }
- COPY_BLOCK_128_BACK15:
- if (n >= 64)
- {
- n -= 64;
- clib_mov64 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 64;
- dst = (u8 *) dst + 64;
- }
- COPY_BLOCK_64_BACK15:
- if (n >= 32)
- {
- n -= 32;
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 32;
- dst = (u8 *) dst + 32;
- }
- if (n > 16)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n > 0)
- {
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- }
- return ret;
- }
-
- /**
- * Make store aligned when copy size exceeds 512 bytes,
- * and make sure the first 15 bytes are copied, because
- * unaligned copy functions require up to 15 bytes
- * backwards access.
- */
- dstofss = (uword) dst & 0x0F;
- if (dstofss > 0)
- {
- dstofss = 16 - dstofss + 16;
- n -= dstofss;
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + dstofss;
- dst = (u8 *) dst + dstofss;
- }
- srcofs = ((uword) src & 0x0F);
-
- /**
- * For aligned copy
- */
- if (srcofs == 0)
- {
- /**
- * Copy 256-byte blocks
- */
- for (; n >= 256; n -= 256)
- {
- clib_mov256 ((u8 *) dst, (const u8 *) src);
- dst = (u8 *) dst + 256;
- src = (const u8 *) src + 256;
- }
-
- /**
- * Copy whatever left
- */
- goto COPY_BLOCK_255_BACK15;
- }
-
- /**
- * For copy with unaligned load
- */
- CLIB_MVUNALIGN_LEFT47 (dst, src, n, srcofs);
-
- /**
- * Copy whatever left
- */
- goto COPY_BLOCK_64_BACK15;
-}
-
-/* *INDENT-OFF* */
-WARN_ON (stringop-overflow)
-/* *INDENT-ON* */
-
-#undef CLIB_MVUNALIGN_LEFT47_IMM
-#undef CLIB_MVUNALIGN_LEFT47
-
-#endif /* included_clib_memcpy_sse3_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/memcpy_x86_64.h b/src/vppinfra/memcpy_x86_64.h
new file mode 100644
index 00000000000..39258f19748
--- /dev/null
+++ b/src/vppinfra/memcpy_x86_64.h
@@ -0,0 +1,613 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Damjan Marion
+ */
+
+#ifndef included_clib_memcpy_x86_64_h
+#define included_clib_memcpy_x86_64_h
+#ifdef __x86_64__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/warnings.h>
+#include <stdio.h>
+
+/* clang-format off */
+WARN_OFF (stringop-overflow)
+/* clang-format on */
+
+static_always_inline void
+clib_memcpy1 (void *d, void *s)
+{
+ *(u8 *) d = *(u8 *) s;
+}
+
+static_always_inline void
+clib_memcpy2 (void *d, void *s)
+{
+ *(u16u *) d = *(u16u *) s;
+}
+
+static_always_inline void
+clib_memcpy4 (void *d, void *s)
+{
+ *(u32u *) d = *(u32u *) s;
+}
+
+static_always_inline void
+clib_memcpy8 (void *d, void *s)
+{
+ *(u64u *) d = *(u64u *) s;
+}
+
+static_always_inline void
+clib_memcpy16 (void *d, void *s)
+{
+#ifdef CLIB_HAVE_VEC128
+ *(u8x16u *) d = *(u8x16u *) s;
+#else
+ clib_memcpy8 (d, s);
+ clib_memcpy8 (d + 8, s + 8);
+#endif
+}
+
+#ifdef CLIB_HAVE_VEC256
+static_always_inline void
+clib_memcpy32 (void *d, void *s)
+{
+ *(u8x32u *) d = *(u8x32u *) s;
+}
+#endif
+
+#ifdef CLIB_HAVE_VEC512
+static_always_inline void
+clib_memcpy64 (void *d, void *s)
+{
+ *(u8x64u *) d = *(u8x64u *) s;
+}
+#endif
+
+static_always_inline void
+clib_memcpy_const_le32 (u8 *dst, u8 *src, size_t n)
+{
+ switch (n)
+ {
+ case 1:
+ clib_memcpy1 (dst, src);
+ break;
+ case 2:
+ clib_memcpy2 (dst, src);
+ break;
+ case 3:
+ clib_memcpy2 (dst, src);
+ clib_memcpy1 (dst + 2, src + 2);
+ break;
+ case 4:
+ clib_memcpy4 (dst, src);
+ break;
+ case 5:
+ clib_memcpy4 (dst, src);
+ clib_memcpy1 (dst + 4, src + 4);
+ break;
+ case 6:
+ clib_memcpy4 (dst, src);
+ clib_memcpy2 (dst + 4, src + 4);
+ break;
+ case 7:
+ clib_memcpy4 (dst, src);
+ clib_memcpy4 (dst + 3, src + 3);
+ break;
+ case 8:
+ clib_memcpy8 (dst, src);
+ break;
+ case 9:
+ clib_memcpy8 (dst, src);
+ clib_memcpy1 (dst + 8, src + 8);
+ break;
+ case 10:
+ clib_memcpy8 (dst, src);
+ clib_memcpy2 (dst + 8, src + 8);
+ break;
+ case 11:
+ case 12:
+ clib_memcpy8 (dst, src);
+ clib_memcpy4 (dst + n - 4, src + n - 4);
+ break;
+ case 13:
+ case 14:
+ case 15:
+ clib_memcpy8 (dst, src);
+ clib_memcpy8 (dst + n - 8, src + n - 8);
+ break;
+ case 16:
+ clib_memcpy16 (dst, src);
+ break;
+ case 17:
+ clib_memcpy16 (dst, src);
+ clib_memcpy1 (dst + 16, src + 16);
+ break;
+ case 18:
+ clib_memcpy16 (dst, src);
+ clib_memcpy2 (dst + 16, src + 16);
+ break;
+ case 20:
+ clib_memcpy16 (dst, src);
+ clib_memcpy4 (dst + 16, src + 16);
+ break;
+ case 24:
+ clib_memcpy16 (dst, src);
+ clib_memcpy8 (dst + 16, src + 16);
+ break;
+ default:
+ clib_memcpy16 (dst, src);
+ clib_memcpy16 (dst + n - 16, src + n - 16);
+ break;
+ }
+}
+
+static_always_inline void
+clib_memcpy_const_le64 (u8 *dst, u8 *src, size_t n)
+{
+ if (n < 32)
+ {
+ clib_memcpy_const_le32 (dst, src, n);
+ return;
+ }
+
+#if defined(CLIB_HAVE_VEC256)
+ switch (n)
+ {
+ case 32:
+ clib_memcpy32 (dst, src);
+ break;
+ case 33:
+ clib_memcpy32 (dst, src);
+ clib_memcpy1 (dst + 32, src + 32);
+ break;
+ case 34:
+ clib_memcpy32 (dst, src);
+ clib_memcpy2 (dst + 32, src + 32);
+ break;
+ case 36:
+ clib_memcpy32 (dst, src);
+ clib_memcpy4 (dst + 32, src + 32);
+ break;
+ case 40:
+ clib_memcpy32 (dst, src);
+ clib_memcpy8 (dst + 32, src + 32);
+ break;
+ case 48:
+ clib_memcpy32 (dst, src);
+ clib_memcpy16 (dst + 32, src + 32);
+ break;
+ default:
+ clib_memcpy32 (dst, src);
+ clib_memcpy32 (dst + n - 32, src + n - 32);
+ break;
+ }
+#else
+ while (n > 31)
+ {
+ clib_memcpy16 (dst, src);
+ clib_memcpy16 (dst + 16, src + 16);
+ dst += 32;
+ src += 32;
+ n -= 32;
+ }
+ clib_memcpy_const_le32 (dst, src, n);
+#endif
+}
+
+static_always_inline void
+clib_memcpy_x86_64_const (u8 *dst, u8 *src, size_t n)
+{
+#if defined(CLIB_HAVE_VEC512)
+ while (n > 128)
+ {
+ clib_memcpy64 (dst, src);
+ dst += 64;
+ src += 64;
+ n -= 64;
+ }
+
+ if (n < 64)
+ {
+ clib_memcpy_const_le64 (dst, src, n);
+ return;
+ }
+
+ switch (n)
+ {
+ case 64:
+ clib_memcpy64 (dst, src);
+ break;
+ case 65:
+ clib_memcpy64 (dst, src);
+ clib_memcpy1 (dst + 64, src + 64);
+ break;
+ case 66:
+ clib_memcpy64 (dst, src);
+ clib_memcpy2 (dst + 64, src + 64);
+ break;
+ case 68:
+ clib_memcpy64 (dst, src);
+ clib_memcpy4 (dst + 64, src + 64);
+ break;
+ case 72:
+ clib_memcpy64 (dst, src);
+ clib_memcpy8 (dst + 64, src + 64);
+ break;
+ case 80:
+ clib_memcpy64 (dst, src);
+ clib_memcpy16 (dst + 64, src + 64);
+ break;
+ case 96:
+ clib_memcpy64 (dst, src);
+ clib_memcpy32 (dst + 64, src + 64);
+ break;
+ default:
+ clib_memcpy64 (dst, src);
+ clib_memcpy64 (dst + n - 64, src + n - 64);
+ break;
+ }
+#elif defined(CLIB_HAVE_VEC256)
+ while (n > 64)
+ {
+ clib_memcpy32 (dst, src);
+ dst += 32;
+ src += 32;
+ n -= 32;
+ }
+ clib_memcpy_const_le64 (dst, src, n);
+#else
+ while (n > 32)
+ {
+ clib_memcpy16 (dst, src);
+ dst += 16;
+ src += 16;
+ n -= 16;
+ }
+ clib_memcpy_const_le32 (dst, src, n);
+#endif
+}
+
+static_always_inline void *
+clib_memcpy_x86_64 (void *restrict dst, const void *restrict src, size_t n)
+{
+ u8 *d = (u8 *) dst, *s = (u8 *) src;
+
+ if (n == 0)
+ return dst;
+
+ if (COMPILE_TIME_CONST (n))
+ {
+ if (n)
+ clib_memcpy_x86_64_const (d, s, n);
+ return dst;
+ }
+
+ if (n <= 32)
+ {
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32 mask = pow2_mask (n);
+ u8x32_mask_store (u8x32_mask_load_zero (s, mask), d, mask);
+#else
+ if (PREDICT_TRUE (n >= 16))
+ {
+ clib_memcpy16 (d, s);
+ clib_memcpy16 (d + n - 16, s + n - 16);
+ }
+ else if (PREDICT_TRUE (n >= 8))
+ {
+ clib_memcpy8 (d, s);
+ clib_memcpy8 (d + n - 8, s + n - 8);
+ }
+ else if (PREDICT_TRUE (n >= 4))
+ {
+ clib_memcpy4 (d, s);
+ clib_memcpy4 (d + n - 4, s + n - 4);
+ }
+ else if (PREDICT_TRUE (n > 1))
+ {
+ clib_memcpy2 (d, s);
+ clib_memcpy2 (d + n - 2, s + n - 2);
+ }
+ else
+ clib_memcpy1 (d, s);
+#endif
+ }
+#ifdef CLIB_HAVE_VEC512
+ else
+ {
+ u8x64 v0, v1, v2, v3;
+ u64 final_off, nr, off = 64;
+
+ if (n <= 64)
+ {
+ n -= 32;
+ u8x32_store_unaligned (u8x32_load_unaligned (s), d);
+ u8x32_store_unaligned (u8x32_load_unaligned (s + n), d + n);
+ return dst;
+ }
+
+ u8x64_store_unaligned (u8x64_load_unaligned (s), d);
+
+ if (n <= 128)
+ goto done2;
+
+ if (n <= 192)
+ goto one;
+
+ if (n <= 512 + 64)
+ {
+ nr = round_pow2 (n - 128, 64);
+ goto last;
+ }
+
+ off -= ((u64) d) & 0x3f;
+ nr = round_pow2 (n - off - 64, 64);
+ final_off = (nr & ~(u64) 0x1ff) + off;
+
+ more:
+ v0 = u8x64_load_unaligned (s + off + 0x000);
+ v1 = u8x64_load_unaligned (s + off + 0x040);
+ v2 = u8x64_load_unaligned (s + off + 0x080);
+ v3 = u8x64_load_unaligned (s + off + 0x0c0);
+ u8x64_store_unaligned (v0, d + off + 0x000);
+ u8x64_store_unaligned (v1, d + off + 0x040);
+ u8x64_store_unaligned (v2, d + off + 0x080);
+ u8x64_store_unaligned (v3, d + off + 0x0c0);
+ v0 = u8x64_load_unaligned (s + off + 0x100);
+ v1 = u8x64_load_unaligned (s + off + 0x140);
+ v2 = u8x64_load_unaligned (s + off + 0x180);
+ v3 = u8x64_load_unaligned (s + off + 0x1c0);
+ u8x64_store_unaligned (v0, d + off + 0x100);
+ u8x64_store_unaligned (v1, d + off + 0x140);
+ u8x64_store_unaligned (v2, d + off + 0x180);
+ u8x64_store_unaligned (v3, d + off + 0x1c0);
+ off += 512;
+ if (off != final_off)
+ goto more;
+
+ if ((nr & 0x1ff) == 0)
+ goto done2;
+
+ last:
+ if (PREDICT_TRUE (nr & 256))
+ {
+ v0 = u8x64_load_unaligned (s + off + 0x000);
+ v1 = u8x64_load_unaligned (s + off + 0x040);
+ v2 = u8x64_load_unaligned (s + off + 0x080);
+ v3 = u8x64_load_unaligned (s + off + 0x0c0);
+ u8x64_store_unaligned (v0, d + off + 0x000);
+ u8x64_store_unaligned (v1, d + off + 0x040);
+ u8x64_store_unaligned (v2, d + off + 0x080);
+ u8x64_store_unaligned (v3, d + off + 0x0c0);
+ off += 256;
+ }
+ if (PREDICT_TRUE (nr & 128))
+ {
+ v0 = u8x64_load_unaligned (s + off + 0x000);
+ v1 = u8x64_load_unaligned (s + off + 0x040);
+ u8x64_store_unaligned (v0, d + off + 0x000);
+ u8x64_store_unaligned (v1, d + off + 0x040);
+ off += 128;
+ }
+ if (PREDICT_TRUE (nr & 64))
+ {
+ one:
+ u8x64_store_unaligned (u8x64_load_unaligned (s + off), d + off);
+ }
+ done2:
+ u8x64_store_unaligned (u8x64_load_unaligned (s + n - 64), d + n - 64);
+ }
+ return dst;
+#elif defined(CLIB_HAVE_VEC256)
+ else
+ {
+ u8x32 v0, v1, v2, v3;
+ u64 final_off, nr, off = 32;
+
+ u8x32_store_unaligned (u8x32_load_unaligned (s), d);
+
+ if (n <= 64)
+ goto done2;
+
+ if (n <= 96)
+ goto one;
+
+ if (n <= 256 + 32)
+ {
+ nr = round_pow2 (n - 64, 32);
+ goto last;
+ }
+
+ off -= ((u64) d) & 0x1f;
+ nr = round_pow2 (n - off - 32, 32);
+ final_off = (nr & ~(u64) 0xff) + off;
+
+ more:
+ v0 = u8x32_load_unaligned (s + off + 0x00);
+ v1 = u8x32_load_unaligned (s + off + 0x20);
+ v2 = u8x32_load_unaligned (s + off + 0x40);
+ v3 = u8x32_load_unaligned (s + off + 0x60);
+ u8x32_store_unaligned (v0, d + off + 0x00);
+ u8x32_store_unaligned (v1, d + off + 0x20);
+ u8x32_store_unaligned (v2, d + off + 0x40);
+ u8x32_store_unaligned (v3, d + off + 0x60);
+ v0 = u8x32_load_unaligned (s + off + 0x80);
+ v1 = u8x32_load_unaligned (s + off + 0xa0);
+ v2 = u8x32_load_unaligned (s + off + 0xc0);
+ v3 = u8x32_load_unaligned (s + off + 0xe0);
+ u8x32_store_unaligned (v0, d + off + 0x80);
+ u8x32_store_unaligned (v1, d + off + 0xa0);
+ u8x32_store_unaligned (v2, d + off + 0xc0);
+ u8x32_store_unaligned (v3, d + off + 0xe0);
+ off += 256;
+ if (off != final_off)
+ goto more;
+
+ if ((nr & 0xff) == 0)
+ goto done2;
+
+ last:
+ if (PREDICT_TRUE (nr & 128))
+ {
+ v0 = u8x32_load_unaligned (s + off + 0x00);
+ v1 = u8x32_load_unaligned (s + off + 0x20);
+ v2 = u8x32_load_unaligned (s + off + 0x40);
+ v3 = u8x32_load_unaligned (s + off + 0x60);
+ u8x32_store_unaligned (v0, d + off + 0x00);
+ u8x32_store_unaligned (v1, d + off + 0x20);
+ u8x32_store_unaligned (v2, d + off + 0x40);
+ u8x32_store_unaligned (v3, d + off + 0x60);
+ off += 128;
+ }
+ if (PREDICT_TRUE (nr & 64))
+ {
+ v0 = u8x32_load_unaligned (s + off + 0x00);
+ v1 = u8x32_load_unaligned (s + off + 0x20);
+ u8x32_store_unaligned (v0, d + off + 0x00);
+ u8x32_store_unaligned (v1, d + off + 0x20);
+ off += 64;
+ }
+ if (PREDICT_TRUE (nr & 32))
+ {
+ one:
+ u8x32_store_unaligned (u8x32_load_unaligned (s + off), d + off);
+ }
+ done2:
+ u8x32_store_unaligned (u8x32_load_unaligned (s + n - 32), d + n - 32);
+ }
+ return dst;
+#elif defined(CLIB_HAVE_VEC128)
+ else
+ {
+ u8x16 v0, v1, v2, v3;
+ u64 final_off, nr, off = 32;
+
+ if (0 && n > 389)
+ {
+ __builtin_memcpy (d, s, n);
+ return dst;
+ }
+
+ u8x16_store_unaligned (u8x16_load_unaligned (s), d);
+ u8x16_store_unaligned (u8x16_load_unaligned (s + 16), d + 16);
+
+ if (n <= 48)
+ goto done2;
+
+ if (n <= 64)
+ goto one;
+
+ if (n <= 256 + 32)
+ {
+ nr = round_pow2 (n - 48, 16);
+ goto last;
+ }
+
+ off -= ((u64) d) & 0x0f;
+ nr = round_pow2 (n - off - 16, 16);
+ final_off = (nr & ~(u64) 0xff) + off;
+
+ more:
+ v0 = u8x16_load_unaligned (s + off + 0x00);
+ v1 = u8x16_load_unaligned (s + off + 0x10);
+ v2 = u8x16_load_unaligned (s + off + 0x20);
+ v3 = u8x16_load_unaligned (s + off + 0x30);
+ u8x16_store_unaligned (v0, d + off + 0x00);
+ u8x16_store_unaligned (v1, d + off + 0x10);
+ u8x16_store_unaligned (v2, d + off + 0x20);
+ u8x16_store_unaligned (v3, d + off + 0x30);
+ v0 = u8x16_load_unaligned (s + off + 0x40);
+ v1 = u8x16_load_unaligned (s + off + 0x50);
+ v2 = u8x16_load_unaligned (s + off + 0x60);
+ v3 = u8x16_load_unaligned (s + off + 0x70);
+ u8x16_store_unaligned (v0, d + off + 0x40);
+ u8x16_store_unaligned (v1, d + off + 0x50);
+ u8x16_store_unaligned (v2, d + off + 0x60);
+ u8x16_store_unaligned (v3, d + off + 0x70);
+ v0 = u8x16_load_unaligned (s + off + 0x80);
+ v1 = u8x16_load_unaligned (s + off + 0x90);
+ v2 = u8x16_load_unaligned (s + off + 0xa0);
+ v3 = u8x16_load_unaligned (s + off + 0xb0);
+ u8x16_store_unaligned (v0, d + off + 0x80);
+ u8x16_store_unaligned (v1, d + off + 0x90);
+ u8x16_store_unaligned (v2, d + off + 0xa0);
+ u8x16_store_unaligned (v3, d + off + 0xb0);
+ v0 = u8x16_load_unaligned (s + off + 0xc0);
+ v1 = u8x16_load_unaligned (s + off + 0xd0);
+ v2 = u8x16_load_unaligned (s + off + 0xe0);
+ v3 = u8x16_load_unaligned (s + off + 0xf0);
+ u8x16_store_unaligned (v0, d + off + 0xc0);
+ u8x16_store_unaligned (v1, d + off + 0xd0);
+ u8x16_store_unaligned (v2, d + off + 0xe0);
+ u8x16_store_unaligned (v3, d + off + 0xf0);
+ off += 256;
+ if (off != final_off)
+ goto more;
+
+ if ((nr & 0xff) == 0)
+ goto done2;
+
+ last:
+ if (PREDICT_TRUE (nr & 128))
+ {
+ v0 = u8x16_load_unaligned (s + off + 0x00);
+ v1 = u8x16_load_unaligned (s + off + 0x10);
+ v2 = u8x16_load_unaligned (s + off + 0x20);
+ v3 = u8x16_load_unaligned (s + off + 0x30);
+ u8x16_store_unaligned (v0, d + off + 0x00);
+ u8x16_store_unaligned (v1, d + off + 0x10);
+ u8x16_store_unaligned (v2, d + off + 0x20);
+ u8x16_store_unaligned (v3, d + off + 0x30);
+ v0 = u8x16_load_unaligned (s + off + 0x40);
+ v1 = u8x16_load_unaligned (s + off + 0x50);
+ v2 = u8x16_load_unaligned (s + off + 0x60);
+ v3 = u8x16_load_unaligned (s + off + 0x70);
+ u8x16_store_unaligned (v0, d + off + 0x40);
+ u8x16_store_unaligned (v1, d + off + 0x50);
+ u8x16_store_unaligned (v2, d + off + 0x60);
+ u8x16_store_unaligned (v3, d + off + 0x70);
+ off += 128;
+ }
+ if (PREDICT_TRUE (nr & 64))
+ {
+ v0 = u8x16_load_unaligned (s + off + 0x00);
+ v1 = u8x16_load_unaligned (s + off + 0x10);
+ v2 = u8x16_load_unaligned (s + off + 0x20);
+ v3 = u8x16_load_unaligned (s + off + 0x30);
+ u8x16_store_unaligned (v0, d + off + 0x00);
+ u8x16_store_unaligned (v1, d + off + 0x10);
+ u8x16_store_unaligned (v2, d + off + 0x20);
+ u8x16_store_unaligned (v3, d + off + 0x30);
+ off += 64;
+ }
+ if (PREDICT_TRUE (nr & 32))
+ {
+ v0 = u8x16_load_unaligned (s + off + 0x00);
+ v1 = u8x16_load_unaligned (s + off + 0x10);
+ u8x16_store_unaligned (v0, d + off + 0x00);
+ u8x16_store_unaligned (v1, d + off + 0x10);
+ off += 32;
+ }
+ if (PREDICT_TRUE (nr & 16))
+ {
+ one:
+ u8x16_store_unaligned (u8x16_load_unaligned (s + off), d + off);
+ }
+ done2:
+ u8x16_store_unaligned (u8x16_load_unaligned (s + n - 16), d + n - 16);
+ }
+ return dst;
+#else
+ __builtin_memcpy (dst, src, n);
+ return dst;
+#endif
+}
+
+/* clang-format off */
+WARN_ON (stringop-overflow)
+/* clang-format on */
+
+#endif
+#endif
diff --git a/src/vppinfra/mhash.c b/src/vppinfra/mhash.c
index c556312e64f..babaaeec726 100644
--- a/src/vppinfra/mhash.c
+++ b/src/vppinfra/mhash.c
@@ -164,6 +164,8 @@ mhash_sanitize_hash_user (mhash_t * mh)
h->user = pointer_to_uword (mh);
}
+static u8 *mhash_format_pair_default (u8 *s, va_list *args);
+
__clib_export void
mhash_init (mhash_t * h, uword n_value_bytes, uword n_key_bytes)
{
@@ -208,12 +210,12 @@ mhash_init (mhash_t * h, uword n_value_bytes, uword n_key_bytes)
vec_validate (h->key_tmps, os_get_nthreads () - 1);
ASSERT (n_key_bytes < ARRAY_LEN (t));
- h->hash = hash_create2 ( /* elts */ 0,
+ h->hash = hash_create2 (/* elts */ 0,
/* user */ pointer_to_uword (h),
/* value_bytes */ n_value_bytes,
t[n_key_bytes].key_sum, t[n_key_bytes].key_equal,
/* format pair/arg */
- 0, 0);
+ mhash_format_pair_default, 0);
}
static uword
@@ -295,7 +297,7 @@ mhash_set_mem (mhash_t * h, void *key, uword * new_value, uword * old_value)
{
i = h->key_vector_free_indices[l - 1];
k = vec_elt_at_index (h->key_vector_or_heap, i);
- _vec_len (h->key_vector_free_indices) = l - 1;
+ vec_set_len (h->key_vector_free_indices, l - 1);
}
else
{
@@ -331,11 +333,11 @@ mhash_set_mem (mhash_t * h, void *key, uword * new_value, uword * old_value)
{
if (key_alloc_from_free_list)
{
- h->key_vector_free_indices[l] = i;
- _vec_len (h->key_vector_free_indices) = l + 1;
+ vec_set_len (h->key_vector_free_indices, l);
+ h->key_vector_free_indices[l - 1] = i;
}
else
- _vec_len (h->key_vector_or_heap) -= h->n_key_bytes;
+ vec_dec_len (h->key_vector_or_heap, h->n_key_bytes);
}
}
@@ -371,8 +373,8 @@ mhash_unset (mhash_t * h, void *key, uword * old_value)
return 1;
}
-u8 *
-format_mhash_key (u8 * s, va_list * va)
+__clib_export u8 *
+format_mhash_key (u8 *s, va_list *va)
{
mhash_t *h = va_arg (*va, mhash_t *);
u32 ki = va_arg (*va, u32);
@@ -387,7 +389,43 @@ format_mhash_key (u8 * s, va_list * va)
else if (h->format_key)
s = format (s, "%U", h->format_key, k);
else
- s = format (s, "%U", format_hex_bytes, k, h->n_key_bytes);
+ s = format (s, "0x%U", format_hex_bytes, k, h->n_key_bytes);
+
+ return s;
+}
+
+static u8 *
+mhash_format_pair_default (u8 *s, va_list *args)
+{
+ void *CLIB_UNUSED (user_arg) = va_arg (*args, void *);
+ void *v = va_arg (*args, void *);
+ hash_pair_t *p = va_arg (*args, hash_pair_t *);
+ hash_t *h = hash_header (v);
+ mhash_t *mh = uword_to_pointer (h->user, mhash_t *);
+
+ s = format (s, "%U", format_mhash_key, mh, (u32) p->key);
+ if (hash_value_bytes (h) > 0)
+ s = format (s, " -> 0x%8U", format_hex_bytes, &p->value[0],
+ hash_value_bytes (h));
+ return s;
+}
+
+__clib_export u8 *
+format_mhash (u8 *s, va_list *va)
+{
+ mhash_t *h = va_arg (*va, mhash_t *);
+ int verbose = va_arg (*va, int);
+
+ s = format (s, "mhash %p, %wd elts, \n", h, mhash_elts (h));
+ if (mhash_key_vector_is_heap (h))
+ s = format (s, " %U", format_heap, h->key_vector_or_heap, verbose);
+ else
+ s = format (s, " keys %wd elts, %wd size, %wd free, %wd bytes used\n",
+ vec_len (h->key_vector_or_heap) / h->n_key_bytes,
+ h->n_key_bytes, vec_len (h->key_vector_free_indices),
+ vec_bytes (h->key_vector_or_heap) +
+ vec_bytes (h->key_vector_free_indices));
+ s = format (s, " %U", format_hash, h->hash, verbose);
return s;
}
diff --git a/src/vppinfra/mhash.h b/src/vppinfra/mhash.h
index 7eb1918384e..62aee365fa3 100644
--- a/src/vppinfra/mhash.h
+++ b/src/vppinfra/mhash.h
@@ -166,8 +166,13 @@ do { \
})); \
} while (0)
+u8 *format_mhash (u8 *s, va_list *va);
+
format_function_t format_mhash_key;
+/* Main test routine. */
+int test_mhash_main (unformat_input_t *input);
+
#endif /* included_clib_mhash_h */
/*
diff --git a/src/vppinfra/mpcap.c b/src/vppinfra/mpcap.c
index 8389a7fd2d7..d8e36c29fbd 100644
--- a/src/vppinfra/mpcap.c
+++ b/src/vppinfra/mpcap.c
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-#include <sys/fcntl.h>
+#include <fcntl.h>
#include <vppinfra/mpcap.h>
/*
diff --git a/src/vppinfra/pcap.c b/src/vppinfra/pcap.c
index 4f8b6bb429c..bdaa861db3f 100644
--- a/src/vppinfra/pcap.c
+++ b/src/vppinfra/pcap.c
@@ -37,7 +37,7 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/fcntl.h>
+#include <fcntl.h>
#include <vppinfra/pcap.h>
/**
@@ -137,7 +137,7 @@ pcap_write (pcap_main_t * pm)
while (vec_len (pm->pcap_data) > pm->n_pcap_data_written)
{
- int n = vec_len (pm->pcap_data) - pm->n_pcap_data_written;
+ i64 n = vec_len (pm->pcap_data) - pm->n_pcap_data_written;
n = write (pm->file_descriptor,
vec_elt_at_index (pm->pcap_data, pm->n_pcap_data_written),
diff --git a/src/vppinfra/pcg.h b/src/vppinfra/pcg.h
new file mode 100644
index 00000000000..a7cc9201b8f
--- /dev/null
+++ b/src/vppinfra/pcg.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * PCG Random Number Generation for C.
+ *
+ * Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+ * and the PCG Project contributors.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ * Licensed under the Apache License, Version 2.0 (provided in
+ * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+ * or under the MIT license (provided in LICENSE-MIT.txt and at
+ * http://opensource.org/licenses/MIT), at your option. This file may not
+ * be copied, modified, or distributed except according to those terms.
+ *
+ * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied. See your chosen license for details.
+ *
+ * For additional information about the PCG random number generation scheme,
+ * visit http://www.pcg-random.org/.
+ */
+
+/* This implements the pcg64i_random_t PCG specialized generator:
+ * https://www.pcg-random.org/using-pcg-c.html#specialized-generators
+ * This generator produces each 64-bits output exactly once, which is
+ * perfectly suited to generated non-repeating IVs. However, because of this
+ * property the entire internal state is revealed with each output.
+ * It has a 2^64 period and supports 2^63 non-overlaping streams */
+
+#define clib_pcg64i_random_r clib_pcg_setseq_64_rxs_m_xs_64_random_r
+#define clib_pcg64i_srandom_r clib_pcg_setseq_64_srandom_r
+
+typedef struct
+{
+ u64 state;
+ u64 inc;
+} clib_pcg_state_setseq_64_t;
+
+typedef clib_pcg_state_setseq_64_t clib_pcg64i_random_t;
+
+static_always_inline void
+clib_pcg_setseq_64_step_r (clib_pcg_state_setseq_64_t *rng)
+{
+ rng->state = rng->state * 6364136223846793005ULL + rng->inc;
+}
+
+static_always_inline u64
+clib_pcg_output_rxs_m_xs_64_64 (u64 state)
+{
+ u64 word =
+ ((state >> ((state >> 59u) + 5u)) ^ state) * 12605985483714917081ull;
+ return (word >> 43u) ^ word;
+}
+
+static_always_inline u64
+clib_pcg_setseq_64_rxs_m_xs_64_random_r (clib_pcg_state_setseq_64_t *rng)
+{
+ u64 oldstate = rng->state;
+ clib_pcg_setseq_64_step_r (rng);
+ return clib_pcg_output_rxs_m_xs_64_64 (oldstate);
+}
+
+static_always_inline void
+clib_pcg_setseq_64_srandom_r (clib_pcg_state_setseq_64_t *rng, u64 initstate,
+ u64 initseq)
+{
+ rng->state = 0U;
+ rng->inc = (initseq << 1u) | 1u;
+ clib_pcg_setseq_64_step_r (rng);
+ rng->state += initstate;
+ clib_pcg_setseq_64_step_r (rng);
+}
diff --git a/src/vppinfra/perfmon/bundle_core_power.c b/src/vppinfra/perfmon/bundle_core_power.c
new file mode 100644
index 00000000000..6a30cdfdde4
--- /dev/null
+++ b/src/vppinfra/perfmon/bundle_core_power.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#ifdef __x86_64__
+
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/perfmon/perfmon.h>
+
+static u8 *
+format_perfmon_bundle_core_power (u8 *s, va_list *args)
+{
+ clib_perfmon_ctx_t __clib_unused *ctx = va_arg (*args, clib_perfmon_ctx_t *);
+ clib_perfmon_capture_t *c = va_arg (*args, clib_perfmon_capture_t *);
+ u32 col = va_arg (*args, int);
+ u64 *d = c->data;
+
+ switch (col)
+ {
+ case 0:
+ return format (s, "%7.1f %%", (f64) 100 * d[1] / d[0]);
+ case 1:
+ return format (s, "%7.1f %%", (f64) 100 * d[2] / d[0]);
+ case 2:
+ return format (s, "%7.1f %%", (f64) 100 * d[3] / d[0]);
+ default:
+ return s;
+ }
+}
+
+#define PERF_INTEL_CODE(event, umask) ((event) | (umask) << 8)
+
+CLIB_PERFMON_BUNDLE (core_power) = {
+ .name = "core-power",
+ .desc =
+ "Core cycles where the core was running under specific turbo schedule.",
+ .type = PERF_TYPE_RAW,
+ .config[0] = PERF_INTEL_CODE (0x3c, 0x00),
+ .config[1] = PERF_INTEL_CODE (0x28, 0x07),
+ .config[2] = PERF_INTEL_CODE (0x28, 0x18),
+ .config[3] = PERF_INTEL_CODE (0x28, 0x20),
+ .n_events = 4,
+ .format_fn = format_perfmon_bundle_core_power,
+ .column_headers = CLIB_STRING_ARRAY ("Level 0", "Level 1", "Level 2"),
+};
+
+#endif
diff --git a/src/vppinfra/perfmon/bundle_default.c b/src/vppinfra/perfmon/bundle_default.c
new file mode 100644
index 00000000000..c2118aed974
--- /dev/null
+++ b/src/vppinfra/perfmon/bundle_default.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/perfmon/perfmon.h>
+
+static u8 *
+format_perfmon_bundle_default (u8 *s, va_list *args)
+{
+ clib_perfmon_ctx_t *ctx = va_arg (*args, clib_perfmon_ctx_t *);
+ clib_perfmon_capture_t *c = va_arg (*args, clib_perfmon_capture_t *);
+ u32 col = va_arg (*args, int);
+ u64 *d = c->data;
+
+ switch (col)
+ {
+ case 0:
+ if (ctx->ref_clock > 0)
+ return format (s, "%8.1f", (f64) d[0] / d[1] * (ctx->ref_clock / 1e9));
+ else
+ return s;
+ case 1:
+ return format (s, "%5.2f", (f64) d[2] / d[0]);
+ case 2:
+ return format (s, "%8u", d[0]);
+ case 3:
+ return format (s, "%8.2f", (f64) d[0] / c->n_ops);
+ case 4:
+ return format (s, "%8u", d[2]);
+ case 5:
+ return format (s, "%8.2f", (f64) d[2] / c->n_ops);
+ case 6:
+ return format (s, "%9u", d[3]);
+ case 7:
+ return format (s, "%9.2f", (f64) d[3] / c->n_ops);
+ case 8:
+ return format (s, "%10u", d[4]);
+ case 9:
+ return format (s, "%10.2f", (f64) d[4] / c->n_ops);
+ default:
+ return s;
+ }
+}
+
+CLIB_PERFMON_BUNDLE (default) = {
+ .name = "default",
+ .desc = "IPC, Clocks/Operatiom, Instr/Operation, Branch Total & Miss",
+ .type = PERF_TYPE_HARDWARE,
+ .config[0] = PERF_COUNT_HW_CPU_CYCLES,
+ .config[1] = PERF_COUNT_HW_REF_CPU_CYCLES,
+ .config[2] = PERF_COUNT_HW_INSTRUCTIONS,
+ .config[3] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+ .config[4] = PERF_COUNT_HW_BRANCH_MISSES,
+ .n_events = 5,
+ .format_fn = format_perfmon_bundle_default,
+ .column_headers = CLIB_STRING_ARRAY ("Freq", "IPC", "Clks", "Clks/Op",
+ "Inst", "Inst/Op", "Brnch", "Brnch/Op",
+ "BrMiss", "BrMiss/Op"),
+};
diff --git a/src/vppinfra/perfmon/perfmon.c b/src/vppinfra/perfmon/perfmon.c
new file mode 100644
index 00000000000..f44f225a86c
--- /dev/null
+++ b/src/vppinfra/perfmon/perfmon.c
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/perfmon/perfmon.h>
+#include <vppinfra/format_table.h>
+
+clib_perfmon_main_t clib_perfmon_main;
+
+__clib_export clib_error_t *
+clib_perfmon_init_by_bundle_name (clib_perfmon_ctx_t *ctx, char *fmt, ...)
+{
+ clib_perfmon_main_t *pm = &clib_perfmon_main;
+ clib_perfmon_bundle_t *b = 0;
+ int group_fd = -1;
+ clib_error_t *err = 0;
+ va_list va;
+ char *bundle_name;
+
+ struct perf_event_attr pe = {
+ .size = sizeof (struct perf_event_attr),
+ .disabled = 1,
+ .exclude_kernel = 1,
+ .exclude_hv = 1,
+ .pinned = 1,
+ .exclusive = 1,
+ .read_format = (PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING),
+ };
+
+ va_start (va, fmt);
+ bundle_name = (char *) va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (bundle_name, 0);
+
+ for (clib_perfmon_bundle_reg_t *r = pm->bundle_regs; r; r = r->next)
+ {
+ if (strncmp (r->bundle->name, bundle_name, vec_len (bundle_name) - 1))
+ continue;
+ b = r->bundle;
+ break;
+ }
+
+ if (b == 0)
+ {
+ err = clib_error_return (0, "Unknown bundle '%s'", bundle_name);
+ goto done;
+ }
+
+ clib_memset_u8 (ctx, 0, sizeof (clib_perfmon_ctx_t));
+ vec_validate_init_empty (ctx->fds, b->n_events - 1, -1);
+ ctx->bundle = b;
+
+ for (int i = 0; i < b->n_events; i++)
+ {
+ pe.config = b->config[i];
+ pe.type = b->type;
+ int fd = syscall (__NR_perf_event_open, &pe, /* pid */ 0, /* cpu */ -1,
+ /* group_fd */ group_fd, /* flags */ 0);
+ if (fd < 0)
+ {
+ err = clib_error_return_unix (0, "perf_event_open[%u]", i);
+ goto done;
+ }
+
+ if (ctx->debug)
+ fformat (stderr, "perf event %u open, fd %d\n", i, fd);
+
+ if (group_fd == -1)
+ {
+ group_fd = fd;
+ pe.pinned = 0;
+ pe.exclusive = 0;
+ }
+
+ ctx->fds[i] = fd;
+ }
+
+ ctx->group_fd = group_fd;
+ ctx->data = vec_new (u64, 3 + b->n_events);
+ ctx->ref_clock = os_cpu_clock_frequency ();
+ vec_validate (ctx->capture_groups, 0);
+
+done:
+ if (err)
+ clib_perfmon_free (ctx);
+
+ vec_free (bundle_name);
+ return err;
+}
+
+__clib_export void
+clib_perfmon_free (clib_perfmon_ctx_t *ctx)
+{
+ clib_perfmon_clear (ctx);
+ vec_free (ctx->captures);
+ vec_free (ctx->capture_groups);
+
+ for (int i = 0; i < vec_len (ctx->fds); i++)
+ if (ctx->fds[i] > -1)
+ close (ctx->fds[i]);
+ vec_free (ctx->fds);
+ vec_free (ctx->data);
+}
+
+__clib_export void
+clib_perfmon_clear (clib_perfmon_ctx_t *ctx)
+{
+ for (int i = 0; i < vec_len (ctx->captures); i++)
+ vec_free (ctx->captures[i].desc);
+ vec_reset_length (ctx->captures);
+ for (int i = 0; i < vec_len (ctx->capture_groups); i++)
+ vec_free (ctx->capture_groups[i].name);
+ vec_reset_length (ctx->capture_groups);
+}
+
+__clib_export u64 *
+clib_perfmon_capture (clib_perfmon_ctx_t *ctx, u32 n_ops, char *fmt, ...)
+{
+ u32 read_size = (ctx->bundle->n_events + 3) * sizeof (u64);
+ clib_perfmon_capture_t *c;
+ u64 d[CLIB_PERFMON_MAX_EVENTS + 3];
+ va_list va;
+
+ if ((read (ctx->group_fd, d, read_size) != read_size))
+ {
+ if (ctx->debug)
+ fformat (stderr, "reading of %u bytes failed, %s (%d)\n", read_size,
+ strerror (errno), errno);
+ return 0;
+ }
+
+ if (ctx->debug)
+ {
+ fformat (stderr, "read events: %lu enabled: %lu running: %lu ", d[0],
+ d[1], d[2]);
+ fformat (stderr, "data: [%lu", d[3]);
+ for (int i = 1; i < ctx->bundle->n_events; i++)
+ fformat (stderr, ", %lu", d[i + 3]);
+ fformat (stderr, "]\n");
+ }
+
+ vec_add2 (ctx->captures, c, 1);
+
+ va_start (va, fmt);
+ c->desc = va_format (0, fmt, &va);
+ va_end (va);
+
+ c->n_ops = n_ops;
+ c->group = vec_len (ctx->capture_groups) - 1;
+ c->time_enabled = d[1];
+ c->time_running = d[2];
+ for (int i = 0; i < CLIB_PERFMON_MAX_EVENTS; i++)
+ c->data[i] = d[i + 3];
+
+ return ctx->data + vec_len (ctx->data) - ctx->bundle->n_events;
+}
+
+__clib_export void
+clib_perfmon_capture_group (clib_perfmon_ctx_t *ctx, char *fmt, ...)
+{
+ clib_perfmon_capture_group_t *cg;
+ va_list va;
+
+ cg = vec_end (ctx->capture_groups) - 1;
+
+ if (cg->name != 0)
+ vec_add2 (ctx->capture_groups, cg, 1);
+
+ va_start (va, fmt);
+ cg->name = va_format (0, fmt, &va);
+ va_end (va);
+ ASSERT (cg->name);
+}
+
+__clib_export void
+clib_perfmon_warmup (clib_perfmon_ctx_t *ctx)
+{
+ for (u64 i = 0; i < (u64) ctx->ref_clock; i++)
+ asm volatile("" : : "r"(i * i) : "memory");
+}
+
+__clib_export u8 *
+format_perfmon_bundle (u8 *s, va_list *args)
+{
+ clib_perfmon_ctx_t *ctx = va_arg (*args, clib_perfmon_ctx_t *);
+ clib_perfmon_capture_t *c;
+ clib_perfmon_capture_group_t *cg = 0;
+ char **hdr = ctx->bundle->column_headers;
+ table_t _t = {}, *t = &_t;
+ u32 n_row = 0, col = 0;
+
+ table_add_header_row (t, 0);
+
+ for (char **h = ctx->bundle->column_headers; h[0]; h++)
+ n_row++;
+
+ vec_foreach (c, ctx->captures)
+ {
+ if (cg != ctx->capture_groups + c->group)
+ {
+ cg = ctx->capture_groups + c->group;
+ table_format_cell (t, col, -1, "%v", cg->name);
+ table_set_cell_align (t, col, -1, TTAA_LEFT);
+ table_set_cell_fg_color (t, col, -1, TTAC_BRIGHT_RED);
+
+ table_format_cell (t, col, 0, "Ops");
+ table_set_cell_fg_color (t, col, 0, TTAC_BRIGHT_YELLOW);
+
+ for (int i = 0; i < n_row; i++)
+ {
+ table_format_cell (t, col, i + 1, "%s", hdr[i]);
+ table_set_cell_fg_color (t, col, i + 1, TTAC_BRIGHT_YELLOW);
+ }
+ col++;
+ }
+ table_format_cell (t, col, -1, "%v", c->desc);
+ table_format_cell (t, col, 0, "%7u", c->n_ops);
+ for (int i = 0; i < n_row; i++)
+ table_format_cell (t, col, i + 1, "%U", ctx->bundle->format_fn, ctx, c,
+ i);
+ col++;
+ }
+
+ s = format (s, "%U", format_table, t);
+ table_free (t);
+ return s;
+}
diff --git a/src/vppinfra/perfmon/perfmon.h b/src/vppinfra/perfmon/perfmon.h
new file mode 100644
index 00000000000..5b904a632e3
--- /dev/null
+++ b/src/vppinfra/perfmon/perfmon.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#ifndef included_perfmon_perfmon_h
+#define included_perfmon_perfmon_h
+
+#include <vppinfra/cpu.h>
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#endif
+
+#define CLIB_PERFMON_MAX_EVENTS 7
+typedef struct
+{
+ char *name;
+ char *desc;
+ u64 config[CLIB_PERFMON_MAX_EVENTS];
+ u32 type;
+ u8 n_events;
+ format_function_t *format_fn;
+ char **column_headers;
+} clib_perfmon_bundle_t;
+
+typedef struct
+{
+ u64 time_enabled;
+ u64 time_running;
+ u64 data[CLIB_PERFMON_MAX_EVENTS];
+ u8 *desc;
+ u32 n_ops;
+ u32 group;
+} clib_perfmon_capture_t;
+
+typedef struct
+{
+ u8 *name;
+ u32 start;
+} clib_perfmon_capture_group_t;
+
+typedef struct
+{
+ int group_fd;
+ int *fds;
+ clib_perfmon_bundle_t *bundle;
+ u64 *data;
+ u8 debug : 1;
+ u32 n_captures;
+ clib_perfmon_capture_t *captures;
+ clib_perfmon_capture_group_t *capture_groups;
+ f64 ref_clock;
+} clib_perfmon_ctx_t;
+
+typedef struct clib_perfmon_bundle_reg
+{
+ clib_perfmon_bundle_t *bundle;
+ struct clib_perfmon_bundle_reg *next;
+} clib_perfmon_bundle_reg_t;
+
+typedef struct
+{
+ clib_perfmon_bundle_reg_t *bundle_regs;
+} clib_perfmon_main_t;
+
+extern clib_perfmon_main_t clib_perfmon_main;
+
+static_always_inline void
+clib_perfmon_ioctl (int fd, u32 req)
+{
+#ifdef __linux__
+#ifdef __x86_64__
+ asm volatile("syscall"
+ :
+ : "D"(fd), "S"(req), "a"(__NR_ioctl), "d"(PERF_IOC_FLAG_GROUP)
+ : "rcx", "r11" /* registers modified by kernel */);
+#else
+ ioctl (fd, req, PERF_IOC_FLAG_GROUP);
+#endif
+#endif /* linux */
+}
+
+clib_error_t *clib_perfmon_init_by_bundle_name (clib_perfmon_ctx_t *ctx,
+ char *fmt, ...);
+void clib_perfmon_free (clib_perfmon_ctx_t *ctx);
+void clib_perfmon_warmup (clib_perfmon_ctx_t *ctx);
+void clib_perfmon_clear (clib_perfmon_ctx_t *ctx);
+u64 *clib_perfmon_capture (clib_perfmon_ctx_t *ctx, u32 n_ops, char *fmt, ...);
+void clib_perfmon_capture_group (clib_perfmon_ctx_t *ctx, char *fmt, ...);
+format_function_t format_perfmon_bundle;
+
+#ifdef __linux__
+static_always_inline void
+clib_perfmon_reset (clib_perfmon_ctx_t *ctx)
+{
+ clib_perfmon_ioctl (ctx->group_fd, PERF_EVENT_IOC_RESET);
+}
+static_always_inline void
+clib_perfmon_enable (clib_perfmon_ctx_t *ctx)
+{
+ clib_perfmon_ioctl (ctx->group_fd, PERF_EVENT_IOC_ENABLE);
+}
+static_always_inline void
+clib_perfmon_disable (clib_perfmon_ctx_t *ctx)
+{
+ clib_perfmon_ioctl (ctx->group_fd, PERF_EVENT_IOC_DISABLE);
+}
+#elif __FreeBSD__
+static_always_inline void
+clib_perfmon_reset (clib_perfmon_ctx_t *ctx)
+{
+ /* TODO: Implement for FreeBSD */
+}
+static_always_inline void
+clib_perfmon_enable (clib_perfmon_ctx_t *ctx)
+{
+ /* TODO: Implement for FreeBSD */
+}
+static_always_inline void
+clib_perfmon_disable (clib_perfmon_ctx_t *ctx)
+{
+ /* TODO: Implement for FreeBSD */
+}
+#endif /* linux */
+
+#define CLIB_PERFMON_BUNDLE(x) \
+ static clib_perfmon_bundle_reg_t clib_perfmon_bundle_reg_##x; \
+ static clib_perfmon_bundle_t clib_perfmon_bundle_##x; \
+ static void __clib_constructor clib_perfmon_bundle_reg_fn_##x (void) \
+ { \
+ clib_perfmon_bundle_reg_##x.bundle = &clib_perfmon_bundle_##x; \
+ clib_perfmon_bundle_reg_##x.next = clib_perfmon_main.bundle_regs; \
+ clib_perfmon_main.bundle_regs = &clib_perfmon_bundle_reg_##x; \
+ } \
+ static clib_perfmon_bundle_t clib_perfmon_bundle_##x
+
+#endif
diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c
index a0b1d1f1104..85b9db9d56c 100644
--- a/src/vppinfra/pmalloc.c
+++ b/src/vppinfra/pmalloc.c
@@ -17,12 +17,17 @@
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
+#ifdef __FreeBSD__
+#include <sys/memrange.h>
+#endif /* __FreeBSD__ */
#include <fcntl.h>
#include <unistd.h>
#include <sched.h>
#include <vppinfra/format.h>
+#ifdef __linux__
#include <vppinfra/linux/sysfs.h>
+#endif
#include <vppinfra/mem.h>
#include <vppinfra/hash.h>
#include <vppinfra/pmalloc.h>
@@ -182,8 +187,9 @@ next_chunk:
}
static void
-pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count)
+pmalloc_update_lookup_table (clib_pmalloc_main_t *pm, u32 first, u32 count)
{
+#ifdef __linux
uword seek, va, pa, p;
int fd;
u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz);
@@ -221,6 +227,45 @@ pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count)
if (fd != -1)
close (fd);
+#elif defined(__FreeBSD__)
+ struct mem_extract meme;
+ uword p;
+ int fd;
+ u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz);
+
+ vec_validate_aligned (pm->lookup_table,
+ vec_len (pm->pages) * elts_per_page - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ p = (uword) first * elts_per_page;
+ if (pm->flags & CLIB_PMALLOC_F_NO_PAGEMAP)
+ {
+ while (p < (uword) elts_per_page * count)
+ {
+ pm->lookup_table[p] =
+ pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
+ p++;
+ }
+ return;
+ }
+
+ fd = open ((char *) "/dev/mem", O_RDONLY);
+ if (fd == -1)
+ return;
+
+ while (p < (uword) elts_per_page * count)
+ {
+ meme.me_vaddr =
+ pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
+ if (ioctl (fd, MEM_EXTRACT_PADDR, &meme) == -1)
+ continue;
+ pm->lookup_table[p] = meme.me_vaddr - meme.me_paddr;
+ p++;
+ }
+ return;
+#else
+#error "Unsupported OS"
+#endif
}
static inline clib_pmalloc_page_t *
@@ -241,6 +286,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
return 0;
}
+#ifdef __linux__
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_sysfs_prealloc_hugepages (numa_node,
@@ -249,6 +295,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
if (pm->error)
return 0;
}
+#endif /* __linux__ */
rv = clib_mem_set_numa_affinity (numa_node, /* force */ 1);
if (rv == CLIB_MEM_ERROR && numa_node != 0)
@@ -271,8 +318,10 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
}
else
{
+#ifdef __linux__
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
mmap_flags |= MAP_HUGETLB;
+#endif /* __linux__ */
mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
a->fd = -1;
@@ -476,8 +525,8 @@ clib_pmalloc_alloc_aligned_on_numa (clib_pmalloc_main_t * pm, uword size,
return clib_pmalloc_alloc_inline (pm, 0, size, align, numa_node);
}
-void *
-clib_pmalloc_alloc_aligned (clib_pmalloc_main_t * pm, uword size, uword align)
+__clib_export void *
+clib_pmalloc_alloc_aligned (clib_pmalloc_main_t *pm, uword size, uword align)
{
return clib_pmalloc_alloc_inline (pm, 0, size, align,
CLIB_PMALLOC_NUMA_LOCAL);
@@ -627,7 +676,6 @@ format_pmalloc (u8 * s, va_list * va)
format_clib_error, pm->error);
- /* *INDENT-OFF* */
pool_foreach (a, pm->arenas)
{
u32 *page_index;
@@ -645,7 +693,6 @@ format_pmalloc (u8 * s, va_list * va)
format_pmalloc_page, pp, verbose);
}
}
- /* *INDENT-ON* */
return s;
}
diff --git a/src/vppinfra/pool.c b/src/vppinfra/pool.c
index 78361b5457e..1f3b96f0f0a 100644
--- a/src/vppinfra/pool.c
+++ b/src/vppinfra/pool.c
@@ -38,93 +38,39 @@
#include <vppinfra/pool.h>
__clib_export void
-_pool_init_fixed (void **pool_ptr, u32 elt_size, u32 max_elts)
+_pool_init_fixed (void **pool_ptr, uword elt_size, uword max_elts, uword align)
{
- u8 *mmap_base;
- u64 vector_size;
- u64 free_index_size;
- u64 total_size;
- u64 page_size;
- pool_header_t *fh;
- vec_header_t *vh;
+ uword *b;
+ pool_header_t *ph;
u8 *v;
- u32 *fi;
u32 i;
- u32 set_bits;
+ vec_attr_t va = { .elt_sz = elt_size,
+ .align = align,
+ .hdr_sz = sizeof (pool_header_t) };
ASSERT (elt_size);
ASSERT (max_elts);
- vector_size = pool_aligned_header_bytes + (u64) elt_size *max_elts;
- free_index_size = vec_header_bytes (0) + sizeof (u32) * max_elts;
+ v = _vec_alloc_internal (max_elts, &va);
- /* Round up to a cache line boundary */
- vector_size = (vector_size + CLIB_CACHE_LINE_BYTES - 1)
- & ~(CLIB_CACHE_LINE_BYTES - 1);
-
- free_index_size = (free_index_size + CLIB_CACHE_LINE_BYTES - 1)
- & ~(CLIB_CACHE_LINE_BYTES - 1);
-
- total_size = vector_size + free_index_size;
-
- /* Round up to an even number of pages */
- page_size = clib_mem_get_page_size ();
- total_size = (total_size + page_size - 1) & ~(page_size - 1);
-
- /* mmap demand zero memory */
-
- mmap_base = mmap (0, total_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-
- if (mmap_base == MAP_FAILED)
- {
- clib_unix_warning ("mmap");
- *pool_ptr = 0;
- }
-
- /* First comes the pool header */
- fh = (pool_header_t *) mmap_base;
- /* Find the user vector pointer */
- v = (u8 *) (mmap_base + pool_aligned_header_bytes);
- /* Finally, the vector header */
- vh = _vec_find (v);
-
- fh->free_bitmap = 0; /* No free elts (yet) */
- fh->max_elts = max_elts;
- fh->mmap_base = mmap_base;
- fh->mmap_size = total_size;
-
- vh->len = max_elts;
+ ph = pool_header (v);
+ ph->max_elts = max_elts;
/* Build the free-index vector */
- vh = (vec_header_t *) (v + vector_size);
- vh->len = max_elts;
- fi = (u32 *) (vh + 1);
-
- fh->free_indices = fi;
+ vec_validate_aligned (ph->free_indices, max_elts - 1, CLIB_CACHE_LINE_BYTES);
+ for (i = 0; i < max_elts; i++)
+ ph->free_indices[i] = (max_elts - 1) - i;
/* Set the entire free bitmap */
- clib_bitmap_alloc (fh->free_bitmap, max_elts);
- clib_memset (fh->free_bitmap, 0xff,
- vec_len (fh->free_bitmap) * sizeof (uword));
-
- /* Clear any extraneous set bits */
- set_bits = vec_len (fh->free_bitmap) * BITS (uword);
+ clib_bitmap_alloc (ph->free_bitmap, max_elts);
- for (i = max_elts; i < set_bits; i++)
- fh->free_bitmap = clib_bitmap_set (fh->free_bitmap, i, 0);
+ for (b = ph->free_bitmap, i = max_elts; i >= uword_bits;
+ i -= uword_bits, b++)
+ b[0] = ~0ULL;
- /* Create the initial free vector */
- for (i = 0; i < max_elts; i++)
- fi[i] = (max_elts - 1) - i;
+ if (i)
+ b[0] = pow2_mask (i);
*pool_ptr = v;
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h
index 6f16e617cd2..07c9269c6d8 100644
--- a/src/vppinfra/pool.h
+++ b/src/vppinfra/pool.h
@@ -61,31 +61,21 @@ typedef struct
/** Maximum size of the pool, in elements */
u32 max_elts;
- /** mmap segment info: base + length */
- u8 *mmap_base;
- u64 mmap_size;
-
} pool_header_t;
-/** Align pool header so that pointers are naturally aligned. */
-#define pool_aligned_header_bytes \
- vec_aligned_header_bytes (sizeof (pool_header_t), sizeof (void *))
-
/** Get pool header from user pool pointer */
always_inline pool_header_t *
pool_header (void *v)
{
- return vec_aligned_header (v, sizeof (pool_header_t), sizeof (void *));
+ return vec_header (v);
}
-extern void _pool_init_fixed (void **, u32, u32);
-extern void fpool_free (void *);
+void _pool_init_fixed (void **pool_ptr, uword elt_sz, uword max_elts,
+ uword align);
/** initialize a fixed-size, preallocated pool */
-#define pool_init_fixed(pool,max_elts) \
-{ \
- _pool_init_fixed((void **)&(pool),sizeof(pool[0]),max_elts); \
-}
+#define pool_init_fixed(P, E) \
+ _pool_init_fixed ((void **) &(P), _vec_elt_sz (P), E, _vec_align (P, 0));
/** Validate a pool */
always_inline void
@@ -103,23 +93,6 @@ pool_validate (void *v)
ASSERT (clib_bitmap_get (p->free_bitmap, p->free_indices[i]) == 1);
}
-always_inline void
-pool_header_validate_index (void *v, uword index)
-{
- pool_header_t *p = pool_header (v);
-
- if (v)
- vec_validate (p->free_bitmap, index / BITS (uword));
-}
-
-#define pool_validate_index(v,i) \
-do { \
- uword __pool_validate_index = (i); \
- vec_validate_ha ((v), __pool_validate_index, \
- pool_aligned_header_bytes, /* align */ 0); \
- pool_header_validate_index ((v), __pool_validate_index); \
-} while (0)
-
/** Number of active elements in a pool.
* @return Number of active elements in a pool
*/
@@ -162,88 +135,87 @@ pool_header_bytes (void *v)
/** Local variable naming macro. */
#define _pool_var(v) _pool_##v
-/** Queries whether pool has at least N_FREE free elements. */
-always_inline uword
-pool_free_elts (void *v)
+/** Number of elements that can fit into pool with current allocation */
+#define pool_max_len(P) vec_max_len (P)
+
+/** Number of free elements in pool */
+static_always_inline uword
+_pool_free_elts (void *p, uword elt_sz)
{
- pool_header_t *p = pool_header (v);
- uword n_free = 0;
+ pool_header_t *ph;
+ uword n_free;
- if (v)
- {
- n_free += vec_len (p->free_indices);
-
- /*
- * Space left at end of vector?
- * Fixed-size pools have max_elts set non-zero,
- */
- if (p->max_elts == 0)
- n_free += vec_capacity (v, sizeof (p[0])) - vec_len (v);
- }
+ if (p == 0)
+ return 0;
+
+ ph = pool_header (p);
+
+ n_free = vec_len (ph->free_indices);
+
+ /* Fixed-size pools have max_elts set non-zero */
+ if (ph->max_elts == 0)
+ n_free += _vec_max_len (p, elt_sz) - vec_len (p);
return n_free;
}
+#define pool_free_elts(P) _pool_free_elts ((void *) (P), _vec_elt_sz (P))
+
/** Allocate an object E from a pool P (general version).
First search free list. If nothing is free extend vector of objects.
*/
-#define _pool_get_aligned_internal_numa(P,E,A,Z,N) \
-do { \
- pool_header_t * _pool_var (p) = pool_header (P); \
- uword _pool_var (l); \
- \
- STATIC_ASSERT(A==0 || ((A % sizeof(P[0]))==0) \
- || ((sizeof(P[0]) % A) == 0), \
- "Pool aligned alloc of incorrectly sized object"); \
- _pool_var (l) = 0; \
- if (P) \
- _pool_var (l) = vec_len (_pool_var (p)->free_indices); \
- \
- if (_pool_var (l) > 0) \
- { \
- /* Return free element from free list. */ \
- uword _pool_var (i) = \
- _pool_var (p)->free_indices[_pool_var (l) - 1]; \
- (E) = (P) + _pool_var (i); \
- _pool_var (p)->free_bitmap = \
- clib_bitmap_andnoti_notrim (_pool_var (p)->free_bitmap, \
- _pool_var (i)); \
- _vec_len (_pool_var (p)->free_indices) = _pool_var (l) - 1; \
- CLIB_MEM_UNPOISON((E), sizeof((E)[0])); \
- } \
- else \
- { \
- /* fixed-size, preallocated pools cannot expand */ \
- if ((P) && _pool_var(p)->max_elts) \
- { \
- clib_warning ("can't expand fixed-size pool"); \
- os_out_of_memory(); \
- } \
- /* Nothing on free list, make a new element and return it. */ \
- P = _vec_resize_numa (P, \
- /* length_increment */ 1, \
- /* new size */ (vec_len (P) + 1) * sizeof (P[0]), \
- pool_aligned_header_bytes, \
- /* align */ (A), \
- /* numa */ (N)); \
- E = vec_end (P) - 1; \
- } \
- if (Z) \
- memset(E, 0, sizeof(*E)); \
-} while (0)
-#define pool_get_aligned_zero_numa(P,E,A,Z,S) \
- _pool_get_aligned_internal_numa(P,E,A,Z,S)
+static_always_inline void
+_pool_get (void **pp, void **ep, uword align, int zero, uword elt_sz)
+{
+ uword len = 0;
+ void *p = pp[0];
+ void *e;
+ vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+ .elt_sz = elt_sz,
+ .align = align };
+
+ if (p)
+ {
+ pool_header_t *ph = pool_header (p);
+ uword n_free = vec_len (ph->free_indices);
+
+ if (n_free)
+ {
+ uword index = ph->free_indices[n_free - 1];
+ e = p + index * elt_sz;
+ ph->free_bitmap =
+ clib_bitmap_andnoti_notrim (ph->free_bitmap, index);
+ vec_set_len (ph->free_indices, n_free - 1);
+ clib_mem_unpoison (e, elt_sz);
+ goto done;
+ }
+
+ if (ph->max_elts)
+ {
+ clib_warning ("can't expand fixed-size pool");
+ os_out_of_memory ();
+ }
+ }
-#define pool_get_aligned_numa(P,E,A,S) \
- _pool_get_aligned_internal_numa(P,E,A,0/*zero*/,S)
+ len = vec_len (p);
-#define pool_get_numa(P,E,S) \
- _pool_get_aligned_internal_numa(P,E,0/*align*/,0/*zero*/,S)
+ /* Nothing on free list, make a new element and return it. */
+ p = _vec_realloc_internal (p, len + 1, &va);
+ e = p + len * elt_sz;
-#define _pool_get_aligned_internal(P,E,A,Z) \
- _pool_get_aligned_internal_numa(P,E,A,Z,VEC_NUMA_UNSPECIFIED)
+ _vec_update_pointer (pp, p);
+
+done:
+ ep[0] = e;
+ if (zero)
+ clib_memset_u8 (e, 0, elt_sz);
+}
+
+#define _pool_get_aligned_internal(P, E, A, Z) \
+ _pool_get ((void **) &(P), (void **) &(E), _vec_align (P, A), Z, \
+ _vec_elt_sz (P))
/** Allocate an object E from a pool P with alignment A */
#define pool_get_aligned(P,E,A) _pool_get_aligned_internal(P,E,A,0)
@@ -257,114 +229,162 @@ do { \
/** Allocate an object E from a pool P and zero it */
#define pool_get_zero(P,E) pool_get_aligned_zero(P,E,0)
-/** See if pool_get will expand the pool or not */
-#define pool_get_aligned_will_expand(P,YESNO,A) \
-do { \
- pool_header_t * _pool_var (p) = pool_header (P); \
- uword _pool_var (l); \
- \
- _pool_var (l) = 0; \
- if (P) \
- { \
- if (_pool_var (p)->max_elts) \
- _pool_var (l) = _pool_var (p)->max_elts; \
- else \
- _pool_var (l) = vec_len (_pool_var (p)->free_indices); \
- } \
- \
- /* Free elements, certainly won't expand */ \
- if (_pool_var (l) > 0) \
- YESNO=0; \
- else \
- { \
- /* Nothing on free list, make a new element and return it. */ \
- YESNO = _vec_resize_will_expand \
- (P, \
- /* length_increment */ 1, \
- /* new size */ (vec_len (P) + 1) * sizeof (P[0]), \
- pool_aligned_header_bytes, \
- /* align */ (A)); \
- } \
-} while (0)
+always_inline int
+_pool_get_will_expand (void *p, uword elt_sz)
+{
+ pool_header_t *ph;
+ uword len;
+
+ if (p == 0)
+ return 1;
+
+ ph = pool_header (p);
+
+ if (ph->max_elts)
+ len = ph->max_elts;
+ else
+ len = vec_len (ph->free_indices);
+
+ /* Free elements, certainly won't expand */
+ if (len > 0)
+ return 0;
+
+ return _vec_resize_will_expand (p, 1, elt_sz);
+}
+
+#define pool_get_will_expand(P) _pool_get_will_expand (P, sizeof ((P)[0]))
+
+always_inline int
+_pool_put_will_expand (void *p, uword index, uword elt_sz)
+{
+ pool_header_t *ph = pool_header (p);
+
+ if (clib_bitmap_will_expand (ph->free_bitmap, index))
+ return 1;
-/** Tell the caller if pool get will expand the pool */
-#define pool_get_will_expand(P,YESNO) pool_get_aligned_will_expand(P,YESNO,0)
+ if (vec_resize_will_expand (ph->free_indices, 1))
+ return 1;
+
+ return 0;
+}
+
+#define pool_put_will_expand(P, E) \
+ _pool_put_will_expand (P, (E) - (P), sizeof ((P)[0]))
/** Use free bitmap to query whether given element is free. */
-#define pool_is_free(P,E) \
-({ \
- pool_header_t * _pool_var (p) = pool_header (P); \
- uword _pool_var (i) = (E) - (P); \
- (_pool_var (i) < vec_len (P)) ? clib_bitmap_get (_pool_var (p)->free_bitmap, _pool_i) : 1; \
-})
+static_always_inline int
+pool_is_free_index (void *p, uword index)
+{
+ pool_header_t *ph = pool_header (p);
+ return index < vec_len (p) ? clib_bitmap_get (ph->free_bitmap, index) : 1;
+}
-/** Use free bitmap to query whether given index is free */
-#define pool_is_free_index(P,I) pool_is_free((P),(P)+(I))
+#define pool_is_free(P, E) pool_is_free_index ((void *) (P), (E) - (P))
/** Free an object E in pool P. */
-#define pool_put(P, E) \
- do \
- { \
- typeof (P) _pool_var (p__) = (P); \
- typeof (E) _pool_var (e__) = (E); \
- pool_header_t *_pool_var (p) = pool_header (_pool_var (p__)); \
- uword _pool_var (l) = _pool_var (e__) - _pool_var (p__); \
- if (_pool_var (p)->max_elts == 0) \
- ASSERT (vec_is_member (_pool_var (p__), _pool_var (e__))); \
- ASSERT (!pool_is_free (_pool_var (p__), _pool_var (e__))); \
- \
- /* Add element to free bitmap and to free list. */ \
- _pool_var (p)->free_bitmap = \
- clib_bitmap_ori_notrim (_pool_var (p)->free_bitmap, _pool_var (l)); \
- \
- /* Preallocated pool? */ \
- if (_pool_var (p)->max_elts) \
- { \
- ASSERT (_pool_var (l) < _pool_var (p)->max_elts); \
- _pool_var (p) \
- ->free_indices[_vec_len (_pool_var (p)->free_indices)] = \
- _pool_var (l); \
- _vec_len (_pool_var (p)->free_indices) += 1; \
- } \
- else \
- vec_add1 (_pool_var (p)->free_indices, _pool_var (l)); \
- \
- CLIB_MEM_POISON (_pool_var (e__), sizeof (_pool_var (e__)[0])); \
- } \
- while (0)
-
-/** Free pool element with given index. */
-#define pool_put_index(p,i) \
-do { \
- typeof (p) _e = (p) + (i); \
- pool_put (p, _e); \
-} while (0)
+static_always_inline void
+_pool_put_index (void *p, uword index, uword elt_sz)
+{
+ pool_header_t *ph = pool_header (p);
+
+ ASSERT (index < ph->max_elts ? ph->max_elts : vec_len (p));
+ ASSERT (!pool_is_free_index (p, index));
+
+ /* Add element to free bitmap and to free list. */
+ ph->free_bitmap = clib_bitmap_ori_notrim (ph->free_bitmap, index);
+
+ /* Preallocated pool? */
+ if (ph->max_elts)
+ {
+ u32 len = _vec_len (ph->free_indices);
+ vec_set_len (ph->free_indices, len + 1);
+ ph->free_indices[len] = index;
+ }
+ else
+ vec_add1 (ph->free_indices, index);
+
+ clib_mem_poison (p + index * elt_sz, elt_sz);
+}
+
+#define pool_put_index(P, I) _pool_put_index ((void *) (P), I, _vec_elt_sz (P))
+#define pool_put(P, E) pool_put_index (P, (E) - (P))
/** Allocate N more free elements to pool (general version). */
-#define pool_alloc_aligned(P,N,A) \
-do { \
- pool_header_t * _p; \
- \
- if ((P)) \
- { \
- _p = pool_header (P); \
- if (_p->max_elts) \
- { \
- clib_warning ("Can't expand fixed-size pool"); \
- os_out_of_memory(); \
- } \
- } \
- \
- (P) = _vec_resize ((P), 0, (vec_len (P) + (N)) * sizeof (P[0]), \
- pool_aligned_header_bytes, \
- (A)); \
- _p = pool_header (P); \
- vec_resize (_p->free_indices, (N)); \
- _vec_len (_p->free_indices) -= (N); \
-} while (0)
-/** Allocate N more free elements to pool (unspecified alignment). */
-#define pool_alloc(P,N) pool_alloc_aligned(P,N,0)
+static_always_inline void
+_pool_alloc (void **pp, uword n_elts, uword align, void *heap, uword elt_sz)
+{
+ pool_header_t *ph = pool_header (pp[0]);
+ uword len = vec_len (pp[0]);
+ const vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+ .elt_sz = elt_sz,
+ .align = align,
+ .heap = heap };
+
+ if (ph && ph->max_elts)
+ {
+ clib_warning ("Can't expand fixed-size pool");
+ os_out_of_memory ();
+ }
+
+ pp[0] = _vec_resize_internal (pp[0], len + n_elts, &va);
+ _vec_set_len (pp[0], len, elt_sz);
+ clib_mem_poison (pp[0] + len * elt_sz, n_elts * elt_sz);
+
+ ph = pool_header (pp[0]);
+ vec_resize (ph->free_indices, n_elts);
+ vec_dec_len (ph->free_indices, n_elts);
+ clib_bitmap_validate (ph->free_bitmap, (len + n_elts) ?: 1);
+}
+
+#define pool_alloc_aligned_heap(P, N, A, H) \
+ _pool_alloc ((void **) &(P), N, _vec_align (P, A), H, _vec_elt_sz (P))
+
+#define pool_alloc_heap(P, N, H) pool_alloc_aligned_heap (P, N, 0, H)
+#define pool_alloc_aligned(P, N, A) pool_alloc_aligned_heap (P, N, A, 0)
+#define pool_alloc(P, N) pool_alloc_aligned_heap (P, N, 0, 0)
+
+static_always_inline void *
+_pool_dup (void *p, uword align, uword elt_sz)
+{
+ pool_header_t *nph, *ph = pool_header (p);
+ uword len = vec_len (p);
+ const vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+ .elt_sz = elt_sz,
+ .align = align };
+ void *n;
+
+ if (ph && ph->max_elts)
+ {
+ clib_warning ("Can't expand fixed-size pool");
+ os_out_of_memory ();
+ }
+
+ n = _vec_alloc_internal (len, &va);
+ nph = pool_header (n);
+ clib_memset_u8 (nph, 0, sizeof (vec_header_t));
+
+ if (len)
+ {
+ u32 *fi;
+ vec_foreach (fi, ph->free_indices)
+ clib_mem_unpoison (p + elt_sz * fi[0], elt_sz);
+
+ clib_memcpy_fast (n, p, len * elt_sz);
+
+ nph->free_bitmap = clib_bitmap_dup (ph->free_bitmap);
+ nph->free_indices = vec_dup (ph->free_indices);
+
+ vec_foreach (fi, ph->free_indices)
+ {
+ uword offset = elt_sz * fi[0];
+ clib_mem_poison (p + offset, elt_sz);
+ clib_mem_poison (n + offset, elt_sz);
+ }
+ }
+
+ return n;
+}
/**
* Return copy of pool with alignment
@@ -373,28 +393,9 @@ do { \
* @param A alignment (may be zero)
* @return copy of pool
*/
-#define pool_dup_aligned(P,A) \
-({ \
- typeof (P) _pool_var (new) = 0; \
- pool_header_t * _pool_var (ph), * _pool_var (new_ph); \
- u32 _pool_var (n) = pool_len (P); \
- if ((P)) \
- { \
- _pool_var (new) = _vec_resize (_pool_var (new), _pool_var (n), \
- _pool_var (n) * sizeof ((P)[0]), \
- pool_aligned_header_bytes, (A)); \
- clib_memcpy_fast (_pool_var (new), (P), \
- _pool_var (n) * sizeof ((P)[0])); \
- _pool_var (ph) = pool_header (P); \
- _pool_var (new_ph) = pool_header (_pool_var (new)); \
- _pool_var (new_ph)->free_bitmap = \
- clib_bitmap_dup (_pool_var (ph)->free_bitmap); \
- _pool_var (new_ph)->free_indices = \
- vec_dup (_pool_var (ph)->free_indices); \
- _pool_var (new_ph)->max_elts = _pool_var (ph)->max_elts; \
- } \
- _pool_var (new); \
-})
+
+#define pool_dup_aligned(P, A) \
+ _pool_dup (P, _vec_align (P, A), _vec_elt_sz (P))
/**
* Return copy of pool without alignment
@@ -405,29 +406,19 @@ do { \
#define pool_dup(P) pool_dup_aligned(P,0)
/** Low-level free pool operator (do not call directly). */
-always_inline void *
-_pool_free (void *v)
+always_inline void
+_pool_free (void **v)
{
- pool_header_t *p = pool_header (v);
- if (!v)
- return v;
- clib_bitmap_free (p->free_bitmap);
+ pool_header_t *p = pool_header (v[0]);
+ if (!p)
+ return;
- if (p->max_elts)
- {
- int rv;
+ clib_bitmap_free (p->free_bitmap);
- rv = munmap (p->mmap_base, p->mmap_size);
- if (rv)
- clib_unix_warning ("munmap");
- }
- else
- {
- vec_free (p->free_indices);
- vec_free_h (v, pool_aligned_header_bytes);
- }
- return 0;
+ vec_free (p->free_indices);
+ _vec_free (v);
}
+#define pool_free(p) _pool_free ((void **) &(p))
static_always_inline uword
pool_get_first_index (void *pool)
@@ -443,9 +434,6 @@ pool_get_next_index (void *pool, uword last)
return clib_bitmap_next_clear (h->free_bitmap, last + 1);
}
-/** Free a pool. */
-#define pool_free(p) (p) = _pool_free(p)
-
/** Optimized iteration through pool.
@param LO pointer to first element in chunk
@@ -569,11 +557,25 @@ do { \
_pool_var(rv); \
})
-#define pool_foreach_index(i,v) \
- if (v) \
- for (i = pool_get_first_index (v); \
- i < vec_len (v); \
- i = pool_get_next_index (v, i)) \
+#define pool_foreach_index(i, v) \
+ if (v) \
+ for (i = pool_get_first_index (v); i < vec_len (v); \
+ i = pool_get_next_index (v, i))
+
+/* Iterate pool by index from s to e */
+#define pool_foreach_stepping_index(i, s, e, v) \
+ for ((i) = \
+ (pool_is_free_index ((v), (s)) ? pool_get_next_index ((v), (s)) : \
+ (s)); \
+ (i) < (e); (i) = pool_get_next_index ((v), (i)))
+
+/* works only for pool of pointers, e is declared inside macro */
+#define pool_foreach_pointer(e, p) \
+ if (p) \
+ for (typeof ((p)[0]) *_t = (p) + pool_get_first_index (p), (e) = *_t, \
+ *_end = vec_end (p); \
+ _t < _end; _t = (p) + pool_get_next_index (p, _t - (p)), \
+ (e) = _t < _end ? *_t : (e))
/**
* @brief Remove all elements from a pool in a safe way
diff --git a/src/vppinfra/random_buffer.h b/src/vppinfra/random_buffer.h
index 320394d1862..12343c10535 100644
--- a/src/vppinfra/random_buffer.h
+++ b/src/vppinfra/random_buffer.h
@@ -42,9 +42,7 @@
#include <vppinfra/random_isaac.h>
#include <vppinfra/warnings.h>
-/* *INDENT-OFF* */
WARN_OFF(array-bounds)
-/* *INDENT-ON* */
typedef struct
{
@@ -54,6 +52,9 @@ typedef struct
/* Random buffer. */
uword *buffer;
+ /* An actual length to be applied before using the buffer. */
+ uword next_read_len;
+
/* Cache up to 1 word worth of bytes for random data
less than one word at a time. */
uword n_cached_bytes;
@@ -84,11 +85,16 @@ clib_random_buffer_get_data (clib_random_buffer_t * b, uword n_bytes)
{
uword n_words, i, l;
+ if (b->buffer)
+ vec_set_len (b->buffer, b->next_read_len);
+ else
+ ASSERT (b->next_read_len == 0);
+
l = b->n_cached_bytes;
if (n_bytes <= l)
{
b->n_cached_bytes = l - n_bytes;
- return &b->cached_bytes[l];
+ return &b->cached_bytes[l - n_bytes];
}
n_words = n_bytes / sizeof (uword);
@@ -100,21 +106,19 @@ clib_random_buffer_get_data (clib_random_buffer_t * b, uword n_bytes)
clib_random_buffer_fill (b, n_words);
i = vec_len (b->buffer) - n_words;
- _vec_len (b->buffer) = i;
+ b->next_read_len = i;
if (n_bytes < sizeof (uword))
{
b->cached_word = b->buffer[i];
b->n_cached_bytes = sizeof (uword) - n_bytes;
- return b->cached_bytes;
+ return &b->cached_bytes[sizeof (uword) - n_bytes];
}
else
return b->buffer + i;
}
-/* *INDENT-OFF* */
WARN_ON(array-bounds)
-/* *INDENT-ON* */
#endif /* included_clib_random_buffer_h */
diff --git a/src/vppinfra/ring.h b/src/vppinfra/ring.h
index be61dc44978..ae25e40b5c0 100644
--- a/src/vppinfra/ring.h
+++ b/src/vppinfra/ring.h
@@ -29,24 +29,28 @@ typedef struct
always_inline clib_ring_header_t *
clib_ring_header (void *v)
{
- return vec_aligned_header (v, sizeof (clib_ring_header_t), sizeof (void *));
+ return vec_header (v);
+}
+
+always_inline void
+clib_ring_reset (void *v)
+{
+ clib_ring_header_t *h = clib_ring_header (v);
+ h->next = 0;
+ h->n_enq = 0;
}
always_inline void
clib_ring_new_inline (void **p, u32 elt_bytes, u32 size, u32 align)
{
void *v;
- clib_ring_header_t *h;
+ vec_attr_t va = { .elt_sz = elt_bytes,
+ .hdr_sz = sizeof (clib_ring_header_t),
+ .align = align };
- v = _vec_resize ((void *) 0,
- /* length increment */ size,
- /* data bytes */ elt_bytes * size,
- /* header bytes */ sizeof (h[0]),
- /* data align */ align);
+ v = _vec_alloc_internal (size, &va);
- h = clib_ring_header (v);
- h->next = 0;
- h->n_enq = 0;
+ clib_ring_reset (v);
p[0] = v;
}
@@ -56,7 +60,7 @@ clib_ring_new_inline (void **p, u32 elt_bytes, u32 size, u32 align)
#define clib_ring_new(ring, size) \
{ clib_ring_new_inline ((void **)&(ring), sizeof(ring[0]), size, 0);}
-#define clib_ring_free(f) vec_free_h((f), sizeof(clib_ring_header_t))
+#define clib_ring_free(f) vec_free ((f))
always_inline u32
clib_ring_n_enq (void *v)
diff --git a/src/vppinfra/sanitizer.c b/src/vppinfra/sanitizer.c
deleted file mode 100644
index fab1cdca136..00000000000
--- a/src/vppinfra/sanitizer.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifdef CLIB_SANITIZE_ADDR
-
-#include <vppinfra/sanitizer.h>
-
-__clib_export clib_sanitizer_main_t sanitizer_main = { .shadow_scale = ~0 };
-
-#endif /* CLIB_SANITIZE_ADDR */
diff --git a/src/vppinfra/sanitizer.h b/src/vppinfra/sanitizer.h
deleted file mode 100644
index 658d8281230..00000000000
--- a/src/vppinfra/sanitizer.h
+++ /dev/null
@@ -1,141 +0,0 @@
-#ifndef _included_clib_sanitizer_h
-#define _included_clib_sanitizer_h
-
-#ifdef CLIB_SANITIZE_ADDR
-
-#include <sanitizer/asan_interface.h>
-#include <vppinfra/clib.h>
-#include <vppinfra/error_bootstrap.h>
-
-typedef struct
-{
- size_t shadow_scale;
- size_t shadow_offset;
-} clib_sanitizer_main_t;
-
-extern clib_sanitizer_main_t sanitizer_main;
-
-#define CLIB_NOSANITIZE_ADDR __attribute__((no_sanitize_address))
-#define CLIB_MEM_POISON(a, s) ASAN_POISON_MEMORY_REGION((a), (s))
-#define CLIB_MEM_UNPOISON(a, s) ASAN_UNPOISON_MEMORY_REGION((a), (s))
-
-#define CLIB_MEM_OVERFLOW_MAX 64
-
-static_always_inline void
-sanitizer_unpoison__ (u64 *restrict *shadow_ptr, size_t *shadow_len,
- const void *ptr, size_t len)
-{
- size_t scale, off;
-
- if (PREDICT_FALSE (~0 == sanitizer_main.shadow_scale))
- __asan_get_shadow_mapping (&sanitizer_main.shadow_scale,
- &sanitizer_main.shadow_offset);
-
- scale = sanitizer_main.shadow_scale;
- off = sanitizer_main.shadow_offset;
-
- /* compute the shadow address and length */
- *shadow_len = len >> scale;
- ASSERT (*shadow_len <= CLIB_MEM_OVERFLOW_MAX);
- *shadow_ptr = (void *) (((clib_address_t) ptr >> scale) + off);
-}
-
-static_always_inline CLIB_NOSANITIZE_ADDR void
-sanitizer_unpoison_push__ (u64 *restrict shadow, const void *ptr, size_t len)
-{
- u64 *restrict shadow_ptr;
- size_t shadow_len;
- int i;
-
- sanitizer_unpoison__ (&shadow_ptr, &shadow_len, ptr, len);
-
- /* save the shadow area */
- for (i = 0; i < shadow_len; i++)
- shadow[i] = shadow_ptr[i];
-
- /* unpoison */
- for (i = 0; i < shadow_len; i++)
- shadow_ptr[i] = 0;
-}
-
-static_always_inline CLIB_NOSANITIZE_ADDR void
-sanitizer_unpoison_pop__ (const u64 *restrict shadow, const void *ptr,
- size_t len)
-{
- u64 *restrict shadow_ptr;
- size_t shadow_len;
- int i;
-
- sanitizer_unpoison__ (&shadow_ptr, &shadow_len, ptr, len);
-
- /* restore the shadow area */
- for (i = 0; i < shadow_len; i++)
- {
- ASSERT (0 == shadow_ptr[i]);
- shadow_ptr[i] = shadow[i];
- }
-}
-
-#define CLIB_MEM_OVERFLOW(f, src, n) \
- ({ \
- typeof (f) clib_mem_overflow_ret__; \
- const void *clib_mem_overflow_src__ = (src); \
- size_t clib_mem_overflow_n__ = (n); \
- u64 clib_mem_overflow_shadow__[CLIB_MEM_OVERFLOW_MAX]; \
- sanitizer_unpoison_push__ (clib_mem_overflow_shadow__, \
- clib_mem_overflow_src__, \
- clib_mem_overflow_n__); \
- clib_mem_overflow_ret__ = f; \
- sanitizer_unpoison_pop__ (clib_mem_overflow_shadow__, \
- clib_mem_overflow_src__, \
- clib_mem_overflow_n__); \
- clib_mem_overflow_ret__; \
- })
-
-#define CLIB_MEM_OVERFLOW_LOAD(f, src) \
- ({ \
- typeof(src) clib_mem_overflow_load_src__ = (src); \
- CLIB_MEM_OVERFLOW(f(clib_mem_overflow_load_src__), clib_mem_overflow_load_src__, sizeof(typeof(f(clib_mem_overflow_load_src__)))); \
- })
-
-static_always_inline void
-CLIB_MEM_POISON_LEN (void *src, size_t oldlen, size_t newlen)
-{
- if (oldlen > newlen)
- CLIB_MEM_POISON (src + newlen, oldlen - newlen);
- else if (newlen > oldlen)
- CLIB_MEM_UNPOISON (src + oldlen, newlen - oldlen);
-}
-
-#else /* CLIB_SANITIZE_ADDR */
-
-#define CLIB_NOSANITIZE_ADDR
-#define CLIB_MEM_POISON(a, s) (void)(a)
-#define CLIB_MEM_UNPOISON(a, s) (void)(a)
-#define CLIB_MEM_OVERFLOW(a, b, c) a
-#define CLIB_MEM_OVERFLOW_LOAD(f, src) f(src)
-#define CLIB_MEM_POISON_LEN(a, b, c)
-
-#endif /* CLIB_SANITIZE_ADDR */
-
-/*
- * clang tends to force alignment of all sections when compiling for address
- * sanitizer. This confuse VPP plugin infra, prevent clang to do that
- * On the contrary, GCC does not support this kind of attribute on sections
- * sigh.
- */
-#ifdef __clang__
-#define CLIB_NOSANITIZE_PLUGIN_REG_SECTION CLIB_NOSANITIZE_ADDR
-#else
-#define CLIB_NOSANITIZE_PLUGIN_REG_SECTION
-#endif
-
-#endif /* _included_clib_sanitizer_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/serialize.c b/src/vppinfra/serialize.c
index 64509254b5d..ceda617f872 100644
--- a/src/vppinfra/serialize.c
+++ b/src/vppinfra/serialize.c
@@ -308,13 +308,16 @@ unserialize_vector_ha (serialize_main_t * m,
{
void *v, *p;
u32 l;
+ vec_attr_t va = { .align = align,
+ .elt_sz = elt_bytes,
+ .hdr_sz = header_bytes };
unserialize_integer (m, &l, sizeof (l));
if (l > max_length)
serialize_error (&m->header,
clib_error_create ("bad vector length %d", l));
- p = v = _vec_resize ((void *) 0, l, (uword) l * elt_bytes, header_bytes,
- /* align */ align);
+
+ p = v = _vec_alloc_internal (l, &va);
while (l != 0)
{
@@ -437,6 +440,9 @@ unserialize_pool_helper (serialize_main_t * m,
void *v;
u32 i, l, lo, hi;
pool_header_t *p;
+ vec_attr_t va = { .align = align,
+ .elt_sz = elt_bytes,
+ .hdr_sz = sizeof (pool_header_t) };
unserialize_integer (m, &l, sizeof (l));
if (l == 0)
@@ -444,8 +450,7 @@ unserialize_pool_helper (serialize_main_t * m,
return 0;
}
- v = _vec_resize ((void *) 0, l, (uword) l * elt_bytes, sizeof (p[0]),
- align);
+ v = _vec_alloc_internal (l, &va);
p = pool_header (v);
vec_unserialize (m, &p->free_indices, unserialize_vec_32);
@@ -715,7 +720,7 @@ serialize_write_not_inline (serialize_main_header_t * m,
n_left_b -= n;
n_left_o -= n;
if (n_left_o == 0)
- _vec_len (s->overflow_buffer) = 0;
+ vec_set_len (s->overflow_buffer, 0);
else
vec_delete (s->overflow_buffer, n, 0);
}
@@ -736,6 +741,7 @@ serialize_write_not_inline (serialize_main_header_t * m,
if (n_left_o > 0 || n_left_b < n_bytes_to_write)
{
u8 *r;
+ s->current_buffer_index = cur_bi;
vec_add2 (s->overflow_buffer, r, n_bytes_to_write);
return r;
}
@@ -772,7 +778,7 @@ serialize_read_not_inline (serialize_main_header_t * m,
if (n_left_o == 0 && s->overflow_buffer)
{
s->current_overflow_index = 0;
- _vec_len (s->overflow_buffer) = 0;
+ vec_set_len (s->overflow_buffer, 0);
}
n_left_to_read = n_bytes_to_read;
@@ -924,7 +930,7 @@ serialize_close_vector (serialize_main_t * m)
serialize_close (m); /* frees overflow buffer */
if (s->buffer)
- _vec_len (s->buffer) = s->current_buffer_index;
+ vec_set_len (s->buffer, s->current_buffer_index);
result = s->buffer;
clib_memset (m, 0, sizeof (m[0]));
return result;
@@ -1162,7 +1168,7 @@ clib_file_write (serialize_main_header_t * m, serialize_stream_t * s)
serialize_error (m, clib_error_return_unix (0, "write"));
}
if (n == s->current_buffer_index)
- _vec_len (s->buffer) = 0;
+ vec_set_len (s->buffer, 0);
else
vec_delete (s->buffer, n, 0);
s->current_buffer_index = vec_len (s->buffer);
@@ -1198,7 +1204,7 @@ serialize_open_clib_file_descriptor_helper (serialize_main_t * m, int fd,
if (!is_read)
{
m->stream.n_buffer_bytes = vec_len (m->stream.buffer);
- _vec_len (m->stream.buffer) = 0;
+ vec_set_len (m->stream.buffer, 0);
}
m->header.data_function = is_read ? clib_file_read : clib_file_write;
diff --git a/src/vppinfra/sha2.h b/src/vppinfra/sha2.h
deleted file mode 100644
index b6caf59ac7f..00000000000
--- a/src/vppinfra/sha2.h
+++ /dev/null
@@ -1,637 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_sha2_h
-#define included_sha2_h
-
-#include <vppinfra/clib.h>
-
-#define SHA224_DIGEST_SIZE 28
-#define SHA224_BLOCK_SIZE 64
-
-#define SHA256_DIGEST_SIZE 32
-#define SHA256_BLOCK_SIZE 64
-#define SHA256_ROTR(x, y) ((x >> y) | (x << (32 - y)))
-#define SHA256_CH(a, b, c) ((a & b) ^ (~a & c))
-#define SHA256_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
-#define SHA256_CSIGMA0(x) (SHA256_ROTR(x, 2) ^ \
- SHA256_ROTR(x, 13) ^ \
- SHA256_ROTR(x, 22));
-#define SHA256_CSIGMA1(x) (SHA256_ROTR(x, 6) ^ \
- SHA256_ROTR(x, 11) ^ \
- SHA256_ROTR(x, 25));
-#define SHA256_SSIGMA0(x) (SHA256_ROTR (x, 7) ^ \
- SHA256_ROTR (x, 18) ^ \
- (x >> 3))
-#define SHA256_SSIGMA1(x) (SHA256_ROTR (x, 17) ^ \
- SHA256_ROTR (x, 19) ^ \
- (x >> 10))
-
-#define SHA256_MSG_SCHED(w, j) \
-{ \
- w[j] = w[j - 7] + w[j - 16]; \
- w[j] += SHA256_SSIGMA0 (w[j - 15]); \
- w[j] += SHA256_SSIGMA1 (w[j - 2]); \
-}
-
-#define SHA256_TRANSFORM(s, w, i, k) \
-{ \
- __typeof__(s[0]) t1, t2; \
- t1 = k + w[i] + s[7]; \
- t1 += SHA256_CSIGMA1 (s[4]); \
- t1 += SHA256_CH (s[4], s[5], s[6]); \
- t2 = SHA256_CSIGMA0 (s[0]); \
- t2 += SHA256_MAJ (s[0], s[1], s[2]); \
- s[7] = s[6]; \
- s[6] = s[5]; \
- s[5] = s[4]; \
- s[4] = s[3] + t1; \
- s[3] = s[2]; \
- s[2] = s[1]; \
- s[1] = s[0]; \
- s[0] = t1 + t2; \
-}
-
-#define SHA512_224_DIGEST_SIZE 28
-#define SHA512_224_BLOCK_SIZE 128
-
-#define SHA512_256_DIGEST_SIZE 32
-#define SHA512_256_BLOCK_SIZE 128
-
-#define SHA384_DIGEST_SIZE 48
-#define SHA384_BLOCK_SIZE 128
-
-#define SHA512_DIGEST_SIZE 64
-#define SHA512_BLOCK_SIZE 128
-#define SHA512_ROTR(x, y) ((x >> y) | (x << (64 - y)))
-#define SHA512_CH(a, b, c) ((a & b) ^ (~a & c))
-#define SHA512_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
-#define SHA512_CSIGMA0(x) (SHA512_ROTR (x, 28) ^ \
- SHA512_ROTR (x, 34) ^ \
- SHA512_ROTR (x, 39))
-#define SHA512_CSIGMA1(x) (SHA512_ROTR (x, 14) ^ \
- SHA512_ROTR (x, 18) ^ \
- SHA512_ROTR (x, 41))
-#define SHA512_SSIGMA0(x) (SHA512_ROTR (x, 1) ^ \
- SHA512_ROTR (x, 8) ^ \
- (x >> 7))
-#define SHA512_SSIGMA1(x) (SHA512_ROTR (x, 19) ^ \
- SHA512_ROTR (x, 61) ^ \
- (x >> 6))
-
-#define SHA512_MSG_SCHED(w, j) \
-{ \
- w[j] = w[j - 7] + w[j - 16]; \
- w[j] += SHA512_SSIGMA0 (w[j - 15]); \
- w[j] += SHA512_SSIGMA1 (w[j - 2]); \
-}
-
-#define SHA512_TRANSFORM(s, w, i, k) \
-{ \
- __typeof__(s[0]) t1, t2; \
- t1 = k + w[i] + s[7]; \
- t1 += SHA512_CSIGMA1 (s[4]); \
- t1 += SHA512_CH (s[4], s[5], s[6]); \
- t2 = SHA512_CSIGMA0 (s[0]); \
- t2 += SHA512_MAJ (s[0], s[1], s[2]); \
- s[7] = s[6]; \
- s[6] = s[5]; \
- s[5] = s[4]; \
- s[4] = s[3] + t1; \
- s[3] = s[2]; \
- s[2] = s[1]; \
- s[1] = s[0]; \
- s[0] = t1 + t2; \
-}
-
-static const u32 sha224_h[8] = {
- 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
- 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
-};
-
-static const u32 sha256_h[8] = {
- 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
- 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
-};
-
-static const u32 sha256_k[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-static const u64 sha384_h[8] = {
- 0xcbbb9d5dc1059ed8, 0x629a292a367cd507,
- 0x9159015a3070dd17, 0x152fecd8f70e5939,
- 0x67332667ffc00b31, 0x8eb44a8768581511,
- 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4
-};
-
-static const u64 sha512_h[8] = {
- 0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
- 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
- 0x510e527fade682d1, 0x9b05688c2b3e6c1f,
- 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
-};
-
-static const u64 sha512_224_h[8] = {
- 0x8c3d37c819544da2, 0x73e1996689dcd4d6,
- 0x1dfab7ae32ff9c82, 0x679dd514582f9fcf,
- 0x0f6d2b697bd44da8, 0x77e36f7304c48942,
- 0x3f9d85a86a1d36c8, 0x1112e6ad91d692a1
-};
-
-static const u64 sha512_256_h[8] = {
- 0x22312194fc2bf72c, 0x9f555fa3c84c64c2,
- 0x2393b86b6f53b151, 0x963877195940eabd,
- 0x96283ee2a88effe3, 0xbe5e1e2553863992,
- 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2
-};
-
-static const u64 sha512_k[80] = {
- 0x428a2f98d728ae22, 0x7137449123ef65cd,
- 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
- 0x3956c25bf348b538, 0x59f111f1b605d019,
- 0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
- 0xd807aa98a3030242, 0x12835b0145706fbe,
- 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
- 0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
- 0x9bdc06a725c71235, 0xc19bf174cf692694,
- 0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
- 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
- 0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
- 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
- 0x983e5152ee66dfab, 0xa831c66d2db43210,
- 0xb00327c898fb213f, 0xbf597fc7beef0ee4,
- 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
- 0x06ca6351e003826f, 0x142929670a0e6e70,
- 0x27b70a8546d22ffc, 0x2e1b21385c26c926,
- 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
- 0x650a73548baf63de, 0x766a0abb3c77b2a8,
- 0x81c2c92e47edaee6, 0x92722c851482353b,
- 0xa2bfe8a14cf10364, 0xa81a664bbc423001,
- 0xc24b8b70d0f89791, 0xc76c51a30654be30,
- 0xd192e819d6ef5218, 0xd69906245565a910,
- 0xf40e35855771202a, 0x106aa07032bbd1b8,
- 0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
- 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
- 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
- 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
- 0x748f82ee5defb2fc, 0x78a5636f43172f60,
- 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
- 0x90befffa23631e28, 0xa4506cebde82bde9,
- 0xbef9a3f7b2c67915, 0xc67178f2e372532b,
- 0xca273eceea26619c, 0xd186b8c721c0c207,
- 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
- 0x06f067aa72176fba, 0x0a637dc5a2c898a6,
- 0x113f9804bef90dae, 0x1b710b35131c471b,
- 0x28db77f523047d84, 0x32caab7b40c72493,
- 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
- 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
- 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-};
-
-typedef enum
-{
- CLIB_SHA2_224,
- CLIB_SHA2_256,
- CLIB_SHA2_384,
- CLIB_SHA2_512,
- CLIB_SHA2_512_224,
- CLIB_SHA2_512_256,
-} clib_sha2_type_t;
-
-#define SHA2_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
-#define SHA2_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
-
-typedef struct
-{
- u64 total_bytes;
- u16 n_pending;
- u8 block_size;
- u8 digest_size;
- union
- {
- u32 h32[8];
- u64 h64[8];
-#if defined(__SHA__) && defined (__x86_64__)
- u32x4 h32x4[2];
-#endif
- };
- union
- {
- u8 as_u8[SHA2_MAX_BLOCK_SIZE];
- u64 as_u64[SHA2_MAX_BLOCK_SIZE / sizeof (u64)];
- uword as_uword[SHA2_MAX_BLOCK_SIZE / sizeof (uword)];
- }
- pending;
-}
-clib_sha2_ctx_t;
-
-static_always_inline void
-clib_sha2_init (clib_sha2_ctx_t * ctx, clib_sha2_type_t type)
-{
- const u32 *h32 = 0;
- const u64 *h64 = 0;
-
- ctx->total_bytes = 0;
- ctx->n_pending = 0;
-
- switch (type)
- {
- case CLIB_SHA2_224:
- h32 = sha224_h;
- ctx->block_size = SHA224_BLOCK_SIZE;
- ctx->digest_size = SHA224_DIGEST_SIZE;
- break;
- case CLIB_SHA2_256:
- h32 = sha256_h;
- ctx->block_size = SHA256_BLOCK_SIZE;
- ctx->digest_size = SHA256_DIGEST_SIZE;
- break;
- case CLIB_SHA2_384:
- h64 = sha384_h;
- ctx->block_size = SHA384_BLOCK_SIZE;
- ctx->digest_size = SHA384_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512:
- h64 = sha512_h;
- ctx->block_size = SHA512_BLOCK_SIZE;
- ctx->digest_size = SHA512_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512_224:
- h64 = sha512_224_h;
- ctx->block_size = SHA512_224_BLOCK_SIZE;
- ctx->digest_size = SHA512_224_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512_256:
- h64 = sha512_256_h;
- ctx->block_size = SHA512_256_BLOCK_SIZE;
- ctx->digest_size = SHA512_256_DIGEST_SIZE;
- break;
- }
- if (h32)
- for (int i = 0; i < 8; i++)
- ctx->h32[i] = h32[i];
-
- if (h64)
- for (int i = 0; i < 8; i++)
- ctx->h64[i] = h64[i];
-}
-
-#if defined(__SHA__) && defined (__x86_64__)
-static inline void
-shani_sha256_cycle_w (u32x4 cw[], u8 a, u8 b, u8 c, u8 d)
-{
- cw[a] = (u32x4) _mm_sha256msg1_epu32 ((__m128i) cw[a], (__m128i) cw[b]);
- cw[a] += (u32x4) _mm_alignr_epi8 ((__m128i) cw[d], (__m128i) cw[c], 4);
- cw[a] = (u32x4) _mm_sha256msg2_epu32 ((__m128i) cw[a], (__m128i) cw[d]);
-}
-
-static inline void
-shani_sha256_4_rounds (u32x4 cw, u8 n, u32x4 s[])
-{
- u32x4 r = *(u32x4 *) (sha256_k + 4 * n) + cw;
- s[0] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[0], (__m128i) s[1],
- (__m128i) r);
- r = (u32x4) u64x2_interleave_hi ((u64x2) r, (u64x2) r);
- s[1] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[1], (__m128i) s[0],
- (__m128i) r);
-}
-
-static inline void
-shani_sha256_shuffle (u32x4 d[2], u32x4 s[2])
-{
- /* {0, 1, 2, 3}, {4, 5, 6, 7} -> {7, 6, 3, 2}, {5, 4, 1, 0} */
- d[0] = (u32x4) _mm_shuffle_ps ((__m128) s[1], (__m128) s[0], 0xbb);
- d[1] = (u32x4) _mm_shuffle_ps ((__m128) s[1], (__m128) s[0], 0x11);
-}
-#endif
-
-void
-clib_sha256_block (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_blocks)
-{
-#if defined(__SHA__) && defined (__x86_64__)
- u32x4 h[2], s[2], w[4];
-
- shani_sha256_shuffle (h, ctx->h32x4);
-
- while (n_blocks)
- {
- w[0] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 0));
- w[1] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 16));
- w[2] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 32));
- w[3] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 48));
-
- s[0] = h[0];
- s[1] = h[1];
-
- shani_sha256_4_rounds (w[0], 0, s);
- shani_sha256_4_rounds (w[1], 1, s);
- shani_sha256_4_rounds (w[2], 2, s);
- shani_sha256_4_rounds (w[3], 3, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 4, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 5, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 6, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 7, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 8, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 9, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 10, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 11, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 12, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 13, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 14, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 15, s);
-
- h[0] += s[0];
- h[1] += s[1];
-
- /* next */
- msg += SHA256_BLOCK_SIZE;
- n_blocks--;
- }
-
- shani_sha256_shuffle (ctx->h32x4, h);
-#else
- u32 w[64], s[8], i;
-
- while (n_blocks)
- {
- for (i = 0; i < 8; i++)
- s[i] = ctx->h32[i];
-
- for (i = 0; i < 16; i++)
- {
- w[i] = clib_net_to_host_u32 (*((u32 *) msg + i));
- SHA256_TRANSFORM (s, w, i, sha256_k[i]);
- }
-
- for (i = 16; i < 64; i++)
- {
- SHA256_MSG_SCHED (w, i);
- SHA256_TRANSFORM (s, w, i, sha256_k[i]);
- }
-
- for (i = 0; i < 8; i++)
- ctx->h32[i] += s[i];
-
- /* next */
- msg += SHA256_BLOCK_SIZE;
- n_blocks--;
- }
-#endif
-}
-
-static_always_inline void
-clib_sha512_block (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_blocks)
-{
- u64 w[80], s[8], i;
-
- while (n_blocks)
- {
- for (i = 0; i < 8; i++)
- s[i] = ctx->h64[i];
-
- for (i = 0; i < 16; i++)
- {
- w[i] = clib_net_to_host_u64 (*((u64 *) msg + i));
- SHA512_TRANSFORM (s, w, i, sha512_k[i]);
- }
-
- for (i = 16; i < 80; i++)
- {
- SHA512_MSG_SCHED (w, i);
- SHA512_TRANSFORM (s, w, i, sha512_k[i]);
- }
-
- for (i = 0; i < 8; i++)
- ctx->h64[i] += s[i];
-
- /* next */
- msg += SHA512_BLOCK_SIZE;
- n_blocks--;
- }
-}
-
-static_always_inline void
-clib_sha2_update (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_bytes)
-{
- uword n_blocks;
- if (ctx->n_pending)
- {
- uword n_left = ctx->block_size - ctx->n_pending;
- if (n_bytes < n_left)
- {
- clib_memcpy_fast (ctx->pending.as_u8 + ctx->n_pending, msg,
- n_bytes);
- ctx->n_pending += n_bytes;
- return;
- }
- else
- {
- clib_memcpy_fast (ctx->pending.as_u8 + ctx->n_pending, msg, n_left);
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->n_pending = 0;
- ctx->total_bytes += ctx->block_size;
- n_bytes -= n_left;
- msg += n_left;
- }
- }
-
- if ((n_blocks = n_bytes / ctx->block_size))
- {
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, msg, n_blocks);
- else
- clib_sha256_block (ctx, msg, n_blocks);
- n_bytes -= n_blocks * ctx->block_size;
- msg += n_blocks * ctx->block_size;
- ctx->total_bytes += n_blocks * ctx->block_size;
- }
-
- if (n_bytes)
- {
- clib_memset_u8 (ctx->pending.as_u8, 0, ctx->block_size);
- clib_memcpy_fast (ctx->pending.as_u8, msg, n_bytes);
- ctx->n_pending = n_bytes;
- }
- else
- ctx->n_pending = 0;
-}
-
-static_always_inline void
-clib_sha2_final (clib_sha2_ctx_t * ctx, u8 * digest)
-{
- int i;
-
- ctx->total_bytes += ctx->n_pending;
- if (ctx->n_pending == 0)
- {
- clib_memset (ctx->pending.as_u8, 0, ctx->block_size);
- ctx->pending.as_u8[0] = 0x80;
- }
- else if (ctx->n_pending + sizeof (u64) + sizeof (u8) > ctx->block_size)
- {
- ctx->pending.as_u8[ctx->n_pending] = 0x80;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- clib_memset (ctx->pending.as_u8, 0, ctx->block_size);
- }
- else
- ctx->pending.as_u8[ctx->n_pending] = 0x80;
-
- ctx->pending.as_u64[ctx->block_size / 8 - 1] =
- clib_net_to_host_u64 (ctx->total_bytes * 8);
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
-
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- {
- for (i = 0; i < ctx->digest_size / sizeof (u64); i++)
- *((u64 *) digest + i) = clib_net_to_host_u64 (ctx->h64[i]);
-
- /* sha512-224 case - write half of u64 */
- if (i * sizeof (u64) < ctx->digest_size)
- *((u32 *) digest + 2 * i) = clib_net_to_host_u32 (ctx->h64[i] >> 32);
- }
- else
- for (i = 0; i < ctx->digest_size / sizeof (u32); i++)
- *((u32 *) digest + i) = clib_net_to_host_u32 (ctx->h32[i]);
-}
-
-static_always_inline void
-clib_sha2 (clib_sha2_type_t type, const u8 * msg, uword len, u8 * digest)
-{
- clib_sha2_ctx_t ctx;
- clib_sha2_init (&ctx, type);
- clib_sha2_update (&ctx, msg, len);
- clib_sha2_final (&ctx, digest);
-}
-
-#define clib_sha224(...) clib_sha2 (CLIB_SHA2_224, __VA_ARGS__)
-#define clib_sha256(...) clib_sha2 (CLIB_SHA2_256, __VA_ARGS__)
-#define clib_sha384(...) clib_sha2 (CLIB_SHA2_384, __VA_ARGS__)
-#define clib_sha512(...) clib_sha2 (CLIB_SHA2_512, __VA_ARGS__)
-#define clib_sha512_224(...) clib_sha2 (CLIB_SHA2_512_224, __VA_ARGS__)
-#define clib_sha512_256(...) clib_sha2 (CLIB_SHA2_512_256, __VA_ARGS__)
-
-static_always_inline void
-clib_hmac_sha2 (clib_sha2_type_t type, const u8 * key, uword key_len,
- const u8 * msg, uword len, u8 * digest)
-{
- clib_sha2_ctx_t _ctx, *ctx = &_ctx;
- uword key_data[SHA2_MAX_BLOCK_SIZE / sizeof (uword)];
- u8 i_digest[SHA2_MAX_DIGEST_SIZE];
- int i, n_words;
-
- clib_sha2_init (ctx, type);
- n_words = ctx->block_size / sizeof (uword);
-
- /* key */
- if (key_len > ctx->block_size)
- {
- /* key is longer than block, calculate hash of key */
- clib_sha2_update (ctx, key, key_len);
- for (i = (ctx->digest_size / sizeof (uword)) / 2; i < n_words; i++)
- key_data[i] = 0;
- clib_sha2_final (ctx, (u8 *) key_data);
- clib_sha2_init (ctx, type);
- }
- else
- {
- for (i = 0; i < n_words; i++)
- key_data[i] = 0;
- clib_memcpy_fast (key_data, key, key_len);
- }
-
- /* ipad */
- for (i = 0; i < n_words; i++)
- ctx->pending.as_uword[i] = key_data[i] ^ (uword) 0x3636363636363636;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->total_bytes += ctx->block_size;
-
- /* message */
- clib_sha2_update (ctx, msg, len);
- clib_sha2_final (ctx, i_digest);
-
- /* opad */
- clib_sha2_init (ctx, type);
- for (i = 0; i < n_words; i++)
- ctx->pending.as_uword[i] = key_data[i] ^ (uword) 0x5c5c5c5c5c5c5c5c;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->total_bytes += ctx->block_size;
-
- /* digest */
- clib_sha2_update (ctx, i_digest, ctx->digest_size);
- clib_sha2_final (ctx, digest);
-}
-
-#define clib_hmac_sha224(...) clib_hmac_sha2 (CLIB_SHA2_224, __VA_ARGS__)
-#define clib_hmac_sha256(...) clib_hmac_sha2 (CLIB_SHA2_256, __VA_ARGS__)
-#define clib_hmac_sha384(...) clib_hmac_sha2 (CLIB_SHA2_384, __VA_ARGS__)
-#define clib_hmac_sha512(...) clib_hmac_sha2 (CLIB_SHA2_512, __VA_ARGS__)
-#define clib_hmac_sha512_224(...) clib_hmac_sha2 (CLIB_SHA2_512_224, __VA_ARGS__)
-#define clib_hmac_sha512_256(...) clib_hmac_sha2 (CLIB_SHA2_512_256, __VA_ARGS__)
-
-#endif /* included_sha2_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/socket.c b/src/vppinfra/socket.c
index 26427d98fa1..2abf2b244cd 100644
--- a/src/vppinfra/socket.c
+++ b/src/vppinfra/socket.c
@@ -93,108 +93,6 @@ find_free_port (word sock)
return port < 1 << 16 ? port : -1;
}
-/* Convert a config string to a struct sockaddr and length for use
- with bind or connect. */
-static clib_error_t *
-socket_config (char *config,
- void *addr, socklen_t * addr_len, u32 ip4_default_address)
-{
- clib_error_t *error = 0;
-
- if (!config)
- config = "";
-
- /* Anything that begins with a / is a local PF_LOCAL socket. */
- if (config[0] == '/')
- {
- struct sockaddr_un *su = addr;
- su->sun_family = PF_LOCAL;
- clib_memcpy (&su->sun_path, config,
- clib_min (sizeof (su->sun_path), 1 + strlen (config)));
- *addr_len = sizeof (su[0]);
- }
-
- /* Treat everything that starts with @ as an abstract socket. */
- else if (config[0] == '@')
- {
- struct sockaddr_un *su = addr;
- su->sun_family = PF_LOCAL;
- clib_memcpy (&su->sun_path, config,
- clib_min (sizeof (su->sun_path), 1 + strlen (config)));
-
- *addr_len = sizeof (su->sun_family) + strlen (config);
- su->sun_path[0] = '\0';
- }
-
- /* Hostname or hostname:port or port. */
- else
- {
- char *host_name;
- int port = -1;
- struct sockaddr_in *sa = addr;
-
- host_name = 0;
- port = -1;
- if (config[0] != 0)
- {
- unformat_input_t i;
-
- unformat_init_string (&i, config, strlen (config));
- if (unformat (&i, "%s:%d", &host_name, &port)
- || unformat (&i, "%s:0x%x", &host_name, &port))
- ;
- else if (unformat (&i, "%s", &host_name))
- ;
- else
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, &i);
- unformat_free (&i);
-
- if (error)
- goto done;
- }
-
- sa->sin_family = PF_INET;
- *addr_len = sizeof (sa[0]);
- if (port != -1)
- sa->sin_port = htons (port);
- else
- sa->sin_port = 0;
-
- if (host_name)
- {
- struct in_addr host_addr;
-
- /* Recognize localhost to avoid host lookup in most common cast. */
- if (!strcmp (host_name, "localhost"))
- sa->sin_addr.s_addr = htonl (INADDR_LOOPBACK);
-
- else if (inet_aton (host_name, &host_addr))
- sa->sin_addr = host_addr;
-
- else if (host_name && strlen (host_name) > 0)
- {
- struct hostent *host = gethostbyname (host_name);
- if (!host)
- error = clib_error_return (0, "unknown host `%s'", config);
- else
- clib_memcpy (&sa->sin_addr.s_addr, host->h_addr_list[0],
- host->h_length);
- }
-
- else
- sa->sin_addr.s_addr = htonl (ip4_default_address);
-
- vec_free (host_name);
- if (error)
- goto done;
- }
- }
-
-done:
- return error;
-}
-
static clib_error_t *
default_socket_write (clib_socket_t * s)
{
@@ -230,7 +128,7 @@ default_socket_write (clib_socket_t * s)
else if (written > 0)
{
if (written == tx_len)
- _vec_len (s->tx_buffer) = 0;
+ vec_set_len (s->tx_buffer, 0);
else
vec_delete (s->tx_buffer, written, 0);
}
@@ -253,7 +151,7 @@ default_socket_read (clib_socket_t * sock, int n_bytes)
u8 *buf;
/* RX side of socket is down once end of file is reached. */
- if (sock->flags & CLIB_SOCKET_F_RX_END_OF_FILE)
+ if (sock->rx_end_of_file)
return 0;
fd = sock->fd;
@@ -275,10 +173,10 @@ default_socket_read (clib_socket_t * sock, int n_bytes)
/* Other side closed the socket. */
if (n_read == 0)
- sock->flags |= CLIB_SOCKET_F_RX_END_OF_FILE;
+ sock->rx_end_of_file = 1;
non_fatal:
- _vec_len (sock->rx_buffer) += n_read - n_bytes;
+ vec_inc_len (sock->rx_buffer, n_read - n_bytes);
return 0;
}
@@ -328,7 +226,7 @@ static clib_error_t *
default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
int fds[], int num_fds)
{
-#ifdef __linux__
+#if CLIB_LINUX
char ctl[CMSG_SPACE (sizeof (int) * num_fds) +
CMSG_SPACE (sizeof (struct ucred))];
struct ucred *cr = 0;
@@ -363,7 +261,7 @@ default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
{
if (cmsg->cmsg_level == SOL_SOCKET)
{
-#ifdef __linux__
+#if CLIB_LINUX
if (cmsg->cmsg_type == SCM_CREDENTIALS)
{
cr = (struct ucred *) CMSG_DATA (cmsg);
@@ -399,190 +297,452 @@ socket_init_funcs (clib_socket_t * s)
s->recvmsg_func = default_socket_recvmsg;
}
+static const struct
+{
+ char *prefix;
+ sa_family_t family;
+ clib_socket_type_t type;
+ u16 skip_prefix : 1;
+ u16 is_local : 1;
+} clib_socket_type_data[] = {
+ { .prefix = "unix:",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 1,
+ .is_local = 1 },
+ { .prefix = "tcp:",
+ .family = AF_INET,
+ .type = CLIB_SOCKET_TYPE_INET,
+ .skip_prefix = 1 },
+#if CLIB_LINUX
+ { .prefix = "abstract:",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
+ .skip_prefix = 1,
+ .is_local = 1 },
+#endif /* CLIB_LINUX */
+ { .prefix = "/",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 0,
+ .is_local = 1 },
+ { .prefix = "",
+ .family = AF_INET,
+ .type = CLIB_SOCKET_TYPE_INET,
+ .skip_prefix = 0,
+ .is_local = 0 },
+ { .prefix = "",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 0,
+ .is_local = 1 },
+};
+
+static u8 *
+_clib_socket_get_string (char **p, int is_hostname)
+{
+ u8 *s = 0;
+ while (**p)
+ {
+ switch (**p)
+ {
+ case '_':
+ if (is_hostname)
+ return s;
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '/':
+ case '-':
+ case '.':
+ vec_add1 (s, **p);
+ (*p)++;
+ break;
+ break;
+ default:
+ return s;
+ }
+ }
+ return s;
+}
+
+__clib_export int
+clib_socket_prefix_is_valid (char *s)
+{
+ for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
+ d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
+ if (d->skip_prefix && strncmp (s, d->prefix, strlen (d->prefix)) == 0)
+ return 1;
+ return 0;
+}
+
+__clib_export int
+clib_socket_prefix_get_type (char *s)
+{
+ for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
+ d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
+ if (strncmp (s, d->prefix, strlen (d->prefix)) == 0)
+ return d->type;
+ return 0;
+}
+
__clib_export clib_error_t *
-clib_socket_init (clib_socket_t * s)
+clib_socket_init (clib_socket_t *s)
{
- union
- {
- struct sockaddr sa;
- struct sockaddr_un su;
- } addr;
+ struct sockaddr_un su = { .sun_family = AF_UNIX };
+ struct sockaddr_in si = { .sin_family = AF_INET };
+ struct sockaddr *sa = 0;
+ typeof (clib_socket_type_data[0]) *data = 0;
socklen_t addr_len = 0;
- int socket_type, rv;
- clib_error_t *error = 0;
- word port;
+ int rv;
+ char *p;
+ clib_error_t *err = 0;
+ u8 *name = 0;
+ u16 port = 0;
+#if CLIB_LINUX
+ int netns_fd = -1;
+#endif
- error = socket_config (s->config, &addr.sa, &addr_len,
- (s->flags & CLIB_SOCKET_F_IS_SERVER
- ? INADDR_LOOPBACK : INADDR_ANY));
- if (error)
- goto done;
+ s->fd = -1;
- socket_init_funcs (s);
+ if (!s->config)
+ s->config = "";
+
+ for (int i = 0; i < ARRAY_LEN (clib_socket_type_data); i++)
+ {
+ typeof (clib_socket_type_data[0]) *d = clib_socket_type_data + i;
+
+ if (d->is_local == 0 && s->local_only)
+ continue;
+
+ if (strncmp (s->config, d->prefix, strlen (d->prefix)) == 0)
+ {
+ data = d;
+ break;
+ }
+ }
+
+ if (data == 0)
+ return clib_error_return (0, "unsupported socket config '%s'", s->config);
+
+ s->type = data->type;
+ p = s->config + (data->skip_prefix ? strlen (data->prefix) : 0);
+
+ name = _clib_socket_get_string (&p, data->type == CLIB_SOCKET_TYPE_INET);
+ vec_add1 (name, 0);
+
+ /* parse port type for INET sockets */
+ if (data->type == CLIB_SOCKET_TYPE_INET && p[0] == ':')
+ {
+ char *old_p = p + 1;
+ long long ll = strtoll (old_p, &p, 0);
- socket_type = s->flags & CLIB_SOCKET_F_SEQPACKET ?
- SOCK_SEQPACKET : SOCK_STREAM;
+ if (p == old_p)
+ {
+ err = clib_error_return (0, "invalid port");
+ goto done;
+ }
+
+ if (ll > CLIB_U16_MAX || ll < 1)
+ {
+ err = clib_error_return (0, "port out of range");
+ goto done;
+ }
+ port = ll;
+ }
+
+ while (p[0] == ',')
+ {
+ p++;
+ if (0)
+ ;
+#if CLIB_LINUX
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
+ strncmp (p, "netns_name=", 11) == 0)
+ {
+ p += 11;
+ u8 *str = _clib_socket_get_string (&p, 0);
+ u8 *pathname = 0;
+ if (str[0] == '/')
+ pathname = format (0, "%v%c", str, 0);
+ else
+ pathname = format (0, "/var/run/netns/%v%c", str, 0);
+ if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
+ err = clib_error_return_unix (0, "open('%s')", pathname);
+ vec_free (str);
+ vec_free (pathname);
+ if (err)
+ goto done;
+ }
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
+ strncmp (p, "netns_pid=", 10) == 0)
+ {
+ char *old_p = p = p + 10;
+ u32 pid = (u32) strtol (old_p, &p, 0);
+
+ if (p == old_p)
+ err = clib_error_return (0, "invalid pid");
+ else
+ {
+ u8 *pathname = format (0, "/proc/%u/ns/net%c", pid, 0);
+ if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
+ err = clib_error_return_unix (0, "open('%s')", pathname);
+ vec_free (pathname);
+ }
+ if (err)
+ goto done;
+ }
+#endif
+ else
+ break;
+ }
- s->fd = socket (addr.sa.sa_family, socket_type, 0);
- if (s->fd < 0)
+ if (p[0] != 0)
{
- error = clib_error_return_unix (0, "socket (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return (0, "unknown input `%s'", p);
goto done;
}
- port = 0;
- if (addr.sa.sa_family == PF_INET)
- port = ((struct sockaddr_in *) &addr)->sin_port;
+#if CLIB_LINUX
+ /* change netns if requested */
+ if (s->type != CLIB_SOCKET_TYPE_INET && netns_fd != -1)
+ {
+ int fd = open ("/proc/self/ns/net", O_RDONLY);
- if (s->flags & CLIB_SOCKET_F_IS_SERVER)
+ if (setns (netns_fd, CLONE_NEWNET) < 0)
+ {
+ close (fd);
+ err = clib_error_return_unix (0, "setns(%d)", netns_fd);
+ goto done;
+ }
+ netns_fd = fd;
+ }
+#endif
+
+ if (s->type == CLIB_SOCKET_TYPE_INET)
{
- uword need_bind = 1;
+ addr_len = sizeof (si);
+ si.sin_port = htons (port);
- if (addr.sa.sa_family == PF_INET)
+ if (name)
{
- if (port == 0)
+ struct in_addr host_addr;
+ vec_add1 (name, 0);
+
+ /* Recognize localhost to avoid host lookup in most common cast. */
+ if (!strcmp ((char *) name, "localhost"))
+ si.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
+
+ else if (inet_aton ((char *) name, &host_addr))
+ si.sin_addr = host_addr;
+
+ else if (strlen ((char *) name) > 0)
{
- port = find_free_port (s->fd);
- if (port < 0)
+ struct hostent *host = gethostbyname ((char *) name);
+ if (!host)
+ err = clib_error_return (0, "unknown host `%s'", name);
+ else
+ clib_memcpy (&si.sin_addr.s_addr, host->h_addr_list[0],
+ host->h_length);
+ }
+
+ else
+ si.sin_addr.s_addr =
+ htonl (s->is_server ? INADDR_LOOPBACK : INADDR_ANY);
+
+ if (err)
+ goto done;
+ }
+ sa = (struct sockaddr *) &si;
+ }
+ else if (s->type == CLIB_SOCKET_TYPE_UNIX)
+ {
+ struct stat st = { 0 };
+ char *path = (char *) &su.sun_path;
+
+ if (vec_len (name) > sizeof (su.sun_path) - 1)
+ {
+ err = clib_error_return (0, "File path '%v' too long", name);
+ goto done;
+ }
+
+ clib_memcpy (path, s->config, vec_len (name));
+ addr_len = sizeof (su);
+ sa = (struct sockaddr *) &su;
+
+ rv = stat (path, &st);
+ if (!s->is_server && rv < 0)
+ {
+ err = clib_error_return_unix (0, "stat ('%s')", path);
+ goto done;
+ }
+
+ if (s->is_server && rv == 0)
+ {
+ if (S_ISSOCK (st.st_mode))
+ {
+ int client_fd = socket (AF_UNIX, SOCK_STREAM, 0);
+ int ret = connect (client_fd, (const struct sockaddr *) &su,
+ sizeof (su));
+ typeof (errno) connect_errno = errno;
+ close (client_fd);
+
+ if (ret == 0 || (ret < 0 && connect_errno != ECONNREFUSED))
{
- error = clib_error_return (0, "no free port (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return (0, "Active listener on '%s'", path);
+ goto done;
+ }
+
+ if (unlink (path) < 0)
+ {
+ err = clib_error_return_unix (0, "unlink ('%s')", path);
goto done;
}
- need_bind = 0;
}
- }
- if (addr.sa.sa_family == PF_LOCAL &&
- ((struct sockaddr_un *) &addr)->sun_path[0] != 0)
- unlink (((struct sockaddr_un *) &addr)->sun_path);
-
- /* Make address available for multiple users. */
- {
- int v = 1;
- if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof (v)) < 0)
- clib_unix_warning ("setsockopt SO_REUSEADDR fails");
- }
-
-#if __linux__
- if (addr.sa.sa_family == PF_LOCAL && s->flags & CLIB_SOCKET_F_PASSCRED)
- {
- int x = 1;
- if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &x, sizeof (x)) < 0)
+ else
{
- error = clib_error_return_unix (0, "setsockopt (SO_PASSCRED, "
- "fd %d, '%s')", s->fd,
- s->config);
+ err = clib_error_return (0, "File '%s' already exists", path);
goto done;
}
}
-#endif
-
- if (need_bind && bind (s->fd, &addr.sa, addr_len) < 0)
+ }
+#if CLIB_LINUX
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT)
+ {
+ if (vec_len (name) > sizeof (su.sun_path) - 2)
{
- error = clib_error_return_unix (0, "bind (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return (0, "Socket name '%v' too long", name);
goto done;
}
- if (listen (s->fd, 5) < 0)
+ clib_memcpy (&su.sun_path[1], name, vec_len (name));
+ addr_len = sizeof (su.sun_family) + vec_len (name);
+ sa = (struct sockaddr *) &su;
+ s->allow_group_write = 0;
+ }
+#endif
+ else
+ {
+ err = clib_error_return_unix (0, "unknown socket family");
+ goto done;
+ }
+
+ socket_init_funcs (s);
+
+ if ((s->fd = socket (sa->sa_family,
+ s->is_seqpacket ? SOCK_SEQPACKET : SOCK_STREAM, 0)) < 0)
+ {
+ err =
+ clib_error_return_unix (0, "socket (fd %d, '%s')", s->fd, s->config);
+ goto done;
+ }
+
+ if (s->is_server)
+ {
+ uword need_bind = 1;
+
+ if (sa->sa_family == AF_INET && si.sin_port == 0)
{
- error = clib_error_return_unix (0, "listen (fd %d, '%s')",
- s->fd, s->config);
- goto done;
+ word port = find_free_port (s->fd);
+ if (port < 0)
+ {
+ err = clib_error_return (0, "no free port (fd %d, '%s')", s->fd,
+ s->config);
+ goto done;
+ }
+ si.sin_port = port;
+ need_bind = 0;
}
- if (addr.sa.sa_family == PF_LOCAL &&
- s->flags & CLIB_SOCKET_F_ALLOW_GROUP_WRITE &&
- ((struct sockaddr_un *) &addr)->sun_path[0] != 0)
+
+ if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &((int){ 1 }),
+ sizeof (int)) < 0)
+ clib_unix_warning ("setsockopt SO_REUSEADDR fails");
+
+#if CLIB_LINUX
+ if (sa->sa_family == AF_UNIX && s->passcred)
{
- struct stat st = { 0 };
- if (stat (((struct sockaddr_un *) &addr)->sun_path, &st) < 0)
+ if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &((int){ 1 }),
+ sizeof (int)) < 0)
{
- error = clib_error_return_unix (0, "stat (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0,
+ "setsockopt (SO_PASSCRED, "
+ "fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
- st.st_mode |= S_IWGRP;
- if (chmod (((struct sockaddr_un *) &addr)->sun_path, st.st_mode) <
- 0)
+ }
+#endif
+
+ if (need_bind)
+ {
+ int bind_ret;
+ if (sa->sa_family == AF_UNIX && s->allow_group_write)
+ {
+ mode_t def_restrictions = umask (S_IWOTH);
+ bind_ret = bind (s->fd, sa, addr_len);
+ umask (def_restrictions);
+ }
+ else
+ bind_ret = bind (s->fd, sa, addr_len);
+
+ if (bind_ret < 0)
{
- error =
- clib_error_return_unix (0, "chmod (fd %d, '%s', mode %o)",
- s->fd, s->config, st.st_mode);
+ err = clib_error_return_unix (0, "bind (fd %d, '%s')", s->fd,
+ s->config);
goto done;
}
}
+
+ if (listen (s->fd, 5) < 0)
+ {
+ err = clib_error_return_unix (0, "listen (fd %d, '%s')", s->fd,
+ s->config);
+ goto done;
+ }
}
else
{
- if ((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT)
- && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
+ if (s->non_blocking_connect && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
{
- error = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
- while ((rv = connect (s->fd, &addr.sa, addr_len)) < 0
- && errno == EAGAIN)
+ while ((rv = connect (s->fd, sa, addr_len)) < 0 && errno == EAGAIN)
;
- if (rv < 0 && !((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) &&
- errno == EINPROGRESS))
+ if (rv < 0 && !(s->non_blocking_connect && errno == EINPROGRESS))
{
- error = clib_error_return_unix (0, "connect (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "connect (fd %d, '%s')", s->fd,
+ s->config);
goto done;
}
/* Connect was blocking so set fd to non-blocking now unless
* blocking mode explicitly requested. */
- if (!(s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) &&
- !(s->flags & CLIB_SOCKET_F_BLOCKING) &&
+ if (!s->non_blocking_connect && !s->is_blocking &&
fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
{
- error = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
}
- return error;
-
done:
- if (s->fd > 0)
- close (s->fd);
- return error;
-}
-
-__clib_export clib_error_t *
-clib_socket_init_netns (clib_socket_t *s, u8 *namespace)
-{
- if (namespace == NULL || namespace[0] == 0)
- return clib_socket_init (s);
-
- clib_error_t *error;
- int old_netns_fd, nfd;
-
- old_netns_fd = clib_netns_open (NULL /* self */);
- if ((nfd = clib_netns_open (namespace)) == -1)
+ if (err && s->fd > -1)
{
- error = clib_error_return_unix (0, "clib_netns_open '%s'", namespace);
- goto done;
+ close (s->fd);
+ s->fd = -1;
}
-
- if (clib_setns (nfd) == -1)
+#if CLIB_LINUX
+ if (netns_fd != -1)
{
- error = clib_error_return_unix (0, "setns '%s'", namespace);
- goto done;
+ setns (netns_fd, CLONE_NEWNET);
+ close (netns_fd);
}
-
- error = clib_socket_init (s);
-
-done:
- if (clib_setns (old_netns_fd) == -1)
- clib_warning ("Cannot set old ns");
- close (old_netns_fd);
-
- return error;
+#endif
+ vec_free (name);
+ return err;
}
__clib_export clib_error_t *
diff --git a/src/vppinfra/socket.h b/src/vppinfra/socket.h
index fa5ef1efced..c4f0b87e3e1 100644
--- a/src/vppinfra/socket.h
+++ b/src/vppinfra/socket.h
@@ -41,11 +41,25 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
+#ifdef __FreeBSD__
+#include <errno.h>
+#define EBADFD EBADF
+#endif /* __FreeBSD__ */
#include <vppinfra/clib.h>
#include <vppinfra/error.h>
#include <vppinfra/format.h>
+typedef enum
+{
+ CLIB_SOCKET_TYPE_UNKNOWN = 0,
+ CLIB_SOCKET_TYPE_INET,
+ CLIB_SOCKET_TYPE_UNIX,
+#if CLIB_LINUX
+ CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
+#endif
+} clib_socket_type_t;
+
typedef struct _socket_t
{
/* File descriptor. */
@@ -54,15 +68,21 @@ typedef struct _socket_t
/* Config string for socket HOST:PORT or just HOST. */
char *config;
- u32 flags;
-#define CLIB_SOCKET_F_IS_SERVER (1 << 0)
-#define CLIB_SOCKET_F_IS_CLIENT (0 << 0)
-#define CLIB_SOCKET_F_RX_END_OF_FILE (1 << 2)
-#define CLIB_SOCKET_F_NON_BLOCKING_CONNECT (1 << 3)
-#define CLIB_SOCKET_F_ALLOW_GROUP_WRITE (1 << 4)
-#define CLIB_SOCKET_F_SEQPACKET (1 << 5)
-#define CLIB_SOCKET_F_PASSCRED (1 << 6)
-#define CLIB_SOCKET_F_BLOCKING (1 << 7)
+ union
+ {
+ struct
+ {
+ u32 is_server : 1;
+ u32 rx_end_of_file : 1;
+ u32 non_blocking_connect : 1;
+ u32 allow_group_write : 1;
+ u32 is_seqpacket : 1;
+ u32 passcred : 1;
+ u32 is_blocking : 1;
+ u32 local_only : 1;
+ };
+ u32 flags;
+ };
/* Transmit buffer. Holds data waiting to be written. */
u8 *tx_buffer;
@@ -85,23 +105,33 @@ typedef struct _socket_t
int fds[], int num_fds);
clib_error_t *(*sendmsg_func) (struct _socket_t * s, void *msg, int msglen,
int fds[], int num_fds);
+ clib_socket_type_t type;
uword private_data;
} clib_socket_t;
+#define CLIB_SOCKET_FLAG(f) (((clib_socket_t){ .f = 1 }).flags)
+#define CLIB_SOCKET_F_IS_CLIENT 0
+#define CLIB_SOCKET_F_IS_SERVER CLIB_SOCKET_FLAG (is_server)
+#define CLIB_SOCKET_F_ALLOW_GROUP_WRITE CLIB_SOCKET_FLAG (allow_group_write)
+#define CLIB_SOCKET_F_SEQPACKET CLIB_SOCKET_FLAG (is_seqpacket)
+#define CLIB_SOCKET_F_PASSCRED CLIB_SOCKET_FLAG (passcred)
+#define CLIB_SOCKET_F_BLOCKING CLIB_SOCKET_FLAG (is_blocking)
+
/* socket config format is host:port.
Unspecified port causes a free one to be chosen starting
from IPPORT_USERRESERVED (5000). */
clib_error_t *clib_socket_init (clib_socket_t * socket);
-clib_error_t *clib_socket_init_netns (clib_socket_t *socket, u8 *namespace);
-
clib_error_t *clib_socket_accept (clib_socket_t * server,
clib_socket_t * client);
+int clib_socket_prefix_is_valid (char *s);
+int clib_socket_prefix_get_type (char *s);
+
always_inline uword
clib_socket_is_server (clib_socket_t * sock)
{
- return (sock->flags & CLIB_SOCKET_F_IS_SERVER) != 0;
+ return sock->is_server;
}
always_inline uword
@@ -120,7 +150,7 @@ clib_socket_is_connected (clib_socket_t * sock)
always_inline int
clib_socket_rx_end_of_file (clib_socket_t * s)
{
- return s->flags & CLIB_SOCKET_F_RX_END_OF_FILE;
+ return s->rx_end_of_file;
}
always_inline void *
diff --git a/src/vppinfra/sparse_vec.h b/src/vppinfra/sparse_vec.h
index 54a92ce7a84..3bd440d5dbd 100644
--- a/src/vppinfra/sparse_vec.h
+++ b/src/vppinfra/sparse_vec.h
@@ -38,8 +38,8 @@
#ifndef included_sparse_vec_h
#define included_sparse_vec_h
+#include <vppinfra/clib.h>
#include <vppinfra/vec.h>
-#include <vppinfra/bitops.h>
/* Sparsely indexed vectors. Basic idea taken from Hacker's delight.
Eliot added ranges. */
@@ -59,7 +59,7 @@ typedef struct
always_inline sparse_vec_header_t *
sparse_vec_header (void *v)
{
- return vec_header (v, sizeof (sparse_vec_header_t));
+ return vec_header (v);
}
/* Index 0 is always used to mark indices that are not valid in
@@ -73,17 +73,14 @@ sparse_vec_new (uword elt_bytes, uword sparse_index_bits)
void *v;
sparse_vec_header_t *h;
word n;
+ vec_attr_t va = { .elt_sz = elt_bytes, .hdr_sz = sizeof (h[0]) };
ASSERT (sparse_index_bits <= 16);
- v = _vec_resize ((void *) 0,
- /* length increment */ 8,
- /* data bytes */ 8 * elt_bytes,
- /* header bytes */ sizeof (h[0]),
- /* data align */ 0);
+ v = _vec_alloc_internal (/* data bytes */ 8, &va);
/* Make space for invalid entry (entry 0). */
- _vec_len (v) = 1;
+ _vec_set_len (v, 1, elt_bytes);
h = sparse_vec_header (v);
@@ -223,7 +220,19 @@ sparse_vec_index2 (void *v,
*i1_return = is_member1 + d1;
}
-#define sparse_vec_free(v) vec_free(v)
+#define sparse_vec_free(V) \
+ do \
+ { \
+ if (V) \
+ { \
+ sparse_vec_header_t *_h = sparse_vec_header (V); \
+ vec_free (_h->is_member_bitmap); \
+ vec_free (_h->member_counts); \
+ clib_mem_free (_h); \
+ V = 0; \
+ } \
+ } \
+ while (0)
#define sparse_vec_elt_at_index(v,i) \
vec_elt_at_index ((v), sparse_vec_index ((v), (i)))
diff --git a/src/vppinfra/stack.c b/src/vppinfra/stack.c
new file mode 100644
index 00000000000..190e880c228
--- /dev/null
+++ b/src/vppinfra/stack.c
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Cisco Systems, Inc.
+ */
+
+#define _GNU_SOURCE
+#include <dlfcn.h>
+
+#include <vppinfra/clib.h>
+#include <vppinfra/stack.h>
+#include <vppinfra/error.h>
+
+#if HAVE_LIBUNWIND == 1
+
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+
+static __thread unw_cursor_t cursor;
+static __thread unw_context_t context;
+
+#endif
+
+__clib_export clib_stack_frame_t *
+clib_stack_frame_get (clib_stack_frame_t *sf)
+{
+#if HAVE_LIBUNWIND == 1
+ Dl_info info = {};
+
+ if (sf->index == 0)
+ {
+ if (unw_getcontext (&context) < 0)
+ {
+ clib_warning ("libunwind: cannot get local machine state\n");
+ return 0;
+ }
+ if (unw_init_local (&cursor, &context) < 0)
+ {
+ clib_warning (
+ "libunwind: cannot initialize cursor for local unwinding\n");
+ return 0;
+ }
+ if (unw_step (&cursor) < 1)
+ return 0;
+ }
+ else if (unw_step (&cursor) < 1)
+ return 0;
+
+ if (unw_get_reg (&cursor, UNW_REG_IP, &sf->ip))
+ {
+ clib_warning ("libunwind: cannot read IP\n");
+ return 0;
+ }
+
+ if (unw_get_reg (&cursor, UNW_REG_SP, &sf->sp))
+ {
+ clib_warning ("libunwind: cannot read SP\n");
+ return 0;
+ }
+
+ if (unw_get_proc_name (&cursor, sf->name, sizeof (sf->name), &sf->offset) <
+ 0)
+ sf->name[0] = sf->offset = 0;
+
+ sf->is_signal_frame = unw_is_signal_frame (&cursor) ? 1 : 0;
+
+ if (dladdr ((void *) sf->ip, &info))
+ sf->file_name = info.dli_fname;
+ else
+ sf->file_name = 0;
+
+ sf->index++;
+ return sf;
+#else
+ return 0;
+#endif
+}
diff --git a/src/vppinfra/stack.h b/src/vppinfra/stack.h
new file mode 100644
index 00000000000..98a621d4176
--- /dev/null
+++ b/src/vppinfra/stack.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef __STACK_H__
+#define __STACK_H__
+
+#include <vppinfra/clib.h>
+
+typedef struct
+{
+ uword ip, sp;
+ uword offset;
+ char name[64];
+ const char *file_name;
+ u32 index;
+ u8 is_signal_frame;
+} clib_stack_frame_t;
+
+clib_stack_frame_t *clib_stack_frame_get (clib_stack_frame_t *);
+
+#define foreach_clib_stack_frame(sf) \
+ for (clib_stack_frame_t _sf = {}, *sf = clib_stack_frame_get (&_sf); sf; \
+ sf = clib_stack_frame_get (sf))
+
+#endif /* __STACK_H__ */
diff --git a/src/vppinfra/std-formats.c b/src/vppinfra/std-formats.c
index 1616001f9c5..cb2872ad24b 100644
--- a/src/vppinfra/std-formats.c
+++ b/src/vppinfra/std-formats.c
@@ -135,6 +135,52 @@ format_white_space (u8 * s, va_list * va)
}
u8 *
+format_duration (u8 *s, va_list *args)
+{
+ f64 t = va_arg (*args, f64);
+ s = format (s, "");
+
+ const f64 seconds_per_minute = 60;
+ const f64 seconds_per_hour = 60 * seconds_per_minute;
+ const f64 seconds_per_day = 24 * seconds_per_hour;
+ uword days, hours, minutes, secs, msecs, usecs;
+
+ days = t / seconds_per_day;
+ t -= days * seconds_per_day;
+
+ hours = t / seconds_per_hour;
+ t -= hours * seconds_per_hour;
+
+ minutes = t / seconds_per_minute;
+ t -= minutes * seconds_per_minute;
+
+ secs = t;
+ t -= secs;
+
+ msecs = 1e3 * t;
+
+ usecs = 1e6 * t;
+ usecs = usecs % 1000;
+
+ if (t == 0.)
+ s = format (s, "0");
+ if (days)
+ s = format (s, "%ddays ", days);
+ if (hours)
+ s = format (s, "%dh ", hours);
+ if (minutes)
+ s = format (s, "%dmin ", minutes);
+ if (secs)
+ s = format (s, "%ds ", secs);
+ if (msecs)
+ s = format (s, "%dms ", msecs);
+ if (usecs)
+ s = format (s, "%dus", usecs);
+
+ return (s);
+}
+
+u8 *
format_time_interval (u8 * s, va_list * args)
{
u8 *fmt = va_arg (*args, u8 *);
@@ -204,6 +250,24 @@ format_time_interval (u8 * s, va_list * args)
return s;
}
+/* Format base 10 e.g. 100, 100K, 100M, 100G */
+__clib_export u8 *
+format_base10 (u8 *s, va_list *va)
+{
+ u64 size = va_arg (*va, u64);
+
+ if (size < 1000)
+ s = format (s, "%d", size);
+ else if (size < 1000000)
+ s = format (s, "%.2fK", (f64) size / 1000.);
+ else if (size < 1000000000)
+ s = format (s, "%.2fM", (f64) size / 1000000.);
+ else
+ s = format (s, "%.2fG", (f64) size / 1000000000.);
+
+ return s;
+}
+
/* Unparse memory size e.g. 100, 100k, 100m, 100g. */
__clib_export u8 *
format_memory_size (u8 * s, va_list * va)
@@ -332,8 +396,6 @@ format_c_identifier (u8 * s, va_list * va)
uword i, l;
l = ~0;
- if (clib_mem_is_vec (id))
- l = vec_len (id);
if (id)
for (i = 0; i < l && id[i] != 0; i++)
@@ -352,7 +414,7 @@ __clib_export u8 *
format_hexdump (u8 * s, va_list * args)
{
u8 *data = va_arg (*args, u8 *);
- uword len = va_arg (*args, uword);
+ u32 len = va_arg (*args, u32);
int i, index = 0;
const int line_len = 16;
u8 *line_hex = 0;
@@ -393,6 +455,104 @@ format_hexdump (u8 * s, va_list * args)
return s;
}
+__clib_export u8 *
+format_hexdump_u16 (u8 *s, va_list *args)
+{
+ u16 *data = va_arg (*args, u16 *);
+ u32 len = va_arg (*args, u32);
+ u32 indent = format_get_indent (s);
+
+ if (!len)
+ return s;
+
+ for (int i = 0; i < len; i++)
+ {
+ if (i % 8 == 0)
+ {
+ s = format (s, "%s%U%05x: ", i ? "\n" : "", format_white_space,
+ i ? indent : 0, i * 2);
+ }
+ s = format (s, " %04lx", data[i]);
+ }
+ return s;
+}
+
+__clib_export u8 *
+format_hexdump_u32 (u8 *s, va_list *args)
+{
+ u32 *data = va_arg (*args, u32 *);
+ u32 len = va_arg (*args, u32);
+ u32 indent = format_get_indent (s);
+
+ if (!len)
+ return s;
+
+ for (int i = 0; i < len; i++)
+ {
+ if (i % 4 == 0)
+ {
+ s = format (s, "%s%U%05x: ", i ? "\n" : "", format_white_space,
+ i ? indent : 0, i * 4);
+ }
+ s = format (s, " %08lx", data[i]);
+ }
+ return s;
+}
+
+__clib_export u8 *
+format_hexdump_u64 (u8 *s, va_list *args)
+{
+ u64 *data = va_arg (*args, u64 *);
+ u32 len = va_arg (*args, u32);
+ u32 indent = format_get_indent (s);
+
+ if (!len)
+ return s;
+
+ for (int i = 0; i < len; i++)
+ {
+ if (i % 2 == 0)
+ {
+ s = format (s, "%s%U%05x: ", i ? "\n" : "", format_white_space,
+ i ? indent : 0, i * 8);
+ }
+ s = format (s, " %016lx", data[i]);
+ }
+ return s;
+}
+
+__clib_export u8 *
+format_uword_bitmap (u8 *s, va_list *args)
+{
+ uword *bitmap = va_arg (*args, uword *);
+ int n_uword = va_arg (*args, int);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%6s", "");
+
+ for (int i = uword_bits - 4; i >= 0; i -= 4)
+ s = format (s, "%5d", i);
+
+ vec_add1 (s, '\n');
+
+ for (int j = n_uword - 1; j >= 0; j--)
+ {
+ s = format (s, "%U0x%04x ", format_white_space, indent,
+ j * uword_bits / 8);
+ for (int i = uword_bits - 1; i >= 0; i--)
+ {
+ vec_add1 (s, (1ULL << i) & bitmap[j] ? '1' : '.');
+ if (i % 4 == 0)
+ vec_add1 (s, ' ');
+ }
+ s = format (s, uword_bits == 64 ? "0x%016lx" : "0x%08lx", bitmap[j]);
+ if (j)
+ vec_add1 (s, '\n');
+ }
+
+ return s;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h
index 0d2c0655c50..b1ef0e4809b 100644
--- a/src/vppinfra/string.h
+++ b/src/vppinfra/string.h
@@ -47,6 +47,9 @@
#include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
#include <vppinfra/vector.h>
#include <vppinfra/error_bootstrap.h>
+#ifdef __SSE4_2__
+#include <vppinfra/memcpy_x86_64.h>
+#endif
#ifdef CLIB_LINUX_KERNEL
#include <linux/string.h>
@@ -67,26 +70,6 @@
/* Exchanges source and destination. */
void clib_memswap (void *_a, void *_b, uword bytes);
-/*
- * the vector unit memcpy variants confuse coverity
- * so don't let it anywhere near them.
- */
-#ifndef __COVERITY__
-#if __AVX512BITALG__
-#include <vppinfra/memcpy_avx512.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_avx512 (a, b, c)
-#elif __AVX2__
-#include <vppinfra/memcpy_avx2.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_avx2 (a, b, c)
-#elif __SSSE3__
-#include <vppinfra/memcpy_sse3.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_sse3 (a, b, c)
-#endif /* __AVX512BITALG__ */
-#endif /* __COVERITY__ */
-
-#ifndef clib_memcpy_fast_arch
-#define clib_memcpy_fast_arch(a, b, c) memcpy (a, b, c)
-#endif /* clib_memcpy_fast_arch */
static_always_inline void *
clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n)
@@ -94,10 +77,34 @@ clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n)
ASSERT (dst && src &&
"memcpy(src, dst, n) with src == NULL or dst == NULL is undefined "
"behaviour");
- return clib_memcpy_fast_arch (dst, src, n);
+#if defined(__COVERITY__)
+ return memcpy (dst, src, n);
+#elif defined(__SSE4_2__)
+ clib_memcpy_x86_64 (dst, src, n);
+ return dst;
+#else
+ return memcpy (dst, src, n);
+#endif
}
-#undef clib_memcpy_fast_arch
+static_always_inline void *
+clib_memmove (void *dst, const void *src, size_t n)
+{
+ u8 *d = (u8 *) dst;
+ u8 *s = (u8 *) src;
+
+ if (s == d)
+ return d;
+
+ if (d > s)
+ for (uword i = n - 1; (i + 1) > 0; i--)
+ d[i] = s[i];
+ else
+ for (uword i = 0; i < n; i++)
+ d[i] = s[i];
+
+ return d;
+}
#include <vppinfra/memcpy.h>
@@ -246,14 +253,14 @@ clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len)
d0 = u8x32_load_unaligned (dst);
d1 = u8x32_load_unaligned (dst + 32);
- d0 = u8x32_blend (d0, s0, u8x32_is_greater (lv, mask));
+ d0 = u8x32_blend (d0, s0, lv > mask);
u8x32_store_unaligned (d0, dst);
if (max_len <= 32)
return;
mask += add;
- d1 = u8x32_blend (d1, s1, u8x32_is_greater (lv, mask));
+ d1 = u8x32_blend (d1, s1, lv > mask);
u8x32_store_unaligned (d1, dst + 32);
#elif defined (CLIB_HAVE_VEC128)
@@ -271,25 +278,25 @@ clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len)
d2 = u8x16_load_unaligned (dst + 32);
d3 = u8x16_load_unaligned (dst + 48);
- d0 = u8x16_blend (d0, s0, u8x16_is_greater (lv, mask));
+ d0 = u8x16_blend (d0, s0, lv > mask);
u8x16_store_unaligned (d0, dst);
if (max_len <= 16)
return;
mask += add;
- d1 = u8x16_blend (d1, s1, u8x16_is_greater (lv, mask));
+ d1 = u8x16_blend (d1, s1, lv > mask);
u8x16_store_unaligned (d1, dst + 16);
if (max_len <= 32)
return;
mask += add;
- d2 = u8x16_blend (d2, s2, u8x16_is_greater (lv, mask));
+ d2 = u8x16_blend (d2, s2, lv > mask);
u8x16_store_unaligned (d2, dst + 32);
mask += add;
- d3 = u8x16_blend (d3, s3, u8x16_is_greater (lv, mask));
+ d3 = u8x16_blend (d3, s3, lv > mask);
u8x16_store_unaligned (d3, dst + 48);
#else
memmove (dst, src, len);
@@ -334,9 +341,17 @@ clib_memset_u64 (void *p, u64 val, uword count)
if (count == 0)
return;
#else
+#if defined(CLIB_HAVE_VEC128)
+ u64x2 v = u64x2_splat (val);
+#endif
while (count >= 4)
{
+#if defined(CLIB_HAVE_VEC128)
+ u64x2_store_unaligned (v, ptr);
+ u64x2_store_unaligned (v, ptr + 2);
+#else
ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
+#endif
ptr += 4;
count -= 4;
}
@@ -483,239 +498,6 @@ clib_memset_u8 (void *p, u8 val, uword count)
ptr++[0] = val;
}
-static_always_inline uword
-clib_count_equal_u64 (u64 * data, uword max_count)
-{
- uword count;
- u64 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u64x4 splat = u64x4_splat (first);
- while (count + 3 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 8;
- return count;
- }
-
- data += 4;
- count += 4;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u32 (u32 * data, uword max_count)
-{
- uword count;
- u32 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u32x8 splat = u32x8_splat (first);
- while (count + 7 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 4;
- return count;
- }
-
- data += 8;
- count += 8;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u32x4 splat = u32x4_splat (first);
- while (count + 3 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp) / 4;
- return count;
- }
-
- data += 4;
- count += 4;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u16 (u16 * data, uword max_count)
-{
- uword count;
- u16 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u16x16 splat = u16x16_splat (first);
- while (count + 15 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 2;
- return count;
- }
-
- data += 16;
- count += 16;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u16x8 splat = u16x8_splat (first);
- while (count + 7 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp) / 2;
- return count;
- }
-
- data += 8;
- count += 8;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u8 (u8 * data, uword max_count)
-{
- uword count;
- u8 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u8x32 splat = u8x32_splat (first);
- while (count + 31 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- return max_count;
-
- data += 32;
- count += 32;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u8x16 splat = u8x16_splat (first);
- while (count + 15 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp);
- return count;
- }
-
- data += 16;
- count += 16;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
/*
* This macro is to provide smooth mapping from memcmp to memcmp_s.
@@ -926,14 +708,6 @@ strncmp_s_inline (const char *s1, rsize_t s1max, const char *s2, rsize_t n,
return EOK;
}
-/*
- * This macro is provided for smooth migration from strcpy. It is not perfect
- * because we don't know the size of the destination buffer to pass to strcpy_s.
- * We improvise dmax with CLIB_STRING_MACRO_MAX.
- * Applications are encouraged to move to the C11 strcpy_s API.
- */
-#define clib_strcpy(d,s) strcpy_s_inline(d,CLIB_STRING_MACRO_MAX,s)
-
errno_t strcpy_s (char *__restrict__ dest, rsize_t dmax,
const char *__restrict__ src);
@@ -1060,16 +834,6 @@ strncpy_s_inline (char *__restrict__ dest, rsize_t dmax,
return status;
}
-/*
- * This macro is to provide smooth migration from strcat to strcat_s.
- * Because there is no dmax in strcat, we improvise it with
- * CLIB_STRING_MACRO_MAX. Please note there may be a chance to overwrite dest
- * with too many bytes from src.
- * Applications are encouraged to use C11 API to provide the actual dmax
- * for proper checking and protection.
- */
-#define clib_strcat(d,s) strcat_s_inline(d,CLIB_STRING_MACRO_MAX,s)
-
errno_t strcat_s (char *__restrict__ dest, rsize_t dmax,
const char *__restrict__ src);
@@ -1121,16 +885,6 @@ strcat_s_inline (char *__restrict__ dest, rsize_t dmax,
return EOK;
}
-/*
- * This macro is to provide smooth migration from strncat to strncat_s.
- * The unsafe strncat does not have s1max. We improvise it with
- * CLIB_STRING_MACRO_MAX. Please note there may be a chance to overwrite
- * dest with too many bytes from src.
- * Applications are encouraged to move to C11 strncat_s which requires dmax
- * from the caller and provides checking to safeguard the memory corruption.
- */
-#define clib_strncat(d,s,n) strncat_s_inline(d,CLIB_STRING_MACRO_MAX,s,n)
-
errno_t strncat_s (char *__restrict__ dest, rsize_t dmax,
const char *__restrict__ src, rsize_t n);
@@ -1350,23 +1104,6 @@ strtok_s_inline (char *__restrict__ s1, rsize_t * __restrict__ s1max,
return (ptoken);
}
-/*
- * This macro is to provide smooth mapping from strstr to strstr_s.
- * strstr_s requires s1max and s2max which the unsafe API does not have. So
- * we have to improvise them with CLIB_STRING_MACRO_MAX which may cause us
- * to access memory beyond it is intended if s1 or s2 is unterminated.
- * For the record, strstr crashes if s1 or s2 is unterminated. But this macro
- * does not.
- * Applications are encouraged to use the cool C11 strstr_s API to avoid
- * this problem.
- */
-#define clib_strstr(s1,s2) \
- ({ char * __substring = 0; \
- strstr_s_inline (s1, CLIB_STRING_MACRO_MAX, s2, CLIB_STRING_MACRO_MAX, \
- &__substring); \
- __substring; \
- })
-
errno_t strstr_s (char *s1, rsize_t s1max, const char *s2, rsize_t s2max,
char **substring);
@@ -1395,7 +1132,7 @@ strstr_s_inline (char *s1, rsize_t s1max, const char *s2, rsize_t s2max,
clib_c11_violation ("substring NULL");
if (s1 && s1max && (s1[clib_strnlen (s1, s1max)] != '\0'))
clib_c11_violation ("s1 unterminated");
- if (s2 && s2max && (s2[clib_strnlen (s2, s1max)] != '\0'))
+ if (s2 && s2max && (s2[clib_strnlen (s2, s2max)] != '\0'))
clib_c11_violation ("s2 unterminated");
return EINVAL;
}
@@ -1424,6 +1161,13 @@ strstr_s_inline (char *s1, rsize_t s1max, const char *s2, rsize_t s2max,
return EOK;
}
+static_always_inline const char *
+clib_string_skip_prefix (const char *s, const char *prefix)
+{
+ uword len = __builtin_strlen (prefix);
+ return s + (__builtin_strncmp (s, prefix, len) ? 0 : len);
+}
+
#endif /* included_clib_string_h */
/*
diff --git a/src/vppinfra/test/aes_cbc.c b/src/vppinfra/test/aes_cbc.c
new file mode 100644
index 00000000000..be5f8fb176a
--- /dev/null
+++ b/src/vppinfra/test/aes_cbc.c
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#if defined(__AES__) || defined(__ARM_FEATURE_CRYPTO)
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/aes_cbc.h>
+
+static const u8 iv[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+};
+
+static const u8 plaintext[] = {
+ 0x6B, 0xC1, 0xBE, 0xE2, 0x2E, 0x40, 0x9F, 0x96, 0xE9, 0x3D, 0x7E, 0x11, 0x73,
+ 0x93, 0x17, 0x2A, 0xAE, 0x2D, 0x8A, 0x57, 0x1E, 0x03, 0xAC, 0x9C, 0x9E, 0xB7,
+ 0x6F, 0xAC, 0x45, 0xAF, 0x8E, 0x51, 0x30, 0xC8, 0x1C, 0x46, 0xA3, 0x5C, 0xE4,
+ 0x11, 0xE5, 0xFB, 0xC1, 0x19, 0x1A, 0x0A, 0x52, 0xEF, 0xF6, 0x9F, 0x24, 0x45,
+ 0xDF, 0x4F, 0x9B, 0x17, 0xAD, 0x2B, 0x41, 0x7B, 0xE6, 0x6C, 0x37, 0x10,
+};
+
+static const u8 key128[] = { 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6,
+ 0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C };
+
+static const u8 key192[24] = {
+ 0x8E, 0x73, 0xB0, 0xF7, 0xDA, 0x0E, 0x64, 0x52, 0xC8, 0x10, 0xF3, 0x2B,
+ 0x80, 0x90, 0x79, 0xE5, 0x62, 0xF8, 0xEA, 0xD2, 0x52, 0x2C, 0x6B, 0x7B,
+};
+
+static const u8 ciphertext128[] = {
+ 0x76, 0x49, 0xAB, 0xAC, 0x81, 0x19, 0xB2, 0x46, 0xCE, 0xE9, 0x8E, 0x9B, 0x12,
+ 0xE9, 0x19, 0x7D, 0x50, 0x86, 0xCB, 0x9B, 0x50, 0x72, 0x19, 0xEE, 0x95, 0xDB,
+ 0x11, 0x3A, 0x91, 0x76, 0x78, 0xB2, 0x73, 0xBE, 0xD6, 0xB8, 0xE3, 0xC1, 0x74,
+ 0x3B, 0x71, 0x16, 0xE6, 0x9E, 0x22, 0x22, 0x95, 0x16, 0x3F, 0xF1, 0xCA, 0xA1,
+ 0x68, 0x1F, 0xAC, 0x09, 0x12, 0x0E, 0xCA, 0x30, 0x75, 0x86, 0xE1, 0xA7,
+};
+
+static const u8 ciphertext192[64] = {
+ 0x4F, 0x02, 0x1D, 0xB2, 0x43, 0xBC, 0x63, 0x3D, 0x71, 0x78, 0x18, 0x3A, 0x9F,
+ 0xA0, 0x71, 0xE8, 0xB4, 0xD9, 0xAD, 0xA9, 0xAD, 0x7D, 0xED, 0xF4, 0xE5, 0xE7,
+ 0x38, 0x76, 0x3F, 0x69, 0x14, 0x5A, 0x57, 0x1B, 0x24, 0x20, 0x12, 0xFB, 0x7A,
+ 0xE0, 0x7F, 0xA9, 0xBA, 0xAC, 0x3D, 0xF1, 0x02, 0xE0, 0x08, 0xB0, 0xE2, 0x79,
+ 0x88, 0x59, 0x88, 0x81, 0xD9, 0x20, 0xA9, 0xE6, 0x4F, 0x56, 0x15, 0xCD,
+};
+
+static const u8 key256[32] = {
+ 0x60, 0x3D, 0xEB, 0x10, 0x15, 0xCA, 0x71, 0xBE, 0x2B, 0x73, 0xAE,
+ 0xF0, 0x85, 0x7D, 0x77, 0x81, 0x1F, 0x35, 0x2C, 0x07, 0x3B, 0x61,
+ 0x08, 0xD7, 0x2D, 0x98, 0x10, 0xA3, 0x09, 0x14, 0xDF, 0xF4,
+};
+
+static const u8 ciphertext256[64] = {
+ 0xF5, 0x8C, 0x4C, 0x04, 0xD6, 0xE5, 0xF1, 0xBA, 0x77, 0x9E, 0xAB, 0xFB, 0x5F,
+ 0x7B, 0xFB, 0xD6, 0x9C, 0xFC, 0x4E, 0x96, 0x7E, 0xDB, 0x80, 0x8D, 0x67, 0x9F,
+ 0x77, 0x7B, 0xC6, 0x70, 0x2C, 0x7D, 0x39, 0xF2, 0x33, 0x69, 0xA9, 0xD9, 0xBA,
+ 0xCF, 0xA5, 0x30, 0xE2, 0x63, 0x04, 0x23, 0x14, 0x61, 0xB2, 0xEB, 0x05, 0xE2,
+ 0xC3, 0x9B, 0xE9, 0xFC, 0xDA, 0x6C, 0x19, 0x07, 0x8C, 0x6A, 0x9D, 0x1B,
+};
+
+#define _(b) \
+ static clib_error_t *test_clib_aes##b##_cbc_encrypt (clib_error_t *err) \
+ { \
+ aes_cbc_key_data_t k; \
+ u8 data[512]; \
+ clib_aes##b##_cbc_key_expand (&k, key##b); \
+ clib_aes##b##_cbc_encrypt (&k, plaintext, sizeof (plaintext), iv, data); \
+ if (memcmp (ciphertext##b, data, sizeof (ciphertext##b)) != 0) \
+ err = \
+ clib_error_return (err, "encrypted data doesn't match plaintext"); \
+ return err; \
+ } \
+ void __test_perf_fn perftest_aes##b##_enc_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_cbc_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ clib_aes##b##_cbc_key_expand (kd, key##b); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##b##_cbc_encrypt (kd, src, n, iv, dst); \
+ test_perf_event_disable (tp); \
+ }
+_ (128)
+_ (192)
+_ (256)
+#undef _
+
+REGISTER_TEST (clib_aes128_cbc_encrypt) = {
+ .name = "clib_aes128_cbc_encrypt",
+ .fn = test_clib_aes128_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes128_enc_var_sz }),
+};
+
+REGISTER_TEST (clib_aes192_cbc_encrypt) = {
+ .name = "clib_aes192_cbc_encrypt",
+ .fn = test_clib_aes192_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes192_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes192_enc_var_sz }),
+};
+
+REGISTER_TEST (clib_aes256_cbc_encrypt) = {
+ .name = "clib_aes256_cbc_encrypt",
+ .fn = test_clib_aes256_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes256_enc_var_sz }),
+};
+
+#define _(b) \
+ static clib_error_t *test_clib_aes##b##_cbc_decrypt (clib_error_t *err) \
+ { \
+ aes_cbc_key_data_t k; \
+ u8 data[512]; \
+ clib_aes##b##_cbc_key_expand (&k, key##b); \
+ clib_aes##b##_cbc_decrypt (&k, ciphertext##b, sizeof (ciphertext##b), iv, \
+ data); \
+ if (memcmp (plaintext, data, sizeof (plaintext)) != 0) \
+ err = \
+ clib_error_return (err, "decrypted data doesn't match plaintext"); \
+ return err; \
+ } \
+ void __test_perf_fn perftest_aes##b##_dec_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_cbc_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ clib_aes##b##_cbc_key_expand (kd, key##b); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##b##_cbc_decrypt (kd, src, n, iv, dst); \
+ test_perf_event_disable (tp); \
+ }
+
+_ (128)
+_ (192)
+_ (256)
+#undef _
+
+REGISTER_TEST (clib_aes128_cbc_decrypt) = {
+ .name = "clib_aes128_cbc_decrypt",
+ .fn = test_clib_aes128_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes128_dec_var_sz }),
+};
+
+REGISTER_TEST (clib_aes192_cbc_decrypt) = {
+ .name = "clib_aes192_cbc_decrypt",
+ .fn = test_clib_aes192_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes192_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes192_dec_var_sz }),
+};
+
+REGISTER_TEST (clib_aes256_cbc_decrypt) = {
+ .name = "clib_aes256_cbc_decrypt",
+ .fn = test_clib_aes256_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes256_dec_var_sz }),
+};
+
+#endif
diff --git a/src/vppinfra/test/aes_ctr.c b/src/vppinfra/test/aes_ctr.c
new file mode 100644
index 00000000000..2892700fb27
--- /dev/null
+++ b/src/vppinfra/test/aes_ctr.c
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#if defined(__AES__) || defined(__ARM_FEATURE_CRYPTO)
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/aes_ctr.h>
+
+static const struct
+{
+ char *name;
+ const u8 *pt, *key, *ct, *iv;
+ u32 data_len;
+} test_cases128[] = {
+ /* test cases */
+ { .name = "RFC3686 Test Vector #1",
+ .key = (const u8[16]){ 0xae, 0x68, 0x52, 0xf8, 0x12, 0x10, 0x67, 0xcc,
+ 0x4b, 0xf7, 0xa5, 0x76, 0x55, 0x77, 0xf3, 0x9e },
+ .iv = (const u8[16]){ 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 },
+
+ .pt = (const u8 *) "Single block msg",
+ .ct = (const u8[16]){ 0xe4, 0x09, 0x5d, 0x4f, 0xb7, 0xa7, 0xb3, 0x79, 0x2d,
+ 0x61, 0x75, 0xa3, 0x26, 0x13, 0x11, 0xb8 },
+ .data_len = 16 },
+ { .name = "RFC3686 Test Vector #2",
+ .key = (const u8[16]){ 0x7e, 0x24, 0x06, 0x78, 0x17, 0xfa, 0xe0, 0xd7,
+ 0x43, 0xd6, 0xce, 0x1f, 0x32, 0x53, 0x91, 0x63 },
+ .iv = (const u8[16]){ 0x00, 0x6c, 0xb6, 0xdb, 0xc0, 0x54, 0x3b, 0x59, 0xda,
+ 0x48, 0xd9, 0x0b, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8[32]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
+ .ct = (const u8[32]){ 0x51, 0x04, 0xa1, 0x06, 0x16, 0x8a, 0x72, 0xd9,
+ 0x79, 0x0d, 0x41, 0xee, 0x8e, 0xda, 0xd3, 0x88,
+ 0xeb, 0x2e, 0x1e, 0xfc, 0x46, 0xda, 0x57, 0xc8,
+ 0xfc, 0xe6, 0x30, 0xdf, 0x91, 0x41, 0xbe, 0x28 },
+ .data_len = 32 },
+ { .name = "RFC3686 Test Vector #3",
+ .key = (const u8[16]){ 0x76, 0x91, 0xbe, 0x03, 0x5e, 0x50, 0x20, 0xa8,
+ 0xac, 0x6e, 0x61, 0x85, 0x29, 0xf9, 0xa0, 0xdc },
+ .iv = (const u8[16]){ 0x00, 0xe0, 0x01, 0x7b, 0x27, 0x77, 0x7f, 0x3f, 0x4a,
+ 0x17, 0x86, 0xf0, 0x00, 0x00, 0x00, 0x01 },
+ .pt =
+ (const u8[36]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a,
+ 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23 },
+ .ct =
+ (const u8[36]){ 0xc1, 0xcf, 0x48, 0xa8, 0x9f, 0x2f, 0xfd, 0xd9, 0xcf,
+ 0x46, 0x52, 0xe9, 0xef, 0xdb, 0x72, 0xd7, 0x45, 0x40,
+ 0xa4, 0x2b, 0xde, 0x6d, 0x78, 0x36, 0xd5, 0x9a, 0x5c,
+ 0xea, 0xae, 0xf3, 0x10, 0x53, 0x25, 0xb2, 0x07, 0x2f },
+ .data_len = 36 },
+}, test_cases192[] = {
+ { .name = "RFC3686 Test Vector #4",
+ .key = (const u8[24]){ 0x16, 0xaf, 0x5b, 0x14, 0x5f, 0xc9, 0xf5, 0x79,
+ 0xc1, 0x75, 0xf9, 0x3e, 0x3b, 0xfb, 0x0e, 0xed,
+ 0x86, 0x3d, 0x06, 0xcc, 0xfd, 0xb7, 0x85, 0x15 },
+ .iv = (const u8[16]){ 0x00, 0x00, 0x00, 0x48, 0x36, 0x73, 0x3c, 0x14, 0x7d,
+ 0x6d, 0x93, 0xcb, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8[16]){ 0x53, 0x69, 0x6e, 0x67, 0x6c, 0x65, 0x20, 0x62, 0x6c,
+ 0x6f, 0x63, 0x6b, 0x20, 0x6d, 0x73, 0x67 },
+ .ct = (const u8[16]){ 0x4b, 0x55, 0x38, 0x4f, 0xe2, 0x59, 0xc9, 0xc8, 0x4e,
+ 0x79, 0x35, 0xa0, 0x03, 0xcb, 0xe9, 0x28 },
+ .data_len = 16 },
+ { .name = "RFC3686 Test Vector #5",
+ .key = (const u8[24]){ 0x7c, 0x5c, 0xb2, 0x40, 0x1b, 0x3d, 0xc3, 0x3c,
+ 0x19, 0xe7, 0x34, 0x08, 0x19, 0xe0, 0xf6, 0x9c,
+ 0x67, 0x8c, 0x3d, 0xb8, 0xe6, 0xf6, 0xa9, 0x1a },
+ .iv = (const u8[16]){ 0x00, 0x96, 0xb0, 0x3b, 0x02, 0x0c, 0x6e, 0xad, 0xc2,
+ 0xcb, 0x50, 0x0d, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8[32]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
+ .ct = (const u8[32]){ 0x45, 0x32, 0x43, 0xfc, 0x60, 0x9b, 0x23, 0x32,
+ 0x7e, 0xdf, 0xaa, 0xfa, 0x71, 0x31, 0xcd, 0x9f,
+ 0x84, 0x90, 0x70, 0x1c, 0x5a, 0xd4, 0xa7, 0x9c,
+ 0xfc, 0x1f, 0xe0, 0xff, 0x42, 0xf4, 0xfb, 0x00 },
+ .data_len = 32 },
+ { .name = "RFC3686 Test Vector #6",
+ .key = (const u8[24]){ 0x02, 0xBF, 0x39, 0x1E, 0xE8, 0xEC, 0xB1, 0x59,
+ 0xB9, 0x59, 0x61, 0x7B, 0x09, 0x65, 0x27, 0x9B,
+ 0xF5, 0x9B, 0x60, 0xA7, 0x86, 0xD3, 0xE0, 0xFE },
+ .iv = (const u8[16]){ 0x00, 0x07, 0xBD, 0xFD, 0x5C, 0xBD, 0x60, 0x27, 0x8D,
+ 0xCC, 0x09, 0x12, 0x00, 0x00, 0x00, 0x01 },
+ .pt =
+ (const u8[36]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A,
+ 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23 },
+ .ct =
+ (const u8[36]){ 0x96, 0x89, 0x3F, 0xC5, 0x5E, 0x5C, 0x72, 0x2F, 0x54,
+ 0x0B, 0x7D, 0xD1, 0xDD, 0xF7, 0xE7, 0x58, 0xD2, 0x88,
+ 0xBC, 0x95, 0xC6, 0x91, 0x65, 0x88, 0x45, 0x36, 0xC8,
+ 0x11, 0x66, 0x2F, 0x21, 0x88, 0xAB, 0xEE, 0x09, 0x35 },
+ .data_len = 36 },
+
+}, test_cases256[] = {
+ { .name = "RFC3686 Test Vector #7",
+ .key = (const u8[32]){ 0x77, 0x6b, 0xef, 0xf2, 0x85, 0x1d, 0xb0, 0x6f,
+ 0x4c, 0x8a, 0x05, 0x42, 0xc8, 0x69, 0x6f, 0x6c,
+ 0x6a, 0x81, 0xaf, 0x1e, 0xec, 0x96, 0xb4, 0xd3,
+ 0x7f, 0xc1, 0xd6, 0x89, 0xe6, 0xc1, 0xc1, 0x04 },
+ .iv = (const u8[16]){ 0x00, 0x00, 0x00, 0x60, 0xdb, 0x56, 0x72, 0xc9, 0x7a,
+ 0xa8, 0xf0, 0xb2, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8 *) "Single block msg",
+ .ct = (const u8[16]){ 0x14, 0x5a, 0xd0, 0x1d, 0xbf, 0x82, 0x4e, 0xc7, 0x56,
+ 0x08, 0x63, 0xdc, 0x71, 0xe3, 0xe0, 0xc0 },
+ .data_len = 16 },
+ { .name = "RFC3686 Test Vector #8",
+ .key = (const u8[32]){ 0xf6, 0xd6, 0x6d, 0x6b, 0xd5, 0x2d, 0x59, 0xbb,
+ 0x07, 0x96, 0x36, 0x58, 0x79, 0xef, 0xf8, 0x86,
+ 0xc6, 0x6d, 0xd5, 0x1a, 0x5b, 0x6a, 0x99, 0x74,
+ 0x4b, 0x50, 0x59, 0x0c, 0x87, 0xa2, 0x38, 0x84 },
+ .iv = (const u8[16]){ 0x00, 0xfa, 0xac, 0x24, 0xc1, 0x58, 0x5e, 0xf1, 0x5a,
+ 0x43, 0xd8, 0x75, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8[32]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
+ .ct = (const u8[32]){ 0xf0, 0x5e, 0x23, 0x1b, 0x38, 0x94, 0x61, 0x2c,
+ 0x49, 0xee, 0x00, 0x0b, 0x80, 0x4e, 0xb2, 0xa9,
+ 0xb8, 0x30, 0x6b, 0x50, 0x8f, 0x83, 0x9d, 0x6a,
+ 0x55, 0x30, 0x83, 0x1d, 0x93, 0x44, 0xaf, 0x1c },
+ .data_len = 32 },
+ { .name = "RFC3686 Test Vector #9",
+ .key = (const u8[32]){ 0xff, 0x7a, 0x61, 0x7c, 0xe6, 0x91, 0x48, 0xe4,
+ 0xf1, 0x72, 0x6e, 0x2f, 0x43, 0x58, 0x1d, 0xe2,
+ 0xaa, 0x62, 0xd9, 0xf8, 0x05, 0x53, 0x2e, 0xdf,
+ 0xf1, 0xee, 0xd6, 0x87, 0xfb, 0x54, 0x15, 0x3d },
+ .iv = (const u8[16]){ 0x00, 0x1c, 0xc5, 0xb7, 0x51, 0xa5, 0x1d, 0x70, 0xa1,
+ 0xc1, 0x11, 0x48, 0x00, 0x00, 0x00, 0x01 },
+ .pt =
+ (const u8[36]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a,
+ 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23 },
+ .ct =
+ (const u8[36]){ 0xeb, 0x6c, 0x52, 0x82, 0x1d, 0x0b, 0xbb, 0xf7, 0xce,
+ 0x75, 0x94, 0x46, 0x2a, 0xca, 0x4f, 0xaa, 0xb4, 0x07,
+ 0xdf, 0x86, 0x65, 0x69, 0xfd, 0x07, 0xf4, 0x8c, 0xc0,
+ 0xb5, 0x83, 0xd6, 0x07, 0x1f, 0x1e, 0xc0, 0xe6, 0xb8 },
+ .data_len = 36 }
+};
+
+#define MAX_TEST_DATA_LEN 256
+
+#define INC_TEST_BYTES (256 * 16 + 1)
+
+static u8 inc_key128[] = {
+ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
+ 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c,
+};
+
+static u8 inc_iv[] = {
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+};
+
+static u64 inc_ct128[] = {
+ 0xb77a659c70dd8dec, 0xebaf93e67e1cdbfa, 0x744766732f6e3a26,
+ 0xb16d4de0cc6db900, 0x6811ac5c5be10d4a, 0x6b42973b30e29d96,
+ 0xf1aec4c4ac0badd8, 0xc1955129e00b33ec, 0x49d7cf50bb054cf0,
+ 0x4deb06dcdc7a21b8, 0xa257b4190916c808, 0x44b7d421c38b934b,
+ 0x9e4dbb2d1aceb85b, 0x2d1c952f53c6000d, 0x7e25b633f3bceb0d,
+ 0xcee9f88cd3c2236d, 0x10ce6bc4a53b1d37, 0xb4783ea69ebc261d,
+ 0x7f732c19e5fdd3ea, 0xb253d0ebd5522c84, 0x7925888c44ef010d,
+ 0xba213ea62e7ec7f0, 0x239e0466520393fd, 0x8cde31681d451842,
+ 0x20b8270d3c5c1bc5, 0x3e56c37a1d573ebe, 0xc4fdb0bb491cf04e,
+ 0x29c9a4f92d7b12da, 0x50c8a51f05b6f704, 0x3cf0f4071c2098fa,
+ 0xb0842470bd8c6fdd, 0x86dd40fdc9640190, 0xe4a6184230ee4f6c,
+ 0x0e2a69261819535e, 0xbdb62571c80aaa39, 0x24a0dc5eafd33f3a,
+ 0x830599f37869c6ac, 0xf7049ae1b8e5c0dd, 0x7c9dd8d4405d2050,
+ 0x0e91382b1dace623, 0xf2b62e26f4133673, 0xa9216257693afdab,
+ 0x2a26df863fb6e980, 0x85e600421c395c83, 0xd5a521016a175cb3,
+ 0x5ef31ae51f7f2f7b, 0xc6ff491d0d6f74d4, 0x16b0e60ac13156d3,
+ 0xd49e0025d5ec1e4b, 0x987c4eff196cd64e, 0xa163915e80892b07,
+ 0x69ab0084052d574a, 0x8017caa649d22bdb, 0xf5eb130f0df2c49a,
+ 0xe2ced8f88537e9ea, 0xdaaff5e845cff681, 0xbd22ac46dd219c7a,
+ 0x1b963af4641e7cf7, 0xe70e7d5b76f88573, 0x39703f5e2db84937,
+ 0x8a1514af42bf3c96, 0x7f51d78b7d3971a6, 0x437a651ef9f08c08,
+ 0x69fd3712ccdfd843, 0xd8204939e67dad48, 0x71035fc942194251,
+ 0x703d964c7525bb2a, 0xe2166e50e1892d94, 0xbe8034b11f6a5a9f,
+ 0x954e4d74c3a9e105, 0x19e077bf00e5186a, 0x7aee46c4b5d4cbf1,
+ 0xfd7dedd15a3e7d35, 0x4ba1c4b76cb93f57, 0xb2e94cffbb82f098,
+ 0x078b04fcebc1fafc, 0x923edcc8600018b2, 0xc018169aba42ff11,
+ 0x0e4f91e46db01bf8, 0x7b5d2b322371e9fa, 0x8e94284632dd300b,
+ 0x80a3d93ce61c2f13, 0x445d2fb83ecfef73, 0xe1279d639bcd26c9,
+ 0xbd1865ba653ce114, 0x0316cfe3227bfb2a, 0xddc80c63d53878db,
+ 0xc91a2f5fedf4a51a, 0xce408a5275b0271f, 0x59a0abc34619018e,
+ 0xa215c590ad1afb21, 0xe3b096d42fc03598, 0x7cada064ab4f4997,
+ 0x699be0e57d76e47f, 0x235151411eee9cbd, 0xbbc688f0eaf896cd,
+ 0x4e78715341f9299d, 0x9f85d76bf99ef2a4, 0x15110ceff4a6040b,
+ 0x9feed36ff4566060, 0x4833ea7d66a0c572, 0x94c7edbdf2169d59,
+ 0xb413d116c6e771f1, 0x9a4b6e78167f4c66, 0x42d3f993c8aaee27,
+ 0xd16783a8c4e57558, 0xb1d7a074dd67339e, 0x97a164444f97adc2,
+ 0xc15a08d61628e5f3, 0x8767e41e04eb96a2, 0xbb28953ed0eae183,
+ 0xc0bab4e80ed8cc6e, 0x1ac34b5a5c4010f8, 0x0bc3b7d9db1775b7,
+ 0x565dead595b98969, 0x0fc03a3cfb656014, 0xdb9098b924a92926,
+ 0xe2786bc431c1f39a, 0xf8a0bf4fffb78d10, 0xd76161fe1ae71851,
+ 0xced33ea693cedbb4, 0xef13034da5529a1b, 0xd71081cadbbff0ac,
+ 0x1873eb643e857392, 0xf6f7c30284ffecb0, 0x93ded259d35eb6fe,
+ 0xf872980774f6e5ef, 0xd457c8ed22d5bc3f, 0x75d907e2a6bcced2,
+ 0xcfd3dceb8d7a79ba, 0xaeed2ff2fc0872bb, 0xb5fc72005d2eb168,
+ 0x850e0e0757274665, 0xab7e5da576c706ec, 0xf1df1ba9a972a4ca,
+ 0xe81d430b4f54adf9, 0x788f3d8655ba79bb, 0xf5990db3557bbf8c,
+ 0x1cacafc47729252c, 0x7581b4d6f3b83d9b, 0x94185dcdb0b0c4cd,
+ 0x3596e687f4b9f4ed, 0xb9462442134b804d, 0xdab188808726fec6,
+ 0xfe10831e8824d4c5, 0x000c641ed4c93be7, 0x2525ee781608b1ea,
+ 0x2b32469d51104097, 0x73a09c6ea117aea9, 0x8506dcdec8ade0be,
+ 0xf9f9fa553cac7285, 0x34b24f100086b974, 0xd42fa88547ade8e7,
+ 0xfd0bb8ce9a5f8e14, 0x15df9966c6a3e433, 0xf6696aafaae89cd6,
+ 0x3d521a9d1a0087e1, 0xe18ca6b8e94701f0, 0x8a4660e26a77965e,
+ 0xc74fcdf41bf4aa20, 0x292a356d0b670157, 0x36ff3344a9eee4ea,
+ 0xd76b051d6251a14b, 0xa9e09f1bacd1e30f, 0xae47cb95f95a8831,
+ 0x58b85ac7c98537ec, 0x9e30f1be05719bd2, 0x94772e6b56fc1380,
+ 0xbe94026a4a89b783, 0x7a7ffb61daa5ac60, 0x2f7beafcc5e9ac8a,
+ 0xfa33f37edc57e94c, 0x230c3582fb412093, 0xdeec806ecc4fa3c4,
+ 0xc7ff8876a31edd76, 0x6d0500f4ccd1bb20, 0xf1d0bef759b81b6c,
+ 0x138b1d39533379b7, 0xece52f84d9f20455, 0x3ed05e391352b9dd,
+ 0x95600f558d4dea51, 0x1d6b997966e35392, 0x0eeae16905b94e37,
+ 0x7db2acc242a56ab0, 0xaf347e5598687f51, 0xbf25013db6bddc18,
+ 0x6d4f106c35f9ee28, 0xc8e90bbe4283ab8c, 0x188cf978f1477dee,
+ 0x66376bfa3a6d8131, 0xe0ebd6632eb89b24, 0xb9e49d81e9d37f69,
+ 0xa5cfa3812d530e04, 0x717353523542a27f, 0x0d6669c916ab4d34,
+ 0x79e741ad592a7bb1, 0x63a7f35584bd3ea5, 0xc0494db2930cbc32,
+ 0x442bd29d7edd0e49, 0x52ec0bce733f61a0, 0x8bd199bf55bc2b4b,
+ 0x727ede5583bb859c, 0x9d07eda6e8220df1, 0xebdd7467d7259f15,
+ 0x8f6035a5dc5f53b1, 0x063a0935630b5f6f, 0xc6e983ec1f08ebe6,
+ 0xeedc82de2b28e651, 0xe28760013e13ae23, 0x37c078d66ad376a3,
+ 0xd54a72e88e80926b, 0x5822405e1d688eec, 0xa001e0b0d4a7447f,
+ 0xfd41f41419d8fd4d, 0x1391d37127a75095, 0x4795d7fb7ad67f17,
+ 0xa47c05c9b8400a0c, 0x28519cd5e98bba0c, 0x84a72dce8a27d050,
+ 0xcbee7b3c83d68c5f, 0xab2227b8f5203d3d, 0x3335a393d47ef9ec,
+ 0xd00b21a2a5dde597, 0xb13d50489ca79216, 0xde1cc721425dda94,
+ 0x1ddc9863b5b0b8e8, 0xb125481a01dfe1b5, 0x5b331c746c4148db,
+ 0x8d6729fe30d56f1d, 0xdc413723540aca6f, 0xf08fe55711f8f09b,
+ 0x98bcde7c09126688, 0xa38c02a0c19d08b0, 0xde8df0683372e31e,
+ 0x08b4727054d766a0, 0xc13b77c325ae45ed, 0x6e7fe05de6b28d5a,
+ 0x1794a4f149586b9a, 0x23f5881c699f81b8, 0x355c9d899c0dcfe3,
+ 0x4319acb92ca33a29, 0x4f3211554c2ecf79, 0x64741347e08aaa2f,
+ 0x32f89bf1084e0723, 0xb0d5d830b9ae58a6, 0x235170babbd5686f,
+ 0xaa711d0aff2e9830, 0x4f73229995f82ca2, 0x46565f056bb352ea,
+ 0x55283776fd729f29, 0xb027c5b67be58718, 0xfa58d8c215d52ef8,
+ 0xfa1a78f7c7db4b2f, 0x7b2badd9a5a7e810, 0x6c362d97ece0f08a,
+ 0xff8ad11e7ce377b1, 0xdf5a423e843cbfa0, 0xfa9e70edc9c12d2b,
+ 0xad745d9146b0b3d9, 0xfc2a590f1ce32b8c, 0x599b34c583449c39,
+ 0xbcab9517d2bd4eae, 0xa5a7f54890e38bc7, 0xb9700fcb336a049a,
+ 0xfcfcc2d65956af5f, 0x3887b5f3e5d238d6, 0x0b9bc00a60dd37c6,
+ 0x09f8d5b6a128fe23, 0x4b33ac26a2a59b5c, 0xfc6e3f30b4b4e108,
+ 0x1e53d6aa6266bee7, 0x9adf6b4cb3369643, 0xda38dfd6df234f48,
+ 0x845e61ddc98d3d16, 0x4a0b90d7d115d701, 0x64e1c9619aa777c3,
+ 0x9dd4b1df006c81f9, 0x71b2b88aea6c679e, 0xb39da7819be759ff,
+ 0xfdad221790b269bb, 0x741f7955b56d786c, 0x5d724fcce9250a73,
+ 0x3812aa144730905b, 0xb74986be047e24c4, 0xeebb8aa5ebdcc8a0,
+ 0x26a0ea4272d5a371, 0x2ff3733c39e92f82, 0x17880beb7b808b30,
+ 0xe298cf8aa284e39c, 0xd481ff1948d0eef0, 0xed53786d517a1f10,
+ 0x853ccfe7f1cba481, 0x9ba1707467deb6dc, 0xf1aae1c3190806b3,
+ 0xb017539bb50b55c4, 0x8809bcc37ac46808, 0x0ae0a3e6e9a6bba5,
+ 0xf7a5276c2a6df772, 0xaf095d1ceb24d931, 0xaa0f62c5eb44d3a6,
+ 0x5e9915d18cd09844, 0xcfff6a2edf6cd35f, 0x893ebc1038af747e,
+ 0xe4360da910f3853a, 0x2097129be26812d5, 0x09d1e31bd3fef181,
+ 0x37a585c49cff87c5, 0xd94d2b3b1cd97311, 0xa3a2d50de285388a,
+ 0xf627d8b7298602a0, 0x567f848218395a28, 0x9b4b416995765491,
+ 0x24388b443fd8730a, 0x5b3a3cc87e225bdb, 0x53a9881d098d520b,
+ 0xadbc31258140299f, 0x37345aad0c678a3f, 0xc0e24ea3958ef6d8,
+ 0x18ceff669a144d20, 0x3ce920ab86ab70c7, 0x430c240b5307c1cb,
+ 0x7240a314d5f7fa9c, 0x4dfaf972d1856f15, 0x76ca74db2ad10515,
+ 0x607ec82965c620f7, 0xc75f531d7eae4145, 0xe91c86c49c8d84a2,
+ 0x8becf71fe1e371a7, 0x055bb0206808c289, 0x36dbcec66eabc566,
+ 0x476f4f1b52c4c856, 0x78bdf9114304e28f, 0x206e8342087ca6e2,
+ 0xda66f574514e8795, 0x903bcf41830a763f, 0x3a8c03f8bfe8c1ae,
+ 0xc386671f05740107, 0xda3abc3b566c70ab, 0xe1072ad4ebd4a028,
+ 0xfe9a6d4c0e8a80ce, 0xeb99eb25a084c442, 0xd34f23f8f279e9f3,
+ 0xccb189048479b94d, 0xfc6f6d863f74a049, 0xa437f340bfdfed0e,
+ 0xc84ef9a7139af764, 0xbeb88737819b7d55, 0x5f06fb8f06d6372b,
+ 0x7ec01ec2f978b4a2, 0x1ad4f2fb9963b46f, 0xae4cdeee5c419652,
+ 0x51ee340ba106d1dc, 0x93544a6e274cf180, 0x0de0b1abf6e9773a,
+ 0xb55514c7be768e6a, 0x70a3ee12298c0688, 0x58943a332454b1ee,
+ 0xe9de88a863b83b29, 0xb99dbf02fc35d6c9, 0x285a09f5583ac480,
+ 0xd0bf2b79a453c915, 0xb6e140e86dcb97d5, 0x8de0ab74f93a8de1,
+ 0x70f9bb989ce46c09, 0xd7ea17d64158d923, 0x308e3f8a527d0ff7,
+ 0xa0fffd413b3a872f, 0xcd35b4b30dfb6587, 0x7ef3ab8b9bd5fbcf,
+ 0x6149f604d9f355f7, 0x130d9020814780cd, 0x45cb969837f9a147,
+ 0x88dc31c106a2345e, 0x690da693a3472e6d, 0xe1dc49aaab6d8504,
+ 0x7749dc54f0a8f838, 0x358a1197921ed6e3, 0x50ae914d7b26c811,
+ 0x6e0f79b3af64d1ad, 0xec45b7e54c408577, 0x94809242f830a52f,
+ 0x88e8c0701fd8cd25, 0x21f562f903b85ca7, 0x3f8f1d2cfd57d394,
+ 0x1f0db9fb1767b393, 0x0504a2b6a6b967d3, 0xf18209ff9dee356b,
+ 0x4e74343f94f09cff, 0x53107e4bd79b52c1, 0x9c4ab4cdba0f0c2f,
+ 0xfd085f652a3c3f14, 0xcbd20129e019e573, 0x92d2e7681d64d41b,
+ 0xfa6c6c50db35a8fd, 0x7dc5177e0cc57261, 0xae3586379eed9e9d,
+ 0x4ba340964a014d54, 0x57147f7d60a4a5ee, 0x423255e50fec612e,
+ 0x1c1158e2a2afbace, 0x5e0dd39d591b341f, 0x4e0fff62124939a6,
+ 0x12e0413146fa5c8d, 0x3a6e0c37d48699a0, 0x9774260521aa490f,
+ 0xbd0f8ecc2b447c99, 0x556d41deab48dad8, 0x08bd36a5be98bc97,
+ 0x8bf0c22eb1cb99a0, 0x959954221670e572, 0x05143412beae5a0c,
+ 0x37246cbdf96ede32, 0xeb05ce52c11ab210, 0xd4e9c130ccd17048,
+ 0x42cc9b6177b7547b, 0x96d603334e7a85c7, 0x850365d5d2f5adcb,
+ 0xcfa11346e834516c, 0xfb9e30870be0c7bb, 0xc4d137ab85224e7a,
+ 0xc7f20e98475c4ab3, 0xaf464d45151fec79, 0xe4ad336a38569bcd,
+ 0xabd20fbf84b809bd, 0xb3643ed21050862a, 0xfb29924632f30a27,
+ 0x3f4fd0809492521f, 0xcc9635ff080ba76d, 0xeb679199764753a7,
+ 0x9df2de103f532b81, 0x83784f41703f0a31, 0x70ba6c249783efba,
+ 0x93cf542badd6d441, 0x8290f3e7b7fcc9a6, 0xb55485e8fadf4677,
+ 0xf29c554f7e99c1de, 0x277a3a2d674f10e9, 0xe9a5460c4d87bd2a,
+ 0x0d8489866023402a, 0x6bd7d212c07df415, 0x8d6194cb592bebc3,
+ 0xa9747f53b4cd4192, 0x56bd4c4c6373dcb9, 0x3385c9e222966cb2,
+ 0x234bda6863a4f7fd, 0xebc79b310f06f538, 0x3b7556403468fc38,
+ 0x9ac05c55de908490, 0x381dba9f8e05fd0e, 0x5e92d1853484e36a,
+ 0x030782801735585f, 0xd8c76845c71a4482, 0xea03ea2ec2406c9b,
+ 0xe2498a52f95cd21e, 0xd4ffe046d9393212, 0x93565efec984c6c9,
+ 0x154c50d8c6e11dc9, 0x3cd889f3188c18cc, 0xb5a46a6cba1287ca,
+ 0xbc203b6c8f21bb66, 0xfedf97cba4c35dea, 0x0c82b3d9520de017,
+ 0xdb2674b14ddb4d95, 0x44c8e1ca851db784, 0x5596d3e27d211d55,
+ 0x9dbe804695d2270d, 0xbd54af74b050b82a, 0xe4ea34515f120cea,
+ 0xaa2564472972ab58, 0xf97af0d678dfd0cb, 0xdebdbc18d6c71bd1,
+ 0x78423e11438fcb21, 0xf6f749d4f30510d4, 0x68de10085ea4c2ea,
+ 0x6b3ff4773ccb4ec1, 0x33206eb82742f50e, 0x3046468ab04a0778,
+ 0xd7168cc59b78654c, 0xcb5800e03e2f90d9, 0x4f8fdaa4a3b0b5ff,
+ 0xe0eeff2c2ff94e64, 0x7f2578708dafae2e, 0x6feab0ef729b4300,
+ 0xf1de49e2796cfdf5, 0x90711a9f7886a0d0, 0xf4b39401ae61d28a,
+ 0x3f26008ddcbc47e9, 0xfab0a15c25a8511d, 0x2664fc987e7fdd17,
+ 0x51125228da560a04, 0x93a545c6207a3d67, 0x7c8e4446a408cc25,
+ 0xf9b10a00083f429e, 0x48704b0fc020d66c, 0x1e1a8c7a3d66eae0,
+ 0x9bde8e4692e41915, 0x7144aad3cf672129, 0xbab5e713e8f5b335,
+ 0x2d2c0b70c55d7d11, 0xed928a6e1b388ab0, 0xf121a4a71653448f,
+ 0x0dd175d00c20e9ed, 0xe68066507fb5dcb1, 0x92384f914830a50e,
+ 0xb4d4c84f220aed3d, 0xa13e4d6ea70cc5f0, 0xfdbe2223195bfa82,
+ 0xe97bb465c3ca2099, 0x0078ec86e8daa6c0, 0x634c3a1311b805c4,
+ 0xac04a89119ae79a7, 0x690e7049d8e8762f, 0x0000000000000086,
+ 0x0000000000000000,
+};
+
+#define perftest_aesXXX_var_sz(a) \
+ void __test_perf_fn perftest_aes##a##_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_ctr_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \
+ u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \
+ \
+ clib_aes_ctr_key_expand (kd, key, AES_KEY_##a); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##a##_ctr (kd, src, n, iv, dst); \
+ test_perf_event_disable (tp); \
+ }
+
+static clib_error_t *
+test_clib_aes128_ctr (clib_error_t *err)
+{
+ aes_ctr_key_data_t kd;
+ aes_ctr_ctx_t ctx;
+ u8 pt[INC_TEST_BYTES];
+ u8 ct[INC_TEST_BYTES];
+
+ FOREACH_ARRAY_ELT (tc, test_cases128)
+ {
+ clib_aes_ctr_key_expand (&kd, tc->key, AES_KEY_128);
+ clib_aes128_ctr (&kd, tc->pt, tc->data_len, tc->iv, ct);
+
+ if (tc->data_len && memcmp (tc->ct, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_ctr_key_expand (&kd, inc_key128, AES_KEY_128);
+ clib_aes128_ctr (&kd, pt, INC_TEST_BYTES, inc_iv, ct);
+ for (int i = 0; i < sizeof (pt); i++)
+ if (((u8 *) inc_ct128)[i] != ct[i])
+ return clib_error_return (err, "incremental test failed (byte %u)", i);
+
+ clib_aes_ctr_init (&ctx, &kd, inc_iv, AES_KEY_128);
+ for (u32 off = 0, chunk_size = 1; off < INC_TEST_BYTES;
+ off += chunk_size, chunk_size = clib_min (((chunk_size + 1) * 2 - 1),
+ INC_TEST_BYTES - off))
+ clib_aes_ctr_transform (&ctx, pt + off, ct + off, chunk_size, AES_KEY_128);
+
+ for (int i = 0; i < sizeof (pt); i++)
+ if (((u8 *) inc_ct128)[i] != ct[i])
+ return clib_error_return (
+ err, "incremental multiseg test failed (byte %u)", i);
+
+ return err;
+}
+
+perftest_aesXXX_var_sz (128);
+REGISTER_TEST (clib_aes128_ctr) = {
+ .name = "clib_aes128_ctr",
+ .fn = test_clib_aes128_ctr,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes128_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes192_ctr (clib_error_t *err)
+{
+ aes_ctr_key_data_t kd;
+ u8 ct[MAX_TEST_DATA_LEN];
+
+ FOREACH_ARRAY_ELT (tc, test_cases192)
+ {
+ clib_aes_ctr_key_expand (&kd, tc->key, AES_KEY_192);
+ clib_aes192_ctr (&kd, tc->pt, tc->data_len, tc->iv, ct);
+
+ if (tc->data_len && memcmp (tc->ct, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_var_sz (192);
+REGISTER_TEST (clib_aes192_ctr) = {
+ .name = "clib_aes192_ctr",
+ .fn = test_clib_aes192_ctr,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes192_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes192_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes256_ctr (clib_error_t *err)
+{
+ aes_ctr_key_data_t kd;
+ u8 ct[MAX_TEST_DATA_LEN];
+
+ FOREACH_ARRAY_ELT (tc, test_cases256)
+ {
+ aes_ctr_ctx_t ctx;
+ u32 sz = tc->data_len / 3;
+
+ clib_aes_ctr_key_expand (&kd, tc->key, AES_KEY_256);
+ clib_aes256_ctr (&kd, tc->pt, tc->data_len, tc->iv, ct);
+
+ if (tc->data_len && memcmp (tc->ct, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ clib_memset (ct, 0, tc->data_len);
+
+ clib_aes_ctr_init (&ctx, &kd, tc->iv, AES_KEY_256);
+ clib_aes_ctr_transform (&ctx, tc->pt, ct, sz, AES_KEY_256);
+ clib_aes_ctr_transform (&ctx, tc->pt + sz, ct + sz, sz, AES_KEY_256);
+ clib_aes_ctr_transform (&ctx, tc->pt + 2 * sz, ct + 2 * sz,
+ tc->data_len - 2 * sz, AES_KEY_256);
+ if (tc->data_len && memcmp (tc->ct, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext (multiseg)",
+ tc->name);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_var_sz (256);
+REGISTER_TEST (clib_aes256_ctr) = {
+ .name = "clib_aes256_ctr",
+ .fn = test_clib_aes256_ctr,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes256_var_sz }),
+};
+
+#endif
diff --git a/src/vppinfra/test/aes_gcm.c b/src/vppinfra/test/aes_gcm.c
new file mode 100644
index 00000000000..caa36b0f710
--- /dev/null
+++ b/src/vppinfra/test/aes_gcm.c
@@ -0,0 +1,1177 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#if (defined(__AES__) && defined(__PCLMUL__)) || defined(__ARM_FEATURE_CRYPTO)
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/aes_gcm.h>
+
+static const u8 tc1_key128[16] = {
+ 0,
+};
+
+static const u8 tc1_iv[12] = {
+ 0,
+};
+
+static const u8 tc1_tag128[] = { 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e,
+ 0x30, 0x61, 0x36, 0x7f, 0x1d, 0x57,
+ 0xa4, 0xe7, 0x45, 0x5a };
+static const u8 tc1_key256[32] = {
+ 0,
+};
+
+static const u8 tc1_tag256[] = {
+ 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
+ 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b,
+};
+
+static const u8 tc2_ciphertext256[] = { 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60,
+ 0x6b, 0x6e, 0x07, 0x4e, 0xc5, 0xd3,
+ 0xba, 0xf3, 0x9d, 0x18 };
+
+static const u8 tc2_tag256[] = { 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99,
+ 0x6b, 0xf0, 0x26, 0x5b, 0x98, 0xb5,
+ 0xd4, 0x8a, 0xb9, 0x19 };
+
+static const u8 tc2_plaintext[16] = {
+ 0,
+};
+
+static const u8 tc2_tag128[] = { 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec,
+ 0x13, 0xbd, 0xf5, 0x3a, 0x67, 0xb2,
+ 0x12, 0x57, 0xbd, 0xdf };
+
+static const u8 tc2_ciphertext128[] = { 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6,
+ 0xa3, 0x92, 0xf3, 0x28, 0xc2, 0xb9,
+ 0x71, 0xb2, 0xfe, 0x78 };
+
+static const u8 tc3_key128[] = { 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65,
+ 0x73, 0x1c, 0x6d, 0x6a, 0x8f, 0x94,
+ 0x67, 0x30, 0x83, 0x08 };
+
+static const u8 tc3_iv[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce,
+ 0xdb, 0xad, 0xde, 0xca, 0xf8, 0x88 };
+
+static const u8 tc3_plaintext[] = {
+ 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, 0xa5, 0x59, 0x09, 0xc5, 0xaf,
+ 0xf5, 0x26, 0x9a, 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, 0x2e, 0x4c,
+ 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09,
+ 0x53, 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, 0xb1, 0x6a, 0xed, 0xf5,
+ 0xaa, 0x0d, 0xe6, 0x57, 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
+};
+
+static const u8 tc3_ciphertext128[] = {
+ 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, 0x4b, 0x72, 0x21, 0xb7, 0x84,
+ 0xd0, 0xd4, 0x9c, 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, 0x35, 0xc1,
+ 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93,
+ 0x1c, 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, 0x1b, 0xa3, 0x0b, 0x39,
+ 0x6a, 0x0a, 0xac, 0x97, 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
+};
+
+static const u8 tc3_tag128[] = { 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd,
+ 0x64, 0xa6, 0x2c, 0xf3, 0x5a, 0xbd,
+ 0x2b, 0xa6, 0xfa, 0xb4 };
+
+static const u8 tc3_key256[] = { 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73,
+ 0x1c, 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30,
+ 0x83, 0x08, 0xfe, 0xff, 0xe9, 0x92, 0x86,
+ 0x65, 0x73, 0x1c, 0x6d, 0x6a, 0x8f, 0x94,
+ 0x67, 0x30, 0x83, 0x08 };
+
+static const u8 tc3_ciphertext256[] = {
+ 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, 0xf4, 0x7f, 0x37, 0xa3, 0x2a,
+ 0x84, 0x42, 0x7d, 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, 0x75, 0x98,
+ 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb,
+ 0x3d, 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, 0xc5, 0xf6, 0x1e, 0x63,
+ 0x93, 0xba, 0x7a, 0x0a, 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
+};
+
+static const u8 tc3_tag256[] = { 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34,
+ 0x71, 0xbd, 0xec, 0x1a, 0x50, 0x22,
+ 0x70, 0xe3, 0xcc, 0x6c };
+
+static const u8 tc4_plaintext[] = {
+ 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, 0xa5, 0x59, 0x09, 0xc5,
+ 0xaf, 0xf5, 0x26, 0x9a, 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
+ 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, 0x1c, 0x3c, 0x0c, 0x95,
+ 0x95, 0x68, 0x09, 0x53, 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
+ 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, 0xba, 0x63, 0x7b, 0x39,
+};
+
+static const u8 tc4_aad[] = { 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe,
+ 0xef, 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad,
+ 0xbe, 0xef, 0xab, 0xad, 0xda, 0xd2 };
+
+static const u8 tc4_ciphertext128[] = {
+ 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, 0x4b, 0x72, 0x21, 0xb7,
+ 0x84, 0xd0, 0xd4, 0x9c, 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
+ 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, 0x21, 0xd5, 0x14, 0xb2,
+ 0x54, 0x66, 0x93, 0x1c, 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
+ 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97, 0x3d, 0x58, 0xe0, 0x91
+};
+
+static const u8 tc4_tag128[] = { 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21,
+ 0xa5, 0xdb, 0x94, 0xfa, 0xe9, 0x5a,
+ 0xe7, 0x12, 0x1a, 0x47 };
+
+static const u8 tc4_ciphertext256[] = {
+ 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, 0xf4, 0x7f, 0x37, 0xa3,
+ 0x2a, 0x84, 0x42, 0x7d, 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
+ 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, 0x8c, 0xb0, 0x8e, 0x48,
+ 0x59, 0x0d, 0xbb, 0x3d, 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
+ 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, 0xbc, 0xc9, 0xf6, 0x62
+};
+
+static const u8 tc4_tag256[] = { 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e,
+ 0x17, 0x68, 0xcd, 0xdf, 0x88, 0x53,
+ 0xbb, 0x2d, 0x55, 0x1b };
+
+static const u8 inc_key[] = { 0x97, 0x3e, 0x43, 0x70, 0x84, 0x71, 0xd4, 0xe2,
+ 0x45, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x5f, 0x3b,
+ 0x97, 0x3e, 0x43, 0x70, 0x84, 0x71, 0xd4, 0xe2,
+ 0x45, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x5f, 0x3b };
+static const u8 inc_iv[] = { 0xe2, 0xe4, 0x3f, 0x29, 0xfe, 0xd4,
+ 0xbc, 0x31, 0x56, 0xa7, 0x97, 0xf5 };
+
+static const struct
+{
+ const u16 n_bytes;
+ const u64 tag_gcm_128[2];
+ const u64 tag_gcm_256[2];
+ const u64 tag_gmac_128[2];
+ const u64 tag_gmac_256[2];
+ const u8 tag256[16];
+} inc_test_cases[] = {
+ {
+ .n_bytes = 0,
+ .tag_gcm_128 = { 0x95f4b8cc824294eb, 0xbf964ccf94b47f96 },
+ .tag_gcm_256 = { 0x206b456eaa81a3c8, 0xa308160d180e080d },
+ .tag_gmac_128 = { 0x95f4b8cc824294eb, 0xbf964ccf94b47f96 },
+ .tag_gmac_256 = { 0x206b456eaa81a3c8, 0xa308160d180e080d },
+ },
+ {
+ .n_bytes = 1,
+ .tag_gcm_128 = { 0xe89aa5be94fa1db4, 0x70d82ed02542a560 },
+ .tag_gcm_256 = { 0xcb0659b38e60d3a7, 0x9758b874959187ff },
+ .tag_gmac_128 = { 0xf9be1e7db073c565, 0x3b8a0ecc7a91f09d },
+ .tag_gmac_256 = { 0x1e302e97ab394130, 0xef29621c33bdb710 },
+ },
+ {
+ .n_bytes = 7,
+ .tag_gcm_128 = { 0xf4af7cbe57bd2078, 0x063dd60abbe51049 },
+ .tag_gcm_256 = { 0x7d231388fe8a19be, 0x59be3e7205269abd },
+ .tag_gmac_128 = { 0x27d0a47980eed1c6, 0xe6163485e73d02b3 },
+ .tag_gmac_256 = { 0x61ce281b47729f6c, 0x128a6bc0880e5d84 },
+ },
+ {
+ .n_bytes = 8,
+ .tag_gcm_128 = { 0xf45b40961422abc4, 0x0a932b98c4999694 },
+ .tag_gcm_256 = { 0xf7f945beed586ee2, 0x67239433a7bd3f23 },
+ .tag_gmac_128 = { 0x3a25d38572abe3b1, 0x220798aca96d594a },
+ .tag_gmac_256 = { 0x2e0e6d58d1ab41ca, 0x09bbc83e3b7b5e11 },
+ },
+ {
+ .n_bytes = 9,
+ .tag_gcm_128 = { 0x791b0a879d236364, 0xde9553e3ed1b763f },
+ .tag_gcm_256 = { 0x24c13ed7b46813cd, 0xe646ce24ea4b281e },
+ .tag_gmac_128 = { 0x0e521672b23a4fc7, 0x16f129224dec5fd8 },
+ .tag_gmac_256 = { 0x8b9c603789c34043, 0x0a8b626928c9fb6f },
+ },
+ {
+ .n_bytes = 15,
+ .tag_gcm_128 = { 0xb277ef05e2be1cc0, 0x2922fba5e321c81e },
+ .tag_gcm_256 = { 0xc3ca9f633fa803dc, 0x96e60b0c3347d744 },
+ .tag_gmac_128 = { 0xab99e6327c8e1493, 0x09a9a153045ba43f },
+ .tag_gmac_256 = { 0xfc9ec2d6a1ad492b, 0xf0b0ba877663732d },
+ },
+ {
+ .n_bytes = 16,
+ .tag_gcm_128 = { 0x3e3438e8f932ebe3, 0x958e270d56ae588e },
+ .tag_gcm_256 = { 0x6ac53524effc8171, 0xccab3a16a0b5813c },
+ .tag_gmac_128 = { 0x0eb4a09c6c7db16b, 0x1cdb5573a27a2e4a },
+ .tag_gmac_256 = { 0x71752018b31eae33, 0xdc4bd36d44b9fd5d },
+ },
+ {
+ .n_bytes = 31,
+ .tag_gcm_128 = { 0x1f4d4a7a056e4bca, 0x97ac76121dccb4e0 },
+ .tag_gcm_256 = { 0x609aea9aec919ab6, 0x1eba3c4998e7abb9 },
+ .tag_gmac_128 = { 0x289280f9e8879c68, 0xe6b0e36afc0d2ae1 },
+ .tag_gmac_256 = { 0x0b3f61762ba4ed43, 0x293f596a76d63b37 },
+ },
+ {
+ .n_bytes = 32,
+ .tag_gcm_128 = { 0xc4b64505d045818f, 0x72bfd499f0f983b4 },
+ .tag_gcm_256 = { 0x3f003fb179b2c480, 0x883876d4904700c2 },
+ .tag_gmac_128 = { 0x3dd10ab954d807f0, 0x5ae32ee41675051e },
+ .tag_gmac_256 = { 0x1a80ab830fc736c0, 0x51db27630adae337 },
+ },
+ {
+ .n_bytes = 47,
+ .tag_gcm_128 = { 0x3aedb0c6c14f2ea1, 0xe4626626bae641cd },
+ .tag_gcm_256 = { 0x9c91b87dfd302880, 0x05bb594dde5abb9c },
+ .tag_gmac_128 = { 0xe0fe54f2bdadeba8, 0x6f8f40edb569701f },
+ .tag_gmac_256 = { 0x26c5632c7abbdb3f, 0xc18ccc24df8bb239 },
+ },
+ {
+ .n_bytes = 48,
+ .tag_gcm_128 = { 0xdbceb2aed0dbbe27, 0xfef0013e8ebe6ef1 },
+ .tag_gcm_256 = { 0x98ad025f30b58ffa, 0xabc8a99857034e42 },
+ .tag_gmac_128 = { 0x269518e8584b7f6c, 0x1c9f41410a81799c },
+ .tag_gmac_256 = { 0x144807ce7aa8eb61, 0x611a8355b4377dc6 },
+ },
+ {
+ .n_bytes = 63,
+ .tag_gcm_128 = { 0x1769ccf523a2046e, 0x7328e18749a559b4 },
+ .tag_gcm_256 = { 0xcdf2f28efa9689ce, 0x636676f6aedea9de },
+ .tag_gmac_128 = { 0x4d47537060defce8, 0x0d4819c20ba8e889 },
+ .tag_gmac_256 = { 0x7b60615e7bfc9a7a, 0x610633296eb30b94 },
+ },
+ {
+ .n_bytes = 64,
+ .tag_gcm_128 = { 0xa5602f73865b6a77, 0x78317e461ff9b560 },
+ .tag_gcm_256 = { 0x5c17a6dcd1f23b65, 0x25331c378256a93e },
+ .tag_gmac_128 = { 0x39d941ed85d81ab0, 0xe358a61078628d63 },
+ .tag_gmac_256 = { 0x5276fbdd333f380d, 0xb0dc63e68f137e74 },
+ },
+ {
+ .n_bytes = 79,
+ .tag_gcm_128 = { 0x5d32cd75f2e82d84, 0xbc15801c1fe285bd },
+ .tag_gcm_256 = { 0xb2b2855f4b1ecf70, 0xa524adc1609c757b },
+ .tag_gmac_128 = { 0xa147493f08a8738e, 0xbf07da9f4a88944f },
+ .tag_gmac_256 = { 0xfee15e0d4b936bc7, 0x1dc88398c6b168bc },
+ },
+ {
+ .n_bytes = 80,
+ .tag_gcm_128 = { 0xa303b7247b9b00df, 0xe72d6d7063d48b72 },
+ .tag_gcm_256 = { 0x7abfffc9ecfa00ec, 0x9c5ffcd753ee4568 },
+ .tag_gmac_128 = { 0xc3e61bf9f370b40e, 0x66b1c4a6df3b19d7 },
+ .tag_gmac_256 = { 0x0cc7b09a7d602352, 0x29e8a64447a764d2 },
+ },
+ {
+ .n_bytes = 95,
+ .tag_gcm_128 = { 0xf0fb35c36eac3025, 0xa13463307fc48907 },
+ .tag_gcm_256 = { 0x283a73a05bd0e3c2, 0x794a181dd07a0fb7 },
+ .tag_gmac_128 = { 0x26f3546060d9f958, 0xc1367fca8869ab40 },
+ .tag_gmac_256 = { 0xa046e1705100c711, 0xbcf9d6a06f360260 },
+ },
+ {
+ .n_bytes = 96,
+ .tag_gcm_128 = { 0x974bb3c1c258bfb5, 0xcf057344bccb0596 },
+ .tag_gcm_256 = { 0x18920d75fcfb702e, 0x18e5f14ba429b7be },
+ .tag_gmac_128 = { 0xf43cca4837ad00b8, 0xb1a1585d51838352 },
+ .tag_gmac_256 = { 0xce3427dc5123b31f, 0xdcc6e49fa0f6587e },
+ },
+ {
+ .n_bytes = 111,
+ .tag_gcm_128 = { 0x5d73baa8eef0ced3, 0x79339e31d5d813de },
+ .tag_gcm_256 = { 0x4cefa311c9c39a86, 0xe809ee78930ef736 },
+ .tag_gmac_128 = { 0x452003e6d535a523, 0x723f08581012c62e },
+ .tag_gmac_256 = { 0x6ce2e1661db942ca, 0xccd700c9c6d03cfd },
+ },
+ {
+ .n_bytes = 112,
+ .tag_gcm_128 = { 0x189aa61ce15a0d11, 0xc907e6bccbdbb8f9 },
+ .tag_gcm_256 = { 0xa41c96c843b791b4, 0x0f9f60953f03e5fc },
+ .tag_gmac_128 = { 0x44c75b94dbf8539f, 0xcdebe3ed9c68c840 },
+ .tag_gmac_256 = { 0x21a289dd39eadd19, 0x749a038e1ea0711c },
+ },
+ {
+ .n_bytes = 127,
+ .tag_gcm_128 = { 0xc6ea87bfe82d73f6, 0x9d85dbf8072bb051 },
+ .tag_gcm_256 = { 0xd5e436b2ddfac9fa, 0x54d7d13fa214703a },
+ .tag_gmac_128 = { 0xdc5374b7d7d221c4, 0xa8cf4e11958b9dff },
+ .tag_gmac_256 = { 0xc7ad0bba9de54f6a, 0x38ed037fe0924dee },
+ },
+ {
+ .n_bytes = 128,
+ .tag_gcm_128 = { 0x357d4954b7c2b440, 0xb3b07ce0cd143149 },
+ .tag_gcm_256 = { 0x5555d09cb247322d, 0xeb9d1cea38b68951 },
+ .tag_gmac_128 = { 0x6a77579181663dde, 0xe359157bd4246d3f },
+ .tag_gmac_256 = { 0x9fe930d50d661e37, 0xba4a0f3c3a6b63cf },
+ },
+ {
+ .n_bytes = 143,
+ .tag_gcm_128 = { 0x358f897d4783966f, 0x6fa44993a9ed54c4 },
+ .tag_gcm_256 = { 0x60e91f959f2ccdbe, 0x116c56fdaa107deb },
+ .tag_gmac_128 = { 0x121d26aba8aaee0d, 0xc37cda9c43f51008 },
+ .tag_gmac_256 = { 0x06918b1cd20e0abc, 0x42938b1d8e745dcd },
+ },
+ {
+ .n_bytes = 144,
+ .tag_gcm_128 = { 0x8a9efe3df387e069, 0xc0a3f2f7547c704b },
+ .tag_gcm_256 = { 0x217d59f53bfbc314, 0x2d8f088d05532b0d },
+ .tag_gmac_128 = { 0x382949d56e0e8f05, 0x4e87fb8f83f095a7 },
+ .tag_gmac_256 = { 0x75e07060883db37d, 0x5fde7b9bda37d680 },
+ },
+ {
+ .n_bytes = 159,
+ .tag_gcm_128 = { 0x297252081cc8db1e, 0x6357143fa7f756c8 },
+ .tag_gcm_256 = { 0x7e8fca9d1b17e003, 0x7bf7dad063b9a5c9 },
+ .tag_gmac_128 = { 0x5d0524b130e97547, 0xd6befd8591072437 },
+ .tag_gmac_256 = { 0xf5f631d391b635fc, 0xe8f7b6808544f312 },
+ },
+ {
+ .n_bytes = 160,
+ .tag_gcm_128 = { 0x90e034ee0f08a871, 0x002f483eefa24ec9 },
+ .tag_gcm_256 = { 0xed24df02e455d6d3, 0x7a7d318ed132cb7f },
+ .tag_gmac_128 = { 0xc75f87215ae12a2f, 0xf264e5381d5b0412 },
+ .tag_gmac_256 = { 0x1ad3e294fd55b0a6, 0xa1a551e59fd12e2f },
+ },
+ {
+ .n_bytes = 175,
+ .tag_gcm_128 = { 0x8f663955c8e4249e, 0xd9d8d8d7352b18d9 },
+ .tag_gcm_256 = { 0xd9af34eae74a35e1, 0xc22e74b34267e5df },
+ .tag_gmac_128 = { 0xb54a2e8b186a55db, 0x980f586c6da8afce },
+ .tag_gmac_256 = { 0x9cceb31baad18ff1, 0xce97588909ece8af },
+ },
+ {
+ .n_bytes = 176,
+ .tag_gcm_128 = { 0x258ec0df82f003bd, 0x571496e92c966695 },
+ .tag_gcm_256 = { 0xa1925cda1fa1dd2c, 0x914038618faecf99 },
+ .tag_gmac_128 = { 0xfc384b412bdb05ef, 0x73968cf3b464a997 },
+ .tag_gmac_256 = { 0x50d9ce4be242e176, 0x5fb78e9404c9226d },
+ },
+ {
+ .n_bytes = 191,
+ .tag_gcm_128 = { 0x796a90a3edaab614, 0x4bf34c2c6333c736 },
+ .tag_gcm_256 = { 0x4ffd3a84b346c6d5, 0x9d4c84c7ac5a191c },
+ .tag_gmac_128 = { 0x16c11c6bfad5973e, 0xa0825b9c827137c8 },
+ .tag_gmac_256 = { 0x82c144c209c22758, 0x7428b4ac38a65c56 },
+ },
+ {
+ .n_bytes = 192,
+ .tag_gcm_128 = { 0x2a44492af2e06a75, 0xbe4eab62aacfc2d3 },
+ .tag_gcm_256 = { 0xb7d4971a8061092d, 0x94da543669369e41 },
+ .tag_gmac_128 = { 0xed462726c984b596, 0xd61b317d979f5df8 },
+ .tag_gmac_256 = { 0x554dc7f30981dbf6, 0x94447d0fbf9f2c8b },
+ },
+ {
+ .n_bytes = 207,
+ .tag_gcm_128 = { 0xcfac9f67252713c8, 0xd638cf6b74c6acf6 },
+ .tag_gcm_256 = { 0x57a4a9d299663925, 0xa802f8453e8bcc5b },
+ .tag_gmac_128 = { 0xef03f3cdcb0ea819, 0xeea8f0f7f805c306 },
+ .tag_gmac_256 = { 0x3d8cd7d92cf0a212, 0x12c1ddddab7e752c },
+ },
+ {
+ .n_bytes = 208,
+ .tag_gcm_128 = { 0x5467633795b92cf5, 0x6b45fb93e19f9341 },
+ .tag_gcm_256 = { 0xaeced4090d4d20bc, 0xd20161cd2617613e },
+ .tag_gmac_128 = { 0x02bb88dbe681ab69, 0xaf973bfd0b924144 },
+ .tag_gmac_256 = { 0x313020fc5283b45e, 0x1757616d4cf17c7f },
+ },
+ {
+ .n_bytes = 223,
+ .tag_gcm_128 = { 0x2f9c725903c07adf, 0xe01712c7d6d5055d },
+ .tag_gcm_256 = { 0xeae53a9b0d03a4f9, 0x42b2375d569d384e },
+ .tag_gmac_128 = { 0x6ea092dd400ec00d, 0x23237fa0bd0c1977 },
+ .tag_gmac_256 = { 0xa02e0f41f12f0053, 0xfba53430aa616219 },
+ },
+ {
+ .n_bytes = 224,
+ .tag_gcm_128 = { 0x73e40772334901a9, 0xddf6075b357cb307 },
+ .tag_gcm_256 = { 0x2eb3450f9462c968, 0xa9fb95f281c117e9 },
+ .tag_gmac_128 = { 0x33762525c12dfd1d, 0xcb3d8d0402c23ebf },
+ .tag_gmac_256 = { 0x30c6d05fb98c2a84, 0xaa2c9f6303383d3a },
+ },
+ {
+ .n_bytes = 239,
+ .tag_gcm_128 = { 0x184d15fd2e2c63a6, 0x3dfe238b88dd2924 },
+ .tag_gcm_256 = { 0x18deafee39975b36, 0xc07761cf4fc16c06 },
+ .tag_gmac_128 = { 0x10a48f2bc4e64f87, 0x85eec49ae83d4256 },
+ .tag_gmac_256 = { 0x5ac87f47f32770eb, 0x31706ca888dd6d44 },
+ },
+ {
+ .n_bytes = 240,
+ .tag_gcm_128 = { 0x153134f11cfa06ec, 0xd987642cc3688a34 },
+ .tag_gcm_256 = { 0x3eb66b6dc0bba824, 0x274c4648d515c844 },
+ .tag_gmac_128 = { 0x9e5afe891c7c7dcb, 0xa2b3fa1c026343e2 },
+ .tag_gmac_256 = { 0xe9120e4e9ff4b1e1, 0xb88bf68336342598 },
+ },
+ {
+ .n_bytes = 255,
+ .tag_gcm_128 = { 0x2b5e78936d1ace73, 0x15b766bfee18d348 },
+ .tag_gcm_256 = { 0xeb3741a345395c97, 0x02e11e0478e4cc5a },
+ .tag_gmac_128 = { 0xf7daf525751192df, 0x1b1641c3362905ac },
+ .tag_gmac_256 = { 0x0b16a2bb842caaca, 0x996732fedaa6b829 },
+ },
+ {
+ .n_bytes = 256,
+ .tag_gcm_128 = { 0x6d4507e0c354e80a, 0x2345eccddd0bd71e },
+ .tag_gcm_256 = { 0xa582b8122d699b63, 0xb16db944f6b073f3 },
+ .tag_gmac_128 = { 0xc58bb57544c07b40, 0x1a8dd3d8124cdf39 },
+ .tag_gmac_256 = { 0xb0f6db0da52e1dc2, 0xbd3a86a577ed208a },
+ },
+ {
+ .n_bytes = 319,
+ .tag_gcm_128 = { 0x2cd41fdf6f659a6b, 0x2486849d7666d76e },
+ .tag_gcm_256 = { 0xb7e416c8a716cb4d, 0xc7abe0d755b48845 },
+ .tag_gmac_128 = { 0xad83725394d4a36b, 0x5fdd42e941cad49b },
+ .tag_gmac_256 = { 0xbb0b73609b90f7eb, 0xe4d382b8b9b7d43e },
+ },
+ {
+ .n_bytes = 320,
+ .tag_gcm_128 = { 0x064cfe34b7d9f89c, 0xb6c7263f66c89b47 },
+ .tag_gcm_256 = { 0x1254c9ae84d8ff50, 0x9faeab423099dc9a },
+ .tag_gmac_128 = { 0xd91d60ce71d24533, 0xb1cdfd3b3200b171 },
+ .tag_gmac_256 = { 0x921de9e3d353559c, 0x3509d2775817a1de },
+ },
+ {
+ .n_bytes = 383,
+ .tag_gcm_128 = { 0x14788c7531d682e1, 0x8af79effe807a4dc },
+ .tag_gcm_256 = { 0x947754a0844b4a4d, 0x9eb3849d93d5048e },
+ .tag_gmac_128 = { 0xfa84d3a18ea6f895, 0x9a45c729797a8ac4 },
+ .tag_gmac_256 = { 0xe8e61e134e40359a, 0xe8e404d4b523607c },
+ },
+ {
+ .n_bytes = 384,
+ .tag_gcm_128 = { 0xfba3fcfd9022e9a7, 0x257ba59f12055d70 },
+ .tag_gcm_256 = { 0x7c6ca4e7fba2bc35, 0x1c590be09b3d549b },
+ .tag_gmac_128 = { 0x4ca0f087d812e48f, 0xd1d39c4338d57a04 },
+ .tag_gmac_256 = { 0xb0a2257cdec364c7, 0x6a4308976fda4e5d },
+ },
+ {
+ .n_bytes = 447,
+ .tag_gcm_128 = { 0x8fde1490c60f09bf, 0xd2932f04c202c5e4 },
+ .tag_gcm_256 = { 0x1845a80cbdcf2e62, 0xc7c49c9864bca732 },
+ .tag_gmac_128 = { 0x35aa90d2deb41b9c, 0x516ab85a3f17b71e },
+ .tag_gmac_256 = { 0x1db78f8b7b34d9e7, 0xd168177351e601fe },
+ },
+ {
+ .n_bytes = 448,
+ .tag_gcm_128 = { 0xd0a7b75f734a1a7c, 0xc7689b7c571a09bf },
+ .tag_gcm_256 = { 0xef3a9118c347118d, 0x282a7736060d7bb5 },
+ .tag_gmac_128 = { 0xce2dab9fede53934, 0x27f3d2bb2af9dd2e },
+ .tag_gmac_256 = { 0xca3b0cba7b772549, 0x3104ded0d6df7123 },
+ },
+ {
+ .n_bytes = 511,
+ .tag_gcm_128 = { 0x6fb5d366fa97b2d2, 0xed2d955fcc78e556 },
+ .tag_gcm_256 = { 0xc2bc52eca9348b7c, 0x0ec18a2eb637446f },
+ .tag_gmac_128 = { 0xe3012a4897edd5b5, 0xfe18c3ec617a7e88 },
+ .tag_gmac_256 = { 0x00e050eecf184591, 0xba24484f84867f4f },
+ },
+ {
+ .n_bytes = 512,
+ .tag_gcm_128 = { 0x25138f7fe88b54bd, 0xcc078b619c0e83a2 },
+ .tag_gcm_256 = { 0x63313c5ebe68fa92, 0xccc78784896cdcc3 },
+ .tag_gmac_128 = { 0xc688fe54c5595ec0, 0x5b8a687343c3ef03 },
+ .tag_gmac_256 = { 0x807c9f8e1c198242, 0xb1e0befc0b9b8193 },
+ },
+ {
+ .n_bytes = 575,
+ .tag_gcm_128 = { 0x0ce8e0b7332a7076, 0xe4aa7ab60dd0946a },
+ .tag_gcm_256 = { 0x585cff3cf78504d4, 0x45f3a9532ea40e8b },
+ .tag_gmac_128 = { 0xc06ca34dbad542b4, 0x840508722ff031dc },
+ .tag_gmac_256 = { 0xa46e22748f195488, 0x43817a5d4d17408a },
+ },
+ {
+ .n_bytes = 576,
+ .tag_gcm_128 = { 0x45360be81e8323bd, 0x10892d9804b75bb5 },
+ .tag_gcm_256 = { 0x66208ae5d809036e, 0x603d0af49475de88 },
+ .tag_gmac_128 = { 0xb4f2b1d05fd3a4ec, 0x6a15b7a05c3a5436 },
+ .tag_gmac_256 = { 0x8d78b8f7c7daf6ff, 0x925b2a92acb7356a },
+ },
+ {
+ .n_bytes = 577,
+ .tag_gcm_128 = { 0xc7e5cd17251fd138, 0xecfb0e05110303df },
+ .tag_gcm_256 = { 0x2939d12c85ea8cf8, 0xea063fba37c92eb5 },
+ .tag_gmac_128 = { 0x1fa02b370bec64a0, 0x8c759ca95a8cea85 },
+ .tag_gmac_256 = { 0x6a602c2b1fff6617, 0x17e06d829bd24a8d },
+ },
+ {
+ .n_bytes = 639,
+ .tag_gcm_128 = { 0xc679ef7a01e8f14c, 0x281e3b9a9f715cb9 },
+ .tag_gcm_256 = { 0x13abd2d67e162f98, 0xf637d467046af949 },
+ .tag_gmac_128 = { 0x05037392550b7ae2, 0x5095b4629ba46d40 },
+ .tag_gmac_256 = { 0xd8e8045772299aa7, 0x564d72fb58ea9808 },
+ },
+ {
+ .n_bytes = 640,
+ .tag_gcm_128 = { 0xff1a2c922cdd1336, 0xcaa02eab8691bf51 },
+ .tag_gcm_256 = { 0xd57e16f169d79da5, 0x3e2b47264f8efe9c },
+ .tag_gmac_128 = { 0xb32750b403bf66f8, 0x1b03ef08da0b9d80 },
+ .tag_gmac_256 = { 0x80ac3f38e2aacbfa, 0xd4ea7eb88213b629 },
+ },
+ {
+ .n_bytes = 703,
+ .tag_gcm_128 = { 0xefd0804f0155b8f1, 0xb1849ed867269569 },
+ .tag_gcm_256 = { 0xf66c5ecbd1a06fa4, 0x55ef36f3fdbe763a },
+ .tag_gmac_128 = { 0x725813463d977e5b, 0xd52aaabb923cfabb },
+ .tag_gmac_256 = { 0x4add8f86736adc52, 0xf6dabb4596975fd7 },
+ },
+ {
+ .n_bytes = 704,
+ .tag_gcm_128 = { 0x583b29260ea8e49f, 0xfaa93b0db98f9274 },
+ .tag_gcm_256 = { 0x0b777f2cd9e2f0ef, 0x01510fc85a99382e },
+ .tag_gmac_128 = { 0x89df280b0ec65cf3, 0xa3b3c05a87d2908b },
+ .tag_gmac_256 = { 0x9d510cb7732920fc, 0x16b672e611ae2f0a },
+ },
+ {
+ .n_bytes = 767,
+ .tag_gcm_128 = { 0x671ec58ab6d4a210, 0x0845fbe603169eff },
+ .tag_gcm_256 = { 0xb3913f7eb9bbdbbb, 0x4cb17aa290f6ab11 },
+ .tag_gmac_128 = { 0x3036046580a81443, 0xe18d34bb706e632b },
+ .tag_gmac_256 = { 0x4e82bc959349466c, 0x01210641d62bbdda },
+ },
+ {
+ .n_bytes = 768,
+ .tag_gcm_128 = { 0x66993b5de915fc6e, 0x4aaf0b8441040267 },
+ .tag_gcm_256 = { 0x958ed0a6c1bf11e0, 0xc29d9f4a8ce8bdc6 },
+ .tag_gmac_128 = { 0x02674435b179fddc, 0xe016a6a0540bb9be },
+ .tag_gmac_256 = { 0xf562c523b24bf164, 0x257cb21a7b602579 },
+ },
+ {
+ .n_bytes = 831,
+ .tag_gcm_128 = { 0x4914f7980699f93c, 0xc2e44fdba6a839e7 },
+ .tag_gcm_256 = { 0xa8fab43ecd572a25, 0x3cd465e491195b81 },
+ .tag_gmac_128 = { 0xa6d725516e956d5d, 0x630768e80ac3de3d },
+ .tag_gmac_256 = { 0xb4746cdde367c9e2, 0x3ea53280901a0375 },
+ },
+ {
+ .n_bytes = 832,
+ .tag_gcm_128 = { 0xac9a519f06fb8c70, 0xdc1a6544ed2cfcf7 },
+ .tag_gcm_256 = { 0x54877a7ccd02c592, 0x1a09a4474d903b56 },
+ .tag_gmac_128 = { 0xd24937cc8b938b05, 0x8d17d73a7909bbd7 },
+ .tag_gmac_256 = { 0x9d62f65eaba46b95, 0xef7f624f71ba7695 },
+ },
+ {
+ .n_bytes = 895,
+ .tag_gcm_128 = { 0x3d365bf4d44c1071, 0x07ac3129079f2013 },
+ .tag_gcm_256 = { 0x608543d4fe6526a1, 0xc78a987b87c8d96c },
+ .tag_gmac_128 = { 0xc71cf903f7a557c5, 0x06788583ad2122a5 },
+ .tag_gmac_256 = { 0x7cdaa511565b289a, 0xf818a4c85a8bd575 },
+ },
+ {
+ .n_bytes = 896,
+ .tag_gcm_128 = { 0x97000fafd1359a0b, 0xfc226d534866b495 },
+ .tag_gcm_256 = { 0x1850ee7af3133326, 0xf198d539eee4b1f5 },
+ .tag_gmac_128 = { 0x7138da25a1114bdf, 0x4deedee9ec8ed265 },
+ .tag_gmac_256 = { 0x249e9e7ec6d879c7, 0x7abfa88b8072fb54 },
+ },
+ {
+ .n_bytes = 959,
+ .tag_gcm_128 = { 0x17200025564902f2, 0x3f2c3b711ba4086d },
+ .tag_gcm_256 = { 0x3d0bf3e8b24e296d, 0x42fe0f54e33deb6d },
+ .tag_gmac_128 = { 0x8baae9b6f3bd797a, 0x177e0b6c577f2436 },
+ .tag_gmac_256 = { 0x853f961c965f472c, 0x8adc4113b3cf933a },
+ },
+ {
+ .n_bytes = 960,
+ .tag_gcm_128 = { 0x2a30ca7325e7a81b, 0xacbc71832bdceb63 },
+ .tag_gcm_256 = { 0x037786319dc22ed7, 0x6730acf359ec3b6e },
+ .tag_gmac_128 = { 0x702dd2fbc0ec5bd2, 0x61e7618d42914e06 },
+ .tag_gmac_256 = { 0x52b3152d961cbb82, 0x6ab088b034f6e3e7 },
+ },
+ {
+ .n_bytes = 1023,
+ .tag_gcm_128 = { 0x8e8789e6c4c90855, 0x4ec5503d7f953df6 },
+ .tag_gcm_256 = { 0xdb0afebe6c085f53, 0x4eb6f07b63b8a020 },
+ .tag_gmac_128 = { 0x6e9b48e5ad508180, 0xdc86430db2bad514 },
+ .tag_gmac_256 = { 0xbb52b4fbf236b741, 0x47ae63bc836dfba3 },
+ },
+ {
+ .n_bytes = 1024,
+ .tag_gcm_128 = { 0x94e1ccbea0f24089, 0xf51b53b600363bd2 },
+ .tag_gcm_256 = { 0x70f3eb3d562f0b34, 0xffd09e1a25d5bef3 },
+ .tag_gmac_128 = { 0x65a2b560392ecee3, 0x30079a9a9dbbd3a3 },
+ .tag_gmac_256 = { 0x4d361736c43090e6, 0x135810df49dcc981 },
+ },
+ {
+ .n_bytes = 1025,
+ .tag_gcm_128 = { 0x830a99737df5a71a, 0xd9ea6e87c63d3aae },
+ .tag_gcm_256 = { 0xa3fc30e0254a5ee2, 0x52e59adc9a75be40 },
+ .tag_gmac_128 = { 0xb217556427fc09ab, 0xc32fd72ec886730d },
+ .tag_gmac_256 = { 0xeab5a9a02cb0869e, 0xd59e51684bc2839c },
+ },
+ {
+ .n_bytes = 1039,
+ .tag_gcm_128 = { 0x238f229130e92934, 0x52752fc860bca067 },
+ .tag_gcm_256 = { 0xae2754bcaed68191, 0xe0770d1e9a7a67f3 },
+ .tag_gmac_128 = { 0xe030ad2beb01d85d, 0xf10c78b1b64c27af },
+ .tag_gmac_256 = { 0x081b45e126248e85, 0xca0789f30e1c47a1 },
+ },
+ {
+ .n_bytes = 1040,
+ .tag_gcm_128 = { 0x4eebcf7391d66c6f, 0x107d8bef4a93d9c6 },
+ .tag_gcm_256 = { 0xbeb02ae5466964f3, 0x8eb90364c5f9e4cb },
+ .tag_gmac_128 = { 0x451deb85fbf27da5, 0xe47e8c91106dadda },
+ .tag_gmac_256 = { 0x85f0a72f3497699d, 0xe6fce0193cc6c9d1 },
+ },
+ {
+ .n_bytes = 1041,
+ .tag_gcm_128 = { 0xbbddfb0304411d71, 0xe573f63553d7ede4 },
+ .tag_gcm_256 = { 0x68e42d2959af0b24, 0x35ac8e73c749e7f4 },
+ .tag_gmac_128 = { 0x98d022b9896b68f8, 0x98dfde2a17b2869b },
+ .tag_gmac_256 = { 0xb8dac6add35d0d9b, 0x1c55973c6dd769af },
+ },
+ {
+ .n_bytes = 1536,
+ .tag_gcm_128 = { 0x7d8933fd922418bd, 0xc88c2f289c5d3d83 },
+ .tag_gcm_256 = { 0x966c103eb6ee69f2, 0x2f6b070b5c0fc66f },
+ .tag_gmac_128 = { 0x3b70f6154246e758, 0xd485c0edf236b6e2 },
+ .tag_gmac_256 = { 0xfefe1832387b9768, 0xc876712098256ca3 },
+ },
+ {
+ .n_bytes = 2047,
+ .tag_gcm_128 = { 0x15c6bbcb0d835fd4, 0xc33afd1328c1deb1 },
+ .tag_gcm_256 = { 0xcde3edeea228ada6, 0x8276721a8662e708 },
+ .tag_gmac_128 = { 0xb556b0e42419759e, 0x23b0365cf956a3ad },
+ .tag_gmac_256 = { 0x8df762cbbe4b2a04, 0x6841bc61e5702419 },
+ },
+ {
+ .n_bytes = 2048,
+ .tag_gcm_128 = { 0xc5ddbeb8765e3aac, 0x1bad7349fd9f2b50 },
+ .tag_gcm_256 = { 0xa2a623dde251a98d, 0xaf905fbd16f6a7d9 },
+ .tag_gmac_128 = { 0xe20f1e533df2b3d0, 0x5d170bdbcc278a63 },
+ .tag_gmac_256 = { 0x9663185c4342cd4a, 0x82d3c5a3a4998fc6 },
+ },
+ {
+ .n_bytes = 2064,
+ .tag_gcm_128 = { 0x12b76ea0a6ee9cbc, 0xdaecfae7c815aa58 },
+ .tag_gcm_256 = { 0xb5bb2f76028713dd, 0xc8f3a1448b3bd050 },
+ .tag_gmac_128 = { 0x019445c168c42f9b, 0xdf33e251bd9a27fe },
+ .tag_gmac_256 = { 0xbbabd0cefc4d6a42, 0xb138675ca66ba54f },
+ },
+ {
+ .n_bytes = 2065,
+ .tag_gcm_128 = { 0x8758c5168ffc3fd7, 0x554f1df7cfa3b976 },
+ .tag_gcm_256 = { 0xc9808cf0fd21aede, 0xe26921f3fd308006 },
+ .tag_gmac_128 = { 0x44a57e7a32031596, 0x75476d5542faa57b },
+ .tag_gmac_256 = { 0xea0e81807fa79a4a, 0x889cca80746fb8d5 },
+ },
+ {
+ .n_bytes = 4095,
+ .tag_gcm_128 = { 0x06db87757f541dc9, 0x823c619c6b88ef80 },
+ .tag_gcm_256 = { 0xdf0861a56a7fe7b0, 0xe077a5c735cc21b2 },
+ .tag_gmac_128 = { 0x43cb482bea0449e9, 0x70d668af983c9a6c },
+ .tag_gmac_256 = { 0x5fc304ad7be1d19a, 0x81bf2f4111de0b06 },
+ },
+ {
+ .n_bytes = 4096,
+ .tag_gcm_128 = { 0xe4afdad642876152, 0xf78cfcfcb92520b6 },
+ .tag_gcm_256 = { 0x7552cda8d91bdab1, 0x4bf57b7567d59e89 },
+ .tag_gmac_128 = { 0xac5240f8e9c49cfc, 0x2a3c9d0999aded50 },
+ .tag_gmac_256 = { 0x9fb6cd8f10f7b6c5, 0x16e442c147869222 },
+ },
+ {
+ .n_bytes = 4112,
+ .tag_gcm_128 = { 0x2a34db8f06bcf0ee, 0x7a4a2456fa340c33 },
+ .tag_gcm_256 = { 0x4b6c0c5b5c943f5e, 0x6d1669e849ce061a },
+ .tag_gmac_128 = { 0x143bfc9ab07d9bb5, 0xf0aa7510a9039349 },
+ .tag_gmac_256 = { 0x8a97bdd033775ba0, 0x5901a5160739be25 },
+ },
+ {
+ .n_bytes = 4113,
+ .tag_gcm_128 = { 0x296acfcbcbf529af, 0xe3e2cfb1bc5855c8 },
+ .tag_gcm_256 = { 0x181f6f9068ea477e, 0x1e05bfd01ee3e173 },
+ .tag_gmac_128 = { 0x0d81fcb0829e3c8b, 0x68016225b5fa7745 },
+ .tag_gmac_256 = { 0xa2421ac50d65c6b5, 0x84bd16fa55486af8 },
+ },
+ {
+ .n_bytes = 16382,
+ .tag_gcm_128 = { 0xd39fd367e00a103d, 0xf873a278b32d207f },
+ .tag_gcm_256 = { 0xa8da09a851ae6c88, 0x2ef17f0da7f191f1 },
+ .tag_gmac_128 = { 0xd4a22896f44c1c14, 0x69a5d02715c90ea4 },
+ .tag_gmac_256 = { 0x64788ca5e11722b6, 0x63d74a4b24538762 },
+ },
+ {
+ .n_bytes = 16383,
+ .tag_gcm_128 = { 0x2162b91aad49eebc, 0x28c7efe93e639c75 },
+ .tag_gcm_256 = { 0xc5baee5e40004087, 0xf6b26211facc66a5 },
+ .tag_gmac_128 = { 0x3ec003d690d3d846, 0x204baef851d8ad7d },
+ .tag_gmac_256 = { 0xdb51d6f5dddf16bb, 0x529f3825cf78dbd5 },
+ },
+ {
+ .n_bytes = 16384,
+ .tag_gcm_128 = { 0x2272e778c4c5c9ef, 0x84c50021e75ddbab },
+ .tag_gcm_256 = { 0x6c32f1c5666b1f4c, 0x91142a86ae5241b2 },
+ .tag_gmac_128 = { 0x43dadd5ecee9674b, 0xa30fea9ae8091c6c },
+ .tag_gmac_256 = { 0xc360b76ac1887181, 0xcb732f29ea86edeb },
+ },
+ {
+ .n_bytes = 16385,
+ .tag_gcm_128 = { 0xe2a47837578b4056, 0xf96e7233cbeb1ce1 },
+ .tag_gcm_256 = { 0xfa3aa4ebe36fb390, 0x6a2cf1671f4f1a01 },
+ .tag_gmac_128 = { 0xfd0b7312c4975687, 0xdd3096b1c850e80a },
+ .tag_gmac_256 = { 0xaf2cae4642a5536a, 0xb27aff5cc8bd354c },
+ },
+ {
+ .n_bytes = 16386,
+ .tag_gcm_128 = { 0xe1b4c0e5825304ae, 0x48c5dd82aa114320 },
+ .tag_gcm_256 = { 0x76c3612118f47fa8, 0xdd0a47b132ecad3a },
+ .tag_gmac_128 = { 0x346bc841a7f5b642, 0x6fb1b96391c66b40 },
+ .tag_gmac_256 = { 0x2f1a1b6a000e18b2, 0xf7cba25e02551d43 },
+ },
+};
+
+#define MAX_TEST_DATA_LEN 32768
+
+static const struct
+{
+ char *name;
+ const u8 *pt, *key128, *key256, *ct128, *ct256, *tag128, *tag256, *aad, *iv;
+ u32 data_len, tag128_len, tag256_len, aad_len;
+} test_cases[] = {
+ /* test cases */
+ {
+ .name = "GCM Spec. TC1",
+ .iv = tc1_iv,
+ .key128 = tc1_key128,
+ .key256 = tc1_key256,
+ .tag128 = tc1_tag128,
+ .tag128_len = sizeof (tc1_tag128),
+ .tag256 = tc1_tag256,
+ .tag256_len = sizeof (tc1_tag256),
+ },
+ {
+ .name = "GCM Spec. TC2",
+ .pt = tc2_plaintext,
+ .data_len = sizeof (tc2_plaintext),
+ .iv = tc1_iv,
+ .key128 = tc1_key128,
+ .key256 = tc1_key256,
+ .ct128 = tc2_ciphertext128,
+ .ct256 = tc2_ciphertext256,
+ .tag128 = tc2_tag128,
+ .tag128_len = sizeof (tc2_tag128),
+ .tag256 = tc2_tag256,
+ .tag256_len = sizeof (tc2_tag256),
+ },
+ {
+ .name = "GCM Spec. TC3",
+ .pt = tc3_plaintext,
+ .data_len = sizeof (tc3_plaintext),
+ .iv = tc3_iv,
+ .key128 = tc3_key128,
+ .key256 = tc3_key256,
+ .ct128 = tc3_ciphertext128,
+ .ct256 = tc3_ciphertext256,
+ .tag128 = tc3_tag128,
+ .tag128_len = sizeof (tc3_tag128),
+ .tag256 = tc3_tag256,
+ .tag256_len = sizeof (tc3_tag256),
+ },
+ {
+ .name = "GCM Spec. TC4",
+ .pt = tc4_plaintext,
+ .data_len = sizeof (tc4_plaintext),
+ .aad = tc4_aad,
+ .aad_len = sizeof (tc4_aad),
+ .iv = tc3_iv,
+ .key128 = tc3_key128,
+ .key256 = tc3_key256,
+ .ct128 = tc4_ciphertext128,
+ .ct256 = tc4_ciphertext256,
+ .tag128 = tc4_tag128,
+ .tag128_len = sizeof (tc4_tag128),
+ .tag256 = tc4_tag256,
+ .tag256_len = sizeof (tc4_tag256),
+ }
+};
+
+#define perftest_aesXXX_enc_var_sz(a) \
+ void __test_perf_fn perftest_aes##a##_enc_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ u8 *tag = test_mem_alloc (16); \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \
+ u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \
+ \
+ clib_aes_gcm_key_expand (kd, key, AES_KEY_##a); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##a##_gcm_enc (kd, src, n, 0, 0, iv, 16, dst, tag); \
+ test_perf_event_disable (tp); \
+ }
+
+#define perftest_aesXXX_dec_var_sz(a) \
+ void __test_perf_fn perftest_aes##a##_dec_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ u8 *tag = test_mem_alloc (16); \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \
+ u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \
+ int *rv = test_mem_alloc (16); \
+ \
+ clib_aes_gcm_key_expand (kd, key, AES_KEY_##a); \
+ \
+ test_perf_event_enable (tp); \
+ rv[0] = clib_aes##a##_gcm_dec (kd, src, n, 0, 0, iv, tag, 16, dst); \
+ test_perf_event_disable (tp); \
+ }
+
+static clib_error_t *
+test_clib_aes128_gcm_enc (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, tc->pt, tc->data_len, tc->aad, tc->aad_len,
+ tc->iv, tc->tag128_len, ct, tag);
+
+ if (memcmp (tc->tag128, tag, tc->tag128_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->ct128, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes128_gcm_enc (&kd, pt, tc->n_bytes, 0, 0, inc_iv, 16, ct, tag);
+
+ if (memcmp (tc->tag_gcm_128, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_enc_var_sz (128);
+
+REGISTER_TEST (clib_aes128_gcm_enc) = {
+ .name = "clib_aes128_gcm_enc",
+ .fn = test_clib_aes128_gcm_enc,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes128_enc_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes256_gcm_enc (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ clib_aes256_gcm_enc (&kd, tc->pt, tc->data_len, tc->aad, tc->aad_len,
+ tc->iv, tc->tag256_len, ct, tag);
+
+ if (memcmp (tc->tag256, tag, tc->tag256_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->ct256, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_256);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes256_gcm_enc (&kd, pt, tc->n_bytes, 0, 0, inc_iv, 16, ct, tag);
+
+ if (memcmp (tc->tag_gcm_256, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_enc_var_sz (256);
+REGISTER_TEST (clib_aes256_gcm_enc) = {
+ .name = "clib_aes256_gcm_enc",
+ .fn = test_clib_aes256_gcm_enc,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes256_enc_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes128_gcm_dec (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+ int rv;
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ rv = clib_aes128_gcm_dec (&kd, tc->ct128, tc->data_len, tc->aad,
+ tc->aad_len, tc->iv, tc->tag128,
+ tc->tag128_len, pt);
+
+ if (!rv)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->pt, pt, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, pt, sizeof (ct), 0, 0, inc_iv, 16, ct, tag);
+
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ if (!clib_aes128_gcm_dec (&kd, ct, tc->n_bytes, 0, 0, inc_iv,
+ (u8 *) tc->tag_gcm_128, 16, pt))
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_dec_var_sz (128);
+
+REGISTER_TEST (clib_aes128_gcm_dec) = {
+ .name = "clib_aes128_gcm_dec",
+ .fn = test_clib_aes128_gcm_dec,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes128_dec_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes256_gcm_dec (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+ int rv;
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ rv = clib_aes256_gcm_dec (&kd, tc->ct256, tc->data_len, tc->aad,
+ tc->aad_len, tc->iv, tc->tag256,
+ tc->tag256_len, pt);
+
+ if (!rv)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->pt, pt, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, pt, sizeof (ct), 0, 0, inc_iv, 16, ct, tag);
+
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ if (!clib_aes128_gcm_dec (&kd, ct, tc->n_bytes, 0, 0, inc_iv,
+ (u8 *) tc->tag_gcm_128, 16, pt))
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_dec_var_sz (256);
+REGISTER_TEST (clib_aes256_gcm_dec) = {
+ .name = "clib_aes256_gcm_dec",
+ .fn = test_clib_aes256_gcm_dec,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes256_dec_var_sz }),
+};
+
+static const u8 gmac1_key[] = {
+ 0x77, 0xbe, 0x63, 0x70, 0x89, 0x71, 0xc4, 0xe2,
+ 0x40, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x7f, 0xeb
+};
+static const u8 gmac1_iv[] = { 0xe0, 0xe0, 0x0f, 0x19, 0xfe, 0xd7,
+ 0xba, 0x01, 0x36, 0xa7, 0x97, 0xf3 };
+static const u8 gmac1_aad[] = {
+ 0x7a, 0x43, 0xec, 0x1d, 0x9c, 0x0a, 0x5a, 0x78,
+ 0xa0, 0xb1, 0x65, 0x33, 0xa6, 0x21, 0x3c, 0xab
+};
+static const u8 gmac1_tag[] = {
+ 0x20, 0x9f, 0xcc, 0x8d, 0x36, 0x75, 0xed, 0x93,
+ 0x8e, 0x9c, 0x71, 0x66, 0x70, 0x9d, 0xd9, 0x46
+};
+
+static const u8 gmac2_key[] = {
+ 0x20, 0xb5, 0xb6, 0xb8, 0x54, 0xe1, 0x87, 0xb0,
+ 0x58, 0xa8, 0x4d, 0x57, 0xbc, 0x15, 0x38, 0xb6
+};
+
+static const u8 gmac2_iv[] = { 0x94, 0xc1, 0x93, 0x5a, 0xfc, 0x06,
+ 0x1c, 0xbf, 0x25, 0x4b, 0x93, 0x6f };
+
+static const u8 gmac2_aad[] = {
+ 0xca, 0x41, 0x8e, 0x71, 0xdb, 0xf8, 0x10, 0x03, 0x81, 0x74, 0xea, 0xa3, 0x71,
+ 0x9b, 0x3f, 0xcb, 0x80, 0x53, 0x1c, 0x71, 0x10, 0xad, 0x91, 0x92, 0xd1, 0x05,
+ 0xee, 0xaa, 0xfa, 0x15, 0xb8, 0x19, 0xac, 0x00, 0x56, 0x68, 0x75, 0x2b, 0x34,
+ 0x4e, 0xd1, 0xb2, 0x2f, 0xaf, 0x77, 0x04, 0x8b, 0xaf, 0x03, 0xdb, 0xdd, 0xb3,
+ 0xb4, 0x7d, 0x6b, 0x00, 0xe9, 0x5c, 0x4f, 0x00, 0x5e, 0x0c, 0xc9, 0xb7, 0x62,
+ 0x7c, 0xca, 0xfd, 0x3f, 0x21, 0xb3, 0x31, 0x2a, 0xa8, 0xd9, 0x1d, 0x3f, 0xa0,
+ 0x89, 0x3f, 0xe5, 0xbf, 0xf7, 0xd4, 0x4c, 0xa4, 0x6f, 0x23, 0xaf, 0xe0
+};
+
+static const u8 gmac2_tag[] = {
+ 0xb3, 0x72, 0x86, 0xeb, 0xaf, 0x4a, 0x54, 0xe0,
+ 0xff, 0xc2, 0xa1, 0xde, 0xaf, 0xc9, 0xf6, 0xdb
+};
+
+static const struct
+{
+ char *name;
+ const u8 *key128, *key256, *tag128, *tag256, *aad, *iv;
+ u32 tag128_len, tag256_len, aad_len;
+} gmac_test_cases[] = {
+ /* test cases */
+ {
+ .name = "GMAC1",
+ .iv = gmac1_iv,
+ .key128 = gmac1_key,
+ .tag128 = gmac1_tag,
+ .tag128_len = sizeof (gmac1_tag),
+ .aad = gmac1_aad,
+ .aad_len = sizeof (gmac1_aad),
+ },
+ {
+ .name = "GMAC2",
+ .iv = gmac2_iv,
+ .key128 = gmac2_key,
+ .tag128 = gmac2_tag,
+ .tag128_len = sizeof (gmac2_tag),
+ .aad = gmac2_aad,
+ .aad_len = sizeof (gmac2_aad),
+ },
+};
+
+static clib_error_t *
+test_clib_aes128_gmac (clib_error_t *err)
+{
+ u8 data[MAX_TEST_DATA_LEN];
+ aes_gcm_key_data_t kd;
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, gmac_test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ clib_aes128_gmac (&kd, tc->aad, tc->aad_len, tc->iv, tc->tag128_len,
+ tag);
+
+ if (memcmp (tc->tag128, tag, tc->tag128_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (data); i++)
+ data[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes128_gmac (&kd, data, tc->n_bytes, inc_iv, 16, tag);
+
+ if (memcmp (tc->tag_gmac_128, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+void __test_perf_fn
+perftest_gmac256_fixed_512byte (test_perf_t *tp)
+{
+ uword n = tp->n_ops;
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (aes_gcm_key_data_t));
+ u8 *ivs = test_mem_alloc_and_fill_inc_u8 (n * 12, 0, 0);
+ u8 *tags = test_mem_alloc_and_fill_inc_u8 (8 + n * 16, 0, 0);
+ u8 *data = test_mem_alloc_and_fill_inc_u8 (512, 0, 0);
+
+ test_perf_event_enable (tp);
+ clib_aes_gcm_key_expand (kd, inc_key, AES_KEY_128);
+
+ for (int i = 0; i < n; i++)
+ clib_aes128_gmac (kd, data, 512, ivs + n * 12, 16, tags + n * 16);
+ test_perf_event_disable (tp);
+}
+
+REGISTER_TEST (clib_aes128_gmac) = {
+ .name = "clib_aes128_gmac",
+ .fn = test_clib_aes128_gmac,
+ .perf_tests = PERF_TESTS ({ .name = "fixed (512 byte)",
+ .n_ops = 256,
+ .fn = perftest_gmac256_fixed_512byte }),
+};
+
+static clib_error_t *
+test_clib_aes256_gmac (clib_error_t *err)
+{
+ u8 data[MAX_TEST_DATA_LEN];
+ aes_gcm_key_data_t kd;
+ u8 tag[16];
+
+#if 0
+ FOREACH_ARRAY_ELT (tc, gmac_test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ clib_aes256_gmac (&kd, tc->aad, tc->aad_len, tc->iv, tc->tag256_len,
+ tag);
+
+ if (memcmp (tc->tag256, tag, tc->tag256_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+ }
+#endif
+
+ for (int i = 0; i < sizeof (data); i++)
+ data[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_256);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes256_gmac (&kd, data, tc->n_bytes, inc_iv, 16, tag);
+
+ if (memcmp (tc->tag_gmac_256, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+REGISTER_TEST (clib_aes256_gmac) = {
+ .name = "clib_aes256_gmac",
+ .fn = test_clib_aes256_gmac,
+};
+#endif
diff --git a/src/vppinfra/vector/test/array_mask.c b/src/vppinfra/test/array_mask.c
index 703c70abbe9..4d8fc7c59e2 100644
--- a/src/vppinfra/vector/test/array_mask.c
+++ b/src/vppinfra/test/array_mask.c
@@ -3,10 +3,10 @@
*/
#include <vppinfra/format.h>
-#include <vppinfra/vector/test/test.h>
+#include <vppinfra/test/test.h>
#include <vppinfra/vector/array_mask.h>
-__clib_test_fn void
+__test_funct_fn void
clib_array_mask_u32_wrapper (u32 *src, u32 mask, u32 n_elts)
{
clib_array_mask_u32 (src, mask, n_elts);
@@ -76,13 +76,15 @@ static array_mask_test_t tests[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
+ /* mask values 0x1, output array of 1, 0, 1, 0,.. */
+ { .mask = 1, .expected = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 } },
};
static clib_error_t *
test_clib_array_mask_u32 (clib_error_t *err)
{
- u32 i, j;
- for (i = 0; i < ARRAY_LEN (tests); i++)
+ u32 i, j, len;
+ for (i = 0; i < ARRAY_LEN (tests) - 1; i++)
{
u32 src[256];
for (j = 0; j < ARRAY_LEN (src); j++)
@@ -99,6 +101,41 @@ test_clib_array_mask_u32 (clib_error_t *err)
i, j, src[j], t->expected[j]);
}
}
+
+ for (i = 0; i < ARRAY_LEN (tests) - 1; i++)
+ {
+ for (len = 1; len <= 256; len++)
+ {
+ u32 src[len];
+ for (j = 0; j < ARRAY_LEN (src); j++)
+ src[j] = j;
+
+ array_mask_test_t *t = tests + i;
+ clib_array_mask_u32_wrapper (src, t->mask, ARRAY_LEN (src));
+ for (j = 0; j < ARRAY_LEN (src); j++)
+ {
+ if (src[j] != t->expected[j])
+ return clib_error_return (err,
+ "testcase %u failed at "
+ "(src[%u] = 0x%x, expected 0x%x)",
+ i, j, src[j], t->expected[j]);
+ }
+ }
+ }
+
+ u32 src[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+ array_mask_test_t *t = tests + i;
+
+ clib_array_mask_u32_wrapper (src, t->mask, ARRAY_LEN (src));
+ for (j = 0; j < ARRAY_LEN (src); j++)
+ {
+ if (src[j] != t->expected[j])
+ return clib_error_return (err,
+ "testcase %u failed at "
+ "(src[%u] = 0x%x, expected 0x%x)",
+ i, j, src[j], t->expected[j]);
+ }
+
return err;
}
diff --git a/src/vppinfra/test/compress.c b/src/vppinfra/test/compress.c
new file mode 100644
index 00000000000..083065f9bda
--- /dev/null
+++ b/src/vppinfra/test/compress.c
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/compress.h>
+
+__test_funct_fn u32
+clib_compress_u64_wrapper (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
+{
+ return clib_compress_u64 (dst, src, mask, n_elts);
+}
+
+__test_funct_fn u32
+clib_compress_u32_wrapper (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
+{
+ return clib_compress_u32 (dst, src, mask, n_elts);
+}
+
+__test_funct_fn u32
+clib_compress_u16_wrapper (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
+{
+ return clib_compress_u16 (dst, src, mask, n_elts);
+}
+
+__test_funct_fn u32
+clib_compress_u8_wrapper (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
+{
+ return clib_compress_u8 (dst, src, mask, n_elts);
+}
+
+typedef struct
+{
+ u64 mask[10];
+ u32 n_elts;
+} compress_test_t;
+
+static compress_test_t tests[] = {
+ { .mask = { 1 }, .n_elts = 1 },
+ { .mask = { 2 }, .n_elts = 2 },
+ { .mask = { 3 }, .n_elts = 2 },
+ { .mask = { 0, 1 }, .n_elts = 66 },
+ { .mask = { 0, 2 }, .n_elts = 69 },
+ { .mask = { 0, 3 }, .n_elts = 66 },
+ { .mask = { ~0ULL, ~0ULL, ~0ULL, ~0ULL }, .n_elts = 62 },
+ { .mask = { ~0ULL, ~0ULL, ~0ULL, ~0ULL }, .n_elts = 255 },
+ { .mask = { ~0ULL, 1, 1, ~0ULL }, .n_elts = 256 },
+};
+
+static clib_error_t *
+test_clib_compress_u64 (clib_error_t *err)
+{
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ compress_test_t *t = tests + i;
+ u64 src[t->n_elts];
+#ifdef CLIB_SANITIZE_ADDR
+ u64 dst[t->n_elts];
+#else /* CLIB_SANITIZE_ADDR */
+ u64 dst[513];
+#endif /* CLIB_SANITIZE_ADDR */
+ u64 *dp = dst;
+ u32 r;
+ for (j = 0; j < t->n_elts; j++)
+ src[j] = j;
+
+ for (j = 0; j < ARRAY_LEN (dst); j++)
+ dst[j] = 0xa5a5a5a5a5a5a5a5;
+
+ r = clib_compress_u64_wrapper (dst, src, t->mask, t->n_elts);
+
+ for (j = 0; j < t->n_elts; j++)
+ {
+ if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
+ continue;
+ if (dp[0] != src[j])
+ return clib_error_return (err,
+ "wrong data in testcase %u at "
+ "(dst[%u] = 0x%lx, src[%u] = 0x%lx)",
+ i, dp - dst, dp[0], j, src[j]);
+ dp++;
+ }
+
+#ifndef CLIB_SANITIZE_ADDR
+ if (dst[dp - dst + 1] != 0xa5a5a5a5a5a5a5a5)
+ return clib_error_return (err, "buffer overrun in testcase %u", i);
+#endif /* CLIB_SANITIZE_ADDR */
+
+ if (dp - dst != r)
+ return clib_error_return (err, "wrong number of elts in testcase %u",
+ i);
+ }
+
+ return err;
+}
+
+static clib_error_t *
+test_clib_compress_u32 (clib_error_t *err)
+{
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ compress_test_t *t = tests + i;
+ u32 src[t->n_elts];
+#ifdef CLIB_SANITIZE_ADDR
+ u32 dst[t->n_elts];
+#else /* CLIB_SANITIZE_ADDR */
+ u32 dst[513];
+#endif /* CLIB_SANITIZE_ADDR */
+ u32 *dp = dst;
+ u32 r;
+ for (j = 0; j < t->n_elts; j++)
+ src[j] = j;
+
+ for (j = 0; j < ARRAY_LEN (dst); j++)
+ dst[j] = 0xa5a5a5a5;
+
+ r = clib_compress_u32_wrapper (dst, src, t->mask, t->n_elts);
+
+ for (j = 0; j < t->n_elts; j++)
+ {
+ if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
+ continue;
+
+ if (dp[0] != src[j])
+ return clib_error_return (err,
+ "wrong data in testcase %u at "
+ "(dst[%u] = 0x%x, src[%u] = 0x%x)",
+ i, dp - dst, dp[0], j, src[j]);
+ dp++;
+ }
+
+#ifndef CLIB_SANITIZE_ADDR
+ if (dst[dp - dst + 1] != 0xa5a5a5a5)
+ return clib_error_return (err, "buffer overrun in testcase %u", i);
+#endif /* CLIB_SANITIZE_ADDR */
+
+ if (dp - dst != r)
+ return clib_error_return (err, "wrong number of elts in testcase %u",
+ i);
+ }
+
+ return err;
+}
+
+static clib_error_t *
+test_clib_compress_u16 (clib_error_t *err)
+{
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ compress_test_t *t = tests + i;
+ u16 src[t->n_elts];
+#ifdef CLIB_SANITIZE_ADDR
+ u16 dst[t->n_elts];
+#else /* CLIB_SANITIZE_ADDR */
+ u16 dst[513];
+#endif /* CLIB_SANITIZE_ADDR */
+ u16 *dp = dst;
+ u32 r;
+ for (j = 0; j < t->n_elts; j++)
+ src[j] = j;
+
+ for (j = 0; j < ARRAY_LEN (dst); j++)
+ dst[j] = 0xa5a5;
+
+ r = clib_compress_u16_wrapper (dst, src, t->mask, t->n_elts);
+
+ for (j = 0; j < t->n_elts; j++)
+ {
+ if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
+ continue;
+ if (dp[0] != src[j])
+ return clib_error_return (err,
+ "wrong data in testcase %u at "
+ "(dst[%u] = 0x%x, src[%u] = 0x%x)",
+ i, dp - dst, dp[0], j, src[j]);
+ dp++;
+ }
+
+#ifndef CLIB_SANITIZE_ADDR
+ if (dst[dp - dst + 1] != 0xa5a5)
+ return clib_error_return (err, "buffer overrun in testcase %u", i);
+#endif /* CLIB_SANITIZE_ADDR */
+
+ if (dp - dst != r)
+ return clib_error_return (err, "wrong number of elts in testcase %u",
+ i);
+ }
+
+ return err;
+}
+
+static clib_error_t *
+test_clib_compress_u8 (clib_error_t *err)
+{
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ compress_test_t *t = tests + i;
+ u8 src[t->n_elts];
+#ifdef CLIB_SANITIZE_ADDR
+ u8 dst[t->n_elts];
+#else /* CLIB_SANITIZE_ADDR */
+ u8 dst[513];
+#endif /* CLIB_SANITIZE_ADDR */
+ u8 *dp = dst;
+ u32 r;
+ for (j = 0; j < t->n_elts; j++)
+ src[j] = j;
+
+ for (j = 0; j < ARRAY_LEN (dst); j++)
+ dst[j] = 0xa5;
+
+ r = clib_compress_u8_wrapper (dst, src, t->mask, t->n_elts);
+
+ for (j = 0; j < t->n_elts; j++)
+ {
+ if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
+ continue;
+ if (dp[0] != src[j])
+ return clib_error_return (err,
+ "wrong data in testcase %u at "
+ "(dst[%u] = 0x%x, src[%u] = 0x%x)",
+ i, dp - dst, dp[0], j, src[j]);
+ dp++;
+ }
+
+#ifndef CLIB_SANITIZE_ADDR
+ if (dst[dp - dst + 1] != 0xa5)
+ return clib_error_return (err, "buffer overrun in testcase %u", i);
+#endif /* CLIB_SANITIZE_ADDR */
+
+ if (dp - dst != r)
+ return clib_error_return (err, "wrong number of elts in testcase %u",
+ i);
+ }
+
+ return err;
+}
+
+REGISTER_TEST (clib_compress_u64) = {
+ .name = "clib_compress_u64",
+ .fn = test_clib_compress_u64,
+};
+
+REGISTER_TEST (clib_compress_u32) = {
+ .name = "clib_compress_u32",
+ .fn = test_clib_compress_u32,
+};
+
+REGISTER_TEST (clib_compress_u16) = {
+ .name = "clib_compress_u16",
+ .fn = test_clib_compress_u16,
+};
+
+REGISTER_TEST (clib_compress_u8) = {
+ .name = "clib_compress_u8",
+ .fn = test_clib_compress_u8,
+};
diff --git a/src/vppinfra/test/count_equal.c b/src/vppinfra/test/count_equal.c
new file mode 100644
index 00000000000..942c2203d3d
--- /dev/null
+++ b/src/vppinfra/test/count_equal.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/count_equal.h>
+
+#define foreach_clib_count_equal(type) \
+ typedef uword (wrapper_fn_##type) (type * a, uword maxcount); \
+ \
+ __test_funct_fn uword clib_count_equal_##type##_wrapper (type *a, \
+ uword maxcount) \
+ { \
+ return clib_count_equal_##type (a, maxcount); \
+ } \
+ \
+ static wrapper_fn_##type *wfn_##type = &clib_count_equal_##type##_wrapper; \
+ static clib_error_t *test_clib_count_equal_##type (clib_error_t *err) \
+ { \
+ u32 ps = clib_mem_get_log2_page_size (); \
+ void *map; \
+ \
+ u16 lengths[] = { \
+ 1, 2, 3, 5, 7, 9, 15, 16, 17, 31, 32, 33, 255, 256, 257 \
+ }; \
+ type *data; \
+ \
+ map = clib_mem_vm_map (0, 2ULL << ps, ps, "test"); \
+ if (map == CLIB_MEM_VM_MAP_FAILED) \
+ return clib_error_return (err, "clib_mem_vm_map failed"); \
+ \
+ data = ((type *) (map + (1ULL << ps))); \
+ data[-1] = 0xfe; \
+ \
+ mprotect (data, 1ULL < ps, PROT_NONE); \
+ \
+ for (u8 d = 0; d < 255; d++) \
+ { \
+ for (int i = 1; i <= (1 << ps) / sizeof (data[0]); i++) \
+ data[-i] = d; \
+ for (int i = 0; i < ARRAY_LEN (lengths); i++) \
+ { \
+ uword rv, len = lengths[i]; \
+ \
+ if ((rv = wfn_##type (data - len, len)) != len) \
+ { \
+ err = clib_error_return ( \
+ err, "testcase 1 failed for len %u data %u(rv %u)", len, d, \
+ rv); \
+ goto done; \
+ } \
+ \
+ data[-1] = d + 1; \
+ if (len > 1 && ((rv = wfn_##type (data - len, len)) != len - 1)) \
+ { \
+ err = clib_error_return ( \
+ err, "testcase 2 failed for len %u data %u (rv %u)", len, \
+ d, rv); \
+ goto done; \
+ } \
+ data[-1] = d; \
+ \
+ data[-2] = d + 1; \
+ if (len > 2 && ((rv = wfn_##type (data - len, len)) != len - 2)) \
+ { \
+ err = clib_error_return ( \
+ err, "testcase 3 failed for len %u data %u (rv %u)", len, \
+ d, rv); \
+ goto done; \
+ } \
+ data[-2] = d; \
+ } \
+ } \
+ \
+ done: \
+ clib_mem_vm_unmap (map); \
+ return err; \
+ }
+
+foreach_clib_count_equal (u8);
+foreach_clib_count_equal (u16);
+foreach_clib_count_equal (u32);
+foreach_clib_count_equal (u64);
+
+REGISTER_TEST (clib_count_equal_u8) = {
+ .name = "clib_count_equal_u8",
+ .fn = test_clib_count_equal_u8,
+};
+
+REGISTER_TEST (clib_count_equal_u16) = {
+ .name = "clib_count_equal_u16",
+ .fn = test_clib_count_equal_u16,
+};
+
+REGISTER_TEST (clib_count_equal_u32) = {
+ .name = "clib_count_equal_u32",
+ .fn = test_clib_count_equal_u32,
+};
+
+REGISTER_TEST (clib_count_equal_u64) = {
+ .name = "clib_count_equal_u64",
+ .fn = test_clib_count_equal_u64,
+};
diff --git a/src/vppinfra/test/crc32c.c b/src/vppinfra/test/crc32c.c
new file mode 100644
index 00000000000..8c0c691e2e8
--- /dev/null
+++ b/src/vppinfra/test/crc32c.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crc32.h>
+
+#ifndef CLIB_MARCH_VARIANT
+char *crc32c_test_string =
+ "The quick brown fox jumped over the lazy dog and stumbled.";
+u32 crc32c_test_values_data[] = {
+ 0x00000000, 0x96bf4dcc, 0x65479df4, 0x60a63889, 0xda99c852, 0x3337e4e2,
+ 0x4651af18, 0x83b586a1, 0x2235e3b5, 0x7f896b6f, 0x1f17a8f3, 0x60dc68bc,
+ 0x6f95458b, 0x24c5aa40, 0xe40de8f0, 0x3e344ed8, 0x798903f4, 0x73ea05e3,
+ 0xcfc61ead, 0xe6ed33a9, 0xfaa20d87, 0x5ce246c4, 0x4022138c, 0x111b090a,
+ 0x1a6b673c, 0x298d6a78, 0x5d3485d5, 0xc6c24fec, 0x91600ac3, 0x877506df,
+ 0xd9702ff7, 0xb7de5f4b, 0xf8f8e606, 0x905bdc1c, 0xb69298ce, 0x3b748c05,
+ 0x1577ee4e, 0xc19389c7, 0x842bc1c7, 0x0db915db, 0x437d7c44, 0xa61f7901,
+ 0x54919807, 0xeb4b5a35, 0xb0f5e17e, 0xfded9015, 0xb6ff2e82, 0xaec598e4,
+ 0x8258fee0, 0xc30f7e3a, 0x390ac90e, 0x1a4376fc, 0xfa5ea3c2, 0xfca2d721,
+ 0x52d74c9f, 0xe06c4bcd, 0x28728122, 0x67f288d5, 0
+};
+u32 *crc32c_test_values = crc32c_test_values_data;
+
+#else
+extern char *crc32c_test_string;
+extern u32 *crc32c_test_values;
+#endif
+
+static clib_error_t *
+test_clib_crc32c (clib_error_t *err)
+{
+ int max_len = strlen (crc32c_test_string);
+ int i;
+ for (i = 0; i < max_len; i++)
+ {
+ u32 expected_crc32c = crc32c_test_values[i];
+ u32 calculated_crc32 = clib_crc32c ((u8 *) crc32c_test_string, i);
+ if (expected_crc32c != calculated_crc32)
+ {
+ return clib_error_return (
+ err,
+ "Bad CRC32C for test case %d: expected 0x%08x, calculated: 0x%08x",
+ i, expected_crc32c, calculated_crc32);
+ }
+ }
+ return err;
+}
+
+REGISTER_TEST (clib_crc32c) = {
+ .name = "clib_crc32c",
+ .fn = test_clib_crc32c,
+};
diff --git a/src/vppinfra/test/index_to_ptr.c b/src/vppinfra/test/index_to_ptr.c
new file mode 100644
index 00000000000..06b621c10ff
--- /dev/null
+++ b/src/vppinfra/test/index_to_ptr.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/index_to_ptr.h>
+
+typedef void (wrapper_fn) (u32 *indices, void *base, u8 shift, void **ptrs,
+ u32 n_elts);
+
+__test_funct_fn void
+clib_index_to_ptr_u32_wrapper (u32 *indices, void *base, u8 shift, void **ptrs,
+ u32 n_elts)
+{
+ clib_index_to_ptr_u32 (indices, base, shift, ptrs, n_elts);
+}
+
+static wrapper_fn *wfn = &clib_index_to_ptr_u32_wrapper;
+
+static clib_error_t *
+test_clib_index_to_ptr_u32 (clib_error_t *err)
+{
+ void *_ptrs[512 + 128], **ptrs = _ptrs + 64;
+ u32 _indices[512 + 128], *indices = _indices + 64;
+ u16 lengths[] = { 1, 3, 5, 7, 9, 15, 16, 17, 31, 32,
+ 33, 40, 41, 42, 63, 64, 65, 511, 512 };
+
+ for (int i = 0; i < ARRAY_LEN (_indices); i++)
+ _indices[i] = i;
+
+ for (int i = 0; i < ARRAY_LEN (lengths); i++)
+ {
+ u16 len = lengths[i];
+ u8 shift = 6;
+ void *base = (void *) 0x100000000 + i;
+
+ for (int j = -64; j < len + 64; j++)
+ ptrs[j] = (void *) 0xfefefefefefefefe;
+
+ wfn (indices, base, shift, ptrs, len);
+ for (int j = 0; j < len; j++)
+ {
+ void *expected = base + ((u64) indices[j] << shift);
+ if (ptrs[j] != expected)
+ return clib_error_return (err,
+ "testcase failed for length %u "
+ "(offset %u, expected %p, found %p)",
+ len, j, expected, ptrs[j]);
+ }
+ }
+ return err;
+}
+
+REGISTER_TEST (clib_index_to_ptr_u32) = {
+ .name = "clib_index_to_ptr_u32",
+ .fn = test_clib_index_to_ptr_u32,
+};
diff --git a/src/vppinfra/test/ip_csum.c b/src/vppinfra/test/ip_csum.c
new file mode 100644
index 00000000000..b8508ee449d
--- /dev/null
+++ b/src/vppinfra/test/ip_csum.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/ip_csum.h>
+
+typedef struct
+{
+ struct
+ {
+ u8 *src;
+ u32 count;
+ } chunk[5];
+ u16 result;
+} ip_csum_test_t;
+
+static u8 test1[] = { 0x45, 0x00, 0x00, 0x73, 0x00, 0x00, 0x40,
+ 0x00, 0x40, 0x11, 0x00, 0x00, 0xc0, 0xa8,
+ 0x00, 0x01, 0xc0, 0xa8, 0x00, 0xc7, 0x00 };
+#define TEST_LEN(x) (ARRAY_LEN (x) - 1)
+
+static ip_csum_test_t tests[] = { {
+ .chunk[0].src = test1,
+ .chunk[0].count = TEST_LEN (test1),
+ .result = 0x61b8,
+ },
+ {
+ .chunk[0].src = test1,
+ .chunk[0].count = 1,
+ .chunk[1].src = test1 + 1,
+ .chunk[1].count = 2,
+ .chunk[2].src = test1 + 3,
+ .chunk[2].count = 3,
+ .chunk[3].src = test1 + 6,
+ .chunk[3].count = 4,
+ .chunk[4].src = test1 + 10,
+ .chunk[4].count = TEST_LEN (test1) - 10,
+ .result = 0x61b8,
+ },
+ {
+ .chunk[0].count = 1,
+ .result = 0xff0f,
+ },
+ {
+ .chunk[0].count = 2,
+ .result = 0x080f,
+ },
+ {
+ .chunk[0].count = 3,
+ .result = 0x0711,
+ },
+ {
+ .chunk[0].count = 4,
+ .result = 0x1210,
+ },
+ {
+ .chunk[0].count = 63,
+ .result = 0xda01,
+ },
+ {
+ .chunk[0].count = 64,
+ .result = 0xe100,
+ },
+ {
+ .chunk[0].count = 65,
+ .result = 0xe010,
+ },
+ {
+ .chunk[0].count = 65535,
+ .result = 0xfc84,
+ },
+ {
+ .chunk[0].count = 65536,
+ .result = 0xffff,
+ } };
+
+static clib_error_t *
+test_clib_ip_csum (clib_error_t *err)
+{
+ u8 *buf;
+ buf = test_mem_alloc (65536);
+ for (int i = 0; i < 65536; i++)
+ buf[i] = 0xf0 + ((i * 7) & 0xf);
+
+ for (int i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ clib_ip_csum_t c = {};
+ ip_csum_test_t *t = tests + i;
+ u16 rv;
+
+ for (int j = 0; j < ARRAY_LEN (((ip_csum_test_t *) 0)->chunk); j++)
+ if (t->chunk[j].count > 0)
+ {
+ if (t->chunk[j].src == 0)
+ clib_ip_csum_chunk (&c, buf, t->chunk[j].count);
+ else
+ clib_ip_csum_chunk (&c, t->chunk[j].src, t->chunk[j].count);
+ }
+ rv = clib_ip_csum_fold (&c);
+
+ if (rv != tests[i].result)
+ {
+ err = clib_error_return (err,
+ "bad checksum in test case %u (expected "
+ "0x%04x, calculated 0x%04x)",
+ i, tests[i].result, rv);
+ goto done;
+ }
+ }
+done:
+ return err;
+}
+
+void __test_perf_fn
+perftest_ip4_hdr (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ u8 *data = test_mem_alloc_and_splat (20, n, (void *) &test1);
+ u16 *res = test_mem_alloc (n * sizeof (u16));
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ res[i] = clib_ip_csum (data + i * 20, 20);
+ test_perf_event_disable (tp);
+}
+
+void __test_perf_fn
+perftest_tcp_payload (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ volatile uword *lenp = &tp->arg0;
+ u8 *data = test_mem_alloc_and_splat (20, n, (void *) &test1);
+ u16 *res = test_mem_alloc (n * sizeof (u16));
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ res[i] = clib_ip_csum (data + i * lenp[0], lenp[0]);
+ test_perf_event_disable (tp);
+}
+
+void __test_perf_fn
+perftest_byte (test_perf_t *tp)
+{
+ volatile uword *np = &tp->n_ops;
+ u8 *data = test_mem_alloc_and_fill_inc_u8 (*np, 0, 0);
+ u16 *res = test_mem_alloc (sizeof (u16));
+
+ test_perf_event_enable (tp);
+ res[0] = clib_ip_csum (data, np[0]);
+ test_perf_event_disable (tp);
+}
+
+REGISTER_TEST (clib_ip_csum) = {
+ .name = "clib_ip_csum",
+ .fn = test_clib_ip_csum,
+ .perf_tests = PERF_TESTS (
+ { .name = "fixed size (per IPv4 Header)",
+ .n_ops = 1024,
+ .fn = perftest_ip4_hdr },
+ { .name = "fixed size (per 1460 byte block)",
+ .n_ops = 16,
+ .arg0 = 1460,
+ .fn = perftest_tcp_payload },
+ { .name = "variable size (per byte)", .n_ops = 16384, .fn = perftest_byte }
+
+ ),
+};
diff --git a/src/vppinfra/vector/test/mask_compare.c b/src/vppinfra/test/mask_compare.c
index 64df0ee084a..738b0082dd7 100644
--- a/src/vppinfra/vector/test/mask_compare.c
+++ b/src/vppinfra/test/mask_compare.c
@@ -3,21 +3,27 @@
*/
#include <vppinfra/format.h>
-#include <vppinfra/vector/test/test.h>
+#include <vppinfra/test/test.h>
#include <vppinfra/vector/mask_compare.h>
-__clib_test_fn void
+__test_funct_fn void
clib_mask_compare_u16_wrapper (u16 v, u16 *a, u64 *mask, u32 n_elts)
{
clib_mask_compare_u16 (v, a, mask, n_elts);
}
-__clib_test_fn void
+__test_funct_fn void
clib_mask_compare_u32_wrapper (u32 v, u32 *a, u64 *mask, u32 n_elts)
{
clib_mask_compare_u32 (v, a, mask, n_elts);
}
+__test_funct_fn void
+clib_mask_compare_u64_wrapper (u64 v, u64 *a, u64 *mask, u64 n_elts)
+{
+ clib_mask_compare_u64 (v, a, mask, n_elts);
+}
+
static clib_error_t *
test_clib_mask_compare_u16 (clib_error_t *err)
{
@@ -93,3 +99,41 @@ REGISTER_TEST (clib_mask_compare_u32) = {
.name = "clib_mask_compare_u32",
.fn = test_clib_mask_compare_u32,
};
+
+static clib_error_t *
+test_clib_mask_compare_u64 (clib_error_t *err)
+{
+ u64 array[513];
+ u64 mask[10];
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (array); i++)
+ array[i] = i;
+
+ for (i = 0; i < ARRAY_LEN (array); i++)
+ {
+ for (j = 0; j < ARRAY_LEN (mask); j++)
+ mask[j] = 0xa5a5a5a5a5a5a5a5;
+
+ clib_mask_compare_u64_wrapper (i, array, mask, i + 1);
+
+ for (j = 0; j < (i >> 6); j++)
+ {
+ if (mask[j])
+ return clib_error_return (err, "mask at position %u not zero", j);
+ }
+ if (mask[j] != 1ULL << (i & 0x3f))
+ return clib_error_return (err,
+ "mask at position %u is %lx, expected %lx",
+ j, mask[j], 1ULL << (i % 64));
+
+ if (mask[j + 1] != 0xa5a5a5a5a5a5a5a5)
+ return clib_error_return (err, "mask overrun at position %u", j + 1);
+ }
+ return err;
+}
+
+REGISTER_TEST (clib_mask_compare_u64) = {
+ .name = "clib_mask_compare_u64",
+ .fn = test_clib_mask_compare_u64,
+};
diff --git a/src/vppinfra/test/memcpy_x86_64.c b/src/vppinfra/test/memcpy_x86_64.c
new file mode 100644
index 00000000000..41855c39241
--- /dev/null
+++ b/src/vppinfra/test/memcpy_x86_64.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifdef __x86_64__
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/memcpy_x86_64.h>
+
+__test_funct_fn void
+wrapper (u8 *dst, u8 *src, uword n)
+{
+ clib_memcpy_x86_64 (dst, src, n);
+}
+
+/* clang-format off */
+#define foreach_const_n \
+ _(1) _(2) _(3) _(4) _(5) _(6) _(7) _(8) _(9) _(10) _(11) _(12) _(13) _(14) \
+ _(15) _(16) _(17) _(18) _(19) _(20) _(21) _(22) _(23) _(24) _(25) _(26) \
+ _(27) _(28) _(29) _(30) _(31) _(32) _(33) _(34) _(35) _(36) _(37) _(38) \
+ _(39) _(40) _(41) _(42) _(43) _(44) _(45) _(46) _(47) _(48) _(49) _(50) \
+ _(51) _(52) _(53) _(54) _(55) _(56) _(57) _(58) _(59) _(60) _(61) _(62) \
+ _(63) _(64) _(65) _(66) _(67) _(68) _(69) _(70) _(71) _(72) _(73) _(74) \
+ _(75) _(76) _(77) _(78) _(79) _(80) _(81) _(82) _(83) _(84) _(85) _(86) \
+ _(87) _(88) _(89) _(90) _(91) _(92) _(93) _(94) _(95) _(96) _(97) _(98) \
+ _(99) _(100) _(101) _(102) _(103) _(104) _(105) _(106) _(107) _(108) \
+ _(109) _(110) _(111) _(112) _(113) _(114) _(115) _(116) _(117) _(118) \
+ _(119) _(120) _(121) _(122) _(123) _(124) _(125) _(126) _(127) _(128) \
+ _(129) _(130) _(131) _(132) _(133) _(134) _(135) _(136) _(137) _(138) \
+ _(139) _(140) _(141) _(142) _(143) _(144) _(145) _(146) _(147) _(148) \
+ _(149) _(150) _(151) _(152) _(153) _(154) _(155) _(156) _(157) _(158) \
+ _(159) _(160) _(161) _(162) _(163) _(164) _(165) _(166) _(167) _(168) \
+ _(169) _(170) _(171) _(172) _(173) _(174) _(175) _(176) _(177) _(178) \
+ _(179) _(180) _(181) _(182) _(183) _(184) _(185) _(186) _(187) _(188) \
+ _(189) _(190) _(191) _(192) _(193) _(194) _(195) _(196) _(197) _(198) \
+ _(199) _(200) _(201) _(202) _(203) _(204) _(205) _(206) _(207) _(208) \
+ _(209) _(210) _(211) _(212) _(213) _(214) _(215) _(216) _(217) _(218) \
+ _(219) _(220) _(221) _(222) _(223) _(224) _(225) _(226) _(227) _(228) \
+ _(229) _(230) _(231) _(232) _(233) _(234) _(235) _(236) _(237) _(238) \
+ _(239) _(240) _(241) _(242) _(243) _(244) _(245) _(246) _(247) _(248) \
+ _(249) _(250) _(251) _(252) _(253) _(254) _(255)
+/* clang-format on */
+
+#define _(n) \
+ static __clib_noinline void wrapper##n (u8 *dst, u8 *src) \
+ { \
+ clib_memcpy_x86_64 (dst, src, n); \
+ }
+
+foreach_const_n;
+#undef _
+
+typedef void (const_fp_t) (u8 *dst, u8 *src);
+typedef struct
+{
+ u16 len;
+ const_fp_t *fp;
+} counst_test_t;
+
+static counst_test_t const_tests[] = {
+#define _(n) { .fp = wrapper##n, .len = n },
+ foreach_const_n
+#undef _
+};
+
+#define MAX_LEN 1024
+
+static clib_error_t *
+validate_one (clib_error_t *err, u8 *d, u8 *s, u16 n, u8 off, int is_const)
+{
+ for (int i = 0; i < n; i++)
+ if (d[i] != s[i])
+ return clib_error_return (err,
+ "memcpy error at position %d "
+ "(n = %u, off = %u, expected 0x%02x "
+ "found 0x%02x%s)",
+ i, n, off, s[i], d[i],
+ is_const ? ", const" : "");
+ for (int i = -64; i < 0; i++)
+ if (d[i] != 0xfe)
+ return clib_error_return (err,
+ "buffer underrun at position %d "
+ "(n = %u, off = %u, expected 0xfe "
+ "found 0x%02x%s)",
+ i, n, off, d[i], is_const ? ", const" : "");
+ for (int i = n; i < n + 64; i++)
+ if (d[i] != 0xfe)
+ return clib_error_return (err,
+ "buffer overrun at position %d "
+ "(n = %u, off = %u, expected 0xfe "
+ "found 0x%02x%s)",
+ i, n, off, d[i], is_const ? ", const" : "");
+ return err;
+}
+
+static clib_error_t *
+test_clib_memcpy_x86_64 (clib_error_t *err)
+{
+ u8 src[MAX_LEN + 192];
+ u8 dst[MAX_LEN + 192];
+
+ for (int i = 0; i < ARRAY_LEN (src); i++)
+ src[i] = i & 0x7f;
+
+ for (int j = 0; j < ARRAY_LEN (const_tests); j++)
+ {
+ u8 *d = dst + 64;
+ u8 *s = src + 64;
+ u16 n = const_tests[j].len;
+
+ for (int i = 0; i < 128 + n; i++)
+ dst[i] = 0xfe;
+ const_tests[j].fp (d, s);
+ if ((err = validate_one (err, d, s, n, 0, /* is_const */ 1)))
+ return err;
+ }
+
+ for (u16 n = 1; n <= MAX_LEN; n++)
+ {
+ for (int off = 0; off < 64; off += 7)
+ {
+ u8 *d = dst + 64 + off;
+ u8 *s = src + 64;
+
+ for (int i = 0; i < 128 + n + off; i++)
+ dst[i] = 0xfe;
+
+ wrapper (d, s, n);
+
+ if ((err = validate_one (err, d, s, n, off, /* is_const */ 0)))
+ return err;
+ }
+ }
+ return err;
+}
+
+REGISTER_TEST (clib_memcpy_x86_64) = {
+ .name = "clib_memcpy_x86_64",
+ .fn = test_clib_memcpy_x86_64,
+};
+#endif
diff --git a/src/vppinfra/test/poly1305.c b/src/vppinfra/test/poly1305.c
new file mode 100644
index 00000000000..34551f84047
--- /dev/null
+++ b/src/vppinfra/test/poly1305.c
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/poly1305.h>
+
+static const u8 text1[375] = {
+ 0x41, 0x6e, 0x79, 0x20, 0x73, 0x75, 0x62, 0x6d, 0x69, 0x73, 0x73, 0x69, 0x6f,
+ 0x6e, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x49, 0x45, 0x54, 0x46,
+ 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20,
+ 0x74, 0x68, 0x65, 0x20, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74,
+ 0x6f, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63,
+ 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x61, 0x73, 0x20, 0x61, 0x6c, 0x6c, 0x20,
+ 0x6f, 0x72, 0x20, 0x70, 0x61, 0x72, 0x74, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x6e,
+ 0x20, 0x49, 0x45, 0x54, 0x46, 0x20, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65,
+ 0x74, 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x20, 0x6f, 0x72, 0x20, 0x52, 0x46,
+ 0x43, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x73, 0x74, 0x61,
+ 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x20, 0x6d, 0x61, 0x64, 0x65, 0x20, 0x77,
+ 0x69, 0x74, 0x68, 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e,
+ 0x74, 0x65, 0x78, 0x74, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x6e, 0x20, 0x49, 0x45,
+ 0x54, 0x46, 0x20, 0x61, 0x63, 0x74, 0x69, 0x76, 0x69, 0x74, 0x79, 0x20, 0x69,
+ 0x73, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x20,
+ 0x61, 0x6e, 0x20, 0x22, 0x49, 0x45, 0x54, 0x46, 0x20, 0x43, 0x6f, 0x6e, 0x74,
+ 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x2e, 0x20, 0x53, 0x75,
+ 0x63, 0x68, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73,
+ 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x20, 0x6f, 0x72, 0x61, 0x6c,
+ 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x20, 0x69,
+ 0x6e, 0x20, 0x49, 0x45, 0x54, 0x46, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f,
+ 0x6e, 0x73, 0x2c, 0x20, 0x61, 0x73, 0x20, 0x77, 0x65, 0x6c, 0x6c, 0x20, 0x61,
+ 0x73, 0x20, 0x77, 0x72, 0x69, 0x74, 0x74, 0x65, 0x6e, 0x20, 0x61, 0x6e, 0x64,
+ 0x20, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x72, 0x6f, 0x6e, 0x69, 0x63, 0x20, 0x63,
+ 0x6f, 0x6d, 0x6d, 0x75, 0x6e, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73,
+ 0x20, 0x6d, 0x61, 0x64, 0x65, 0x20, 0x61, 0x74, 0x20, 0x61, 0x6e, 0x79, 0x20,
+ 0x74, 0x69, 0x6d, 0x65, 0x20, 0x6f, 0x72, 0x20, 0x70, 0x6c, 0x61, 0x63, 0x65,
+ 0x2c, 0x20, 0x77, 0x68, 0x69, 0x63, 0x68, 0x20, 0x61, 0x72, 0x65, 0x20, 0x61,
+ 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x20, 0x74, 0x6f
+};
+
+const static struct
+{
+ char *name;
+ u32 len;
+ const u8 key[32];
+ const u8 *msg;
+ const u8 out[16];
+} test_cases[] = {
+ {
+ .name = "test1",
+ .len = 34,
+ .out = { 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6, 0xc2, 0x2b, 0x8b,
+ 0xaf, 0x0c, 0x01, 0x27, 0xa9 },
+ .key = { 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33, 0x7f, 0x44, 0x52,
+ 0xfe, 0x42, 0xd5, 0x06, 0xa8, 0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d,
+ 0xb2, 0xfd, 0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b },
+ .msg = (u8[34]){ 0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72, 0x61,
+ 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f, 0x72, 0x75,
+ 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65, 0x61, 0x72, 0x63,
+ 0x68, 0x20, 0x47, 0x72, 0x6f, 0x75, 0x70 },
+ },
+ {
+ .name = "RFC8439 A3 TV1",
+ .len = 64,
+ .out = {},
+ .key = {},
+ .msg = (u8[64]){},
+ },
+ {
+ .name = "RFC8439 A3 TV2",
+ .len = sizeof (text1),
+ .out = { 0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0, 0x60, 0x70, 0xf0, 0xef, 0xca,
+ 0x96, 0x22, 0x7a, 0x86, 0x3e },
+ .key = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0,
+ 0x60, 0x70, 0xf0, 0xef, 0xca, 0x96, 0x22, 0x7a, 0x86, 0x3e },
+ .msg = text1,
+ },
+ {
+ .name = "RFC8439 A3 TV3",
+ .len = sizeof (text1),
+ .out = { 0xf3, 0x47, 0x7e, 0x7c, 0xd9, 0x54, 0x17, 0xaf, 0x89, 0xa6, 0xb8,
+ 0x79, 0x4c, 0x31, 0x0c, 0xf0
+
+ },
+ .key = { 0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0, 0x60, 0x70, 0xf0, 0xef, 0xca,
+ 0x96, 0x22, 0x7a, 0x86, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+
+ .msg = text1,
+ },
+ {
+ .name = "RFC8439 A3 TV4",
+ .len = 127,
+ .key = { 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, 0xf3, 0x33, 0x88,
+ 0x86, 0x04, 0xf6, 0xb5, 0xf0, 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b,
+ 0x80, 0x09, 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 },
+ .msg =
+ (u8[127]){
+ 0x27, 0x54, 0x77, 0x61, 0x73, 0x20, 0x62, 0x72, 0x69, 0x6c, 0x6c, 0x69,
+ 0x67, 0x2c, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73,
+ 0x6c, 0x69, 0x74, 0x68, 0x79, 0x20, 0x74, 0x6f, 0x76, 0x65, 0x73, 0x0a,
+ 0x44, 0x69, 0x64, 0x20, 0x67, 0x79, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x64,
+ 0x20, 0x67, 0x69, 0x6d, 0x62, 0x6c, 0x65, 0x20, 0x69, 0x6e, 0x20, 0x74,
+ 0x68, 0x65, 0x20, 0x77, 0x61, 0x62, 0x65, 0x3a, 0x0a, 0x41, 0x6c, 0x6c,
+ 0x20, 0x6d, 0x69, 0x6d, 0x73, 0x79, 0x20, 0x77, 0x65, 0x72, 0x65, 0x20,
+ 0x74, 0x68, 0x65, 0x20, 0x62, 0x6f, 0x72, 0x6f, 0x67, 0x6f, 0x76, 0x65,
+ 0x73, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d,
+ 0x6f, 0x6d, 0x65, 0x20, 0x72, 0x61, 0x74, 0x68, 0x73, 0x20, 0x6f, 0x75,
+ 0x74, 0x67, 0x72, 0x61, 0x62, 0x65, 0x2e },
+ .out = { 0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61, 0xe7, 0x08, 0xdc,
+ 0x7c, 0xbc, 0xc5, 0xeb, 0x62 },
+ },
+ {
+ /* Test Vector #5:
+ * If one uses 130-bit partial reduction, does the code handle the case
+ * where partially reduced final result is not fully reduced? */
+ .name = "RFC8439 A3 TV5",
+ .len = 16,
+ .key = { 2 },
+ .msg = (u8[16]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .out = { 3 },
+ },
+ {
+ /* Test Vector #6:
+ * What happens if addition of s overflows modulo 2^128? */
+ .name = "RFC8439 A3 TV6",
+ .len = 16,
+ .key = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .msg = (u8[16]){ 2 },
+ .out = { 3 },
+ },
+ {
+ /* Test Vector #7:
+ * What happens if data limb is all ones and there is carry from lower
+ * limb? */
+ .name = "RFC8439 A3 TV7",
+ .len = 48,
+ .key = { 1 },
+ .msg =
+ (u8[48]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .out = { 5 },
+ },
+ {
+ /* Test Vector #8:
+ * What happens if final result from polynomial part is exactly 2^130-5? */
+ .name = "RFC8439 A3 TV8",
+ .len = 48,
+ .key = { 1 },
+ .msg =
+ (u8[48]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xfe, 0xfe, 0xfe,
+ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
+ 0xfe, 0xfe, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 },
+ .out = { 0 },
+ },
+ {
+ /* Test Vector #9:
+ * What happens if final result from polynomial part is exactly 2^130-6? */
+ .name = "RFC8439 A3 TV9",
+ .len = 16,
+ .key = { 2 },
+ .msg = (u8[16]){ 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .out = { 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff },
+ },
+ {
+ /* Test Vector #10:
+ * What happens if 5*H+L-type reduction produces 131-bit intermediate
+ * result? */
+ .name = "RFC8439 A3 TV10",
+ .len = 64,
+ .key = { [0] = 1, [8] = 4 },
+ .msg =
+ (u8[64]){ 0xE3, 0x35, 0x94, 0xD7, 0x50, 0x5E, 0x43, 0xB9, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33, 0x94, 0xD7, 0x50,
+ 0x5E, 0x43, 0x79, 0xCD, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 },
+ .out = { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00 },
+ },
+ {
+ /* Test Vector #11:
+ * What happens if 5*H+L-type reduction produces 131-bit final result? */
+ .name = "RFC8439 A3 TV11",
+ .len = 48,
+ .key = { [0] = 1, [8] = 4 },
+ .msg =
+ (u8[48]){ 0xE3, 0x35, 0x94, 0xD7, 0x50, 0x5E, 0x43, 0xB9, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33, 0x94, 0xD7, 0x50,
+ 0x5E, 0x43, 0x79, 0xCD, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .out = { 0x13 },
+ }
+};
+
+static clib_error_t *
+test_clib_poly1305 (clib_error_t *err)
+{
+ u8 out[16] = {};
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_poly1305 (tc->key, tc->msg, tc->len, out);
+ if (memcmp (out, tc->out, 16) != 0)
+ err = clib_error_return (
+ err,
+ "\ntest: %s"
+ "\nkey: %U"
+ "\ndata: %U"
+ "\nexp out: %U"
+ "\ncalc out: %U\n",
+ tc->name, format_hexdump, tc->key, 32, format_hexdump, tc->msg,
+ tc->len, format_hexdump, tc->out, 16, format_hexdump, out, 16);
+ }
+ return err;
+}
+
+void __test_perf_fn
+perftest_64byte (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ u8 *m = test_mem_alloc_and_fill_inc_u8 (n * 64, 0, 0);
+ u8 *k = test_mem_alloc_and_fill_inc_u8 (n * 32, 0, 0);
+ u8 *t = test_mem_alloc (n * 16);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++, t += 16, k += 32, m += 64)
+ clib_poly1305 (k, m, 64, t);
+ test_perf_event_disable (tp);
+}
+
+void __test_perf_fn
+perftest_byte (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+
+ u8 *m = test_mem_alloc_and_fill_inc_u8 (n, 0, 0);
+ u8 *k = test_mem_alloc_and_fill_inc_u8 (32, 0, 0);
+ u8 *t = test_mem_alloc (16);
+
+ test_perf_event_enable (tp);
+ clib_poly1305 (k, m, n, t);
+ test_perf_event_disable (tp);
+}
+
+REGISTER_TEST (clib_poly1305) = {
+ .name = "clib_poly1305",
+ .fn = test_clib_poly1305,
+ .perf_tests = PERF_TESTS (
+ { .name = "fixed size (64 bytes)", .n_ops = 1024, .fn = perftest_64byte },
+ { .name = "variable size (per byte)",
+ .n_ops = 16384,
+ .fn = perftest_byte }),
+};
diff --git a/src/vppinfra/test/sha2.c b/src/vppinfra/test/sha2.c
new file mode 100644
index 00000000000..d5da2b61706
--- /dev/null
+++ b/src/vppinfra/test/sha2.c
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/sha2.h>
+
+typedef struct
+{
+ const u8 *msg;
+ const u8 *key;
+ int tc;
+ u32 msg_len;
+ u32 key_len;
+ u8 digest_224[28];
+ u8 digest_256[32];
+ u8 digest_384[48];
+ u8 digest_512[64];
+ u8 digest_224_len;
+ u8 digest_256_len;
+ u8 digest_384_len;
+ u8 digest_512_len;
+} sha2_test_t;
+
+#ifndef CLIB_MARCH_VARIANT
+static const u8 key1[20] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b };
+static const u8 key2[4] = "Jefe";
+static const u8 key3[20] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa };
+static const u8 key4[25] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+ 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
+ 0x16, 0x17, 0x18, 0x19 };
+static const u8 key5[20] = { 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c };
+static const u8 key6[131] = {
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa
+};
+
+static const u8 msg1[8] = "Hi There";
+static const u8 msg2[28] = "what do ya want for nothing?";
+static const u8 msg3[50] = {
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd,
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd,
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd,
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd
+};
+static const u8 msg4[50] = {
+ 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd,
+ 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd,
+ 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd,
+ 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd
+};
+static const u8 msg6[54] =
+ "Test Using Larger Than Block-Size Key - Hash Key First";
+static const u8 msg7[153] =
+ "This is a test using a larger than block-size key and a larger than "
+ "block-size data. The key needs to be hashed before being used by the "
+ "HMAC algorithm.";
+
+const sha2_test_t sha2_tests[] = {
+ {
+ /* RFC4231 Test Case 1 */
+ .tc = 1,
+ .key = key1,
+ .key_len = sizeof (key1),
+ .msg = msg1,
+ .msg_len = sizeof (msg1),
+ .digest_224 = { 0x89, 0x6f, 0xb1, 0x12, 0x8a, 0xbb, 0xdf, 0x19, 0x68, 0x32,
+ 0x10, 0x7c, 0xd4, 0x9d, 0xf3, 0x3f, 0x47, 0xb4, 0xb1, 0x16,
+ 0x99, 0x12, 0xba, 0x4f, 0x53, 0x68, 0x4b, 0x22 },
+ .digest_256 = { 0xb0, 0x34, 0x4c, 0x61, 0xd8, 0xdb, 0x38, 0x53,
+ 0x5c, 0xa8, 0xaf, 0xce, 0xaf, 0x0b, 0xf1, 0x2b,
+ 0x88, 0x1d, 0xc2, 0x00, 0xc9, 0x83, 0x3d, 0xa7,
+ 0x26, 0xe9, 0x37, 0x6c, 0x2e, 0x32, 0xcf, 0xf7 },
+ .digest_384 = { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62, 0x6b, 0x08,
+ 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f, 0x15, 0xf9, 0xda, 0xdb,
+ 0xe4, 0x10, 0x1e, 0xc6, 0x82, 0xaa, 0x03, 0x4c, 0x7c, 0xeb,
+ 0xc5, 0x9c, 0xfa, 0xea, 0x9e, 0xa9, 0x07, 0x6e, 0xde, 0x7f,
+ 0x4a, 0xf1, 0x52, 0xe8, 0xb2, 0xfa, 0x9c, 0xb6 },
+ .digest_512 = { 0x87, 0xaa, 0x7c, 0xde, 0xa5, 0xef, 0x61, 0x9d, 0x4f, 0xf0,
+ 0xb4, 0x24, 0x1a, 0x1d, 0x6c, 0xb0, 0x23, 0x79, 0xf4, 0xe2,
+ 0xce, 0x4e, 0xc2, 0x78, 0x7a, 0xd0, 0xb3, 0x05, 0x45, 0xe1,
+ 0x7c, 0xde, 0xda, 0xa8, 0x33, 0xb7, 0xd6, 0xb8, 0xa7, 0x02,
+ 0x03, 0x8b, 0x27, 0x4e, 0xae, 0xa3, 0xf4, 0xe4, 0xbe, 0x9d,
+ 0x91, 0x4e, 0xeb, 0x61, 0xf1, 0x70, 0x2e, 0x69, 0x6c, 0x20,
+ 0x3a, 0x12, 0x68, 0x54 },
+ },
+ {
+ /* RFC4231 Test Case 2 */
+ .tc = 2,
+ .key = key2,
+ .key_len = sizeof (key2),
+ .msg = msg2,
+ .msg_len = sizeof (msg2),
+ .digest_224 = { 0xa3, 0x0e, 0x01, 0x09, 0x8b, 0xc6, 0xdb, 0xbf, 0x45, 0x69,
+ 0x0f, 0x3a, 0x7e, 0x9e, 0x6d, 0x0f, 0x8b, 0xbe, 0xa2, 0xa3,
+ 0x9e, 0x61, 0x48, 0x00, 0x8f, 0xd0, 0x5e, 0x44 },
+ .digest_256 = { 0x5b, 0xdc, 0xc1, 0x46, 0xbf, 0x60, 0x75, 0x4e,
+ 0x6a, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xc7,
+ 0x5a, 0x00, 0x3f, 0x08, 0x9d, 0x27, 0x39, 0x83,
+ 0x9d, 0xec, 0x58, 0xb9, 0x64, 0xec, 0x38, 0x43 },
+ .digest_384 = { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31, 0x61, 0x7f,
+ 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b, 0x9c, 0x7e, 0xf4, 0x64,
+ 0xf5, 0xa0, 0x1b, 0x47, 0xe4, 0x2e, 0xc3, 0x73, 0x63, 0x22,
+ 0x44, 0x5e, 0x8e, 0x22, 0x40, 0xca, 0x5e, 0x69, 0xe2, 0xc7,
+ 0x8b, 0x32, 0x39, 0xec, 0xfa, 0xb2, 0x16, 0x49 },
+ .digest_512 = { 0x16, 0x4b, 0x7a, 0x7b, 0xfc, 0xf8, 0x19, 0xe2, 0xe3, 0x95,
+ 0xfb, 0xe7, 0x3b, 0x56, 0xe0, 0xa3, 0x87, 0xbd, 0x64, 0x22,
+ 0x2e, 0x83, 0x1f, 0xd6, 0x10, 0x27, 0x0c, 0xd7, 0xea, 0x25,
+ 0x05, 0x54, 0x97, 0x58, 0xbf, 0x75, 0xc0, 0x5a, 0x99, 0x4a,
+ 0x6d, 0x03, 0x4f, 0x65, 0xf8, 0xf0, 0xe6, 0xfd, 0xca, 0xea,
+ 0xb1, 0xa3, 0x4d, 0x4a, 0x6b, 0x4b, 0x63, 0x6e, 0x07, 0x0a,
+ 0x38, 0xbc, 0xe7, 0x37 },
+ },
+ { /* RFC4231 Test Case 3 */
+ .tc = 3,
+ .key = key3,
+ .key_len = sizeof (key3),
+ .msg = msg3,
+ .msg_len = sizeof (msg3),
+ .digest_224 = { 0x7f, 0xb3, 0xcb, 0x35, 0x88, 0xc6, 0xc1, 0xf6, 0xff, 0xa9,
+ 0x69, 0x4d, 0x7d, 0x6a, 0xd2, 0x64, 0x93, 0x65, 0xb0, 0xc1,
+ 0xf6, 0x5d, 0x69, 0xd1, 0xec, 0x83, 0x33, 0xea },
+ .digest_256 = { 0x77, 0x3e, 0xa9, 0x1e, 0x36, 0x80, 0x0e, 0x46,
+ 0x85, 0x4d, 0xb8, 0xeb, 0xd0, 0x91, 0x81, 0xa7,
+ 0x29, 0x59, 0x09, 0x8b, 0x3e, 0xf8, 0xc1, 0x22,
+ 0xd9, 0x63, 0x55, 0x14, 0xce, 0xd5, 0x65, 0xfe },
+ .digest_384 = { 0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a, 0x0a, 0xa2,
+ 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f, 0x0a, 0xa6, 0x35, 0xd9,
+ 0x47, 0xac, 0x9f, 0xeb, 0xe8, 0x3e, 0xf4, 0xe5, 0x59, 0x66,
+ 0x14, 0x4b, 0x2a, 0x5a, 0xb3, 0x9d, 0xc1, 0x38, 0x14, 0xb9,
+ 0x4e, 0x3a, 0xb6, 0xe1, 0x01, 0xa3, 0x4f, 0x27 },
+ .digest_512 = { 0xfa, 0x73, 0xb0, 0x08, 0x9d, 0x56, 0xa2, 0x84, 0xef, 0xb0,
+ 0xf0, 0x75, 0x6c, 0x89, 0x0b, 0xe9, 0xb1, 0xb5, 0xdb, 0xdd,
+ 0x8e, 0xe8, 0x1a, 0x36, 0x55, 0xf8, 0x3e, 0x33, 0xb2, 0x27,
+ 0x9d, 0x39, 0xbf, 0x3e, 0x84, 0x82, 0x79, 0xa7, 0x22, 0xc8,
+ 0x06, 0xb4, 0x85, 0xa4, 0x7e, 0x67, 0xc8, 0x07, 0xb9, 0x46,
+ 0xa3, 0x37, 0xbe, 0xe8, 0x94, 0x26, 0x74, 0x27, 0x88, 0x59,
+ 0xe1, 0x32, 0x92, 0xfb } },
+ {
+ /* RFC4231 Test Case 4 */
+ .tc = 4,
+ .key = key4,
+ .key_len = sizeof (key4),
+ .msg = msg4,
+ .msg_len = sizeof (msg4),
+ .digest_224 = { 0x6c, 0x11, 0x50, 0x68, 0x74, 0x01, 0x3c, 0xac, 0x6a, 0x2a,
+ 0xbc, 0x1b, 0xb3, 0x82, 0x62, 0x7c, 0xec, 0x6a, 0x90, 0xd8,
+ 0x6e, 0xfc, 0x01, 0x2d, 0xe7, 0xaf, 0xec, 0x5a },
+ .digest_256 = { 0x82, 0x55, 0x8a, 0x38, 0x9a, 0x44, 0x3c, 0x0e,
+ 0xa4, 0xcc, 0x81, 0x98, 0x99, 0xf2, 0x08, 0x3a,
+ 0x85, 0xf0, 0xfa, 0xa3, 0xe5, 0x78, 0xf8, 0x07,
+ 0x7a, 0x2e, 0x3f, 0xf4, 0x67, 0x29, 0x66, 0x5b },
+ .digest_384 = { 0x3e, 0x8a, 0x69, 0xb7, 0x78, 0x3c, 0x25, 0x85, 0x19, 0x33,
+ 0xab, 0x62, 0x90, 0xaf, 0x6c, 0xa7, 0x7a, 0x99, 0x81, 0x48,
+ 0x08, 0x50, 0x00, 0x9c, 0xc5, 0x57, 0x7c, 0x6e, 0x1f, 0x57,
+ 0x3b, 0x4e, 0x68, 0x01, 0xdd, 0x23, 0xc4, 0xa7, 0xd6, 0x79,
+ 0xcc, 0xf8, 0xa3, 0x86, 0xc6, 0x74, 0xcf, 0xfb },
+ .digest_512 = { 0xb0, 0xba, 0x46, 0x56, 0x37, 0x45, 0x8c, 0x69, 0x90, 0xe5,
+ 0xa8, 0xc5, 0xf6, 0x1d, 0x4a, 0xf7, 0xe5, 0x76, 0xd9, 0x7f,
+ 0xf9, 0x4b, 0x87, 0x2d, 0xe7, 0x6f, 0x80, 0x50, 0x36, 0x1e,
+ 0xe3, 0xdb, 0xa9, 0x1c, 0xa5, 0xc1, 0x1a, 0xa2, 0x5e, 0xb4,
+ 0xd6, 0x79, 0x27, 0x5c, 0xc5, 0x78, 0x80, 0x63, 0xa5, 0xf1,
+ 0x97, 0x41, 0x12, 0x0c, 0x4f, 0x2d, 0xe2, 0xad, 0xeb, 0xeb,
+ 0x10, 0xa2, 0x98, 0xdd },
+ },
+ {
+ /* RFC4231 Test Case 5 */
+ .tc = 5,
+ .key = key5,
+ .key_len = sizeof (key5),
+ .msg = (u8 *) "Test With Truncation",
+ .msg_len = 20,
+ .digest_224 = { 0x0e, 0x2a, 0xea, 0x68, 0xa9, 0x0c, 0x8d, 0x37, 0xc9, 0x88,
+ 0xbc, 0xdb, 0x9f, 0xca, 0x6f, 0xa8 },
+ .digest_224_len = 16,
+ .digest_256 = { 0xa3, 0xb6, 0x16, 0x74, 0x73, 0x10, 0x0e, 0xe0, 0x6e, 0x0c,
+ 0x79, 0x6c, 0x29, 0x55, 0x55, 0x2b },
+ .digest_256_len = 16,
+ .digest_384 = { 0x3a, 0xbf, 0x34, 0xc3, 0x50, 0x3b, 0x2a, 0x23, 0xa4, 0x6e,
+ 0xfc, 0x61, 0x9b, 0xae, 0xf8, 0x97 },
+ .digest_384_len = 16,
+ .digest_512 = { 0x41, 0x5f, 0xad, 0x62, 0x71, 0x58, 0x0a, 0x53, 0x1d, 0x41,
+ 0x79, 0xbc, 0x89, 0x1d, 0x87, 0xa6 },
+ .digest_512_len = 16,
+ },
+ { /* RFC4231 Test Case 6 */
+ .tc = 6,
+ .key = key6,
+ .key_len = sizeof (key6),
+ .msg = msg6,
+ .msg_len = sizeof (msg6),
+ .digest_224 = { 0x95, 0xe9, 0xa0, 0xdb, 0x96, 0x20, 0x95, 0xad, 0xae, 0xbe,
+ 0x9b, 0x2d, 0x6f, 0x0d, 0xbc, 0xe2, 0xd4, 0x99, 0xf1, 0x12,
+ 0xf2, 0xd2, 0xb7, 0x27, 0x3f, 0xa6, 0x87, 0x0e },
+ .digest_256 = { 0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f,
+ 0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f,
+ 0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14,
+ 0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54 },
+ .digest_384 = { 0x4e, 0xce, 0x08, 0x44, 0x85, 0x81, 0x3e, 0x90, 0x88, 0xd2,
+ 0xc6, 0x3a, 0x04, 0x1b, 0xc5, 0xb4, 0x4f, 0x9e, 0xf1, 0x01,
+ 0x2a, 0x2b, 0x58, 0x8f, 0x3c, 0xd1, 0x1f, 0x05, 0x03, 0x3a,
+ 0xc4, 0xc6, 0x0c, 0x2e, 0xf6, 0xab, 0x40, 0x30, 0xfe, 0x82,
+ 0x96, 0x24, 0x8d, 0xf1, 0x63, 0xf4, 0x49, 0x52 },
+ .digest_512 = { 0x80, 0xb2, 0x42, 0x63, 0xc7, 0xc1, 0xa3, 0xeb, 0xb7, 0x14,
+ 0x93, 0xc1, 0xdd, 0x7b, 0xe8, 0xb4, 0x9b, 0x46, 0xd1, 0xf4,
+ 0x1b, 0x4a, 0xee, 0xc1, 0x12, 0x1b, 0x01, 0x37, 0x83, 0xf8,
+ 0xf3, 0x52, 0x6b, 0x56, 0xd0, 0x37, 0xe0, 0x5f, 0x25, 0x98,
+ 0xbd, 0x0f, 0xd2, 0x21, 0x5d, 0x6a, 0x1e, 0x52, 0x95, 0xe6,
+ 0x4f, 0x73, 0xf6, 0x3f, 0x0a, 0xec, 0x8b, 0x91, 0x5a, 0x98,
+ 0x5d, 0x78, 0x65, 0x98 } },
+ {
+ /* RFC4231 Test Case 7 */
+ .tc = 7,
+ .key = key6,
+ .key_len = sizeof (key6),
+ .msg = msg7,
+ .msg_len = sizeof (msg7) - 1,
+ .digest_224 = { 0x3a, 0x85, 0x41, 0x66, 0xac, 0x5d, 0x9f, 0x02, 0x3f, 0x54,
+ 0xd5, 0x17, 0xd0, 0xb3, 0x9d, 0xbd, 0x94, 0x67, 0x70, 0xdb,
+ 0x9c, 0x2b, 0x95, 0xc9, 0xf6, 0xf5, 0x65, 0xd1 },
+ .digest_256 = { 0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb,
+ 0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44,
+ 0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93,
+ 0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2 },
+ .digest_384 = { 0x66, 0x17, 0x17, 0x8e, 0x94, 0x1f, 0x02, 0x0d, 0x35, 0x1e,
+ 0x2f, 0x25, 0x4e, 0x8f, 0xd3, 0x2c, 0x60, 0x24, 0x20, 0xfe,
+ 0xb0, 0xb8, 0xfb, 0x9a, 0xdc, 0xce, 0xbb, 0x82, 0x46, 0x1e,
+ 0x99, 0xc5, 0xa6, 0x78, 0xcc, 0x31, 0xe7, 0x99, 0x17, 0x6d,
+ 0x38, 0x60, 0xe6, 0x11, 0x0c, 0x46, 0x52, 0x3e },
+ .digest_512 = { 0xe3, 0x7b, 0x6a, 0x77, 0x5d, 0xc8, 0x7d, 0xba, 0xa4, 0xdf,
+ 0xa9, 0xf9, 0x6e, 0x5e, 0x3f, 0xfd, 0xde, 0xbd, 0x71, 0xf8,
+ 0x86, 0x72, 0x89, 0x86, 0x5d, 0xf5, 0xa3, 0x2d, 0x20, 0xcd,
+ 0xc9, 0x44, 0xb6, 0x02, 0x2c, 0xac, 0x3c, 0x49, 0x82, 0xb1,
+ 0x0d, 0x5e, 0xeb, 0x55, 0xc3, 0xe4, 0xde, 0x15, 0x13, 0x46,
+ 0x76, 0xfb, 0x6d, 0xe0, 0x44, 0x60, 0x65, 0xc9, 0x74, 0x40,
+ 0xfa, 0x8c, 0x6a, 0x58 },
+ },
+ {}
+};
+#else
+extern const sha2_test_t sha2_tests[];
+#endif
+
+static clib_error_t *
+check_digest (clib_error_t *err, int tc, u8 *calculated, const u8 *expected,
+ u8 len)
+{
+ if (memcmp (expected, calculated, len) != 0)
+ err = clib_error_return (err,
+ "Bad HMAC SHA%u digest for test case "
+ "%u:\nExpected:\n%U\nCalculated:\n%U\n",
+ len * 8, tc, format_hexdump, expected, len,
+ format_hexdump, calculated, len);
+ return err;
+}
+
+#define _(bits) \
+ static clib_error_t *test_clib_hmac_sha##bits (clib_error_t *err) \
+ { \
+ u8 digest[64]; \
+ const sha2_test_t *t = sha2_tests; \
+ \
+ while (t->key) \
+ { \
+ u8 digest_len = t->digest_##bits##_len; \
+ if (digest_len == 0) \
+ digest_len = sizeof (t->digest_##bits); \
+ clib_memset_u8 (digest, 0xfe, sizeof (digest)); \
+ clib_hmac_sha##bits (t->key, t->key_len, t->msg, t->msg_len, digest); \
+ if ((err = check_digest (err, t->tc, digest, t->digest_##bits, \
+ digest_len))) \
+ return err; \
+ t++; \
+ } \
+ \
+ return err; \
+ } \
+ \
+ void __test_perf_fn perftest_sha##bits##_byte (test_perf_t *tp) \
+ { \
+ volatile uword *np = &tp->n_ops; \
+ volatile uword *kl = &tp->arg0; \
+ ; \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (*kl, 32, 0); \
+ u8 *data = test_mem_alloc_and_fill_inc_u8 (*np, 0, 0); \
+ u8 *digest = test_mem_alloc (64); \
+ \
+ test_perf_event_enable (tp); \
+ clib_hmac_sha##bits (key, *kl, data, *np, digest); \
+ test_perf_event_disable (tp); \
+ } \
+ REGISTER_TEST (clib_hmac_sha##bits) = { \
+ .name = "clib_hmac_sha" #bits, \
+ .fn = test_clib_hmac_sha##bits, \
+ .perf_tests = PERF_TESTS ({ .name = "byte", \
+ .n_ops = 16384, \
+ .arg0 = 20, \
+ .fn = perftest_sha##bits##_byte }) \
+ }
+
+_ (224);
+_ (256);
+_ (384);
+_ (512);
+#undef _
diff --git a/src/vppinfra/test/test.c b/src/vppinfra/test/test.c
new file mode 100644
index 00000000000..55c2ae7a11f
--- /dev/null
+++ b/src/vppinfra/test/test.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/error.h>
+
+test_main_t test_main;
+
+int
+test_march_supported (clib_march_variant_type_t type)
+{
+#define _(s, n) \
+ if (CLIB_MARCH_VARIANT_TYPE_##s == type) \
+ return clib_cpu_march_priority_##s ();
+ foreach_march_variant
+#undef _
+ return 0;
+}
+
+clib_error_t *
+test_funct (test_main_t *tm)
+{
+ for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
+ {
+ test_registration_t *r = tm->registrations[i];
+
+ if (r == 0 || test_march_supported (i) < 0)
+ continue;
+
+ fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i);
+ fformat (stdout,
+ "-------------------------------------------------------\n");
+ while (r)
+ {
+ clib_error_t *err;
+ if (tm->filter && strstr (r->name, (char *) tm->filter) == 0)
+ goto next;
+ err = (r->fn) (0);
+ fformat (stdout, "%-50s %s\n", r->name, err ? "FAIL" : "PASS");
+ for (int i = 0; i < vec_len (tm->allocated_mem); i++)
+ clib_mem_free (tm->allocated_mem[i]);
+ vec_free (tm->allocated_mem);
+ if (err)
+ {
+ clib_error_report (err);
+ fformat (stdout, "\n");
+ }
+ next:
+ r = r->next;
+ }
+ }
+
+ fformat (stdout, "\n");
+ return 0;
+}
+
+#if 0
+static u8 *
+format_test_perf_bundle_core_power (u8 *s, va_list *args)
+{
+ test_perf_event_bundle_t __clib_unused *b =
+ va_arg (*args, test_perf_event_bundle_t *);
+ test_perf_t __clib_unused *tp = va_arg (*args, test_perf_t *);
+ u64 *data = va_arg (*args, u64 *);
+
+ if (data)
+ s = format (s, "%7.1f %%", (f64) 100 * data[1] / data[0]);
+ else
+ s = format (s, "%9s", "Level 0");
+
+ if (data)
+ s = format (s, "%8.1f %%", (f64) 100 * data[2] / data[0]);
+ else
+ s = format (s, "%9s", "Level 1");
+
+ if (data)
+ s = format (s, "%7.1f %%", (f64) 100 * data[3] / data[0]);
+ else
+ s = format (s, "%9s", "Level 2");
+
+ return s;
+}
+
+#ifdef __x86_64__
+#define PERF_INTEL_CODE(event, umask) ((event) | (umask) << 8)
+ ,
+ {
+ .name = "core-power",
+ .desc =
+ "Core cycles where the core was running under specific turbo schedule.",
+ .type = PERF_TYPE_RAW,
+ .config[0] = PERF_INTEL_CODE (0x3c, 0x00),
+ .config[1] = PERF_INTEL_CODE (0x28, 0x07),
+ .config[2] = PERF_INTEL_CODE (0x28, 0x18),
+ .config[3] = PERF_INTEL_CODE (0x28, 0x20),
+ .config[4] = PERF_INTEL_CODE (0x28, 0x40),
+ .n_events = 5,
+ .format_fn = format_test_perf_bundle_core_power,
+ }
+#endif
+};
+#endif
+
+#ifdef __linux__
+clib_error_t *
+test_perf (test_main_t *tm)
+{
+ clib_error_t *err = 0;
+ clib_perfmon_ctx_t _ctx, *ctx = &_ctx;
+
+ if ((err = clib_perfmon_init_by_bundle_name (
+ ctx, "%s", tm->bundle ? (char *) tm->bundle : "default")))
+ return err;
+
+ fformat (stdout, "Warming up...\n");
+ clib_perfmon_warmup (ctx);
+
+ for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
+ {
+ test_registration_t *r = tm->registrations[i];
+
+ if (r == 0 || test_march_supported (i) < 0)
+ continue;
+
+ fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i);
+ fformat (stdout,
+ "-------------------------------------------------------\n");
+ while (r)
+ {
+ if (r->perf_tests)
+ {
+ test_perf_t *pt = r->perf_tests;
+ if (tm->filter && strstr (r->name, (char *) tm->filter) == 0)
+ goto next;
+
+ clib_perfmon_capture_group (ctx, "%s", r->name);
+ do
+ {
+ for (int i = 0; i < tm->repeat; i++)
+ {
+ pt->fd = ctx->group_fd;
+ clib_perfmon_reset (ctx);
+ pt->fn (pt);
+ clib_perfmon_capture (ctx, pt->n_ops, "%0s", pt->name);
+ for (int i = 0; i < vec_len (tm->allocated_mem); i++)
+ clib_mem_free (tm->allocated_mem[i]);
+ vec_free (tm->allocated_mem);
+ }
+ }
+ while ((++pt)->fn);
+ }
+ next:
+ r = r->next;
+ }
+ fformat (stdout, "%U\n", format_perfmon_bundle, ctx);
+ clib_perfmon_clear (ctx);
+ }
+
+ clib_perfmon_free (ctx);
+ return err;
+}
+#elif __FreeBSD__
+clib_error_t *
+test_perf (test_main_t *tm)
+{
+ return NULL;
+}
+#endif
+
+int
+main (int argc, char *argv[])
+{
+ test_main_t *tm = &test_main;
+ unformat_input_t _i = {}, *i = &_i;
+ clib_mem_init (0, 64ULL << 20);
+ clib_error_t *err;
+ int perf = 0;
+
+ /* defaults */
+ tm->repeat = 3;
+
+ unformat_init_command_line (i, argv);
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "perf"))
+ perf = 1;
+ else if (unformat (i, "filter %s", &tm->filter))
+ ;
+ else if (unformat (i, "bundle %s", &tm->bundle))
+ ;
+ else if (unformat (i, "repeat %d", &tm->repeat))
+ ;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ exit (1);
+ }
+ }
+
+ if (perf)
+ err = test_perf (tm);
+ else
+ err = test_funct (tm);
+
+ if (err)
+ {
+ clib_error_report (err);
+ fformat (stderr, "\n");
+ return 1;
+ }
+ return 0;
+}
+
+void *
+test_mem_alloc (uword size)
+{
+ void *rv;
+ size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);
+ rv = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
+ clib_memset_u8 (rv, 0, size);
+ vec_add1 (test_main.allocated_mem, rv);
+ return rv;
+}
+
+void *
+test_mem_alloc_and_fill_inc_u8 (uword size, u8 start, u8 mask)
+{
+ u8 *rv;
+ mask = mask ? mask : 0xff;
+ size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);
+ rv = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
+ for (uword i = 0; i < size; i++)
+ rv[i] = ((u8) i + start) & mask;
+ vec_add1 (test_main.allocated_mem, rv);
+ return rv;
+}
+
+void *
+test_mem_alloc_and_splat (uword elt_size, uword n_elts, void *elt)
+{
+ u8 *rv, *e;
+ uword data_size = elt_size * n_elts;
+ uword alloc_size = round_pow2 (data_size, CLIB_CACHE_LINE_BYTES);
+ e = rv = clib_mem_alloc_aligned (alloc_size, CLIB_CACHE_LINE_BYTES);
+ while (e - rv < data_size)
+ {
+ clib_memcpy_fast (e, elt, elt_size);
+ e += elt_size;
+ }
+
+ if (data_size < alloc_size)
+ clib_memset_u8 (e, 0, alloc_size - data_size);
+ vec_add1 (test_main.allocated_mem, rv);
+ return rv;
+}
+
diff --git a/src/vppinfra/test/test.h b/src/vppinfra/test/test.h
new file mode 100644
index 00000000000..8d756366163
--- /dev/null
+++ b/src/vppinfra/test/test.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_test_test_h
+#define included_test_test_h
+
+#include <vppinfra/cpu.h>
+#include <vppinfra/perfmon/perfmon.h>
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#endif
+
+typedef clib_error_t *(test_fn_t) (clib_error_t *);
+
+struct test_perf_;
+typedef void (test_perf_fn_t) (struct test_perf_ *tp);
+
+typedef struct test_perf_
+{
+ int fd;
+ u64 n_ops;
+ union
+ {
+ u64 arg0;
+ void *ptr0;
+ };
+ union
+ {
+ u64 arg1;
+ void *ptr1;
+ };
+ union
+ {
+ u64 arg2;
+ void *ptr2;
+ };
+ char *name;
+ test_perf_fn_t *fn;
+} test_perf_t;
+
+typedef struct test_registration_
+{
+ char *name;
+ u8 multiarch : 1;
+ test_fn_t *fn;
+ test_perf_t *perf_tests;
+ u32 n_perf_tests;
+ struct test_registration_ *next;
+} test_registration_t;
+
+typedef struct
+{
+ test_registration_t *registrations[CLIB_MARCH_TYPE_N_VARIANTS];
+ u32 repeat;
+ u8 *filter;
+ u8 *bundle;
+ f64 ref_clock;
+ void **allocated_mem;
+} test_main_t;
+extern test_main_t test_main;
+
+#define __test_funct_fn \
+ static __clib_noinline __clib_noclone __clib_section (".test_func")
+#define __test_perf_fn \
+ static __clib_noinline __clib_noclone __clib_section (".test_perf")
+
+#define REGISTER_TEST(x) \
+ test_registration_t CLIB_MARCH_SFX (__test_##x); \
+ static void __clib_constructor CLIB_MARCH_SFX (__test_registration_##x) ( \
+ void) \
+ { \
+ test_registration_t *r = &CLIB_MARCH_SFX (__test_##x); \
+ r->next = \
+ test_main.registrations[CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE)]; \
+ test_main.registrations[CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE)] = r; \
+ } \
+ test_registration_t CLIB_MARCH_SFX (__test_##x)
+
+#define PERF_TESTS(...) \
+ (test_perf_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+#ifdef __linux__
+static_always_inline void
+test_perf_event_reset (test_perf_t *t)
+{
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_RESET);
+}
+static_always_inline void
+test_perf_event_enable (test_perf_t *t)
+{
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_ENABLE);
+}
+static_always_inline void
+test_perf_event_disable (test_perf_t *t)
+{
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_DISABLE);
+}
+#elif __FreeBSD__
+static_always_inline void
+test_perf_event_reset (test_perf_t *t)
+{
+ /* TODO: Implement for FreeBSD */
+}
+static_always_inline void
+test_perf_event_enable (test_perf_t *t)
+{
+ /* TODO: Implement for FreeBSD */
+}
+static_always_inline void
+test_perf_event_disable (test_perf_t *t)
+{
+ /* TODO: Implement for FreeBSD */
+}
+#endif
+
+void *test_mem_alloc (uword size);
+void *test_mem_alloc_and_fill_inc_u8 (uword size, u8 start, u8 mask);
+void *test_mem_alloc_and_splat (uword elt_size, uword n_elts, void *elt);
+
+#endif
diff --git a/src/vppinfra/test/toeplitz.c b/src/vppinfra/test/toeplitz.c
new file mode 100644
index 00000000000..708fd0e60fc
--- /dev/null
+++ b/src/vppinfra/test/toeplitz.c
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/toeplitz.h>
+
+/* secret key and test cases taken from:
+ * https://docs.microsoft.com/en-us/windows-hardware/drivers/network/verifying-the-rss-hash-calculation
+ */
+
+typedef struct
+{
+ u32 sip, dip;
+ u16 sport, dport;
+} __clib_packed ip4_key_t;
+
+typedef struct
+{
+ ip4_key_t key;
+ u32 hash_2t, hash_4t;
+} ip4_test_t;
+
+typedef struct
+{
+ u16 sip[8], dip[8];
+ u16 sport, dport;
+} __clib_packed ip6_key_t;
+
+typedef struct
+{
+ ip6_key_t key;
+ u32 hash_2t, hash_4t;
+} ip6_test_t;
+
+#define N_IP4_TESTS 5
+#define N_IP6_TESTS 3
+#define N_LENGTH_TESTS 240
+
+#ifndef CLIB_MARCH_VARIANT
+#define _IP4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a))
+#define _IP6(a, b, c, d, e, f, g, h) \
+ { \
+ (u16) ((a) << 8) | (u8) ((a) >> 8), (u16) ((b) << 8) | (u8) ((b) >> 8), \
+ (u16) ((c) << 8) | (u8) ((c) >> 8), (u16) ((d) << 8) | (u8) ((d) >> 8), \
+ (u16) ((e) << 8) | (u8) ((e) >> 8), (u16) ((f) << 8) | (u8) ((f) >> 8), \
+ (u16) ((g) << 8) | (u8) ((g) >> 8), (u16) ((h) << 8) | (u8) ((h) >> 8), \
+ }
+#define _PORT(a) ((a) >> 8 | (((a) &0xff) << 8))
+
+const ip4_test_t ip4_tests[N_IP4_TESTS] = {
+ /* ipv4 tests */
+ {
+ .key.sip = _IP4 (66, 9, 149, 187),
+ .key.dip = _IP4 (161, 142, 100, 80),
+ .key.sport = _PORT (2794),
+ .key.dport = _PORT (1766),
+ .hash_2t = 0x323e8fc2,
+ .hash_4t = 0x51ccc178,
+ },
+ {
+ .key.sip = _IP4 (199, 92, 111, 2),
+ .key.dip = _IP4 (65, 69, 140, 83),
+ .key.sport = _PORT (14230),
+ .key.dport = _PORT (4739),
+ .hash_2t = 0xd718262a,
+ .hash_4t = 0xc626b0ea,
+ },
+ {
+ .key.sip = _IP4 (24, 19, 198, 95),
+ .key.dip = _IP4 (12, 22, 207, 184),
+ .key.sport = _PORT (12898),
+ .key.dport = _PORT (38024),
+ .hash_2t = 0xd2d0a5de,
+ .hash_4t = 0x5c2b394a,
+ },
+ {
+ .key.sip = _IP4 (38, 27, 205, 30),
+ .key.dip = _IP4 (209, 142, 163, 6),
+ .key.sport = _PORT (48228),
+ .key.dport = _PORT (2217),
+ .hash_2t = 0x82989176,
+ .hash_4t = 0xafc7327f,
+ },
+ {
+ .key.sip = _IP4 (153, 39, 163, 191),
+ .key.dip = _IP4 (202, 188, 127, 2),
+ .key.sport = _PORT (44251),
+ .key.dport = _PORT (1303),
+ .hash_2t = 0x5d1809c5,
+ .hash_4t = 0x10e828a2,
+ }
+};
+
+const ip6_test_t ip6_tests[N_IP6_TESTS] = {
+ {
+ .key.sip = _IP6 (0x3ffe, 0x2501, 0x200, 0x1fff, 0, 0, 0, 7),
+ .key.dip = _IP6 (0x3ffe, 0x2501, 0x200, 3, 0, 0, 0, 1),
+ .key.sport = _PORT (2794),
+ .key.dport = _PORT (1766),
+ .hash_2t = 0x2cc18cd5,
+ .hash_4t = 0x40207d3d,
+ },
+ {
+ .key.sip = _IP6 (0x3ffe, 0x501, 8, 0, 0x260, 0x97ff, 0xfe40, 0xefab),
+ .key.dip = _IP6 (0xff02, 0, 0, 0, 0, 0, 0, 1),
+ .key.sport = _PORT (14230),
+ .key.dport = _PORT (4739),
+ .hash_2t = 0x0f0c461c,
+ .hash_4t = 0xdde51bbf,
+ },
+ {
+ .key.sip = _IP6 (0x3ffe, 0x1900, 0x4545, 3, 0x200, 0xf8ff, 0xfe21, 0x67cf),
+ .key.dip = _IP6 (0xfe80, 0, 0, 0, 0x200, 0xf8ff, 0xfe21, 0x67cf),
+ .key.sport = _PORT (44251),
+ .key.dport = _PORT (38024),
+ .hash_2t = 0x4b61e985,
+ .hash_4t = 0x02d1feef,
+ }
+};
+
+const u32 length_test_hashes[N_LENGTH_TESTS] = {
+ 0x00000000, 0x00000000, 0x2b6d12ad, 0x9de4446e, 0x061f00bf, 0xad7ed8f7,
+ 0x4bc7b068, 0x231fc545, 0xdbd97a33, 0xcdab29e7, 0x2d665c0c, 0x31e28ed7,
+ 0x14e19218, 0x5aa89f0f, 0xd47de07f, 0x355ec712, 0x7e1cbfc0, 0xf84de19d,
+ 0xbcf66bd3, 0x104086c6, 0x71900b34, 0xcd2f9819, 0xeae68ebb, 0x54d63b4c,
+ 0x5f865a2c, 0x9d6ded08, 0xe00b0912, 0x3fcf07a6, 0x3bd9ca93, 0x3f4f3bbb,
+ 0xd0b82624, 0xa28a08e1, 0xa585969f, 0x0c8f4a71, 0x5dce7bdd, 0x4fcf2a6d,
+ 0x91c89ae9, 0xbef8a24d, 0x8e3d30fe, 0xc8027848, 0xc1e7e513, 0xa12bd3d9,
+ 0x46700bb4, 0xc6339dab, 0x970805ad, 0xfcb50ac8, 0xc6db4f44, 0x792e2987,
+ 0xacfb7836, 0xa25ec529, 0x957d7beb, 0x6732809a, 0x891836ed, 0xeefb83b2,
+ 0xca96b40b, 0x93fd5abd, 0x9076f922, 0x59adb4eb, 0x9705aafb, 0x282719b1,
+ 0xdda9cb8a, 0x3f499131, 0x47491130, 0x30ef0759, 0xad1cf855, 0x428aa312,
+ 0x4200240a, 0x71a72857, 0x16b30c36, 0x10cca9a3, 0x166f091e, 0x30e00560,
+ 0x8acd20ba, 0xfa633d76, 0x0fe32eb7, 0xdcc0122f, 0x20aa8ab0, 0x62b2a9af,
+ 0x7a6c80a6, 0x27e87268, 0x95b797a8, 0x25d18ccd, 0x68a7fb00, 0xc54bcdad,
+ 0x3bd0e717, 0xf0df54c9, 0x780daadf, 0x7b435605, 0x150c1e10, 0x8a892e54,
+ 0x9d27cb25, 0xe23383a5, 0x57aac408, 0x83b8abf8, 0x560f33af, 0xd5cb3307,
+ 0x79ae8edc, 0x9b127665, 0x320f18bd, 0x385d636b, 0xbd1b2dbf, 0x97679888,
+ 0x738894a4, 0xeba2afb0, 0xfa7c2d50, 0xb6741aa1, 0x28922bba, 0x7783242b,
+ 0xa694cca2, 0xa32781c0, 0x696cd670, 0xa714d72f, 0xea34d35a, 0xc5aed81e,
+ 0x0438433a, 0xc1939ab2, 0xb51c123a, 0x121426b9, 0x1add93ba, 0x50c56b6a,
+ 0x7e90902a, 0xae3abd85, 0x2f7a0088, 0xb45cf6f9, 0x80070094, 0x8bd46467,
+ 0xdfd1b762, 0x0bb25856, 0x48eefe84, 0x0989dbb9, 0xfc32472b, 0x965fec6b,
+ 0x5a256bd0, 0x6df7127a, 0x7856d0d6, 0xedc82bd3, 0x1b563b96, 0xc73eace7,
+ 0xba4c0a93, 0xdfd6dd97, 0x923c41db, 0x14926ca6, 0x22e52ab1, 0x22852a66,
+ 0x79606b9c, 0xb0f22b23, 0xb46354ba, 0x9c3cd931, 0x03a92bd6, 0x84000834,
+ 0x5425df65, 0xf4dd3fc9, 0x391cc873, 0xa560b52e, 0x828037d9, 0x31323dd5,
+ 0x5c6e3147, 0x28e21f85, 0xa431eb51, 0xf468c4a3, 0x9bea1d2e, 0x43d9109c,
+ 0x5bb9b081, 0xe0825675, 0xc9c92591, 0xd29fc812, 0x03136bc9, 0x5e005a1f,
+ 0x6d821ed8, 0x3f0bfcc4, 0x24774162, 0x893bde94, 0x6475efea, 0x6711538e,
+ 0xc4755f6d, 0x9425ebe2, 0xacf471b4, 0xb947ab0c, 0x1f78c455, 0x372b3ed7,
+ 0xb3ec24d7, 0x18c4459f, 0xa8ff3695, 0xe4aa2b85, 0x8a52ad7e, 0xe05e8177,
+ 0x7aa348ed, 0x3e4ac6aa, 0x17dcf8a5, 0x93b933b0, 0x8f7413ec, 0xc77bfe61,
+ 0xfdb72874, 0x4370f138, 0xdf3462ad, 0xc8970a59, 0xb4a9fed8, 0xa2ddc39b,
+ 0xd61db62a, 0x95c5fc1b, 0x7b22e6e0, 0x1969702c, 0x7992aebb, 0x59d7c225,
+ 0x0e16db0b, 0x9f2afc21, 0x246cf66b, 0xb3d6569d, 0x29c532d7, 0xe155747a,
+ 0xe38d7872, 0xea704969, 0xb69095b0, 0x1b198efd, 0x55daab76, 0xa2a377b6,
+ 0xb31aa2fa, 0x48b73c41, 0xf0cc501a, 0x9c9ca831, 0x1b591b99, 0xb2d8d22f,
+ 0xab4b5f69, 0x4fe00e71, 0xdf5480bd, 0x982540d7, 0x7f34ea4f, 0xd7be66e1,
+ 0x9d2ab1ba, 0x1ba62e12, 0xee3fb36c, 0xf28d7c5a, 0x756311eb, 0xc68567f2,
+ 0x7b6ea177, 0xc398d9f3
+};
+
+#else
+extern const ip4_test_t ip4_tests[N_IP4_TESTS];
+extern const ip6_test_t ip6_tests[N_IP6_TESTS];
+extern const u32 length_test_hashes[N_LENGTH_TESTS];
+#endif
+
+__test_funct_fn u32
+wrapper (clib_toeplitz_hash_key_t *k, u8 *data, u32 n_bytes)
+{
+ return clib_toeplitz_hash (k, data, n_bytes);
+}
+
+__test_funct_fn void
+wrapper_x4 (clib_toeplitz_hash_key_t *k, u8 *d0, u8 *d1, u8 *d2, u8 *d3,
+ u32 *h0, u32 *h1, u32 *h2, u32 *h3, u32 n_bytes)
+{
+ clib_toeplitz_hash_x4 (k, d0, d1, d2, d3, h0, h1, h2, h3, n_bytes);
+}
+
+static clib_error_t *
+test_clib_toeplitz_hash (clib_error_t *err)
+{
+ u32 r;
+ int n_key_copies, bigkey_len, bigdata_len;
+ u8 *bigkey, *bigdata;
+ clib_toeplitz_hash_key_t *k;
+
+ k = clib_toeplitz_hash_key_init (0, 0);
+
+ for (int i = 0; i < N_IP4_TESTS; i++)
+ {
+ r = wrapper (k, (u8 *) &ip4_tests[i].key, 8);
+ if (ip4_tests[i].hash_2t != r)
+ return clib_error_return (err,
+ "wrong IPv4 2 tuple hash for test %u, "
+ "calculated 0x%08x expected 0x%08x",
+ i, ip4_tests[i].hash_2t, r);
+
+ r = wrapper (k, (u8 *) &ip4_tests[i].key, 12);
+ if (ip4_tests[i].hash_4t != r)
+ return clib_error_return (err,
+ "wrong IPv4 4 tuple hash for test %u, "
+ "calculated 0x%08x expected 0x%08x",
+ i, ip4_tests[i].hash_4t, r);
+ }
+
+ for (int i = 0; i < N_IP6_TESTS; i++)
+ {
+ r = wrapper (k, (u8 *) &ip6_tests[i].key, 32);
+ if (ip6_tests[i].hash_2t != r)
+ return clib_error_return (err,
+ "wrong IPv6 2 tuple hash for test %u, "
+ "calculated 0x%08x expected 0x%08x",
+ i, ip6_tests[i].hash_2t, r);
+
+ r = wrapper (k, (u8 *) &ip6_tests[i].key, 36);
+ if (ip6_tests[i].hash_4t != r)
+ return clib_error_return (err,
+ "wrong IPv6 4 tuple hash for test %u, "
+ "calculated 0x%08x expected 0x%08x",
+ i, ip6_tests[i].hash_4t, r);
+ }
+
+ n_key_copies = 6;
+ bigkey_len = k->key_length * n_key_copies;
+ bigdata_len = bigkey_len - 4;
+ bigkey = test_mem_alloc_and_splat (k->key_length, n_key_copies, k->data);
+ bigdata = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ u32 key_len = k->key_length;
+
+ clib_toeplitz_hash_key_free (k);
+ k = clib_toeplitz_hash_key_init (bigkey, n_key_copies * key_len);
+
+ for (int i = 0; i < N_LENGTH_TESTS - 4; i++)
+ {
+ r = wrapper (k, bigdata, i);
+ if (length_test_hashes[i] != r)
+ {
+ err = clib_error_return (err,
+ "wrong length test hash for length %u, "
+ "calculated 0x%08x expected 0x%08x "
+ "xor 0x%08x",
+ i, r, length_test_hashes[i],
+ r ^ length_test_hashes[i]);
+ goto done;
+ }
+ }
+
+done:
+ clib_toeplitz_hash_key_free (k);
+ return err;
+}
+
+void __test_perf_fn
+perftest_fixed_12byte (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ u8 *data = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[0].key);
+ u8 *res = test_mem_alloc (4 * n);
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ ((u32 *) res)[i] = clib_toeplitz_hash (k, data + i * 12, 12);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+void __test_perf_fn
+perftest_fixed_36byte (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ u8 *data = test_mem_alloc_and_splat (36, n, (void *) &ip6_tests[0].key);
+ u8 *res = test_mem_alloc (4 * n);
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ ((u32 *) res)[i] = clib_toeplitz_hash (k, data + i * 36, 36);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+void __test_perf_fn
+perftest_variable_size (test_perf_t *tp)
+{
+ u32 key_len, n_keys, n = tp->n_ops;
+ u8 *key, *data = test_mem_alloc (n);
+ u32 *res = test_mem_alloc (sizeof (u32));
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ k = clib_toeplitz_hash_key_init (0, 0);
+ key_len = k->key_length;
+ n_keys = ((n + 4) / k->key_length) + 1;
+ key = test_mem_alloc_and_splat (n_keys, key_len, k->data);
+ clib_toeplitz_hash_key_free (k);
+ k = clib_toeplitz_hash_key_init (key, key_len * n_keys);
+
+ test_perf_event_enable (tp);
+ res[0] = clib_toeplitz_hash (k, data, n);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+REGISTER_TEST (clib_toeplitz_hash) = {
+ .name = "clib_toeplitz_hash",
+ .fn = test_clib_toeplitz_hash,
+ .perf_tests = PERF_TESTS ({ .name = "fixed (per 12 byte tuple)",
+ .n_ops = 1024,
+ .fn = perftest_fixed_12byte },
+ { .name = "fixed (per 36 byte tuple)",
+ .n_ops = 1024,
+ .fn = perftest_fixed_36byte },
+ { .name = "variable size (per byte)",
+ .n_ops = 16384,
+ .fn = perftest_variable_size }),
+};
+
+static clib_error_t *
+test_clib_toeplitz_hash_x4 (clib_error_t *err)
+{
+ u32 r[4];
+ int n_key_copies, bigkey_len, bigdata_len;
+ u8 *bigkey, *bigdata0, *bigdata1, *bigdata2, *bigdata3;
+ clib_toeplitz_hash_key_t *k;
+
+ k = clib_toeplitz_hash_key_init (0, 0);
+
+ wrapper_x4 (k, (u8 *) &ip4_tests[0].key, (u8 *) &ip4_tests[1].key,
+ (u8 *) &ip4_tests[2].key, (u8 *) &ip4_tests[3].key, r, r + 1,
+ r + 2, r + 3, 8);
+
+ if (ip4_tests[0].hash_2t != r[0] || ip4_tests[1].hash_2t != r[1] ||
+ ip4_tests[2].hash_2t != r[2] || ip4_tests[3].hash_2t != r[3])
+ return clib_error_return (err,
+ "wrong IPv4 2 tuple x4 hash "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x } "
+ "expected { 0x%08x, 0x%08x, 0x%08x, 0x%08x }",
+ ip4_tests[0].hash_2t, ip4_tests[1].hash_2t,
+ ip4_tests[2].hash_2t, ip4_tests[3].hash_2t, r[0],
+ r[1], r[2], r[3]);
+
+ wrapper_x4 (k, (u8 *) &ip4_tests[0].key, (u8 *) &ip4_tests[1].key,
+ (u8 *) &ip4_tests[2].key, (u8 *) &ip4_tests[3].key, r, r + 1,
+ r + 2, r + 3, 12);
+
+ if (ip4_tests[0].hash_4t != r[0] || ip4_tests[1].hash_4t != r[1] ||
+ ip4_tests[2].hash_4t != r[2] || ip4_tests[3].hash_4t != r[3])
+ return clib_error_return (err,
+ "wrong IPv4 4 tuple x4 hash "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x } "
+ "expected { 0x%08x, 0x%08x, 0x%08x, 0x%08x }",
+ ip4_tests[0].hash_4t, ip4_tests[1].hash_4t,
+ ip4_tests[2].hash_4t, ip4_tests[3].hash_4t, r[0],
+ r[1], r[2], r[3]);
+
+ wrapper_x4 (k, (u8 *) &ip6_tests[0].key, (u8 *) &ip6_tests[1].key,
+ (u8 *) &ip6_tests[2].key, (u8 *) &ip6_tests[0].key, r, r + 1,
+ r + 2, r + 3, 32);
+
+ if (ip6_tests[0].hash_2t != r[0] || ip6_tests[1].hash_2t != r[1] ||
+ ip6_tests[2].hash_2t != r[2] || ip6_tests[0].hash_2t != r[3])
+ return clib_error_return (err,
+ "wrong IPv6 2 tuple x4 hash "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x } "
+ "expected { 0x%08x, 0x%08x, 0x%08x, 0x%08x }",
+ ip6_tests[0].hash_2t, ip6_tests[1].hash_2t,
+ ip6_tests[2].hash_2t, ip6_tests[0].hash_2t, r[0],
+ r[1], r[2], r[3]);
+
+ wrapper_x4 (k, (u8 *) &ip6_tests[0].key, (u8 *) &ip6_tests[1].key,
+ (u8 *) &ip6_tests[2].key, (u8 *) &ip6_tests[0].key, r, r + 1,
+ r + 2, r + 3, 36);
+
+ if (ip6_tests[0].hash_4t != r[0] || ip6_tests[1].hash_4t != r[1] ||
+ ip6_tests[2].hash_4t != r[2] || ip6_tests[0].hash_4t != r[3])
+ return clib_error_return (err,
+ "wrong IPv6 4 tuple x4 hash "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x } "
+ "expected { 0x%08x, 0x%08x, 0x%08x, 0x%08x }",
+ ip6_tests[0].hash_4t, ip6_tests[1].hash_4t,
+ ip6_tests[2].hash_4t, ip6_tests[0].hash_4t, r[0],
+ r[1], r[2], r[3]);
+
+ n_key_copies = 6;
+ bigkey_len = k->key_length * n_key_copies;
+ bigdata_len = bigkey_len - 4;
+ bigkey = test_mem_alloc_and_splat (k->key_length, n_key_copies, k->data);
+ bigdata0 = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ bigdata1 = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ bigdata2 = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ bigdata3 = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ u32 key_len = k->key_length;
+
+ clib_toeplitz_hash_key_free (k);
+ k = clib_toeplitz_hash_key_init (bigkey, n_key_copies * key_len);
+
+ for (int i = 0; i < N_LENGTH_TESTS - 4; i++)
+ {
+ wrapper_x4 (k, bigdata0, bigdata1, bigdata2, bigdata3, r, r + 1, r + 2,
+ r + 3, i);
+ if (length_test_hashes[i] != r[0] || length_test_hashes[i] != r[1] ||
+ length_test_hashes[i] != r[2] || length_test_hashes[i] != r[3])
+ {
+ err = clib_error_return (
+ err,
+ "wrong length test hash x4 for length %u, "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x }, expected 0x%08x",
+ i, r[0], r[1], r[2], r[3], length_test_hashes[i]);
+ goto done;
+ }
+ }
+
+done:
+ clib_toeplitz_hash_key_free (k);
+ return err;
+}
+
+void __test_perf_fn
+perftest_fixed_12byte_x4 (test_perf_t *tp)
+{
+ u32 n = tp->n_ops / 4;
+ u8 *d0 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[0].key);
+ u8 *d1 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[1].key);
+ u8 *d2 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[2].key);
+ u8 *d3 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[3].key);
+ u32 *h0 = test_mem_alloc (4 * n);
+ u32 *h1 = test_mem_alloc (4 * n);
+ u32 *h2 = test_mem_alloc (4 * n);
+ u32 *h3 = test_mem_alloc (4 * n);
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ clib_toeplitz_hash_x4 (k, d0 + i * 12, d1 + i * 12, d2 + i * 12,
+ d3 + i * 12, h0 + i, h1 + i, h2 + i, h3 + i, 12);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+void __test_perf_fn
+perftest_fixed_36byte_x4 (test_perf_t *tp)
+{
+ u32 n = tp->n_ops / 4;
+ u8 *d0 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[0].key);
+ u8 *d1 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[1].key);
+ u8 *d2 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[2].key);
+ u8 *d3 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[3].key);
+ u32 *h0 = test_mem_alloc (4 * n);
+ u32 *h1 = test_mem_alloc (4 * n);
+ u32 *h2 = test_mem_alloc (4 * n);
+ u32 *h3 = test_mem_alloc (4 * n);
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ clib_toeplitz_hash_x4 (k, d0 + i * 36, d1 + i * 36, d2 + i * 36,
+ d3 + i * 36, h0 + i, h1 + i, h2 + i, h3 + i, 36);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+void __test_perf_fn
+perftest_variable_size_x4 (test_perf_t *tp)
+{
+ u32 key_len, n_keys, n = tp->n_ops / 4;
+ u8 *key;
+ u8 *d0 = test_mem_alloc (n);
+ u8 *d1 = test_mem_alloc (n);
+ u8 *d2 = test_mem_alloc (n);
+ u8 *d3 = test_mem_alloc (n);
+ u32 *h0 = test_mem_alloc (sizeof (u32));
+ u32 *h1 = test_mem_alloc (sizeof (u32));
+ u32 *h2 = test_mem_alloc (sizeof (u32));
+ u32 *h3 = test_mem_alloc (sizeof (u32));
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ k = clib_toeplitz_hash_key_init (0, 0);
+ key_len = k->key_length;
+ n_keys = ((n + 4) / k->key_length) + 1;
+ key = test_mem_alloc_and_splat (n_keys, key_len, k->data);
+ clib_toeplitz_hash_key_free (k);
+ k = clib_toeplitz_hash_key_init (key, key_len * n_keys);
+
+ test_perf_event_enable (tp);
+ clib_toeplitz_hash_x4 (k, d0, d1, d2, d3, h0, h1, h2, h3, n);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+REGISTER_TEST (clib_toeplitz_hash_x4) = {
+ .name = "clib_toeplitz_hash_x4",
+ .fn = test_clib_toeplitz_hash_x4,
+ .perf_tests = PERF_TESTS ({ .name = "fixed (per 12 byte tuple)",
+ .n_ops = 1024,
+ .fn = perftest_fixed_12byte_x4 },
+ { .name = "fixed (per 36 byte tuple)",
+ .n_ops = 1024,
+ .fn = perftest_fixed_36byte_x4 },
+ { .name = "variable size (per byte)",
+ .n_ops = 16384,
+ .fn = perftest_variable_size_x4 }),
+};
diff --git a/src/vppinfra/test_bihash_template.c b/src/vppinfra/test_bihash_template.c
index ffed5c73287..17cc05629ae 100644
--- a/src/vppinfra/test_bihash_template.c
+++ b/src/vppinfra/test_bihash_template.c
@@ -247,6 +247,59 @@ test_bihash_threads (test_main_t * tm)
return 0;
}
+static clib_error_t *
+test_bihash_vanilla_overwrite (test_main_t *tm)
+{
+ int i;
+ BVT (clib_bihash) * h;
+ BVT (clib_bihash_kv) kv;
+
+ h = &tm->hash;
+
+#if BIHASH_32_64_SVM
+ BV (clib_bihash_initiator_init_svm)
+ (h, "test", tm->nbuckets, 0x30000000 /* base_addr */, tm->hash_memory_size);
+#else
+ BV (clib_bihash_init) (h, "test", tm->nbuckets, tm->hash_memory_size);
+#endif
+
+ for (i = 0; i < 100; i++)
+ {
+ kv.key = 12345;
+ kv.value = i;
+
+ BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */);
+ }
+
+ fformat (stdout, "End of run, should one item...\n");
+ fformat (stdout, "%U", BV (format_bihash), h, 0 /* very verbose */);
+ BV (clib_bihash_free) (h);
+ return 0;
+}
+
+static clib_error_t *
+test_bihash_value_assert (test_main_t *tm)
+{
+ BVT (clib_bihash) * h;
+ BVT (clib_bihash_kv) kv;
+
+ h = &tm->hash;
+
+#if BIHASH_32_64_SVM
+ BV (clib_bihash_initiator_init_svm)
+ (h, "test", tm->nbuckets, 0x30000000 /* base_addr */, tm->hash_memory_size);
+#else
+ BV (clib_bihash_init) (h, "test", tm->nbuckets, tm->hash_memory_size);
+#endif
+
+ kv.key = 12345;
+ kv.value = 0xFEEDFACE8BADF00DULL;
+
+ fformat (stderr, "The following add should ASSERT...\n");
+ BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */);
+
+ return 0;
+}
static clib_error_t *
test_bihash (test_main_t * tm)
@@ -338,7 +391,7 @@ test_bihash (test_main_t * tm)
for (i = 0; i < tm->nitems; i++)
{
/* Prefetch buckets 8 iterations ahead */
- if (1 && (i < (tm->nitems - 8)))
+ if (1 && (i < ((i64) tm->nitems - 8)))
{
BVT (clib_bihash_kv) pref_kv;
u64 pref_hash;
@@ -422,7 +475,7 @@ test_bihash (test_main_t * tm)
for (j = 0; j < tm->nitems; j++)
{
/* Prefetch buckets 8 iterations ahead */
- if (1 && (j < (tm->nitems - 8)))
+ if (1 && (j < ((i64) tm->nitems - 8)))
{
BVT (clib_bihash_kv) pref_kv;
u64 pref_hash;
@@ -514,6 +567,10 @@ test_bihash_main (test_main_t * tm)
tm->verbose = 1;
else if (unformat (i, "stale-overwrite"))
which = 3;
+ else if (unformat (i, "overwrite"))
+ which = 4;
+ else if (unformat (i, "value-assert"))
+ which = 5;
else
return clib_error_return (0, "unknown input '%U'",
format_unformat_error, i);
@@ -522,8 +579,7 @@ test_bihash_main (test_main_t * tm)
/* Preallocate hash table, key vector */
tm->key_hash = hash_create (tm->nitems, sizeof (uword));
vec_validate (tm->keys, tm->nitems - 1);
- _vec_len (tm->keys) = 0;
-
+ vec_set_len (tm->keys, 0);
switch (which)
{
@@ -543,6 +599,14 @@ test_bihash_main (test_main_t * tm)
error = test_bihash_stale_overwrite (tm);
break;
+ case 4:
+ error = test_bihash_vanilla_overwrite (tm);
+ break;
+
+ case 5:
+ error = test_bihash_value_assert (tm);
+ break;
+
default:
return clib_error_return (0, "no such test?");
}
diff --git a/src/vppinfra/test_fifo.c b/src/vppinfra/test_fifo.c
index 45392bc35eb..2d3cad33119 100644
--- a/src/vppinfra/test_fifo.c
+++ b/src/vppinfra/test_fifo.c
@@ -105,12 +105,10 @@ test_fifo_main (unformat_input_t * input)
ASSERT (clib_fifo_elts (as) == n_added - n_removed);
j = 0;
- /* *INDENT-OFF* */
clib_fifo_foreach (a, as, {
ASSERT (A_is_valid (a, n_removed + j));
j++;
});
- /* *INDENT-ON* */
ASSERT (j == clib_fifo_elts (as));
}
diff --git a/src/vppinfra/test_fpool.c b/src/vppinfra/test_fpool.c
index e2d67f16907..02d9d219717 100644
--- a/src/vppinfra/test_fpool.c
+++ b/src/vppinfra/test_fpool.c
@@ -30,7 +30,7 @@ main (int argc, char *argv[])
clib_mem_init (0, 3ULL << 30);
vec_validate (indices, NELTS - 1);
- _vec_len (indices) = 0;
+ vec_set_len (indices, 0);
pool_init_fixed (tp, NELTS);
diff --git a/src/vppinfra/test_hash.c b/src/vppinfra/test_hash.c
index 95ced448c13..25adff3443b 100644
--- a/src/vppinfra/test_hash.c
+++ b/src/vppinfra/test_hash.c
@@ -86,14 +86,12 @@ hash_next_test (word * h)
hash_pair_t *p0, *p1;
clib_error_t *error = 0;
- /* *INDENT-OFF* */
hash_foreach_pair (p0, h, {
p1 = hash_next (h, &hn);
error = CLIB_ERROR_ASSERT (p0 == p1);
if (error)
break;
});
- /* *INDENT-ON* */
if (!error)
error = CLIB_ERROR_ASSERT (!hash_next (h, &hn));
@@ -176,12 +174,10 @@ test_word_key (hash_test_t * ht)
hash_pair_t *p;
uword ki;
- /* *INDENT-OFF* */
hash_foreach_pair (p, h, {
ki = p->value[0];
ASSERT (keys[ki] == p->key);
});
- /* *INDENT-ON* */
}
if ((error = hash_validate (h)))
diff --git a/src/vppinfra/test_heap.c b/src/vppinfra/test_heap.c
index 0fd6bf74245..da3ad24a820 100644
--- a/src/vppinfra/test_heap.c
+++ b/src/vppinfra/test_heap.c
@@ -61,14 +61,13 @@ main (int argc, char *argv[])
uword *objects = 0;
uword *handles = 0;
uword objects_used;
- uword align, fixed_size;
+ uword align;
clib_mem_init (0, 10 << 20);
n = 10;
seed = (u32) getpid ();
check_mask = 0;
- fixed_size = 0;
if (argc > 1)
{
@@ -100,13 +99,6 @@ main (int argc, char *argv[])
objects_used = 0;
- if (fixed_size)
- {
- uword max_len = 1024 * 1024;
- void *memory = clib_mem_alloc (max_len * sizeof (h[0]));
- h = heap_create_from_memory (memory, max_len, sizeof (h[0]));
- }
-
for (i = 0; i < n; i++)
{
while (1)
@@ -188,9 +180,6 @@ main (int argc, char *argv[])
vec_free (objects);
vec_free (handles);
- if (fixed_size)
- vec_free_h (h, sizeof (heap_header_t));
-
if (verbose)
fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0);
diff --git a/src/vppinfra/test_interrupt.c b/src/vppinfra/test_interrupt.c
new file mode 100644
index 00000000000..133692d1bd0
--- /dev/null
+++ b/src/vppinfra/test_interrupt.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Graphiant, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/random.h>
+#include <vppinfra/time.h>
+#include <vppinfra/interrupt.h>
+
+#define MAX_INTS 2048
+
+int debug = 0;
+
+#define debug(format, args...) \
+ if (debug) \
+ { \
+ fformat (stdout, format, ##args); \
+ }
+
+void
+set_and_check_bits (void *interrupts, int num_ints)
+{
+ for (int step = 1; step < num_ints; step++)
+ {
+ int int_num = -1;
+ int expected = 0;
+
+ debug (" Step of %d\n", step);
+ for (int i = 0; i < num_ints; i += step)
+ {
+ debug (" Setting %d\n", i);
+ clib_interrupt_set (interrupts, i);
+ }
+
+ while ((int_num =
+ clib_interrupt_get_next_and_clear (interrupts, int_num)) != -1)
+ {
+ debug (" Got %d, expecting %d\n", int_num, expected);
+ ASSERT (int_num == expected);
+ expected += step;
+ }
+ int_num = clib_interrupt_get_next_and_clear (interrupts, -1);
+ ASSERT (int_num == -1);
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ clib_mem_init (0, 3ULL << 30);
+
+ debug = (argc > 1);
+
+ void *interrupts = NULL;
+
+ for (int num_ints = 0; num_ints < MAX_INTS; num_ints++)
+ {
+ clib_interrupt_resize (&interrupts, num_ints);
+ debug ("Size now %d\n", num_ints);
+
+ set_and_check_bits (interrupts, num_ints);
+ }
+
+ return 0;
+}
diff --git a/src/vppinfra/test_longjmp.c b/src/vppinfra/test_longjmp.c
index 01debe2ac37..50dc24b48b0 100644
--- a/src/vppinfra/test_longjmp.c
+++ b/src/vppinfra/test_longjmp.c
@@ -82,27 +82,25 @@ test_longjmp_main (unformat_input_t * input)
static uword
f3 (uword arg)
{
- uword i, j, array[10];
-
- for (i = 0; i < ARRAY_LEN (array); i++)
- array[i] = arg + i;
-
- j = 0;
- for (i = 0; i < ARRAY_LEN (array); i++)
- j ^= array[i];
-
- return j;
+ return (uword) __builtin_frame_address (0);
}
static void
test_calljmp (unformat_input_t * input)
{
- static u8 stack[32 * 1024] __attribute__ ((aligned (16)));
- uword v;
+ u8 stack[4096] __attribute__ ((aligned (16))) = {};
+ uword start, end, v;
+
+ start = pointer_to_uword (stack);
+ end = start + ARRAY_LEN (stack);
+
+ v = f3 (0);
+ if (!(v < start || v > end))
+ clib_panic ("something went wrong in the calljmp test");
v = clib_calljmp (f3, 0, stack + sizeof (stack));
- ASSERT (v == f3 (0));
- if_verbose ("calljump ok");
+ if_verbose ("calljump %s",
+ v >= start && v < (end - sizeof (uword)) ? "ok" : "fail");
}
#ifdef CLIB_UNIX
diff --git a/src/vppinfra/test_mhash.c b/src/vppinfra/test_mhash.c
new file mode 100644
index 00000000000..70be2b9b382
--- /dev/null
+++ b/src/vppinfra/test_mhash.c
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Yandex LLC.
+ */
+
+#ifdef CLIB_LINUX_KERNEL
+#include <linux/unistd.h>
+#endif
+
+#ifdef CLIB_UNIX
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <vppinfra/time.h>
+#endif
+
+#include <vppinfra/random.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/mhash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/bitmap.h>
+
+static int verbose;
+#define if_verbose(format, args...) \
+ if (verbose) \
+ { \
+ clib_warning (format, ##args); \
+ }
+
+typedef struct
+{
+ int n_iterations;
+
+ int n_iterations_per_print;
+
+ /* Number of pairs to insert into mhash. */
+ int n_pairs;
+
+ /* True to validate correctness of mhash functions. */
+ int n_iterations_per_validate;
+
+ /* Verbosity level for mhash formats. */
+ int verbose;
+
+ /* Random number seed. */
+ u32 seed;
+} mhash_test_t;
+
+static clib_error_t *
+mhash_next_test (mhash_t *h)
+{
+ hash_next_t hn = { 0 };
+ hash_pair_t *p0, *p1;
+ clib_error_t *error = 0;
+
+ hash_foreach_pair (p0, h->hash, {
+ p1 = hash_next (h->hash, &hn);
+ error = CLIB_ERROR_ASSERT (p0 == p1);
+ if (error)
+ break;
+ });
+
+ if (!error)
+ error = CLIB_ERROR_ASSERT (!hash_next (h->hash, &hn));
+
+ return error;
+}
+
+static clib_error_t *
+test_word_key (mhash_test_t *ht)
+{
+ mhash_t _h = { 0 }, *h = &_h;
+ word i, j;
+
+ word *keys = 0, *vals = 0;
+ uword *is_inserted = 0;
+
+ clib_error_t *error = 0;
+
+ vec_resize (keys, ht->n_pairs);
+ vec_resize (vals, vec_len (keys));
+
+ mhash_init (h, sizeof (vals[0]), sizeof (keys[0]));
+ /* borrow 0 elt to make index keys non-zero */
+ vec_validate (h->key_vector_or_heap, 0);
+
+ {
+ uword *unique = 0;
+ u32 k;
+
+ for (i = 0; i < vec_len (keys); i++)
+ {
+ do
+ {
+ k = random_u32 (&ht->seed) & 0xfffff;
+ }
+ while (clib_bitmap_get (unique, k));
+ unique = clib_bitmap_ori (unique, k);
+ keys[i] = k;
+ vals[i] = i;
+ }
+
+ clib_bitmap_free (unique);
+ }
+
+ for (i = 0; i < ht->n_iterations; i++)
+ {
+ u32 vi = random_u32 (&ht->seed) % vec_len (keys);
+
+ if (clib_bitmap_get (is_inserted, vi))
+ {
+ mhash_unset (h, &keys[vi], 0);
+ mhash_unset (h, &keys[vi], 0);
+ }
+ else
+ {
+ mhash_set (h, &keys[vi], vals[vi], 0);
+ mhash_set (h, &keys[vi], vals[vi], 0);
+ }
+
+ is_inserted = clib_bitmap_xori (is_inserted, vi);
+
+ if (ht->n_iterations_per_print > 0 &&
+ ((i + 1) % ht->n_iterations_per_print) == 0)
+ if_verbose ("iteration %d\n %U", i + 1, format_mhash, h, ht->verbose);
+
+ if (ht->n_iterations_per_validate == 0 ||
+ (i + 1) % ht->n_iterations_per_validate)
+ continue;
+
+ {
+ uword ki, *k, *v;
+
+ mhash_foreach (k, v, h, {
+ ki = v[0];
+ ASSERT (keys[ki] == k[0]);
+ });
+ }
+
+ if ((error = hash_validate (h->hash)))
+ goto done;
+
+ for (j = 0; j < vec_len (keys); j++)
+ {
+ uword *v;
+ v = mhash_get (h, &keys[j]);
+ if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) ==
+ (v != 0))))
+ goto done;
+ if (v)
+ {
+ if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j])))
+ goto done;
+ }
+ }
+ }
+
+ if ((error = mhash_next_test (h)))
+ goto done;
+
+ if_verbose ("%U", format_mhash, h, ht->verbose);
+
+ for (i = 0; i < vec_len (keys); i++)
+ {
+ if (!clib_bitmap_get (is_inserted, i))
+ continue;
+
+ mhash_unset (h, &keys[i], 0);
+ mhash_unset (h, &keys[i], 0);
+ is_inserted = clib_bitmap_xori (is_inserted, i);
+
+ if (ht->n_iterations_per_validate == 0 ||
+ (i + 1) % ht->n_iterations_per_validate)
+ continue;
+
+ if ((error = hash_validate (h->hash)))
+ goto done;
+
+ for (j = 0; j < vec_len (keys); j++)
+ {
+ uword *v;
+ v = mhash_get (h, &keys[j]);
+ if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) ==
+ (v != 0))))
+ goto done;
+ if (v)
+ {
+ if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j])))
+ goto done;
+ }
+ }
+ }
+
+done:
+ mhash_free (h);
+ vec_free (keys);
+ vec_free (vals);
+ clib_bitmap_free (is_inserted);
+
+ if (verbose)
+ fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0);
+
+ return error;
+}
+
+static u8 *
+test2_format (u8 *s, va_list *args)
+{
+ void *CLIB_UNUSED (user_arg) = va_arg (*args, void *);
+ void *v = va_arg (*args, void *);
+ hash_pair_t *p = va_arg (*args, hash_pair_t *);
+ hash_t *h = hash_header (v);
+ mhash_t *mh = uword_to_pointer (h->user, mhash_t *);
+
+ return format (s, "0x%8U <- %U", format_hex_bytes, &p->value[0],
+ hash_value_bytes (h), format_mhash_key, mh, (u32) p->key);
+}
+
+static clib_error_t *
+test_string_key (mhash_test_t *ht, uword is_c_string)
+{
+ mhash_t _h = { 0 }, *h = &_h;
+ word i, j;
+
+ u8 **keys = 0;
+ word *vals = 0;
+ uword *is_inserted = 0;
+
+ clib_error_t *error = 0;
+
+ vec_resize (keys, ht->n_pairs);
+ vec_resize (vals, vec_len (keys));
+
+ if (is_c_string)
+ mhash_init_c_string (h, sizeof (vals[0]));
+ else
+ mhash_init_vec_string (h, sizeof (vals[0]));
+ hash_set_pair_format (h->hash, test2_format, 0);
+
+ for (i = 0; i < vec_len (keys); i++)
+ {
+ keys[i] = random_string (&ht->seed, 5 + (random_u32 (&ht->seed) & 0xf));
+ keys[i] = format (keys[i], "%x", i);
+ if (is_c_string)
+ vec_terminate_c_string (keys[i]);
+ vals[i] = random_u32 (&ht->seed);
+ }
+
+ for (i = 0; i < ht->n_iterations; i++)
+ {
+ u32 vi = random_u32 (&ht->seed) % vec_len (keys);
+
+ if (clib_bitmap_get (is_inserted, vi))
+ {
+ mhash_unset (h, keys[vi], 0);
+ mhash_unset (h, keys[vi], 0);
+ }
+ else
+ {
+ mhash_set (h, keys[vi], vals[vi], 0);
+ mhash_set (h, keys[vi], vals[vi], 0);
+ }
+
+ is_inserted = clib_bitmap_xori (is_inserted, vi);
+
+ if (ht->n_iterations_per_print > 0 &&
+ ((i + 1) % ht->n_iterations_per_print) == 0)
+ if_verbose ("iteration %d\n %U", i + 1, format_mhash, h, ht->verbose);
+
+ if (ht->n_iterations_per_validate == 0 ||
+ (i + 1) % ht->n_iterations_per_validate)
+ continue;
+
+ if ((error = hash_validate (h->hash)))
+ goto done;
+
+ for (j = 0; j < vec_len (keys); j++)
+ {
+ uword *v;
+ v = mhash_get (h, keys[j]);
+ if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) ==
+ (v != 0))))
+ goto done;
+ if (v)
+ {
+ if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j])))
+ goto done;
+ }
+ }
+ }
+
+ if ((error = mhash_next_test (h)))
+ goto done;
+
+ if_verbose ("%U", format_mhash, h, ht->verbose);
+
+ for (i = 0; i < vec_len (keys); i++)
+ {
+ if (!clib_bitmap_get (is_inserted, i))
+ continue;
+
+ mhash_unset (h, keys[i], 0);
+ mhash_unset (h, keys[i], 0);
+ is_inserted = clib_bitmap_xori (is_inserted, i);
+
+ if (ht->n_iterations_per_validate == 0 ||
+ (i + 1) % ht->n_iterations_per_validate)
+ continue;
+
+ if ((error = hash_validate (h->hash)))
+ goto done;
+
+ for (j = 0; j < vec_len (keys); j++)
+ {
+ uword *v;
+ v = mhash_get (h, keys[j]);
+ if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) ==
+ (v != 0))))
+ goto done;
+ if (v)
+ {
+ if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j])))
+ goto done;
+ }
+ }
+ }
+
+done:
+ mhash_free (h);
+ vec_free (vals);
+ clib_bitmap_free (is_inserted);
+
+ for (i = 0; i < vec_len (keys); i++)
+ vec_free (keys[i]);
+ vec_free (keys);
+
+ if (verbose)
+ fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0);
+
+ return error;
+}
+
+int
+test_mhash_main (unformat_input_t *input)
+{
+ mhash_test_t _ht = { 0 }, *ht = &_ht;
+ clib_error_t *error;
+
+ ht->n_iterations = 100;
+ ht->n_pairs = 10;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (0 == unformat (input, "iter %d", &ht->n_iterations) &&
+ 0 == unformat (input, "print %d", &ht->n_iterations_per_print) &&
+ 0 == unformat (input, "elts %d", &ht->n_pairs) &&
+ 0 == unformat (input, "seed %d", &ht->seed) &&
+ 0 == unformat (input, "verbose %=", &ht->verbose, 1) &&
+ 0 == unformat (input, "valid %d", &ht->n_iterations_per_validate))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, input);
+ return 1;
+ }
+ }
+
+ if (!ht->seed)
+ ht->seed = random_default_seed ();
+
+ if_verbose ("testing %d iterations, seed %d", ht->n_iterations, ht->seed);
+
+ error = test_word_key (ht);
+ if (error)
+ clib_error_report (error);
+
+ error = test_string_key (ht, 0);
+ if (error)
+ clib_error_report (error);
+
+ error = test_string_key (ht, 1);
+ if (error)
+ clib_error_report (error);
+
+ return 0;
+}
+
+#ifdef CLIB_UNIX
+int
+main (int argc, char *argv[])
+{
+ unformat_input_t i;
+ int ret;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ verbose = (argc > 1);
+ unformat_init_command_line (&i, argv);
+ ret = test_mhash_main (&i);
+ unformat_free (&i);
+
+ return ret;
+}
+#endif /* CLIB_UNIX */
diff --git a/src/vppinfra/test_mheap.c b/src/vppinfra/test_mheap.c
deleted file mode 100644
index ae0c58a6a74..00000000000
--- a/src/vppinfra/test_mheap.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#ifdef CLIB_LINUX_KERNEL
-#include <linux/unistd.h>
-#endif
-
-#ifdef CLIB_UNIX
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdio.h> /* scanf */
-#endif
-
-#include <vppinfra/format.h>
-#include <vppinfra/random.h>
-#include <vppinfra/time.h>
-
-static int verbose = 0;
-#define if_verbose(format,args...) \
- if (verbose) { clib_warning(format, ## args); }
-
-int
-test1 (void)
-{
- clib_time_t clib_time;
- void *h_mem = clib_mem_alloc (2ULL << 30);
- void *h;
- uword *objects = 0;
- int i;
- f64 before, after;
-
- clib_time_init (&clib_time);
-
- vec_validate (objects, 2000000 - 1);
-
- h = mheap_alloc (h_mem, (uword) (2 << 30));
-
- before = clib_time_now (&clib_time);
-
- for (i = 0; i < vec_len (objects); i++)
- {
- h = mheap_get_aligned (h, 24 /* size */ ,
- 64 /* align */ ,
- 16 /* align at offset */ , &objects[i]);
- }
-
- after = clib_time_now (&clib_time);
-
- fformat (stdout, "alloc: %u objects in %.2f seconds, %.2f objects/second\n",
- vec_len (objects), (after - before),
- ((f64) vec_len (objects)) / (after - before));
-
- return 0;
-}
-
-
-int
-test_mheap_main (unformat_input_t * input)
-{
- int i, j, k, n_iterations;
- void *h, *h_mem;
- uword *objects = 0;
- u32 objects_used, really_verbose, n_objects, max_object_size;
- u32 check_mask, seed, trace, use_vm;
- u32 print_every = 0;
- u32 *data;
- mheap_t *mh;
-
- /* Validation flags. */
- check_mask = 0;
-#define CHECK_VALIDITY 1
-#define CHECK_DATA 2
-#define CHECK_ALIGN 4
-#define TEST1 8
-
- n_iterations = 10;
- seed = 0;
- max_object_size = 100;
- n_objects = 1000;
- trace = 0;
- really_verbose = 0;
- use_vm = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (0 == unformat (input, "iter %d", &n_iterations)
- && 0 == unformat (input, "count %d", &n_objects)
- && 0 == unformat (input, "size %d", &max_object_size)
- && 0 == unformat (input, "seed %d", &seed)
- && 0 == unformat (input, "print %d", &print_every)
- && 0 == unformat (input, "validdata %|",
- &check_mask, CHECK_DATA | CHECK_VALIDITY)
- && 0 == unformat (input, "valid %|",
- &check_mask, CHECK_VALIDITY)
- && 0 == unformat (input, "verbose %=", &really_verbose, 1)
- && 0 == unformat (input, "trace %=", &trace, 1)
- && 0 == unformat (input, "vm %=", &use_vm, 1)
- && 0 == unformat (input, "align %|", &check_mask, CHECK_ALIGN)
- && 0 == unformat (input, "test1 %|", &check_mask, TEST1))
- {
- clib_warning ("unknown input `%U'", format_unformat_error, input);
- return 1;
- }
- }
-
- /* Zero seed means use default. */
- if (!seed)
- seed = random_default_seed ();
-
- if (check_mask & TEST1)
- {
- return test1 ();
- }
-
- if_verbose
- ("testing %d iterations, %d %saligned objects, max. size %d, seed %d",
- n_iterations, n_objects, (check_mask & CHECK_ALIGN) ? "randomly " : "un",
- max_object_size, seed);
-
- vec_resize (objects, n_objects);
- if (vec_bytes (objects) > 0) /* stupid warning be gone */
- clib_memset (objects, ~0, vec_bytes (objects));
- objects_used = 0;
-
- /* Allocate initial heap. */
- {
- uword size =
- max_pow2 (2 * n_objects * max_object_size * sizeof (data[0]));
-
- h_mem = clib_mem_alloc (size);
- if (!h_mem)
- return 0;
-
- h = mheap_alloc (h_mem, size);
- }
-
- if (trace)
- mheap_trace (h, trace);
-
- mh = mheap_header (h);
-
- if (use_vm)
- mh->flags &= ~MHEAP_FLAG_DISABLE_VM;
- else
- mh->flags |= MHEAP_FLAG_DISABLE_VM;
-
- if (check_mask & CHECK_VALIDITY)
- mh->flags |= MHEAP_FLAG_VALIDATE;
-
- for (i = 0; i < n_iterations; i++)
- {
- while (1)
- {
- j = random_u32 (&seed) % vec_len (objects);
- if (objects[j] != ~0 || i + objects_used < n_iterations)
- break;
- }
-
- if (objects[j] != ~0)
- {
- mheap_put (h, objects[j]);
- objects_used--;
- objects[j] = ~0;
- }
- else
- {
- uword size, align, align_offset;
-
- size = (random_u32 (&seed) % max_object_size) * sizeof (data[0]);
- align = align_offset = 0;
- if (check_mask & CHECK_ALIGN)
- {
- align = 1 << (random_u32 (&seed) % 10);
- align_offset = round_pow2 (random_u32 (&seed) & (align - 1),
- sizeof (u32));
- }
-
- h = mheap_get_aligned (h, size, align, align_offset, &objects[j]);
-
- if (align > 0)
- ASSERT (0 == ((objects[j] + align_offset) & (align - 1)));
-
- ASSERT (objects[j] != ~0);
- objects_used++;
-
- /* Set newly allocated object with test data. */
- if (check_mask & CHECK_DATA)
- {
- uword len;
-
- data = (void *) h + objects[j];
- len = mheap_len (h, data);
-
- ASSERT (size <= mheap_data_bytes (h, objects[j]));
-
- data[0] = len;
- for (k = 1; k < len; k++)
- data[k] = objects[j] + k;
- }
- }
-
- /* Verify that all used objects have correct test data. */
- if (check_mask & 2)
- {
- for (j = 0; j < vec_len (objects); j++)
- if (objects[j] != ~0)
- {
- u32 *data = h + objects[j];
- uword len = data[0];
- for (k = 1; k < len; k++)
- ASSERT (data[k] == objects[j] + k);
- }
- }
- if (print_every != 0 && i > 0 && (i % print_every) == 0)
- fformat (stderr, "iteration %d: %U\n", i, format_mheap, h,
- really_verbose);
- }
-
- if (verbose)
- fformat (stderr, "%U\n", format_mheap, h, really_verbose);
- mheap_free (h);
- clib_mem_free (h_mem);
- vec_free (objects);
-
- return 0;
-}
-
-#ifdef CLIB_UNIX
-int
-main (int argc, char *argv[])
-{
- unformat_input_t i;
- int ret;
-
- clib_mem_init (0, 3ULL << 30);
-
- verbose = (argc > 1);
- unformat_init_command_line (&i, argv);
- ret = test_mheap_main (&i);
- unformat_free (&i);
-
- return ret;
-}
-#endif /* CLIB_UNIX */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/test_pool_alloc.c b/src/vppinfra/test_pool_alloc.c
new file mode 100644
index 00000000000..57b78b8ad9e
--- /dev/null
+++ b/src/vppinfra/test_pool_alloc.c
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Yandex LLC.
+ */
+
+#include <vppinfra/pool.h>
+
+/* can be a very large size */
+#define NELTS 1024
+
+int
+main (int argc, char *argv[])
+{
+ u32 *junk = 0;
+ int i;
+ u32 *tp = 0;
+ u32 *indices = 0;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ vec_validate (indices, NELTS - 1);
+ vec_set_len (indices, 0);
+
+ /* zero size allocation is ok */
+ pool_alloc (tp, 0);
+
+ fformat (stdout, "%d pool elts of empty pool\n", pool_elts (tp));
+
+ pool_validate (tp);
+
+ pool_alloc (tp, NELTS);
+
+ for (i = 0; i < NELTS; i++)
+ {
+ pool_get (tp, junk);
+ vec_add1 (indices, junk - tp);
+ *junk = i;
+ }
+
+ for (i = 0; i < NELTS; i++)
+ {
+ junk = pool_elt_at_index (tp, indices[i]);
+ ASSERT (*junk == i);
+ }
+
+ fformat (stdout, "%d pool elts before deletes\n", pool_elts (tp));
+
+ pool_put_index (tp, indices[12]);
+ pool_put_index (tp, indices[43]);
+
+ fformat (stdout, "%d pool elts after deletes\n", pool_elts (tp));
+
+ pool_validate (tp);
+
+ pool_free (tp);
+ return 0;
+}
diff --git a/src/vppinfra/test_pool_iterate.c b/src/vppinfra/test_pool_iterate.c
index bcbd235ba71..fc4be6d6fe1 100644
--- a/src/vppinfra/test_pool_iterate.c
+++ b/src/vppinfra/test_pool_iterate.c
@@ -77,7 +77,6 @@ main (int argc, char *argv[])
}
while (next != ~0);
- /* *INDENT-OFF* */
pool_foreach (junk, tp)
{
int is_free;
@@ -94,7 +93,6 @@ main (int argc, char *argv[])
clib_warning ("oops, busy index %d reported free", i);
}
}
- /* *INDENT-ON* */
return 0;
}
diff --git a/src/vppinfra/test_serialize.c b/src/vppinfra/test_serialize.c
index 5c931b76023..0dcff031364 100644
--- a/src/vppinfra/test_serialize.c
+++ b/src/vppinfra/test_serialize.c
@@ -136,6 +136,46 @@ typedef struct
serialize_main_t unserialize_main;
} test_serialize_main_t;
+u8 *test_pattern;
+
+int
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+void
+test_serialize_not_inline_double_vector_expand (void)
+{
+ serialize_main_t _m, *m = &_m;
+ u8 *serialized = 0;
+ u64 *magic;
+ void *p;
+ int i;
+
+ vec_validate (test_pattern, 1023);
+
+ for (i = 0; i < vec_len (test_pattern); i++)
+ test_pattern[i] = i & 0xff;
+
+ serialize_open_vector (m, serialized);
+ p = serialize_get (m, 61);
+ clib_memcpy_fast (p, test_pattern, 61);
+ serialize_integer (m, 0xDEADBEEFFEEDFACEULL, 8);
+ p = serialize_get (m, vec_len (test_pattern) - 62);
+ clib_memcpy_fast (p, test_pattern + 61, vec_len (test_pattern) - 62);
+ serialized = serialize_close_vector (m);
+
+ magic = (u64 *) (serialized + 61);
+
+ if (*magic != clib_net_to_host_u64 (0xDEADBEEFFEEDFACEULL))
+ {
+ fformat (stderr, "BUG!\n");
+ exit (1);
+ }
+ return;
+}
+
int
test_serialize_main (unformat_input_t * input)
{
@@ -168,6 +208,12 @@ test_serialize_main (unformat_input_t * input)
;
else if (unformat (input, "verbose %=", &tm->verbose, 1))
;
+ else if (unformat (input, "double-expand"))
+ {
+ test_serialize_not_inline_double_vector_expand ();
+ clib_warning ("serialize_not_inline double vector expand OK");
+ exit (0);
+ }
else
{
error = clib_error_create ("unknown input `%U'\n",
diff --git a/src/vppinfra/test_socket.c b/src/vppinfra/test_socket.c
index ea0ae658943..3a0e6b29ce6 100644
--- a/src/vppinfra/test_socket.c
+++ b/src/vppinfra/test_socket.c
@@ -99,7 +99,7 @@ test_socket_main (unformat_input_t * input)
break;
if_verbose ("%v", s->rx_buffer);
- _vec_len (s->rx_buffer) = 0;
+ vec_set_len (s->rx_buffer, 0);
}
error = clib_socket_close (s);
diff --git a/src/vppinfra/test_tw_timer.c b/src/vppinfra/test_tw_timer.c
index 47e5e49bf1f..e9f4251a7b1 100644
--- a/src/vppinfra/test_tw_timer.c
+++ b/src/vppinfra/test_tw_timer.c
@@ -316,7 +316,6 @@ test2_single (tw_timer_test_main_t * tm)
j = 0;
vec_reset_length (deleted_indices);
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
tw_timer_stop_2t_1w_2048sl (&tm->single_wheel, e->stop_timer_handle);
@@ -324,7 +323,6 @@ test2_single (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto del_and_re_add;
}
- /* *INDENT-ON* */
del_and_re_add:
for (j = 0; j < vec_len (deleted_indices); j++)
@@ -374,14 +372,12 @@ test2_single (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_2t_1w_2048sl (&tm->single_wheel);
@@ -455,7 +451,6 @@ test2_double (tw_timer_test_main_t * tm)
j = 0;
vec_reset_length (deleted_indices);
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
tw_timer_stop_16t_2w_512sl (&tm->double_wheel, e->stop_timer_handle);
@@ -463,7 +458,6 @@ test2_double (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto del_and_re_add;
}
- /* *INDENT-ON* */
del_and_re_add:
for (j = 0; j < vec_len (deleted_indices); j++)
@@ -512,14 +506,12 @@ test2_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
@@ -590,7 +582,6 @@ test2_double_updates (tw_timer_test_main_t * tm)
j = 0;
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
expiration_time = get_expiration_time (tm);
@@ -602,7 +593,6 @@ test2_double_updates (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto done;
}
- /* *INDENT-ON* */
done:
updates += j;
@@ -623,14 +613,12 @@ test2_double_updates (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
@@ -706,7 +694,6 @@ test2_triple (tw_timer_test_main_t * tm)
j = 0;
vec_reset_length (deleted_indices);
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
tw_timer_stop_4t_3w_256sl (&tm->triple_wheel, e->stop_timer_handle);
@@ -714,7 +701,6 @@ test2_triple (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto del_and_re_add;
}
- /* *INDENT-ON* */
del_and_re_add:
for (j = 0; j < vec_len (deleted_indices); j++)
@@ -763,14 +749,12 @@ test2_triple (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_4t_3w_256sl (&tm->triple_wheel);
@@ -846,7 +830,6 @@ test2_triple_ov (tw_timer_test_main_t * tm)
j = 0;
vec_reset_length (deleted_indices);
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
tw_timer_stop_1t_3w_1024sl_ov (&tm->triple_ov_wheel,
@@ -855,7 +838,6 @@ test2_triple_ov (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto del_and_re_add;
}
- /* *INDENT-ON* */
del_and_re_add:
for (j = 0; j < vec_len (deleted_indices); j++)
@@ -904,7 +886,6 @@ test2_triple_ov (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
TWT (tw_timer) * t;
@@ -915,7 +896,6 @@ test2_triple_ov (tw_timer_test_main_t * tm)
t = pool_elt_at_index (tm->triple_ov_wheel.timers, e->stop_timer_handle);
fformat (stdout, " expiration_time %lld\n", t->expiration_time);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_1t_3w_1024sl_ov (&tm->triple_ov_wheel);
@@ -972,14 +952,12 @@ test1_single (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat(stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
fformat (stdout,
"final wheel time %d, fast index %d\n",
@@ -1030,14 +1008,12 @@ test1_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat(stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
fformat (stdout,
"final wheel time %d, fast index %d\n",
@@ -1088,14 +1064,12 @@ test1_two_timer_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat(stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
fformat (stdout,
"final wheel time %d, fast index %d\n",
@@ -1168,14 +1142,12 @@ test3_triple_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_4t_3w_256sl (&tm->triple_wheel);
@@ -1252,14 +1224,12 @@ test4_double_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
@@ -1336,14 +1306,12 @@ test5_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
diff --git a/src/vppinfra/test_vec.c b/src/vppinfra/test_vec.c
index c6f97fb984d..9f336a0a095 100644
--- a/src/vppinfra/test_vec.c
+++ b/src/vppinfra/test_vec.c
@@ -207,33 +207,35 @@ dump_call_stats (uword * stats)
more sensible value later. */
#define MAX_VEC_LEN 10
-#define create_random_vec_wh(elt_type, len, hdr_bytes, seed) \
-({ \
- elt_type * _v(v) = NULL; \
- uword _v(l) = (len); \
- uword _v(h) = (hdr_bytes); \
- u8 * _v(hdr); \
- \
- if (_v(l) == 0) \
- goto __done__; \
- \
- /* ~0 means select random length between 0 and MAX_VEC_LEN. */ \
- if (_v(l) == ~0) \
- _v(l) = bounded_random_u32 (&(seed), 0, MAX_VEC_LEN); \
- \
- _v(v) = _vec_resize (NULL, _v(l), _v(l) * sizeof (elt_type), _v(h), 0); \
- fill_with_random_data (_v(v), vec_bytes (_v(v)), (seed)); \
- \
- /* Fill header with random data as well. */ \
- if (_v(h) > 0) \
- { \
- _v(hdr) = vec_header (_v(v), _v(h)); \
- fill_with_random_data (_v(hdr), _v(h), (seed)); \
- } \
- \
-__done__: \
- _v(v); \
-})
+#define create_random_vec_wh(elt_type, len, hdr_bytes, seed) \
+ ({ \
+ elt_type *_v (v) = NULL; \
+ uword _v (l) = (len); \
+ vec_attr_t _v (attr) = { .hdr_sz = (hdr_bytes), \
+ .elt_sz = sizeof (elt_type) }; \
+ uword _v (h) = (hdr_bytes); \
+ u8 *_v (hdr); \
+ \
+ if (_v (l) == 0) \
+ goto __done__; \
+ \
+ /* ~0 means select random length between 0 and MAX_VEC_LEN. */ \
+ if (_v (l) == ~0) \
+ _v (l) = bounded_random_u32 (&(seed), 0, MAX_VEC_LEN); \
+ \
+ _v (v) = _vec_alloc_internal (_v (l), &_v (attr)); \
+ fill_with_random_data (_v (v), vec_bytes (_v (v)), (seed)); \
+ \
+ /* Fill header with random data as well. */ \
+ if (_v (h) > 0) \
+ { \
+ _v (hdr) = vec_header (_v (v)); \
+ fill_with_random_data (_v (hdr), _v (h), (seed)); \
+ } \
+ \
+ __done__: \
+ _v (v); \
+ })
#define create_random_vec(elt_type, len, seed) \
create_random_vec_wh (elt_type, len, 0, seed)
@@ -258,7 +260,7 @@ validate_vec_free (elt_t * vec)
static elt_t *
validate_vec_free_h (elt_t * vec, uword hdr_bytes)
{
- vec_free_h (vec, hdr_bytes);
+ vec_free (vec);
ASSERT (vec == NULL);
return vec;
}
@@ -274,8 +276,8 @@ validate_vec_hdr (elt_t * vec, uword hdr_bytes)
return;
vh = _vec_find (vec);
- hdr = vec_header (vec, hdr_bytes);
- hdr_end = vec_header_end (hdr, hdr_bytes);
+ hdr = vec_header (vec);
+ hdr_end = vec_header_end (hdr);
ASSERT (hdr_end == (u8 *) vec);
ASSERT ((u8 *) vh - (u8 *) hdr >= hdr_bytes);
@@ -335,8 +337,7 @@ validate_vec (elt_t * vec, uword hdr_bytes)
else
{
if (hdr_bytes > 0)
- VERBOSE3 ("Header: %U\n",
- format_hex_bytes, vec_header (vec, sizeof (vec[0])),
+ VERBOSE3 ("Header: %U\n", format_hex_bytes, vec_header (vec),
sizeof (vec[0]));
VERBOSE3 ("%U\n\n",
@@ -371,7 +372,7 @@ validate_vec_resize_h (elt_t * vec, uword num_elts, uword hdr_bytes)
len1 = vec_len (vec);
if (vec)
- hdr = vec_header (vec, hdr_bytes);
+ hdr = vec_header (vec);
hash = compute_vec_hash (0, vec);
hdr_hash = compute_mem_hash (0, hdr, hdr_bytes);
@@ -391,7 +392,7 @@ validate_vec_resize_h (elt_t * vec, uword num_elts, uword hdr_bytes)
}
if (vec)
- hdr = vec_header (vec, hdr_bytes);
+ hdr = vec_header (vec);
ASSERT (compute_vec_hash (hash, vec) == 0);
ASSERT (compute_mem_hash (hdr_hash, hdr, hdr_bytes) == 0);
@@ -677,7 +678,7 @@ validate_vec_init_h (uword num_elts, uword hdr_bytes)
uword len;
elt_t *new;
- new = vec_new_ha (elt_t, num_elts, hdr_bytes, 0);
+ new = vec_new_generic (elt_t, num_elts, hdr_bytes, 0, 0);
len = vec_len (new);
ASSERT (len == num_elts);
@@ -687,7 +688,7 @@ validate_vec_init_h (uword num_elts, uword hdr_bytes)
{
if (i == 0)
{
- ptr = (u8 *) vec_header (new, hdr_bytes);
+ ptr = (u8 *) vec_header (new);
end = ptr + hdr_bytes;
}
else
@@ -799,7 +800,7 @@ run_validator_wh (uword iter)
{
case OP_IS_VEC_INIT_H:
num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
- vec_free_h (vec, sizeof (hdr_t));
+ vec_free (vec);
VERBOSE2 ("vec_init_h(), new elts %d\n", num_elts);
vec = validate_vec_init_h (num_elts, sizeof (hdr_t));
break;
@@ -840,7 +841,7 @@ run_validator_wh (uword iter)
}
validate_vec (vec, sizeof (hdr_t));
- vec_free_h (vec, sizeof (hdr_t));
+ vec_free (vec);
}
static void
diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c
index 3377828bbc5..f1736499a0a 100644
--- a/src/vppinfra/time.c
+++ b/src/vppinfra/time.c
@@ -74,35 +74,35 @@ clock_frequency_from_proc_filesystem (void)
{
f64 cpu_freq = 1e9; /* better than 40... */
f64 ppc_timebase = 0; /* warnings be gone */
- int fd;
unformat_input_t input;
-/* $$$$ aarch64 kernel doesn't report "cpu MHz" */
-#if defined(__aarch64__)
+#if defined(__x86_64__)
+ if (clib_cpu_supports_aperfmperf ())
+ return 0.0;
+#elif defined(__aarch64__)
+ /* $$$$ aarch64 kernel doesn't report "cpu MHz" */
return 0.0;
#endif
cpu_freq = 0;
- fd = open ("/proc/cpuinfo", 0);
- if (fd < 0)
- return cpu_freq;
-
- unformat_init_clib_file (&input, fd);
ppc_timebase = 0;
- while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ if (unformat_init_file (&input, "/proc/cpuinfo"))
{
- if (unformat (&input, "cpu MHz : %f", &cpu_freq))
- cpu_freq *= 1e6;
- else if (unformat (&input, "timebase : %f", &ppc_timebase))
- ;
- else
- unformat_skip_line (&input);
- }
-
- unformat_free (&input);
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&input, "cpu MHz : %f", &cpu_freq))
+ cpu_freq *= 1e6;
+ else if (unformat (&input, "timebase : %f", &ppc_timebase))
+ ;
+ else
+ unformat_skip_line (&input);
+ }
- close (fd);
+ unformat_free (&input);
+ }
+ else
+ return cpu_freq;
/* Override CPU frequency with time base for PPC. */
if (ppc_timebase != 0)
@@ -117,21 +117,19 @@ static f64
clock_frequency_from_sys_filesystem (void)
{
f64 cpu_freq = 0.0;
- int fd;
unformat_input_t input;
/* Time stamp always runs at max frequency. */
cpu_freq = 0;
- fd = open ("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 0);
- if (fd < 0)
- goto done;
-
- unformat_init_clib_file (&input, fd);
- (void) unformat (&input, "%f", &cpu_freq);
- cpu_freq *= 1e3; /* measured in kHz */
- unformat_free (&input);
- close (fd);
-done:
+
+ if (unformat_init_file (
+ &input, "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"))
+ {
+ if (unformat (&input, "%f", &cpu_freq))
+ cpu_freq *= 1e3; /* measured in kHz */
+ unformat_free (&input);
+ }
+
return cpu_freq;
}
diff --git a/src/vppinfra/time.h b/src/vppinfra/time.h
index 4d8997f0a9e..761dbed3fe8 100644
--- a/src/vppinfra/time.h
+++ b/src/vppinfra/time.h
@@ -192,6 +192,15 @@ clib_cpu_time_now (void)
return result;
}
+#elif defined(__riscv)
+
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u64 result;
+ asm volatile("rdcycle %0\n" : "=r"(result));
+ return result;
+}
#else
#error "don't know how to read CPU time stamp"
diff --git a/src/vppinfra/time_range.c b/src/vppinfra/time_range.c
index 4b5e1303763..54f5629641a 100644
--- a/src/vppinfra/time_range.c
+++ b/src/vppinfra/time_range.c
@@ -264,11 +264,10 @@ format_clib_timebase_time (u8 * s, va_list * args)
clib_timebase_time_to_components (now, cp);
- s = format (s, "%s, %u %s %u %u:%02u:%02u",
- day_names_epoch_order[cp->day_name_index],
- cp->day,
- month_short_names[cp->month],
- cp->year, cp->hour, cp->minute, cp->second);
+ s = format (s, "%s, %02u %s %u %02u:%02u:%02u",
+ day_names_epoch_order[cp->day_name_index], cp->day,
+ month_short_names[cp->month], cp->year, cp->hour, cp->minute,
+ cp->second);
return (s);
}
diff --git a/src/vppinfra/timing_wheel.c b/src/vppinfra/timing_wheel.c
index 2c46d72a2fe..830888a19c1 100644
--- a/src/vppinfra/timing_wheel.c
+++ b/src/vppinfra/timing_wheel.c
@@ -185,7 +185,7 @@ free_elt_vector (timing_wheel_t * w, timing_wheel_elt_t * ev)
/* Poison free elements so we never use them by mistake. */
if (CLIB_DEBUG > 0)
clib_memset (ev, ~0, vec_len (ev) * sizeof (ev[0]));
- _vec_len (ev) = 0;
+ vec_set_len (ev, 0);
vec_add1 (w->free_elt_vectors, ev);
}
@@ -302,23 +302,19 @@ timing_wheel_insert (timing_wheel_t * w, u64 insert_cpu_time, u32 user_data)
/* Delete elts with given user data so that stale events don't expire. */
vec_foreach (l, w->levels)
{
- /* *INDENT-OFF* */
clib_bitmap_foreach (wi, l->occupancy_bitmap) {
l->elts[wi] = delete_user_data (l->elts[wi], user_data);
if (vec_len (l->elts[wi]) == 0)
l->occupancy_bitmap = clib_bitmap_andnoti (l->occupancy_bitmap, wi);
}
- /* *INDENT-ON* */
}
{
timing_wheel_overflow_elt_t *oe;
- /* *INDENT-OFF* */
pool_foreach (oe, w->overflow_pool) {
if (oe->user_data == user_data)
pool_put (w->overflow_pool, oe);
}
- /* *INDENT-ON* */
}
hash_unset (w->deleted_user_data_hash, user_data);
@@ -397,10 +393,8 @@ timing_wheel_next_expiring_elt_time (timing_wheel_t * w)
if (min_dt != ~0)
min_t = w->cpu_time_base + min_dt;
- /* *INDENT-OFF* */
pool_foreach (oe, w->overflow_pool)
{ min_t = clib_min (min_t, oe->cpu_time); }
- /* *INDENT-ON* */
done:
return min_t;
@@ -459,7 +453,7 @@ expire_bin (timing_wheel_t * w,
/* Adjust for deleted elts. */
if (j < e_len)
- _vec_len (expired_user_data) -= e_len - j;
+ vec_dec_len (expired_user_data, e_len - j);
free_elt_vector (w, e);
@@ -485,7 +479,6 @@ advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
vec_foreach (l, w->levels)
{
uword wi;
- /* *INDENT-OFF* */
clib_bitmap_foreach (wi, l->occupancy_bitmap) {
vec_foreach (e, l->elts[wi])
{
@@ -496,13 +489,11 @@ advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
e->cpu_time_relative_to_base -= delta;
}
}
- /* *INDENT-ON* */
}
/* See which overflow elements fit now. */
{
timing_wheel_overflow_elt_t *oe;
- /* *INDENT-OFF* */
pool_foreach (oe, w->overflow_pool) {
/* It fits now into 32 bits. */
if (0 == ((oe->cpu_time - w->cpu_time_base) >> BITS (e->cpu_time_relative_to_base)))
@@ -521,7 +512,6 @@ advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
pool_put (w->overflow_pool, oe);
}
}
- /* *INDENT-ON* */
}
return expired_user_data;
}
@@ -613,7 +603,7 @@ timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
if (PREDICT_FALSE (current_ti != advance_ti))
{
if (w->unexpired_elts_pending_insert)
- _vec_len (w->unexpired_elts_pending_insert) = 0;
+ vec_set_len (w->unexpired_elts_pending_insert, 0);
level_index = 0;
while (current_ti != advance_ti)
@@ -647,12 +637,10 @@ timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
break;
level = vec_elt_at_index (w->levels, level_index);
- /* *INDENT-OFF* */
clib_bitmap_foreach (wi, level->occupancy_bitmap) {
expired_user_data = expire_bin (w, level_index, wi, advance_cpu_time,
expired_user_data);
}
- /* *INDENT-ON* */
}
if (PREDICT_TRUE (level_index < vec_len (w->levels)))
@@ -684,7 +672,7 @@ timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
{
timing_wheel_elt_t *e;
vec_foreach (e, w->unexpired_elts_pending_insert) insert_elt (w, e);
- _vec_len (w->unexpired_elts_pending_insert) = 0;
+ vec_set_len (w->unexpired_elts_pending_insert, 0);
}
/* Don't advance until necessary. */
diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c
index 97c70b223ce..6e8a58dbfaf 100644
--- a/src/vppinfra/tw_timer_template.c
+++ b/src/vppinfra/tw_timer_template.c
@@ -424,7 +424,7 @@ TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw,
tw->ticks_per_second = 1.0 / timer_interval_in_seconds;
vec_validate (tw->expired_timer_handles, 0);
- _vec_len (tw->expired_timer_handles) = 0;
+ vec_set_len (tw->expired_timer_handles, 0);
for (ring = 0; ring < TW_TIMER_WHEELS; ring++)
{
@@ -536,7 +536,7 @@ static inline
if (callback_vector_arg == 0)
{
- _vec_len (tw->expired_timer_handles) = 0;
+ vec_set_len (tw->expired_timer_handles, 0);
callback_vector = tw->expired_timer_handles;
}
else
diff --git a/src/vppinfra/types.h b/src/vppinfra/types.h
index c5e7f09ef23..ad85af35ac9 100644
--- a/src/vppinfra/types.h
+++ b/src/vppinfra/types.h
@@ -57,12 +57,8 @@ typedef unsigned char u8;
typedef unsigned short u16;
#endif /* ! CLIB_LINUX_KERNEL */
-#if defined (__x86_64__)
-#ifndef __COVERITY__
-typedef signed int i128 __attribute__ ((mode (TI)));
-typedef unsigned int u128 __attribute__ ((mode (TI)));
-#endif
-#endif
+typedef signed __int128 i128;
+typedef unsigned __int128 u128;
#if (defined(i386) || (defined(_mips) && __mips != 64) || defined(powerpc) || defined (__SPU__) || defined(__sparc__) || defined(__arm__) || defined (__xtensa__) || defined(__TMS320C6X__))
typedef signed int i32;
@@ -73,7 +69,9 @@ typedef unsigned int u32;
typedef unsigned long long u64;
#endif /* CLIB_AVOID_CLASH_WITH_LINUX_TYPES */
-#elif defined(alpha) || (defined(_mips) && __mips == 64) || defined(__x86_64__) || defined (__powerpc64__) || defined (__aarch64__)
+#elif defined(alpha) || (defined(_mips) && __mips == 64) || \
+ defined(__x86_64__) || defined(__powerpc64__) || defined(__aarch64__) || \
+ (defined(__riscv) && __riscv_xlen == 64)
typedef signed int i32;
typedef signed long i64;
@@ -123,6 +121,27 @@ typedef u64 clib_address_t;
typedef u32 clib_address_t;
#endif
+#define CLIB_I8_MAX __INT8_MAX__
+#define CLIB_I16_MAX __INT16_MAX__
+#define CLIB_I32_MAX __INT32_MAX__
+#define CLIB_I64_MAX __INT64_MAX__
+
+#define CLIB_U8_MAX __UINT8_MAX__
+#define CLIB_U16_MAX __UINT16_MAX__
+#define CLIB_U32_MAX __UINT32_MAX__
+#define CLIB_U64_MAX __UINT64_MAX__
+
+#define CLIB_F64_MAX __DBL_MAX__
+#define CLIB_F32_MAX __FLT_MAX__
+
+#if clib_address_bits == 64
+#define CLIB_WORD_MAX CLIB_I64_MAX
+#define CLIB_UWORD_MAX CLIB_U64_MAX
+#else
+#define CLIB_WORD_MAX CLIB_I32_MAX
+#define CLIB_UWORD_MAX CLIB_U32_MAX
+#endif
+
/* These are needed to convert between pointers and machine words.
MIPS is currently the only machine that can have different sized
pointers and machine words (but only when compiling with 64 bit
@@ -133,6 +152,14 @@ pointer_to_uword (const void *p)
return (uword) (clib_address_t) p;
}
+static inline __attribute__ ((always_inline)) uword
+pointer_is_aligned (void *p, uword align)
+{
+ if ((pointer_to_uword (p) & (align - 1)) == 0)
+ return 1;
+ return 0;
+}
+
#define uword_to_pointer(u,type) ((type) (clib_address_t) (u))
/* Any type: can be either word or pointer. */
@@ -163,6 +190,27 @@ typedef f64 fword;
__attribute__ ((aligned (align), packed)); \
} *) (addr))->_data)
+typedef u16 u16u __attribute__ ((aligned (1), __may_alias__));
+typedef u32 u32u __attribute__ ((aligned (1), __may_alias__));
+typedef u64 u64u __attribute__ ((aligned (1), __may_alias__));
+typedef i16 i16u __attribute__ ((aligned (1), __may_alias__));
+typedef i32 i32u __attribute__ ((aligned (1), __may_alias__));
+typedef i64 i64u __attribute__ ((aligned (1), __may_alias__));
+typedef word wordu __attribute__ ((aligned (1), __may_alias__));
+typedef uword uwordu __attribute__ ((aligned (1), __may_alias__));
+
+#define foreach_int(__var, ...) \
+ for (int __int_array[] = { __VA_ARGS__, 0 }, *__int_ptr = __int_array, \
+ __var = *__int_ptr; \
+ __int_ptr - (ARRAY_LEN (__int_array) - 1) < __int_array; \
+ __var = *++__int_ptr)
+
+#define foreach_pointer(__var, ...) \
+ for (void *__ptr_array[] = { __VA_ARGS__, 0 }, **__ptr_ptr = __ptr_array, \
+ *__var = *__ptr_ptr; \
+ __ptr_ptr - (ARRAY_LEN (__ptr_array) - 1) < __ptr_array; \
+ __var = *++__ptr_ptr)
+
#endif /* included_clib_types_h */
/*
diff --git a/src/vppinfra/unformat.c b/src/vppinfra/unformat.c
index 172182f8a34..522517888c3 100644
--- a/src/vppinfra/unformat.c
+++ b/src/vppinfra/unformat.c
@@ -36,6 +36,7 @@
*/
#include <vppinfra/format.h>
+#include <fcntl.h>
/* Call user's function to fill input buffer. */
__clib_export uword
@@ -70,22 +71,6 @@ _unformat_fill_input (unformat_input_t * i)
return i->index;
}
-always_inline uword
-is_white_space (uword c)
-{
- switch (c)
- {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- return 1;
-
- default:
- return 0;
- }
-}
-
/* Format function for dumping input stream. */
__clib_export u8 *
format_unformat_error (u8 * s, va_list * va)
@@ -968,7 +953,7 @@ parse_fail:
if (!input_matches_format)
input->index = input->buffer_marks[l - 1];
- _vec_len (input->buffer_marks) = l - 1;
+ vec_set_len (input->buffer_marks, l - 1);
}
return input_matches_format;
@@ -1003,7 +988,7 @@ unformat_user (unformat_input_t * input, unformat_function_t * func, ...)
if (!result && input->index != UNFORMAT_END_OF_INPUT)
input->index = input->buffer_marks[l];
- _vec_len (input->buffer_marks) = l;
+ vec_set_len (input->buffer_marks, l);
return result;
}
@@ -1026,7 +1011,8 @@ unformat_init_command_line (unformat_input_t * input, char *argv[])
}
__clib_export void
-unformat_init_string (unformat_input_t * input, char *string, int string_len)
+unformat_init_string (unformat_input_t *input, const char *string,
+ int string_len)
{
unformat_init (input, 0, 0);
if (string_len > 0)
@@ -1052,7 +1038,7 @@ clib_file_fill_buffer (unformat_input_t * input)
vec_resize (input->buffer, 4096);
n = read (fd, input->buffer + l, 4096);
if (n > 0)
- _vec_len (input->buffer) = l + n;
+ vec_set_len (input->buffer, l + n);
if (n <= 0)
return UNFORMAT_END_OF_INPUT;
@@ -1060,6 +1046,13 @@ clib_file_fill_buffer (unformat_input_t * input)
return input->index;
}
+static void
+unformat_close_fd (unformat_input_t *input)
+{
+ int fd = pointer_to_uword (input->fill_buffer_arg);
+ close (fd);
+}
+
__clib_export void
unformat_init_clib_file (unformat_input_t * input, int file_descriptor)
{
@@ -1067,6 +1060,31 @@ unformat_init_clib_file (unformat_input_t * input, int file_descriptor)
uword_to_pointer (file_descriptor, void *));
}
+__clib_export uword
+unformat_init_file (unformat_input_t *input, char *fmt, ...)
+{
+ va_list va;
+ u8 *path;
+ int fd;
+
+ va_start (va, fmt);
+ path = va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (path, 0);
+
+ fd = open ((char *) path, 0);
+ vec_free (path);
+
+ if (fd >= 0)
+ {
+ unformat_init (input, clib_file_fill_buffer,
+ uword_to_pointer (fd, void *));
+ input->free = unformat_close_fd;
+ return 1;
+ }
+ return 0;
+}
+
/* Take input from Unix environment variable. */
uword
unformat_init_unix_env (unformat_input_t * input, char *var)
@@ -1101,8 +1119,97 @@ unformat_data_size (unformat_input_t * input, va_list * args)
return 1;
}
+__clib_export uword
+unformat_c_string_array (unformat_input_t *input, va_list *va)
+{
+ char *str = va_arg (*va, char *);
+ u32 array_len = va_arg (*va, u32);
+ uword c, rv = 0;
+ u8 *s = 0;
+
+ if (unformat (input, "%v", &s) == 0)
+ return 0;
+
+ c = vec_len (s);
+
+ if (c > 0 && c < array_len)
+ {
+ clib_memcpy (str, s, c);
+ str[c] = 0;
+ rv = 1;
+ }
+
+ vec_free (s);
+ return rv;
+}
+
+static uword
+__unformat_quoted_string (unformat_input_t *input, u8 **sp, char quote)
+{
+ u8 *s = 0;
+ uword c, p = 0;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ if (!is_white_space (c))
+ break;
+
+ if (c != quote)
+ return 0;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ if (c == quote && p != '\\')
+ {
+ *sp = s;
+ return 1;
+ }
+ vec_add1 (s, c);
+ p = c;
+ }
+ vec_free (s);
+
+ return 0;
+}
+
+__clib_export uword
+unformat_single_quoted_string (unformat_input_t *input, va_list *va)
+{
+ return __unformat_quoted_string (input, va_arg (*va, u8 **), '\'');
+}
+
+__clib_export uword
+unformat_double_quoted_string (unformat_input_t *input, va_list *va)
+{
+ return __unformat_quoted_string (input, va_arg (*va, u8 **), '"');
+}
+
#endif /* CLIB_UNIX */
+__clib_export uword
+unformat_u8 (unformat_input_t *input, va_list *args)
+{
+ u8 *d = va_arg (*args, u8 *);
+
+ u32 tmp;
+ if (!unformat (input, "%u", &tmp) || tmp > CLIB_U8_MAX)
+ return 0;
+
+ *d = tmp;
+ return 1;
+}
+
+__clib_export uword
+unformat_u16 (unformat_input_t *input, va_list *args)
+{
+ u16 *d = va_arg (*args, u16 *);
+
+ u32 tmp;
+ if (!unformat (input, "%u", &tmp) || tmp > CLIB_U16_MAX)
+ return 0;
+
+ *d = tmp;
+ return 1;
+}
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vppinfra/unix-formats.c b/src/vppinfra/unix-formats.c
index fd112675fa7..1a101e04aee 100644
--- a/src/vppinfra/unix-formats.c
+++ b/src/vppinfra/unix-formats.c
@@ -67,7 +67,9 @@
#include <linux/types.h>
#include <linux/netlink.h>
#endif
-#endif
+#elif __FreeBSD__
+#include <netlink/netlink.h>
+#endif /* __linux__ */
#endif /* ! __KERNEL__ */
@@ -91,7 +93,6 @@
# include <netinet/if_ether.h>
#endif /* __KERNEL__ */
-#include <vppinfra/bitops.h> /* foreach_set_bit */
#include <vppinfra/format.h>
#include <vppinfra/error.h>
@@ -410,7 +411,9 @@ u8 * format_signal (u8 * s, va_list * args)
_ (SIGPROF);
_ (SIGWINCH);
_ (SIGIO);
+#ifdef __linux__
_ (SIGPWR);
+#endif /* __linux */
#ifdef SIGSYS
_ (SIGSYS);
#endif
@@ -431,12 +434,15 @@ u8 * format_ucontext_pc (u8 * s, va_list * args)
uc = va_arg (*args, ucontext_t *);
+#ifdef __linux__
#if defined (powerpc)
regs = &uc->uc_mcontext.uc_regs->gregs[0];
#elif defined (powerpc64)
regs = &uc->uc_mcontext.uc_regs->gp_regs[0];
#elif defined (i386) || defined (__x86_64__)
regs = (void *) &uc->uc_mcontext.gregs[0];
+#elif defined(__aarch64__)
+ regs = (void *) &uc->uc_mcontext.pc;
#endif
#if defined (powerpc) || defined (powerpc64)
@@ -445,10 +451,19 @@ u8 * format_ucontext_pc (u8 * s, va_list * args)
reg_no = REG_EIP;
#elif defined (__x86_64__)
reg_no = REG_RIP;
+#elif defined(__aarch64__)
+ reg_no = 0;
#else
reg_no = 0;
regs = 0;
#endif
+#elif __FreeBSD__
+#if defined(__amd64__)
+ reg_no = 0;
+ regs = (void *) &uc->uc_mcontext.mc_rip;
+#else
+#endif /* __amd64__ */
+#endif /* __linux__ */
if (! regs)
return format (s, "unsupported");
diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c
index 5559a2392fe..05ca2f901c6 100644
--- a/src/vppinfra/unix-misc.c
+++ b/src/vppinfra/unix-misc.c
@@ -35,19 +35,40 @@
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
#include <vppinfra/error.h>
#include <vppinfra/os.h>
+#include <vppinfra/bitmap.h>
#include <vppinfra/unix.h>
+#include <vppinfra/format.h>
+#ifdef __linux__
+#include <vppinfra/linux/sysfs.h>
+#include <sched.h>
+#elif defined(__FreeBSD__)
+#define _WANT_FREEBSD_BITSET
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/cpuset.h>
+#include <sys/domainset.h>
+#include <sys/sysctl.h>
+#endif
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/uio.h> /* writev */
#include <fcntl.h>
#include <stdio.h> /* for sprintf */
+#include <limits.h>
__clib_export __thread uword __os_thread_index = 0;
__clib_export __thread uword __os_numa_index = 0;
+__clib_export clib_bitmap_t *os_get_cpu_affinity_bitmap (int pid);
+
clib_error_t *
clib_file_n_bytes (char *file, uword * result)
{
@@ -131,6 +152,37 @@ clib_file_contents (char *file, u8 ** result)
return error;
}
+__clib_export u8 *
+clib_file_get_resolved_basename (char *fmt, ...)
+{
+ va_list va;
+ char *p, buffer[PATH_MAX];
+ u8 *link, *s = 0;
+ int r;
+
+ va_start (va, fmt);
+ link = va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (link, 0);
+
+ r = readlink ((char *) link, buffer, sizeof (buffer) - 1);
+ vec_free (link);
+
+ if (r < 1)
+ return 0;
+
+ buffer[r] = 0;
+ p = buffer + r - 1;
+ while (p > buffer && p[-1] != '/')
+ p--;
+
+ while (p[0])
+ vec_add1 (s, p++[0]);
+
+ vec_add1 (s, 0);
+ return s;
+}
+
clib_error_t *
unix_proc_file_contents (char *file, u8 ** result)
{
@@ -158,7 +210,7 @@ unix_proc_file_contents (char *file, u8 ** result)
if (bytes == 0)
{
- _vec_len (rv) = pos;
+ vec_set_len (rv, pos);
break;
}
pos += bytes;
@@ -169,27 +221,20 @@ unix_proc_file_contents (char *file, u8 ** result)
return 0;
}
-void os_panic (void) __attribute__ ((weak));
-
-__clib_export void
+__clib_export __clib_weak void
os_panic (void)
{
abort ();
}
-void os_exit (int) __attribute__ ((weak));
-
-void
+__clib_export __clib_weak void
os_exit (int code)
{
exit (code);
}
-void os_puts (u8 * string, uword string_length, uword is_error)
- __attribute__ ((weak));
-
-void
-os_puts (u8 * string, uword string_length, uword is_error)
+__clib_export __clib_weak void
+os_puts (u8 *string, uword string_length, uword is_error)
{
int cpu = os_get_thread_index ();
int nthreads = os_get_nthreads ();
@@ -227,6 +272,141 @@ os_get_nthreads (void)
return 1;
}
+__clib_export clib_bitmap_t *
+os_get_online_cpu_core_bitmap ()
+{
+#if __linux__
+ return clib_sysfs_read_bitmap ("/sys/devices/system/cpu/online");
+#elif defined(__FreeBSD__)
+ return os_get_cpu_affinity_bitmap (0);
+#else
+ return 0;
+#endif
+}
+
+__clib_export clib_bitmap_t *
+os_get_cpu_affinity_bitmap (int pid)
+{
+#if __linux
+ int index, ret;
+ cpu_set_t cpuset;
+ uword *affinity_cpus;
+
+ clib_bitmap_alloc (affinity_cpus, sizeof (cpu_set_t));
+ clib_bitmap_zero (affinity_cpus);
+
+ CPU_ZERO_S (sizeof (cpu_set_t), &cpuset);
+
+ ret = sched_getaffinity (0, sizeof (cpu_set_t), &cpuset);
+
+ if (ret < 0)
+ {
+ clib_bitmap_free (affinity_cpus);
+ return 0;
+ }
+
+ for (index = 0; index < sizeof (cpu_set_t); index++)
+ if (CPU_ISSET_S (index, sizeof (cpu_set_t), &cpuset))
+ clib_bitmap_set (affinity_cpus, index, 1);
+ return affinity_cpus;
+#elif defined(__FreeBSD__)
+ cpuset_t mask;
+ uword *r = NULL;
+
+ clib_bitmap_alloc (r, sizeof (CPU_SETSIZE));
+ clib_bitmap_zero (r);
+
+ if (cpuset_getaffinity (CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, -1,
+ sizeof (mask), &mask) != 0)
+ {
+ clib_bitmap_free (r);
+ return NULL;
+ }
+
+ for (int bit = 0; bit < CPU_SETSIZE; bit++)
+ clib_bitmap_set (r, bit, CPU_ISSET (bit, &mask));
+
+ return r;
+#else
+ return NULL;
+#endif
+}
+
+__clib_export clib_bitmap_t *
+os_get_online_cpu_node_bitmap ()
+{
+#if __linux__
+ return clib_sysfs_read_bitmap ("/sys/devices/system/node/online");
+#else
+ return 0;
+#endif
+}
+__clib_export clib_bitmap_t *
+os_get_cpu_on_node_bitmap (int node)
+{
+#if __linux__
+ return clib_sysfs_read_bitmap ("/sys/devices/system/node/node%u/cpulist",
+ node);
+#else
+ return 0;
+#endif
+}
+
+__clib_export clib_bitmap_t *
+os_get_cpu_with_memory_bitmap ()
+{
+#if __linux__
+ return clib_sysfs_read_bitmap ("/sys/devices/system/node/has_memory");
+#else
+ return 0;
+#endif
+}
+
+__clib_export int
+os_get_cpu_phys_core_id (int cpu_id)
+{
+#if __linux
+ int core_id = -1;
+ clib_error_t *err;
+ u8 *p;
+
+ p =
+ format (0, "/sys/devices/system/cpu/cpu%u/topology/core_id%c", cpu_id, 0);
+ err = clib_sysfs_read ((char *) p, "%d", &core_id);
+ vec_free (p);
+ if (err)
+ {
+ clib_error_free (err);
+ return -1;
+ }
+ return core_id;
+#else
+ return -1;
+#endif
+}
+
+__clib_export u8 *
+os_get_exec_path ()
+{
+ u8 *rv = 0;
+#ifdef __linux__
+ char tmp[PATH_MAX];
+ ssize_t sz = readlink ("/proc/self/exe", tmp, sizeof (tmp));
+
+ if (sz <= 0)
+ return 0;
+#else
+ char tmp[MAXPATHLEN];
+ int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
+ size_t sz = MAXPATHLEN;
+
+ if (sysctl (mib, 4, tmp, &sz, NULL, 0) == -1)
+ return 0;
+#endif
+ vec_add (rv, tmp, sz);
+ return rv;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vppinfra/unix.h b/src/vppinfra/unix.h
index 5b82c23a3c0..d0ddb93a46f 100644
--- a/src/vppinfra/unix.h
+++ b/src/vppinfra/unix.h
@@ -53,6 +53,25 @@ clib_error_t *clib_file_contents (char *file, u8 ** result);
/* As above but for /proc file system on Linux. */
clib_error_t *unix_proc_file_contents (char *file, u8 ** result);
+/* Retrieve bitmap of online cpu cures */
+clib_bitmap_t *os_get_online_cpu_core_bitmap ();
+
+/* Retrieve bitmap of online cpu nodes (sockets) */
+clib_bitmap_t *os_get_online_cpu_node_bitmap ();
+
+/* Retrieve bitmap of cpus with memory */
+clib_bitmap_t *os_get_cpu_with_memory_bitmap ();
+
+/* Retrieve bitmap of cpus on specific node */
+clib_bitmap_t *os_get_cpu_on_node_bitmap (int node);
+
+/* Retrieve physical core id of specific cpu, -1 if not available */
+int os_get_cpu_phys_core_id (int cpu);
+
+/* Retrieve the path of the current executable as a vector (not
+ * null-terminated). */
+u8 *os_get_exec_path ();
+
#endif /* included_clib_unix_h */
/*
diff --git a/src/vppinfra/vec.c b/src/vppinfra/vec.c
index 970f7f7bfa8..dbaadad2dd5 100644
--- a/src/vppinfra/vec.c
+++ b/src/vppinfra/vec.c
@@ -1,39 +1,6 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
*/
-/*
- Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
#include <vppinfra/vec.h>
#include <vppinfra/mem.h>
@@ -42,106 +9,125 @@
#define CLIB_VECTOR_GROW_BY_ONE 0
#endif
-/* Vector resize operator. Called as needed by various macros such as
- vec_add1() when we need to allocate memory. */
-__clib_export void *
-vec_resize_allocate_memory (void *v,
- word length_increment,
- uword data_bytes,
- uword header_bytes, uword data_align,
- uword numa_id)
+__clib_export uword
+vec_mem_size (void *v)
{
- vec_header_t *vh = _vec_find (v);
- uword old_alloc_bytes, new_alloc_bytes;
- void *old, *new;
- void *oldheap;
-
- header_bytes = vec_header_bytes (header_bytes);
-
- data_bytes += header_bytes;
-
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- {
- oldheap = clib_mem_get_per_cpu_heap ();
- clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap (numa_id));
- }
+ return v ? clib_mem_size (v - vec_get_header_size (v)) : 0;
+}
- if (!v)
+__clib_export void *
+_vec_alloc_internal (uword n_elts, const vec_attr_t *const attr)
+{
+ uword req_size, alloc_size, data_offset, align;
+ uword elt_sz = attr->elt_sz;
+ void *p, *v, *heap = attr->heap;
+
+ /* alignment must be power of 2 */
+ align = clib_max (attr->align, VEC_MIN_ALIGN);
+ ASSERT (count_set_bits (align) == 1);
+
+ /* calc offset where vector data starts */
+ data_offset = attr->hdr_sz + sizeof (vec_header_t);
+ data_offset += heap ? sizeof (void *) : 0;
+ data_offset = round_pow2 (data_offset, align);
+
+ req_size = data_offset + n_elts * elt_sz;
+ p = clib_mem_heap_alloc_aligned (heap, req_size, align);
+
+ /* zero out whole alocation */
+ alloc_size = clib_mem_size (p);
+ clib_mem_unpoison (p, alloc_size);
+ clib_memset_u8 (p, 0, alloc_size);
+
+ /* fill vector header */
+ v = p + data_offset;
+ _vec_find (v)->len = n_elts;
+ _vec_find (v)->hdr_size = data_offset / VEC_MIN_ALIGN;
+ _vec_find (v)->log2_align = min_log2 (align);
+ if (heap)
{
- new = clib_mem_alloc_aligned_at_offset (data_bytes, data_align, header_bytes, 1 /* yes, call os_out_of_memory */
- );
- new_alloc_bytes = clib_mem_size (new);
- CLIB_MEM_UNPOISON (new + data_bytes, new_alloc_bytes - data_bytes);
- clib_memset (new, 0, new_alloc_bytes);
- CLIB_MEM_POISON (new + data_bytes, new_alloc_bytes - data_bytes);
- v = new + header_bytes;
- _vec_len (v) = length_increment;
- _vec_numa (v) = numa_id;
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
- return v;
+ _vec_find (v)->default_heap = 0;
+ _vec_heap (v) = heap;
}
+ else
+ _vec_find (v)->default_heap = 1;
- vh->len += length_increment;
- old = v - header_bytes;
-
- /* Vector header must start heap object. */
- ASSERT (clib_mem_is_heap_object (old));
-
- old_alloc_bytes = clib_mem_size (old);
+ /* poison extra space given by allocator */
+ clib_mem_poison (p + req_size, alloc_size - req_size);
+ _vec_set_grow_elts (v, (alloc_size - req_size) / elt_sz);
+ return v;
+}
- /* Need to resize? */
- if (data_bytes <= old_alloc_bytes)
- {
- CLIB_MEM_UNPOISON (v, data_bytes);
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
- return v;
- }
+static inline void
+_vec_update_len (void *v, uword n_elts, uword elt_sz, uword n_data_bytes,
+ uword unused_bytes)
+{
+ _vec_find (v)->len = n_elts;
+ _vec_set_grow_elts (v, unused_bytes / elt_sz);
+ clib_mem_unpoison (v, n_data_bytes);
+ clib_mem_poison (v + n_data_bytes, unused_bytes);
+}
-#if CLIB_VECTOR_GROW_BY_ONE > 0
- new_alloc_bytes = data_bytes;
-#else
- new_alloc_bytes = (old_alloc_bytes * 3) / 2;
- if (new_alloc_bytes < data_bytes)
- new_alloc_bytes = data_bytes;
-#endif
+__clib_export void *
+_vec_realloc_internal (void *v, uword n_elts, const vec_attr_t *const attr)
+{
+ uword old_alloc_sz, new_alloc_sz, new_data_size, n_data_bytes, data_offset;
+ uword elt_sz;
- new =
- clib_mem_alloc_aligned_at_offset (new_alloc_bytes, data_align,
- header_bytes,
- 1 /* yes, call os_out_of_memory */ );
+ if (PREDICT_FALSE (v == 0))
+ return _vec_alloc_internal (n_elts, attr);
- /* FIXME fail gracefully. */
- if (!new)
- clib_panic
- ("vec_resize fails, length increment %d, data bytes %d, alignment %d",
- length_increment, data_bytes, data_align);
+ elt_sz = attr->elt_sz;
+ n_data_bytes = n_elts * elt_sz;
+ data_offset = vec_get_header_size (v);
+ new_data_size = data_offset + n_data_bytes;
+ new_alloc_sz = old_alloc_sz = clib_mem_size (vec_header (v));
- CLIB_MEM_UNPOISON (old, old_alloc_bytes);
- clib_memcpy_fast (new, old, old_alloc_bytes);
- clib_mem_free (old);
+ /* realloc if new size cannot fit into existing allocation */
+ if (old_alloc_sz < new_data_size)
+ {
+ uword n_bytes, req_size = new_data_size;
+ void *p = v - data_offset;
- /* Allocator may give a bit of extra room. */
- new_alloc_bytes = clib_mem_size (new);
- v = new;
+ req_size += CLIB_VECTOR_GROW_BY_ONE ? 0 : n_data_bytes / 2;
- /* Zero new memory. */
- CLIB_MEM_UNPOISON (new + data_bytes, new_alloc_bytes - data_bytes);
- memset (v + old_alloc_bytes, 0, new_alloc_bytes - old_alloc_bytes);
- CLIB_MEM_POISON (new + data_bytes, new_alloc_bytes - data_bytes);
+ p = clib_mem_heap_realloc_aligned (vec_get_heap (v), p, req_size,
+ vec_get_align (v));
+ new_alloc_sz = clib_mem_size (p);
+ v = p + data_offset;
- _vec_numa ((v + header_bytes)) = numa_id;
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
+ /* zero out new allocation */
+ n_bytes = new_alloc_sz - old_alloc_sz;
+ clib_mem_unpoison (p + old_alloc_sz, n_bytes);
+ clib_memset_u8 (p + old_alloc_sz, 0, n_bytes);
+ }
- return v + header_bytes;
+ _vec_update_len (v, n_elts, elt_sz, n_data_bytes,
+ new_alloc_sz - new_data_size);
+ return v;
}
-__clib_export uword
-clib_mem_is_vec_h (void *v, uword header_bytes)
+__clib_export void *
+_vec_resize_internal (void *v, uword n_elts, const vec_attr_t *const attr)
{
- return clib_mem_is_heap_object (vec_header (v, header_bytes));
+ uword elt_sz = attr->elt_sz;
+ if (PREDICT_TRUE (v != 0))
+ {
+ uword hs = _vec_find (v)->hdr_size * VEC_MIN_ALIGN;
+ uword alloc_sz = clib_mem_size (v - hs);
+ uword n_data_bytes = elt_sz * n_elts;
+ word unused_bytes = alloc_sz - (n_data_bytes + hs);
+
+ if (PREDICT_TRUE (unused_bytes >= 0))
+ {
+ _vec_update_len (v, n_elts, elt_sz, n_data_bytes, unused_bytes);
+ return v;
+ }
+ }
+
+ /* this shouled emit tail jump and likely avoid stack usasge inside this
+ * function */
+ return _vec_realloc_internal (v, n_elts, attr);
}
__clib_export u32
@@ -155,62 +141,3 @@ vec_free_not_inline (void *v)
{
vec_free (v);
}
-
-/** \cond */
-
-#ifdef TEST
-
-#include <stdio.h>
-
-void
-main (int argc, char *argv[])
-{
- word n = atoi (argv[1]);
- word i, *x = 0;
-
- typedef struct
- {
- word x, y, z;
- } FOO;
-
- FOO *foos = vec_init (FOO, 10), *f;
-
- vec_validate (foos, 100);
- foos[100].x = 99;
-
- _vec_len (foos) = 0;
- for (i = 0; i < n; i++)
- {
- vec_add1 (x, i);
- vec_add2 (foos, f, 1);
- f->x = 2 * i;
- f->y = 3 * i;
- f->z = 4 * i;
- }
-
- {
- word n = 2;
- word m = 42;
- vec_delete (foos, n, m);
- }
-
- {
- word n = 2;
- word m = 42;
- vec_insert (foos, n, m);
- }
-
- vec_free (x);
- vec_free (foos);
- exit (0);
-}
-#endif
-/** \endcond */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/vec.h b/src/vppinfra/vec.h
index d19ff998137..1a64a69a1e6 100644
--- a/src/vppinfra/vec.h
+++ b/src/vppinfra/vec.h
@@ -52,11 +52,13 @@
The memory layout looks like this:
~~~~~~~~
- user header (aligned to uword boundary)
- vector length: number of elements
+ user header (start of memory allocation)
+ padding
+ heap pointer (optional, only if default_heap == 0)
+ vector header: number of elements, header size
user's pointer-> vector element #0
- vector element #1
- ...
+ vector element #1
+ ...
~~~~~~~~
The user pointer contains the address of vector element # 0. Null
@@ -70,8 +72,9 @@
Typically, the header is not present. Headers allow for other
data structures to be built atop CLIB vectors.
- Users may specify the alignment for first data element of a vector
- via the vec_*_aligned macros.
+ While users may specify the alignment for first data element of a vector
+ via the vec_*_aligned macros that is typically not needed as alignment
+ is set based on native alignment of the data structure used.
Vector elements can be any C type e.g. (int, double, struct bar).
This is also true for data types built atop vectors (e.g. heap,
@@ -89,123 +92,130 @@
which are invariant.
*/
-/** \brief Low-level resize allocation function, usually not called directly
+/** \brief Low-level (re)allocation function, usually not called directly
@param v pointer to a vector
- @param length_increment length increment in elements
- @param data_bytes requested size in bytes
- @param header_bytes header size in bytes (may be zero)
- @param data_align alignment (may be zero)
- @param numa_id numa id (may be zero)
+ @param n_elts requested number of elements
+ @param elt_sz requested size of one element
+ @param hdr_sz header size in bytes (may be zero)
+ @param align alignment (may be zero)
@return v_prime pointer to resized vector, may or may not equal v
*/
-void *vec_resize_allocate_memory (void *v,
- word length_increment,
- uword data_bytes,
- uword header_bytes, uword data_align,
- uword numa_id);
-/** \brief Low-level vector resize function, usually not called directly
-
- @param v pointer to a vector
- @param length_increment length increment in elements
- @param data_bytes requested size in bytes
- @param header_bytes header size in bytes (may be zero)
- @param data_align alignment (may be zero)
- @param numa_id (may be ~0)
- @return v_prime pointer to resized vector, may or may not equal v
-*/
+typedef struct
+{
+ void *heap;
+ u32 elt_sz;
+ u16 hdr_sz;
+ u16 align;
+} vec_attr_t;
+
+void *_vec_alloc_internal (uword n_elts, const vec_attr_t *const attr);
+void *_vec_realloc_internal (void *v, uword n_elts,
+ const vec_attr_t *const attr);
+void *_vec_resize_internal (void *v, uword n_elts,
+ const vec_attr_t *const attr);
+
+/* calculate minimum alignment out of data natural alignment and provided
+ * value, should not be < VEC_MIN_ALIGN */
+static_always_inline uword
+__vec_align (uword data_align, uword configuered_align)
+{
+ data_align = clib_max (data_align, configuered_align);
+ ASSERT (count_set_bits (data_align) == 1);
+ return clib_max (VEC_MIN_ALIGN, data_align);
+}
-#define _vec_resize_numa(V,L,DB,HB,A,S) \
-({ \
- __typeof__ ((V)) _V; \
- _V = _vec_resize_inline((void *)V,L,DB,HB,clib_max((__alignof__((V)[0])),(A)),(S)); \
- _V; \
-})
+/* function used t o catch cases where vec_* macros on used on void * */
+static_always_inline uword
+__vec_elt_sz (uword elt_sz, int is_void)
+{
+ /* vector macro operations on void * are not allowed */
+ ASSERT (is_void == 0);
+ return elt_sz;
+}
-#define _vec_resize(V,L,DB,HB,A) \
- _vec_resize_numa(V,L,DB,HB,A,VEC_NUMA_UNSPECIFIED)
+static_always_inline void
+_vec_update_pointer (void **vp, void *v)
+{
+ /* avoid store if not needed */
+ if (v != vp[0])
+ vp[0] = v;
+}
-always_inline void *
-_vec_resize_inline (void *v,
- word length_increment,
- uword data_bytes, uword header_bytes, uword data_align,
- uword numa_id)
+static_always_inline void *
+vec_get_heap (void *v)
{
- vec_header_t *vh = _vec_find (v);
- uword new_data_bytes, aligned_header_bytes;
- void *oldheap;
+ if (v == 0 || _vec_find (v)->default_heap == 1)
+ return 0;
+ return _vec_heap (v);
+}
- aligned_header_bytes = vec_header_bytes (header_bytes);
+static_always_inline uword
+vec_get_align (void *v)
+{
+ return 1ULL << _vec_find (v)->log2_align;
+}
- new_data_bytes = data_bytes + aligned_header_bytes;
+static_always_inline void
+_vec_prealloc (void **vp, uword n_elts, uword hdr_sz, uword align, void *heap,
+ uword elt_sz)
+{
+ const vec_attr_t va = {
+ .elt_sz = elt_sz, .hdr_sz = hdr_sz, .align = align, .heap = heap
+ };
+ void *v;
- if (PREDICT_TRUE (v != 0))
- {
- void *p = v - aligned_header_bytes;
-
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- {
- oldheap = clib_mem_get_per_cpu_heap ();
- clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap (numa_id));
- }
-
- /* Vector header must start heap object. */
- ASSERT (clib_mem_is_heap_object (p));
-
- /* Typically we'll not need to resize. */
- if (new_data_bytes <= clib_mem_size (p))
- {
- CLIB_MEM_UNPOISON (v, data_bytes);
- vh->len += length_increment;
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
- return v;
- }
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
- }
+ ASSERT (vp[0] == 0);
- /* Slow path: call helper function. */
- return vec_resize_allocate_memory (v, length_increment, data_bytes,
- header_bytes,
- clib_max (sizeof (vec_header_t),
- data_align), numa_id);
+ v = _vec_alloc_internal (n_elts, &va);
+ _vec_set_len (v, 0, elt_sz);
+ _vec_update_pointer (vp, v);
}
-/** \brief Determine if vector will resize with next allocation
+/** \brief Pre-allocate a vector (generic version)
- @param v pointer to a vector
- @param length_increment length increment in elements
- @param data_bytes requested size in bytes
- @param header_bytes header size in bytes (may be zero)
- @param data_align alignment (may be zero)
- @return 1 if vector will resize 0 otherwise
+ @param V pointer to a vector
+ @param N number of elements to pre-allocate
+ @param H header size in bytes (may be zero)
+ @param A alignment (zero means default alignment of the data structure)
+ @param P heap (zero means default heap)
+ @return V (value-result macro parameter)
*/
-always_inline int
-_vec_resize_will_expand (void *v,
- word length_increment,
- uword data_bytes, uword header_bytes,
- uword data_align)
-{
- uword new_data_bytes, aligned_header_bytes;
+#define vec_prealloc_hap(V, N, H, A, P) \
+ _vec_prealloc ((void **) &(V), N, H, _vec_align (V, A), P, _vec_elt_sz (V))
- aligned_header_bytes = vec_header_bytes (header_bytes);
+/** \brief Pre-allocate a vector (simple version)
+
+ @param V pointer to a vector
+ @param N number of elements to pre-allocate
+ @return V (value-result macro parameter)
+*/
+#define vec_prealloc(V, N) vec_prealloc_hap (V, N, 0, 0, 0)
- new_data_bytes = data_bytes + aligned_header_bytes;
+/** \brief Pre-allocate a vector (heap version)
- if (PREDICT_TRUE (v != 0))
- {
- void *p = v - aligned_header_bytes;
+ @param V pointer to a vector
+ @param N number of elements to pre-allocate
+ @param P heap (zero means default heap)
+ @return V (value-result macro parameter)
+*/
+#define vec_prealloc_heap(V, N, P) vec_prealloc_hap (V, N, 0, 0, P)
- /* Vector header must start heap object. */
- ASSERT (clib_mem_is_heap_object (p));
+always_inline int
+_vec_resize_will_expand (void *v, uword n_elts, uword elt_sz)
+{
+ if (v == 0)
+ return 1;
+
+ /* Vector header must start heap object. */
+ ASSERT (clib_mem_heap_is_heap_object (vec_get_heap (v), vec_header (v)));
+
+ n_elts += _vec_len (v);
+ if ((n_elts * elt_sz) <= vec_max_bytes (v))
+ return 0;
- /* Typically we'll not need to resize. */
- if (new_data_bytes <= clib_mem_size (p))
- return 0;
- }
return 1;
}
@@ -217,34 +227,7 @@ _vec_resize_will_expand (void *v,
*/
#define vec_resize_will_expand(V, N) \
- ({ \
- word _v (n) = (N); \
- word _v (l) = vec_len (V); \
- _vec_resize_will_expand ((V), _v (n), \
- (_v (l) + _v (n)) * sizeof ((V)[0]), 0, 0); \
- })
-
-/** \brief Predicate function, says whether the supplied vector is a clib heap
- object (general version).
-
- @param v pointer to a vector
- @param header_bytes vector header size in bytes (may be zero)
- @return 0 or 1
-*/
-uword clib_mem_is_vec_h (void *v, uword header_bytes);
-
-
-/** \brief Predicate function, says whether the supplied vector is a clib heap
- object
-
- @param v pointer to a vector
- @return 0 or 1
-*/
-always_inline uword
-clib_mem_is_vec (void *v)
-{
- return clib_mem_is_vec_h (v, 0);
-}
+ _vec_resize_will_expand (V, N, _vec_elt_sz (V))
/* Local variable naming macro (prevents collisions with other macro naming). */
#define _v(var) _vec_##var
@@ -258,31 +241,36 @@ clib_mem_is_vec (void *v)
@param N number of elements to add
@param H header size in bytes (may be zero)
@param A alignment (may be zero)
- @param S numa_id (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_resize_has(V,N,H,A,S) \
-do { \
- word _v(n) = (N); \
- word _v(l) = vec_len (V); \
- V = _vec_resize_numa ((V), _v(n), \
- (_v(l) + _v(n)) * sizeof ((V)[0]), \
- (H), (A),(S)); \
-} while (0)
+static_always_inline void
+_vec_resize (void **vp, uword n_add, uword hdr_sz, uword align, uword elt_sz)
+{
+ void *v = *vp;
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ *vp = _vec_alloc_internal (n_add, &va);
+ return;
+ }
-/** \brief Resize a vector (less general version).
- Add N elements to end of given vector V, return pointer to start of vector.
- Vector will have room for H header bytes and will have user's data aligned
- at alignment A (rounded to next power of 2).
+ if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ v = _vec_resize_internal (v, _vec_len (v) + n_add, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, _vec_len (v) + n_add, elt_sz);
+}
- @param V pointer to a vector
- @param N number of elements to add
- @param H header size in bytes (may be zero)
- @param A alignment (may be zero)
- @return V (value-result macro parameter)
-*/
-#define vec_resize_ha(V,N,H,A) vec_resize_has(V,N,H,A,VEC_NUMA_UNSPECIFIED)
+#define vec_resize_ha(V, N, H, A) \
+ _vec_resize ((void **) &(V), N, H, _vec_align (V, A), _vec_elt_sz (V))
/** \brief Resize a vector (no header, unspecified alignment)
Add N elements to end of given vector V, return pointer to start of vector.
@@ -317,12 +305,14 @@ do { \
@return V (value-result macro parameter)
*/
-#define vec_alloc_ha(V,N,H,A) \
-do { \
- uword _v(l) = vec_len (V); \
- vec_resize_ha (V, N, H, A); \
- _vec_len (V) = _v(l); \
-} while (0)
+#define vec_alloc_ha(V, N, H, A) \
+ do \
+ { \
+ uword _v (l) = vec_len (V); \
+ vec_resize_ha (V, N, H, A); \
+ vec_set_len (V, _v (l)); \
+ } \
+ while (0)
/** \brief Allocate space for N more elements
(no header, unspecified alignment)
@@ -347,13 +337,14 @@ do { \
@param N number of elements to add
@param H header size in bytes (may be zero)
@param A alignment (may be zero)
+ @param P heap (may be zero)
@return V new vector
*/
-#define vec_new_ha(T,N,H,A) \
-({ \
- word _v(n) = (N); \
- (T *)_vec_resize ((T *) 0, _v(n), _v(n) * sizeof (T), (H), (A)); \
-})
+#define vec_new_generic(T, N, H, A, P) \
+ _vec_alloc_internal (N, &((vec_attr_t){ .align = _vec_align ((T *) 0, A), \
+ .hdr_sz = (H), \
+ .heap = (P), \
+ .elt_sz = sizeof (T) }))
/** \brief Create new vector of given type and length
(unspecified alignment, no header).
@@ -362,7 +353,7 @@ do { \
@param N number of elements to add
@return V new vector
*/
-#define vec_new(T,N) vec_new_ha(T,N,0,0)
+#define vec_new(T, N) vec_new_generic (T, N, 0, 0, 0)
/** \brief Create new vector of given type and length
(alignment specified, no header).
@@ -371,28 +362,32 @@ do { \
@param A alignment (may be zero)
@return V new vector
*/
-#define vec_new_aligned(T,N,A) vec_new_ha(T,N,0,A)
-
-/** \brief Free vector's memory (general version)
+#define vec_new_aligned(T, N, A) vec_new_generic (T, N, 0, A, 0)
+/** \brief Create new vector of given type and length
+ (heap specified, no header).
- @param V pointer to a vector
- @param H size of header in bytes
- @return V (value-result parameter, V=0)
+ @param T type of elements in new vector
+ @param N number of elements to add
+ @param P heap (may be zero)
+ @return V new vector
*/
-#define vec_free_h(V,H) \
-do { \
- if (V) \
- { \
- clib_mem_free (vec_header ((V), (H))); \
- V = 0; \
- } \
-} while (0)
+#define vec_new_heap(T, N, P) vec_new_generic (T, N, 0, 0, P)
/** \brief Free vector's memory (no header).
@param V pointer to a vector
@return V (value-result parameter, V=0)
*/
-#define vec_free(V) vec_free_h(V,0)
+
+static_always_inline void
+_vec_free (void **vp)
+{
+ if (vp[0] == 0)
+ return;
+ clib_mem_heap_free (vec_get_heap (vp[0]), vec_header (vp[0]));
+ vp[0] = 0;
+}
+
+#define vec_free(V) _vec_free ((void **) &(V))
void vec_free_not_inline (void *v);
@@ -407,34 +402,27 @@ void vec_free_not_inline (void *v);
@param V pointer to a vector
@param H size of header in bytes
@param A alignment (may be zero)
- @param S numa (may be VEC_NUMA_UNSPECIFIED)
@return Vdup copy of vector
*/
-#define vec_dup_ha_numa(V,H,A,S) \
-({ \
- __typeof__ ((V)[0]) * _v(v) = 0; \
- uword _v(l) = vec_len (V); \
- if (_v(l) > 0) \
- { \
- vec_resize_has (_v(v), _v(l), (H), (A), (S)); \
- clib_memcpy_fast (_v(v), (V), _v(l) * sizeof ((V)[0]));\
- } \
- _v(v); \
-})
-
-/** \brief Return copy of vector (VEC_NUMA_UNSPECIFIED).
-
- @param V pointer to a vector
- @param H size of header in bytes
- @param A alignment (may be zero)
+static_always_inline void *
+_vec_dup (void *v, uword hdr_size, uword align, uword elt_sz)
+{
+ uword len = vec_len (v);
+ const vec_attr_t va = { .elt_sz = elt_sz, .align = align };
+ void *n = 0;
- @return Vdup copy of vector
-*/
-#define vec_dup_ha(V,H,A) \
- vec_dup_ha_numa(V,H,A,VEC_NUMA_UNSPECIFIED)
+ if (len)
+ {
+ n = _vec_alloc_internal (len, &va);
+ clib_memcpy_fast (n, v, len * elt_sz);
+ }
+ return n;
+}
+#define vec_dup_ha(V, H, A) \
+ _vec_dup ((void *) (V), H, _vec_align (V, A), _vec_elt_sz (V))
/** \brief Return copy of vector (no header, no alignment)
@@ -467,12 +455,16 @@ void vec_free_not_inline (void *v);
@param NEW_V pointer to new vector
@param OLD_V pointer to old vector
*/
-#define vec_clone(NEW_V,OLD_V) \
-do { \
- (NEW_V) = 0; \
- (NEW_V) = _vec_resize ((NEW_V), vec_len (OLD_V), \
- vec_len (OLD_V) * sizeof ((NEW_V)[0]), (0), (0)); \
-} while (0)
+
+static_always_inline void
+_vec_clone (void **v1p, void *v2, uword align, uword elt_sz)
+{
+ const vec_attr_t va = { .elt_sz = elt_sz, .align = align };
+ v1p[0] = _vec_alloc_internal (vec_len (v2), &va);
+}
+#define vec_clone(NEW_V, OLD_V) \
+ _vec_clone ((void **) &(NEW_V), OLD_V, _vec_align (NEW_V, 0), \
+ _vec_elt_sz (NEW_V))
/** \brief Make sure vector is long enough for given index (general version).
@@ -480,39 +472,53 @@ do { \
@param I vector index which will be valid upon return
@param H header size in bytes (may be zero)
@param A alignment (may be zero)
- @param N numa_id (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_validate_han(V,I,H,A,N) \
-do { \
- void *oldheap; \
- STATIC_ASSERT(A==0 || ((A % sizeof(V[0]))==0) \
- || ((sizeof(V[0]) % A) == 0), \
- "vector validate aligned on incorrectly sized object"); \
- word _v(i) = (I); \
- word _v(l) = vec_len (V); \
- if (_v(i) >= _v(l)) \
- { \
- /* switch to the per-numa heap if directed */ \
- if (PREDICT_FALSE(N != VEC_NUMA_UNSPECIFIED)) \
- { \
- oldheap = clib_mem_get_per_cpu_heap(); \
- clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap(N)); \
- } \
- \
- vec_resize_ha ((V), 1 + (_v(i) - _v(l)), (H), (A)); \
- /* Must zero new space since user may have previously \
- used e.g. _vec_len (v) -= 10 */ \
- clib_memset ((V) + _v(l), 0, \
- (1 + (_v(i) - _v(l))) * sizeof ((V)[0])); \
- /* Switch back to the global heap */ \
- if (PREDICT_FALSE (N != VEC_NUMA_UNSPECIFIED)) \
- clib_mem_set_per_cpu_heap (oldheap); \
- } \
-} while (0)
+always_inline void
+_vec_zero_elts (void *v, uword first, uword count, uword elt_sz)
+{
+ clib_memset_u8 (v + (first * elt_sz), 0, count * elt_sz);
+}
+#define vec_zero_elts(V, F, C) _vec_zero_elts (V, F, C, sizeof ((V)[0]))
-#define vec_validate_ha(V,I,H,A) vec_validate_han(V,I,H,A,VEC_NUMA_UNSPECIFIED)
+static_always_inline void
+_vec_validate (void **vp, uword index, uword header_size, uword align,
+ void *heap, uword elt_sz)
+{
+ void *v = *vp;
+ uword vl, n_elts = index + 1;
+
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = header_size };
+ *vp = _vec_alloc_internal (n_elts, &va);
+ return;
+ }
+
+ vl = _vec_len (v);
+
+ if (PREDICT_FALSE (index < vl))
+ return;
+
+ if (PREDICT_FALSE (index >= _vec_find (v)->grow_elts + vl))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = header_size };
+ v = _vec_resize_internal (v, n_elts, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, n_elts, elt_sz);
+
+ _vec_zero_elts (v, vl, n_elts - vl, elt_sz);
+}
+
+#define vec_validate_hap(V, I, H, A, P) \
+ _vec_validate ((void **) &(V), I, H, _vec_align (V, A), 0, sizeof ((V)[0]))
/** \brief Make sure vector is long enough for given index
(no header, unspecified alignment)
@@ -521,7 +527,7 @@ do { \
@param I vector index which will be valid upon return
@return V (value-result macro parameter)
*/
-#define vec_validate(V,I) vec_validate_ha(V,I,0,0)
+#define vec_validate(V, I) vec_validate_hap (V, I, 0, 0, 0)
/** \brief Make sure vector is long enough for given index
(no header, specified alignment)
@@ -532,7 +538,18 @@ do { \
@return V (value-result macro parameter)
*/
-#define vec_validate_aligned(V,I,A) vec_validate_ha(V,I,0,A)
+#define vec_validate_aligned(V, I, A) vec_validate_hap (V, I, 0, A, 0)
+
+/** \brief Make sure vector is long enough for given index
+ (no header, specified heap)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param I vector index which will be valid upon return
+ @param H heap (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_validate_heap(V, I, P) vec_validate_hap (V, I, 0, 0, P)
/** \brief Make sure vector is long enough for given index
and initialize empty space (general version)
@@ -544,20 +561,22 @@ do { \
@param A alignment (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_validate_init_empty_ha(V,I,INIT,H,A) \
-do { \
- word _v(i) = (I); \
- word _v(l) = vec_len (V); \
- if (_v(i) >= _v(l)) \
- { \
- vec_resize_ha ((V), 1 + (_v(i) - _v(l)), (H), (A)); \
- while (_v(l) <= _v(i)) \
- { \
- (V)[_v(l)] = (INIT); \
- _v(l)++; \
- } \
- } \
-} while (0)
+#define vec_validate_init_empty_ha(V, I, INIT, H, A) \
+ do \
+ { \
+ word _v (i) = (I); \
+ word _v (l) = vec_len (V); \
+ if (_v (i) >= _v (l)) \
+ { \
+ vec_resize_ha (V, 1 + (_v (i) - _v (l)), H, A); \
+ while (_v (l) <= _v (i)) \
+ { \
+ (V)[_v (l)] = (INIT); \
+ _v (l)++; \
+ } \
+ } \
+ } \
+ while (0)
/** \brief Make sure vector is long enough for given index
and initialize empty space (no header, unspecified alignment)
@@ -591,12 +610,40 @@ do { \
@param A alignment (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_add1_ha(V,E,H,A) \
-do { \
- word _v(l) = vec_len (V); \
- V = _vec_resize ((V), 1, (_v(l) + 1) * sizeof ((V)[0]), (H), (A)); \
- (V)[_v(l)] = (E); \
-} while (0)
+
+static_always_inline void *
+_vec_add1 (void **vp, uword hdr_sz, uword align, uword elt_sz)
+{
+ void *v = vp[0];
+ uword len;
+
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ return *vp = _vec_alloc_internal (1, &va);
+ }
+
+ len = _vec_len (v);
+
+ if (PREDICT_FALSE (_vec_find (v)->grow_elts == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ v = _vec_resize_internal (v, len + 1, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, len + 1, elt_sz);
+
+ return v + len * elt_sz;
+}
+
+#define vec_add1_ha(V, E, H, A) \
+ ((__typeof__ ((V)[0]) *) _vec_add1 ((void **) &(V), H, _vec_align (V, A), \
+ _vec_elt_sz (V)))[0] = (E)
/** \brief Add 1 element to end of vector (unspecified alignment).
@@ -625,13 +672,41 @@ do { \
@param A alignment (may be zero)
@return V and P (value-result macro parameters)
*/
-#define vec_add2_ha(V,P,N,H,A) \
-do { \
- word _v(n) = (N); \
- word _v(l) = vec_len (V); \
- V = _vec_resize ((V), _v(n), (_v(l) + _v(n)) * sizeof ((V)[0]), (H), (A)); \
- P = (V) + _v(l); \
-} while (0)
+
+static_always_inline void
+_vec_add2 (void **vp, void **pp, uword n_add, uword hdr_sz, uword align,
+ uword elt_sz)
+{
+ void *v = *vp;
+ uword len;
+
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ *vp = *pp = _vec_alloc_internal (n_add, &va);
+ return;
+ }
+
+ len = _vec_len (v);
+ if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ v = _vec_resize_internal (v, len + n_add, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, len + n_add, elt_sz);
+
+ *pp = v + len * elt_sz;
+}
+
+#define vec_add2_ha(V, P, N, H, A) \
+ _vec_add2 ((void **) &(V), (void **) &(P), N, H, _vec_align (V, A), \
+ _vec_elt_sz (V))
/** \brief Add N elements to end of vector V,
return pointer to new elements in P. (no header, unspecified alignment)
@@ -665,19 +740,47 @@ do { \
@param A alignment (may be zero)
@return V (value-result macro parameter)
*/
+static_always_inline void
+_vec_add (void **vp, void *e, word n_add, uword hdr_sz, uword align,
+ uword elt_sz)
+{
+ void *v = *vp;
+ uword len;
+
+ ASSERT (n_add >= 0);
+
+ if (n_add < 1)
+ return;
+
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ *vp = v = _vec_alloc_internal (n_add, &va);
+ clib_memcpy_fast (v, e, n_add * elt_sz);
+ return;
+ }
+
+ len = _vec_len (v);
+
+ if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ v = _vec_resize_internal (v, len + n_add, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, len + n_add, elt_sz);
+
+ clib_memcpy_fast (v + len * elt_sz, e, n_add * elt_sz);
+}
+
#define vec_add_ha(V, E, N, H, A) \
- do \
- { \
- word _v (n) = (N); \
- if (PREDICT_TRUE (_v (n) > 0)) \
- { \
- word _v (l) = vec_len (V); \
- V = _vec_resize ((V), _v (n), (_v (l) + _v (n)) * sizeof ((V)[0]), \
- (H), (A)); \
- clib_memcpy_fast ((V) + _v (l), (E), _v (n) * sizeof ((V)[0])); \
- } \
- } \
- while (0)
+ _vec_add ((void **) &(V), (void *) (E), N, H, _vec_align (V, A), \
+ _vec_elt_sz (V))
/** \brief Add N elements to end of vector V (no header, unspecified alignment)
@@ -703,14 +806,16 @@ do { \
@param V pointer to a vector
@return E element removed from the end of the vector
*/
-#define vec_pop(V) \
-({ \
- uword _v(l) = vec_len (V); \
- ASSERT (_v(l) > 0); \
- _v(l) -= 1; \
- _vec_len (V) = _v (l); \
- (V)[_v(l)]; \
-})
+#define vec_pop(V) \
+ ({ \
+ uword _v (l) = vec_len (V); \
+ __typeof__ ((V)[0]) _v (rv); \
+ ASSERT (_v (l) > 0); \
+ _v (l) -= 1; \
+ _v (rv) = (V)[_v (l)]; \
+ vec_set_len (V, _v (l)); \
+ (_v (rv)); \
+ })
/** \brief Set E to the last element of a vector, decrement vector length
@param V pointer to a vector
@@ -737,21 +842,27 @@ do { \
@param A alignment (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_insert_init_empty_ha(V,N,M,INIT,H,A) \
-do { \
- word _v(l) = vec_len (V); \
- word _v(n) = (N); \
- word _v(m) = (M); \
- V = _vec_resize ((V), \
- _v(n), \
- (_v(l) + _v(n))*sizeof((V)[0]), \
- (H), (A)); \
- ASSERT (_v(m) <= _v(l)); \
- memmove ((V) + _v(m) + _v(n), \
- (V) + _v(m), \
- (_v(l) - _v(m)) * sizeof ((V)[0])); \
- clib_memset ((V) + _v(m), INIT, _v(n) * sizeof ((V)[0])); \
-} while (0)
+
+static_always_inline void
+_vec_insert (void **vp, uword n_insert, uword ins_pt, u8 init, uword hdr_sz,
+ uword align, uword elt_sz)
+{
+ void *v = vp[0];
+ uword len = vec_len (v);
+ const vec_attr_t va = { .elt_sz = elt_sz, .align = align, .hdr_sz = hdr_sz };
+
+ ASSERT (ins_pt <= len);
+
+ v = _vec_resize_internal (v, len + n_insert, &va);
+ clib_memmove (v + va.elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz,
+ (len - ins_pt) * elt_sz);
+ _vec_zero_elts (v, ins_pt, n_insert, elt_sz);
+ _vec_update_pointer (vp, v);
+}
+
+#define vec_insert_init_empty_ha(V, N, M, INIT, H, A) \
+ _vec_insert ((void **) &(V), N, M, INIT, H, _vec_align (V, A), \
+ _vec_elt_sz (V))
/** \brief Insert N vector elements starting at element M,
initialize new elements to zero (general version)
@@ -825,23 +936,27 @@ do { \
@return V (value-result macro parameter)
*/
+static_always_inline void
+_vec_insert_elts (void **vp, void *e, uword n_insert, uword ins_pt,
+ uword hdr_sz, uword align, uword elt_sz)
+{
+ void *v = vp[0];
+ uword len = vec_len (v);
+ const vec_attr_t va = { .elt_sz = elt_sz, .align = align, .hdr_sz = hdr_sz };
+
+ ASSERT (ins_pt <= len);
+
+ v = _vec_resize_internal (v, len + n_insert, &va);
+ clib_memmove (v + elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz,
+ (len - ins_pt) * elt_sz);
+ _vec_zero_elts (v, ins_pt, n_insert, elt_sz);
+ clib_memcpy_fast (v + ins_pt * elt_sz, e, n_insert * elt_sz);
+ _vec_update_pointer (vp, v);
+}
+
#define vec_insert_elts_ha(V, E, N, M, H, A) \
- do \
- { \
- word _v (n) = (N); \
- if (PREDICT_TRUE (_v (n) > 0)) \
- { \
- word _v (l) = vec_len (V); \
- word _v (m) = (M); \
- V = _vec_resize ((V), _v (n), (_v (l) + _v (n)) * sizeof ((V)[0]), \
- (H), (A)); \
- ASSERT (_v (m) <= _v (l)); \
- memmove ((V) + _v (m) + _v (n), (V) + _v (m), \
- (_v (l) - _v (m)) * sizeof ((V)[0])); \
- clib_memcpy_fast ((V) + _v (m), (E), _v (n) * sizeof ((V)[0])); \
- } \
- } \
- while (0)
+ _vec_insert_elts ((void **) &(V), E, N, M, H, _vec_align (V, A), \
+ _vec_elt_sz (V))
/** \brief Insert N vector elements starting at element M,
insert given elements (no header, unspecified alignment)
@@ -873,57 +988,66 @@ do { \
@param M first element to delete
@return V (value-result macro parameter)
*/
-#define vec_delete(V,N,M) \
-do { \
- word _v(l) = vec_len (V); \
- word _v(n) = (N); \
- word _v(m) = (M); \
- /* Copy over deleted elements. */ \
- if (_v(l) - _v(n) - _v(m) > 0) \
- memmove ((V) + _v(m), (V) + _v(m) + _v(n), \
- (_v(l) - _v(n) - _v(m)) * sizeof ((V)[0])); \
- /* Zero empty space at end (for future re-allocation). */ \
- if (_v(n) > 0) \
- clib_memset ((V) + _v(l) - _v(n), 0, _v(n) * sizeof ((V)[0])); \
- _vec_len (V) -= _v(n); \
- CLIB_MEM_POISON(vec_end(V), _v(n) * sizeof ((V)[0])); \
-} while (0)
+
+static_always_inline void
+_vec_delete (void *v, uword n_del, uword first, uword elt_sz)
+{
+ word n_bytes_del, n_bytes_to_move, len = vec_len (v);
+ u8 *dst;
+
+ if (n_del == 0)
+ return;
+
+ ASSERT (first + n_del <= len);
+
+ n_bytes_del = n_del * elt_sz;
+ n_bytes_to_move = (len - first - n_del) * elt_sz;
+ dst = v + first * elt_sz;
+
+ if (n_bytes_to_move > 0)
+ clib_memmove (dst, dst + n_bytes_del, n_bytes_to_move);
+ clib_memset (dst + n_bytes_to_move, 0, n_bytes_del);
+
+ _vec_set_len (v, _vec_len (v) - n_del, elt_sz);
+}
+
+#define vec_delete(V, N, M) _vec_delete ((void *) (V), N, M, _vec_elt_sz (V))
/** \brief Delete the element at index I
@param V pointer to a vector
@param I index to delete
*/
-#define vec_del1(v,i) \
-do { \
- uword _vec_del_l = _vec_len (v) - 1; \
- uword _vec_del_i = (i); \
- if (_vec_del_i < _vec_del_l) \
- (v)[_vec_del_i] = (v)[_vec_del_l]; \
- _vec_len (v) = _vec_del_l; \
- CLIB_MEM_POISON(vec_end(v), sizeof ((v)[0])); \
-} while (0)
-/** \brief Append v2 after v1. Result in v1.
- @param V1 target vector
- @param V2 vector to append
-*/
+static_always_inline void
+_vec_del1 (void *v, uword index, uword elt_sz)
+{
+ uword len = _vec_len (v) - 1;
-#define vec_append(v1, v2) \
- do \
- { \
- uword _v (l1) = vec_len (v1); \
- uword _v (l2) = vec_len (v2); \
- \
- if (PREDICT_TRUE (_v (l2) > 0)) \
- { \
- v1 = _vec_resize ((v1), _v (l2), \
- (_v (l1) + _v (l2)) * sizeof ((v1)[0]), 0, 0); \
- clib_memcpy_fast ((v1) + _v (l1), (v2), \
- _v (l2) * sizeof ((v2)[0])); \
- } \
- } \
- while (0)
+ if (index < len)
+ clib_memcpy_fast (v + index * elt_sz, v + len * elt_sz, elt_sz);
+
+ _vec_set_len (v, len, elt_sz);
+}
+
+#define vec_del1(v, i) _vec_del1 ((void *) (v), i, _vec_elt_sz (v))
+
+static_always_inline void
+_vec_append (void **v1p, void *v2, uword v1_elt_sz, uword v2_elt_sz,
+ uword align)
+{
+ void *v1 = v1p[0];
+ uword len1 = vec_len (v1);
+ uword len2 = vec_len (v2);
+
+ if (PREDICT_TRUE (len2 > 0))
+ {
+ const vec_attr_t va = { .elt_sz = v2_elt_sz, .align = align };
+ v1 = _vec_resize_internal (v1, len1 + len2, &va);
+ clib_memcpy_fast (v1 + len1 * v1_elt_sz, v2, len2 * v2_elt_sz);
+ _vec_update_pointer (v1p, v1);
+ }
+}
/** \brief Append v2 after v1. Result in v1. Specified alignment.
@param V1 target vector
@@ -932,72 +1056,66 @@ do { \
*/
#define vec_append_aligned(v1, v2, align) \
- do \
- { \
- uword _v (l1) = vec_len (v1); \
- uword _v (l2) = vec_len (v2); \
- \
- if (PREDICT_TRUE (_v (l2) > 0)) \
- { \
- v1 = _vec_resize ( \
- (v1), _v (l2), (_v (l1) + _v (l2)) * sizeof ((v1)[0]), 0, align); \
- clib_memcpy_fast ((v1) + _v (l1), (v2), \
- _v (l2) * sizeof ((v2)[0])); \
- } \
- } \
- while (0)
+ _vec_append ((void **) &(v1), (void *) (v2), _vec_elt_sz (v1), \
+ _vec_elt_sz (v2), _vec_align (v1, align))
-/** \brief Prepend v2 before v1. Result in v1.
+/** \brief Append v2 after v1. Result in v1.
@param V1 target vector
- @param V2 vector to prepend
+ @param V2 vector to append
*/
-#define vec_prepend(v1, v2) \
- do \
- { \
- uword _v (l1) = vec_len (v1); \
- uword _v (l2) = vec_len (v2); \
- \
- if (PREDICT_TRUE (_v (l2) > 0)) \
- { \
- v1 = _vec_resize ((v1), _v (l2), \
- (_v (l1) + _v (l2)) * sizeof ((v1)[0]), 0, 0); \
- memmove ((v1) + _v (l2), (v1), _v (l1) * sizeof ((v1)[0])); \
- clib_memcpy_fast ((v1), (v2), _v (l2) * sizeof ((v2)[0])); \
- } \
- } \
- while (0)
+#define vec_append(v1, v2) vec_append_aligned (v1, v2, 0)
+
+static_always_inline void
+_vec_prepend (void *restrict *v1p, void *restrict v2, uword v1_elt_sz,
+ uword v2_elt_sz, uword align)
+{
+ void *restrict v1 = v1p[0];
+ uword len1 = vec_len (v1);
+ uword len2 = vec_len (v2);
+
+ if (PREDICT_TRUE (len2 > 0))
+ {
+ /* prepending vector to itself would result in use-after-free */
+ ASSERT (v1 != v2);
+ const vec_attr_t va = { .elt_sz = v2_elt_sz, .align = align };
+ v1 = _vec_resize_internal (v1, len1 + len2, &va);
+ clib_memmove (v1 + len2 * v2_elt_sz, v1, len1 * v1_elt_sz);
+ clib_memcpy_fast (v1, v2, len2 * v2_elt_sz);
+ _vec_update_pointer ((void **) v1p, v1);
+ }
+}
/** \brief Prepend v2 before v1. Result in v1. Specified alignment
@param V1 target vector
- @param V2 vector to prepend
+ @param V2 vector to prepend, V1 != V2
@param align required alignment
*/
#define vec_prepend_aligned(v1, v2, align) \
- do \
- { \
- uword _v (l1) = vec_len (v1); \
- uword _v (l2) = vec_len (v2); \
- \
- if (PREDICT_TRUE (_v (l2) > 0)) \
- { \
- v1 = _vec_resize ( \
- (v1), _v (l2), (_v (l1) + _v (l2)) * sizeof ((v1)[0]), 0, align); \
- memmove ((v1) + _v (l2), (v1), _v (l1) * sizeof ((v1)[0])); \
- clib_memcpy_fast ((v1), (v2), _v (l2) * sizeof ((v2)[0])); \
- } \
- } \
- while (0)
+ _vec_prepend ((void **) &(v1), (void *) (v2), _vec_elt_sz (v1), \
+ _vec_elt_sz (v2), _vec_align (v1, align))
+
+/** \brief Prepend v2 before v1. Result in v1.
+ @param V1 target vector
+ @param V2 vector to prepend, V1 != V2
+*/
+
+#define vec_prepend(v1, v2) vec_prepend_aligned (v1, v2, 0)
/** \brief Zero all vector elements. Null-pointer tolerant.
@param var Vector to zero
*/
-#define vec_zero(var) \
-do { \
- if (var) \
- clib_memset ((var), 0, vec_len (var) * sizeof ((var)[0])); \
-} while (0)
+static_always_inline void
+_vec_zero (void *v, uword elt_sz)
+{
+ uword len = vec_len (v);
+
+ if (len)
+ clib_memset_u8 (v, 0, len * elt_sz);
+}
+
+#define vec_zero(var) _vec_zero ((void *) (var), _vec_elt_sz (var))
/** \brief Set all vector elements to given value. Null-pointer tolerant.
@param v vector to set
@@ -1021,8 +1139,23 @@ do { \
@param v2 Pointer to a vector
@return 1 if equal, 0 if unequal
*/
-#define vec_is_equal(v1,v2) \
- (vec_len (v1) == vec_len (v2) && ! memcmp ((v1), (v2), vec_len (v1) * sizeof ((v1)[0])))
+static_always_inline int
+_vec_is_equal (void *v1, void *v2, uword v1_elt_sz, uword v2_elt_sz)
+{
+ uword vec_len_v1 = vec_len (v1);
+
+ if ((vec_len_v1 != vec_len (v2)) || (v1_elt_sz != v2_elt_sz))
+ return 0;
+
+ if ((vec_len_v1 == 0) || (memcmp (v1, v2, vec_len_v1 * v1_elt_sz) == 0))
+ return 1;
+
+ return 0;
+}
+
+#define vec_is_equal(v1, v2) \
+ _vec_is_equal ((void *) (v1), (void *) (v2), _vec_elt_sz (v1), \
+ _vec_elt_sz (v2))
/** \brief Compare two vectors (only applicable to vectors of signed numbers).
Used in qsort compare functions.
@@ -1107,15 +1240,16 @@ do { \
@param S pointer to string buffer.
@param L string length (NOT including the terminating NULL; a la strlen())
*/
-#define vec_validate_init_c_string(V, S, L) \
- do { \
- vec_reset_length (V); \
- vec_validate ((V), (L)); \
- if ((S) && (L)) \
- clib_memcpy_fast ((V), (S), (L)); \
- (V)[(L)] = 0; \
- } while (0)
-
+#define vec_validate_init_c_string(V, S, L) \
+ do \
+ { \
+ vec_reset_length (V); \
+ vec_validate (V, (L)); \
+ if ((S) && (L)) \
+ clib_memcpy_fast (V, (S), (L)); \
+ (V)[(L)] = 0; \
+ } \
+ while (0)
/** \brief Test whether a vector is a NULL terminated c-string.
@@ -1130,23 +1264,12 @@ do { \
@param V (possibly NULL) pointer to a vector.
@return V (value-result macro parameter)
*/
-#define vec_terminate_c_string(V) \
- do { \
- u32 vl = vec_len ((V)); \
- if (!vec_c_string_is_terminated(V)) \
- { \
- vec_validate ((V), vl); \
- (V)[vl] = 0; \
- } \
- } while (0)
+#define vec_terminate_c_string(V) \
+ do \
+ { \
+ if (!vec_c_string_is_terminated (V)) \
+ vec_add1 (V, 0); \
+ } \
+ while (0)
#endif /* included_vec_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h
index 5cf5d3b76a1..5d386b1eaad 100644
--- a/src/vppinfra/vec_bootstrap.h
+++ b/src/vppinfra/vec_bootstrap.h
@@ -55,12 +55,15 @@
typedef struct
{
u32 len; /**< Number of elements in vector (NOT its allocated length). */
- u8 numa_id; /**< NUMA id */
- u8 vpad[3]; /**< pad to 8 bytes */
+ u8 hdr_size; /**< header size divided by VEC_MIN_ALIGN */
+ u8 log2_align : 7; /**< data alignment */
+ u8 default_heap : 1; /**< vector uses default heap */
+ u8 grow_elts; /**< number of elts vector can grow without realloc */
+ u8 vpad[1]; /**< pad to 8 bytes */
u8 vector_data[0]; /**< Vector data . */
} vec_header_t;
-#define VEC_NUMA_UNSPECIFIED (0xFF)
+#define VEC_MIN_ALIGN 8
/** \brief Find the vector header
@@ -71,15 +74,23 @@ typedef struct
@return pointer to the vector's vector_header_t
*/
#define _vec_find(v) ((vec_header_t *) (v) - 1)
+#define _vec_heap(v) (((void **) (_vec_find (v)))[-1])
+
+always_inline uword __vec_align (uword data_align, uword configuered_align);
+always_inline uword __vec_elt_sz (uword elt_sz, int is_void);
#define _vec_round_size(s) \
(((s) + sizeof (uword) - 1) &~ (sizeof (uword) - 1))
+#define _vec_is_void(P) \
+ __builtin_types_compatible_p (__typeof__ ((P)[0]), void)
+#define _vec_elt_sz(V) __vec_elt_sz (sizeof ((V)[0]), _vec_is_void (V))
+#define _vec_align(V, A) __vec_align (__alignof__((V)[0]), A)
-always_inline uword
-vec_header_bytes (uword header_bytes)
+always_inline __clib_nosanitize_addr uword
+vec_get_header_size (void *v)
{
- return round_pow2 (header_bytes + sizeof (vec_header_t),
- sizeof (vec_header_t));
+ uword header_size = _vec_find (v)->hdr_size * VEC_MIN_ALIGN;
+ return header_size;
}
/** \brief Find a user vector header
@@ -89,9 +100,9 @@ vec_header_bytes (uword header_bytes)
*/
always_inline void *
-vec_header (void *v, uword header_bytes)
+vec_header (void *v)
{
- return v - vec_header_bytes (header_bytes);
+ return v ? v - vec_get_header_size (v) : 0;
}
/** \brief Find the end of user vector header
@@ -101,92 +112,94 @@ vec_header (void *v, uword header_bytes)
*/
always_inline void *
-vec_header_end (void *v, uword header_bytes)
+vec_header_end (void *v)
{
- return v + vec_header_bytes (header_bytes);
+ return v + vec_get_header_size (v);
}
-always_inline uword
-vec_aligned_header_bytes (uword header_bytes, uword align)
-{
- return round_pow2 (header_bytes + sizeof (vec_header_t), align);
-}
+/** \brief Number of elements in vector (rvalue-only, NULL tolerant)
-always_inline void *
-vec_aligned_header (void *v, uword header_bytes, uword align)
-{
- return v - vec_aligned_header_bytes (header_bytes, align);
-}
+ vec_len (v) checks for NULL, but cannot be used as an lvalue.
+ If in doubt, use vec_len...
+*/
-always_inline void *
-vec_aligned_header_end (void *v, uword header_bytes, uword align)
+static_always_inline u32
+__vec_len (void *v)
{
- return v + vec_aligned_header_bytes (header_bytes, align);
+ return _vec_find (v)->len;
}
+#define _vec_len(v) __vec_len ((void *) (v))
+#define vec_len(v) ((v) ? _vec_len(v) : 0)
-/** \brief Number of elements in vector (lvalue-capable)
-
- _vec_len (v) does not check for null, but can be used as an lvalue
- (e.g. _vec_len (v) = 99).
-*/
-
-#define _vec_len(v) (_vec_find(v)->len)
+u32 vec_len_not_inline (void *v);
-/** \brief Number of elements in vector (rvalue-only, NULL tolerant)
+/** \brief Number of data bytes in vector. */
- vec_len (v) checks for NULL, but cannot be used as an lvalue.
- If in doubt, use vec_len...
-*/
+#define vec_bytes(v) (vec_len (v) * sizeof (v[0]))
-#define vec_len(v) ((v) ? _vec_len(v) : 0)
-u32 vec_len_not_inline (void *v);
+/**
+ * Return size of memory allocated for the vector
+ *
+ * @param v vector
+ * @return memory size allocated for the vector
+ */
-/** \brief Vector's NUMA id (lvalue-capable)
+uword vec_mem_size (void *v);
- _vec_numa(v) does not check for null, but can be used as an lvalue
- (e.g. _vec_numa(v) = 1).
-*/
+/**
+ * Number of elements that can fit into generic vector
+ *
+ * @param v vector
+ * @param b extra header bytes
+ * @return number of elements that can fit into vector
+ */
-#define _vec_numa(v) (_vec_find(v)->numa_id)
+always_inline uword
+vec_max_bytes (void *v)
+{
+ return v ? vec_mem_size (v) - vec_get_header_size (v) : 0;
+}
-/** \brief Return vector's NUMA ID (rvalue-only, NULL tolerant)
- vec_numa(v) checks for NULL, but cannot be used as an lvalue.
-*/
-#define vec_numa(v) ((v) ? _vec_numa(v) : 0)
+always_inline uword
+_vec_max_len (void *v, uword elt_sz)
+{
+ return vec_max_bytes (v) / elt_sz;
+}
+#define vec_max_len(v) _vec_max_len (v, _vec_elt_sz (v))
-/** \brief Number of data bytes in vector. */
+static_always_inline void
+_vec_set_grow_elts (void *v, uword n_elts)
+{
+ uword max = pow2_mask (BITS (_vec_find (0)->grow_elts));
-#define vec_bytes(v) (vec_len (v) * sizeof (v[0]))
+ if (PREDICT_FALSE (n_elts > max))
+ n_elts = max;
-/** \brief Total number of bytes that can fit in vector with current allocation. */
+ _vec_find (v)->grow_elts = n_elts;
+}
-#define vec_capacity(v,b) \
-({ \
- void * _vec_capacity_v = (void *) (v); \
- uword _vec_capacity_b = (b); \
- _vec_capacity_b = sizeof (vec_header_t) + _vec_round_size (_vec_capacity_b); \
- _vec_capacity_v ? clib_mem_size (_vec_capacity_v - _vec_capacity_b) : 0; \
-})
+always_inline void
+_vec_set_len (void *v, uword len, uword elt_sz)
+{
+ ASSERT (v);
+ ASSERT (len <= _vec_max_len (v, elt_sz));
+ uword old_len = _vec_len (v);
+ uword grow_elts = _vec_find (v)->grow_elts;
+
+ if (len > old_len)
+ clib_mem_unpoison (v + old_len * elt_sz, (len - old_len) * elt_sz);
+ else if (len < old_len)
+ clib_mem_poison (v + len * elt_sz, (old_len - len) * elt_sz);
+
+ _vec_set_grow_elts (v, old_len + grow_elts - len);
+ _vec_find (v)->len = len;
+}
-/** \brief Total number of elements that can fit into vector. */
-#define vec_max_len(v) \
- ((v) ? (vec_capacity (v,0) - vec_header_bytes (0)) / sizeof (v[0]) : 0)
-
-/** \brief Set vector length to a user-defined value */
-#ifndef __COVERITY__ /* Coverity gets confused by ASSERT() */
-#define vec_set_len(v, l) do { \
- ASSERT(v); \
- ASSERT((l) <= vec_max_len(v)); \
- CLIB_MEM_POISON_LEN((void *)(v), _vec_len(v) * sizeof((v)[0]), (l) * sizeof((v)[0])); \
- _vec_len(v) = (l); \
-} while (0)
-#else /* __COVERITY__ */
-#define vec_set_len(v, l) do { \
- _vec_len(v) = (l); \
-} while (0)
-#endif /* __COVERITY__ */
+#define vec_set_len(v, l) _vec_set_len ((void *) v, l, _vec_elt_sz (v))
+#define vec_inc_len(v, l) vec_set_len (v, _vec_len (v) + (l))
+#define vec_dec_len(v, l) vec_set_len (v, _vec_len (v) - (l))
/** \brief Reset vector length to zero
NULL-pointer tolerant
@@ -213,26 +226,22 @@ u32 vec_len_not_inline (void *v);
#define vec_foreach(var,vec) for (var = (vec); var < vec_end (vec); var++)
/** \brief Vector iterator (reverse) */
-#define vec_foreach_backwards(var,vec) \
-for (var = vec_end (vec) - 1; var >= (vec); var--)
+#define vec_foreach_backwards(var, vec) \
+ if (vec) \
+ for (var = vec_end (vec) - 1; var >= (vec); var--)
/** \brief Iterate over vector indices. */
#define vec_foreach_index(var,v) for ((var) = 0; (var) < vec_len (v); (var)++)
/** \brief Iterate over vector indices (reverse). */
-#define vec_foreach_index_backwards(var,v) \
- for ((var) = vec_len((v)) - 1; (var) >= 0; (var)--)
-
-/** \brief return the NUMA index for a vector */
-always_inline uword
-vec_get_numa (void *v)
-{
- vec_header_t *vh;
- if (v == 0)
- return 0;
- vh = _vec_find (v);
- return vh->numa_id;
-}
+#define vec_foreach_index_backwards(var, v) \
+ if (v) \
+ for ((var) = vec_len ((v)) - 1; (var) >= 0; (var)--)
+
+#define vec_foreach_pointer(e, v) \
+ if (v) \
+ for (typeof (**v) **__ep = (v), **__end = vec_end (v), *(e) = *__ep; \
+ __ep < __end; __ep++, (e) = __ep < __end ? *__ep : (e))
#endif /* included_clib_vec_bootstrap_h */
diff --git a/src/vppinfra/vector.h b/src/vppinfra/vector.h
index 6a6635b4c93..b5544c4b975 100644
--- a/src/vppinfra/vector.h
+++ b/src/vppinfra/vector.h
@@ -65,8 +65,9 @@
#define CLIB_HAVE_VEC512
#endif
-#define _vector_size(n) __attribute__ ((vector_size (n)))
-#define _vector_size_unaligned(n) __attribute__ ((vector_size (n), __aligned__ (1)))
+#define _vector_size(n) __attribute__ ((vector_size (n), __may_alias__))
+#define _vector_size_unaligned(n) \
+ __attribute__ ((vector_size (n), __aligned__ (1), __may_alias__))
#define foreach_vec64i _(i,8,8) _(i,16,4) _(i,32,2)
#define foreach_vec64u _(u,8,8) _(u,16,4) _(u,32,2)
@@ -97,22 +98,53 @@
#define foreach_vec foreach_int_vec foreach_uint_vec foreach_float_vec
-/* *INDENT-OFF* */
-
/* Type Definitions */
-#define _(t,s,c) \
-typedef t##s t##s##x##c _vector_size (s/8*c); \
-typedef t##s t##s##x##c##u _vector_size_unaligned (s/8*c); \
-typedef union { \
- t##s##x##c as_##t##s##x##c; \
- t##s as_##t##s[c]; \
-} t##s##x##c##_union_t;
+#define _(t, s, c) \
+ typedef t##s t##s##x##c _vector_size (s / 8 * c); \
+ typedef t##s t##s##x##c##u _vector_size_unaligned (s / 8 * c); \
+ typedef union \
+ { \
+ t##s##x##c as_##t##s##x##c; \
+ t##s as_##t##s[c]; \
+ } t##s##x##c##_union_t;
+/* clang-format off */
foreach_vec64i foreach_vec64u foreach_vec64f
foreach_vec128i foreach_vec128u foreach_vec128f
foreach_vec256i foreach_vec256u foreach_vec256f
foreach_vec512i foreach_vec512u foreach_vec512f
+/* clang-format on */
+#undef _
+
+ typedef union
+{
+#define _(t, s, c) t##s##x##c as_##t##s##x##c;
+ foreach_vec128i foreach_vec128u foreach_vec128f
+#undef _
+} vec128_t;
+
+typedef union
+{
+#define _(t, s, c) t##s##x##c as_##t##s##x##c;
+ foreach_vec256i foreach_vec256u foreach_vec256f
+#undef _
+#define _(t, s, c) t##s##x##c as_##t##s##x##c[2];
+ foreach_vec128i foreach_vec128u foreach_vec128f
+#undef _
+} vec256_t;
+
+typedef union
+{
+#define _(t, s, c) t##s##x##c as_##t##s##x##c;
+ foreach_vec512i foreach_vec512u foreach_vec512f
#undef _
+#define _(t, s, c) t##s##x##c as_##t##s##x##c[2];
+ foreach_vec256i foreach_vec256u foreach_vec256f
+#undef _
+#define _(t, s, c) t##s##x##c as_##t##s##x##c[4];
+ foreach_vec128i foreach_vec128u foreach_vec128f
+#undef _
+} vec512_t;
/* universal inlines */
#define _(t, s, c) \
@@ -125,6 +157,68 @@ foreach_vec
#undef _vector_size
+ /* _shuffle and _shuffle2 */
+#if defined(__GNUC__) && !defined(__clang__)
+#define __builtin_shufflevector(v1, v2, ...) \
+ __builtin_shuffle ((v1), (v2), (__typeof__ (v1)){ __VA_ARGS__ })
+#endif
+
+#define u8x16_shuffle(v1, ...) \
+ (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v1), __VA_ARGS__)
+#define u8x32_shuffle(v1, ...) \
+ (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v1), __VA_ARGS__)
+#define u8x64_shuffle(v1, ...) \
+ (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v1), __VA_ARGS__)
+
+#define u16x8_shuffle(v1, ...) \
+ (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v1), __VA_ARGS__)
+#define u16x16_shuffle(v1, ...) \
+ (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v1), __VA_ARGS__)
+#define u16x32_shuffle(v1, ...) \
+ (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v1), __VA_ARGS__);
+
+#define u32x4_shuffle(v1, ...) \
+ (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v1), __VA_ARGS__)
+#define u32x8_shuffle(v1, ...) \
+ (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v1), __VA_ARGS__)
+#define u32x16_shuffle(v1, ...) \
+ (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v1), __VA_ARGS__)
+
+#define u64x2_shuffle(v1, ...) \
+ (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v1), __VA_ARGS__)
+#define u64x4_shuffle(v1, ...) \
+ (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v1), __VA_ARGS__)
+#define u64x8_shuffle(v1, ...) \
+ (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v1), __VA_ARGS__)
+
+#define u8x16_shuffle2(v1, v2, ...) \
+ (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v2), __VA_ARGS__)
+#define u8x32_shuffle2(v1, v2, ...) \
+ (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v2), __VA_ARGS__)
+#define u8x64_shuffle2(v1, v2, ...) \
+ (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v2), __VA_ARGS__)
+
+#define u16x8_shuffle2(v1, v2, ...) \
+ (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v2), __VA_ARGS__)
+#define u16x16_shuffle2(v1, v2, ...) \
+ (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v2), __VA_ARGS__)
+#define u16x32_shuffle2(v1, v2, ...) \
+ (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v2), __VA_ARGS__);
+
+#define u32x4_shuffle2(v1, v2, ...) \
+ (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v2), __VA_ARGS__)
+#define u32x8_shuffle2(v1, v2, ...) \
+ (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v2), __VA_ARGS__)
+#define u32x16_shuffle2(v1, v2, ...) \
+ (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v2), __VA_ARGS__)
+
+#define u64x2_shuffle2(v1, v2, ...) \
+ (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v2), __VA_ARGS__)
+#define u64x4_shuffle2(v1, v2, ...) \
+ (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v2), __VA_ARGS__)
+#define u64x8_shuffle2(v1, v2, ...) \
+ (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v2), __VA_ARGS__)
+
#define VECTOR_WORD_TYPE(t) t##x
#define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t))
@@ -166,13 +260,4 @@ t##s##x##c##_splat (t##s x) \
#undef _
#endif
-/* *INDENT-ON* */
-
#endif /* included_clib_vector_h */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/vector/array_mask.h b/src/vppinfra/vector/array_mask.h
index 8f2e1d7d88c..3d4a82ac01b 100644
--- a/src/vppinfra/vector/array_mask.h
+++ b/src/vppinfra/vector/array_mask.h
@@ -17,59 +17,114 @@
static_always_inline void
clib_array_mask_u32 (u32 *src, u32 mask, u32 n_elts)
{
- u32 i;
#if defined(CLIB_HAVE_VEC512)
u32x16 mask16 = u32x16_splat (mask);
-
- for (i = 0; i + 16 <= n_elts; i += 16)
- *((u32x16u *) (src + i)) &= mask16;
- n_elts -= i;
- if (n_elts)
+ if (n_elts <= 16)
{
- u16 m = pow2_mask (n_elts);
- u32x16_mask_store (u32x16_mask_load_zero (src + i, m) & mask16, src + i,
- m);
+ u32 m = pow2_mask (n_elts);
+ u32x16 r = u32x16_mask_load_zero (src, m);
+ u32x16_mask_store (r & mask16, src, m);
+ return;
}
- return;
+ for (; n_elts >= 16; n_elts -= 16, src += 16)
+ *((u32x16u *) src) &= mask16;
+ *((u32x16u *) (src + n_elts - 16)) &= mask16;
#elif defined(CLIB_HAVE_VEC256)
u32x8 mask8 = u32x8_splat (mask);
-
- for (i = 0; i + 8 <= n_elts; i += 8)
- *((u32x8u *) (src + i)) &= mask8;
- n_elts -= i;
#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
- if (n_elts)
+ if (n_elts <= 8)
{
- u8 m = pow2_mask (n_elts);
- u32x8_mask_store (u32x8_mask_load_zero (src + i, m) & mask8, src + i, m);
+ u32 m = pow2_mask (n_elts);
+ u32x8 r = u32x8_mask_load_zero (src, m);
+ u32x8_mask_store (r & mask8, src, m);
+ return;
+ }
+#else
+ if (PREDICT_FALSE (n_elts < 4))
+ {
+ if (n_elts & 2)
+ {
+ src[0] &= mask;
+ src[1] &= mask;
+ src += 2;
+ }
+ if (n_elts & 1)
+ src[0] &= mask;
+ return;
+ }
+ if (n_elts <= 8)
+ {
+ u32x4 mask4 = u32x4_splat (mask);
+ *(u32x4u *) src &= mask4;
+ *(u32x4u *) (src + n_elts - 4) &= mask4;
+ return;
}
- return;
#endif
+
+ for (; n_elts >= 8; n_elts -= 8, src += 8)
+ *((u32x8u *) src) &= mask8;
+ *((u32x8u *) (src + n_elts - 8)) &= mask8;
#elif defined(CLIB_HAVE_VEC128)
u32x4 mask4 = u32x4_splat (mask);
- for (i = 0; i + 4 <= n_elts; i += 4)
- *((u32x4u *) (src + i)) &= mask4;
- n_elts -= i;
- switch (n_elts)
+ if (PREDICT_FALSE (n_elts < 4))
{
- case 3:
- src[2] &= mask;
- case 2:
- src[1] &= mask;
- case 1:
- src[0] &= mask;
- case 0:
- default:;
+ if (n_elts & 2)
+ {
+ src[0] &= mask;
+ src[1] &= mask;
+ src += 2;
+ }
+ if (n_elts & 1)
+ src[0] &= mask;
+ return;
}
+
+ for (; n_elts >= 4; n_elts -= 4, src += 4)
+ *((u32x4u *) src) &= mask4;
+ *((u32x4u *) (src + n_elts - 4)) &= mask4;
return;
-#endif
+#else
while (n_elts > 0)
{
src[0] &= mask;
src++;
n_elts--;
}
+#endif
+}
+
+static_always_inline void
+clib_array_mask_set_u32_x64 (u32 *a, u32 v, uword bmp, int n_elts)
+{
+#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u32x16 r = u32x16_splat (v);
+ for (; n_elts > 0; n_elts -= 16, a += 16, bmp >>= 16)
+ u32x16_mask_store (r, a, bmp);
+#elif defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32x8 r = u32x8_splat (v);
+ for (; n_elts > 0; n_elts -= 8, a += 8, bmp >>= 8)
+ u32x8_mask_store (r, a, bmp);
+#else
+ while (bmp)
+ {
+ a[get_lowest_set_bit_index (bmp)] = v;
+ bmp = clear_lowest_set_bit (bmp);
+ }
+#endif
+}
+
+static_always_inline void
+clib_array_mask_set_u32 (u32 *a, u32 v, uword *bmp, u32 n_elts)
+{
+ while (n_elts >= uword_bits)
+ {
+ clib_array_mask_set_u32_x64 (a, v, bmp++[0], uword_bits);
+ a += uword_bits;
+ n_elts -= uword_bits;
+ }
+
+ clib_array_mask_set_u32_x64 (a, v, bmp[0] & pow2_mask (n_elts), n_elts);
}
#endif
diff --git a/src/vppinfra/vector/compress.h b/src/vppinfra/vector/compress.h
index 1d5d84e77ea..5429113984b 100644
--- a/src/vppinfra/vector/compress.h
+++ b/src/vppinfra/vector/compress.h
@@ -7,6 +7,101 @@
#include <vppinfra/clib.h>
#include <vppinfra/memcpy.h>
+static_always_inline u64 *
+clib_compress_u64_x64 (u64 *dst, u64 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS)
+ u64x8u *sv = (u64x8u *) src;
+ for (int i = 0; i < 8; i++)
+ {
+ u64x8_compress_store (sv[i], mask, dst);
+ dst += _popcnt32 ((u8) mask);
+ mask >>= 8;
+ }
+#elif defined(CLIB_HAVE_VEC256_COMPRESS)
+ u64x4u *sv = (u64x4u *) src;
+ for (int i = 0; i < 16; i++)
+ {
+ u64x4_compress_store (sv[i], mask, dst);
+ dst += _popcnt32 (((u8) mask) & 0x0f);
+ mask >>= 4;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+static_always_inline u64 *
+clib_compress_u64_x64_masked (u64 *dst, u64 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS) && \
+ defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u64x8u *sv = (u64x8u *) src;
+ for (int i = 0; i < 8; i++)
+ {
+ u64x8u s = u64x8_mask_load_zero (&sv[i], mask);
+ u64x8_compress_store (s, mask, dst);
+ dst += _popcnt32 ((u8) mask);
+ mask >>= 8;
+ }
+#elif defined(CLIB_HAVE_VEC256_COMPRESS) && \
+ defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u64x4u *sv = (u64x4u *) src;
+ for (int i = 0; i < 16; i++)
+ {
+ u64x4u s = u64x4_mask_load_zero (&sv[i], mask);
+ u64x4_compress_store (s, mask, dst);
+ dst += _popcnt32 (((u8) mask) & 0x0f);
+ mask >>= 4;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+/** \brief Compress array of 64-bit elemments into destination array based on
+ * mask
+
+ @param dst destination array of u64 elements
+ @param src source array of u64 elements
+ @param mask array of u64 values representing compress mask
+ @param n_elts number of elements in the source array
+ @return number of elements stored in destionation array
+*/
+
+static_always_inline u32
+clib_compress_u64 (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
+{
+ u64 *dst0 = dst;
+ while (n_elts >= 64)
+ {
+ if (mask[0] == ~0ULL)
+ {
+ clib_memcpy_fast (dst, src, 64 * sizeof (u64));
+ dst += 64;
+ }
+ else
+ dst = clib_compress_u64_x64 (dst, src, mask[0]);
+
+ mask++;
+ src += 64;
+ n_elts -= 64;
+ }
+
+ if (PREDICT_TRUE (n_elts == 0))
+ return dst - dst0;
+
+ return clib_compress_u64_x64_masked (dst, src,
+ mask[0] & pow2_mask (n_elts)) -
+ dst0;
+}
+
static_always_inline u32 *
clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
{
@@ -14,9 +109,8 @@ clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
u32x16u *sv = (u32x16u *) src;
for (int i = 0; i < 4; i++)
{
- int cnt = _popcnt32 ((u16) mask);
u32x16_compress_store (sv[i], mask, dst);
- dst += cnt;
+ dst += _popcnt32 ((u16) mask);
mask >>= 16;
}
@@ -24,18 +118,46 @@ clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
u32x8u *sv = (u32x8u *) src;
for (int i = 0; i < 8; i++)
{
- int cnt = _popcnt32 ((u8) mask);
u32x8_compress_store (sv[i], mask, dst);
- dst += cnt;
+ dst += _popcnt32 ((u8) mask);
mask >>= 8;
}
#else
- while (mask)
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+static_always_inline u32 *
+clib_compress_u32_x64_masked (u32 *dst, u32 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS) && \
+ defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u32x16u *sv = (u32x16u *) src;
+ for (int i = 0; i < 4; i++)
{
- u16 bit = count_trailing_zeros (mask);
- mask = clear_lowest_set_bit (mask);
- dst++[0] = src[bit];
+ u32x16u s = u32x16_mask_load_zero (&sv[i], mask);
+ u32x16_compress_store (s, mask, dst);
+ dst += _popcnt32 ((u16) mask);
+ mask >>= 16;
}
+
+#elif defined(CLIB_HAVE_VEC256_COMPRESS) && \
+ defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32x8u *sv = (u32x8u *) src;
+ for (int i = 0; i < 8; i++)
+ {
+ u32x8u s = u32x8_mask_load_zero (&sv[i], mask);
+ u32x8_compress_store (s, mask, dst);
+ dst += _popcnt32 ((u8) mask);
+ mask >>= 8;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
#endif
return dst;
}
@@ -72,7 +194,154 @@ clib_compress_u32 (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
if (PREDICT_TRUE (n_elts == 0))
return dst - dst0;
- return clib_compress_u32_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
+ return clib_compress_u32_x64_masked (dst, src,
+ mask[0] & pow2_mask (n_elts)) -
+ dst0;
+}
+
+static_always_inline u16 *
+clib_compress_u16_x64 (u16 *dst, u16 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
+ u16x32u *sv = (u16x32u *) src;
+ for (int i = 0; i < 2; i++)
+ {
+ u16x32_compress_store (sv[i], mask, dst);
+ dst += _popcnt32 ((u32) mask);
+ mask >>= 32;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+static_always_inline u16 *
+clib_compress_u16_x64_masked (u16 *dst, u16 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) && \
+ defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u16x32u *sv = (u16x32u *) src;
+ for (int i = 0; i < 2; i++)
+ {
+ u16x32u s = u16x32_mask_load_zero (&sv[i], mask);
+ u16x32_compress_store (s, mask, dst);
+ dst += _popcnt32 ((u32) mask);
+ mask >>= 32;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+/** \brief Compress array of 16-bit elemments into destination array based on
+ * mask
+
+ @param dst destination array of u16 elements
+ @param src source array of u16 elements
+ @param mask array of u64 values representing compress mask
+ @param n_elts number of elements in the source array
+ @return number of elements stored in destionation array
+*/
+
+static_always_inline u32
+clib_compress_u16 (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
+{
+ u16 *dst0 = dst;
+ while (n_elts >= 64)
+ {
+ if (mask[0] == ~0ULL)
+ {
+ clib_memcpy_fast (dst, src, 64 * sizeof (u16));
+ dst += 64;
+ }
+ else
+ dst = clib_compress_u16_x64 (dst, src, mask[0]);
+
+ mask++;
+ src += 64;
+ n_elts -= 64;
+ }
+
+ if (PREDICT_TRUE (n_elts == 0))
+ return dst - dst0;
+
+ return clib_compress_u16_x64_masked (dst, src,
+ mask[0] & pow2_mask (n_elts)) -
+ dst0;
+}
+
+static_always_inline u8 *
+clib_compress_u8_x64 (u8 *dst, u8 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
+ u8x64u *sv = (u8x64u *) src;
+ u8x64_compress_store (sv[0], mask, dst);
+ dst += _popcnt64 (mask);
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+static_always_inline u8 *
+clib_compress_u8_x64_masked (u8 *dst, u8 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) && \
+ defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u8x64u *sv = (u8x64u *) src;
+ u8x64u s = u8x64_mask_load_zero (sv, mask);
+ u8x64_compress_store (s, mask, dst);
+ dst += _popcnt64 (mask);
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+/** \brief Compress array of 8-bit elemments into destination array based on
+ * mask
+
+ @param dst destination array of u8 elements
+ @param src source array of u8 elements
+ @param mask array of u64 values representing compress mask
+ @param n_elts number of elements in the source array
+ @return number of elements stored in destionation array
+*/
+
+static_always_inline u32
+clib_compress_u8 (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
+{
+ u8 *dst0 = dst;
+ while (n_elts >= 64)
+ {
+ if (mask[0] == ~0ULL)
+ {
+ clib_memcpy_fast (dst, src, 64);
+ dst += 64;
+ }
+ else
+ dst = clib_compress_u8_x64 (dst, src, mask[0]);
+
+ mask++;
+ src += 64;
+ n_elts -= 64;
+ }
+
+ if (PREDICT_TRUE (n_elts == 0))
+ return dst - dst0;
+
+ return clib_compress_u8_x64_masked (dst, src, mask[0] & pow2_mask (n_elts)) -
+ dst0;
}
#endif
diff --git a/src/vppinfra/vector/count_equal.h b/src/vppinfra/vector/count_equal.h
new file mode 100644
index 00000000000..ca2fbb7fd39
--- /dev/null
+++ b/src/vppinfra/vector/count_equal.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_count_equal_h
+#define included_vector_count_equal_h
+#include <vppinfra/clib.h>
+
+static_always_inline uword
+clib_count_equal_u64 (u64 *data, uword max_count)
+{
+ uword count;
+ u64 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u64x4 splat = u64x4_splat (first);
+ while (count + 3 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 8;
+ return count;
+ }
+
+ data += 4;
+ count += 4;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u32 (u32 *data, uword max_count)
+{
+ uword count;
+ u32 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC512)
+ u32x16 splat = u32x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u32 bmp;
+ bmp = u32x16_is_equal_mask (u32x16_load_unaligned (data), splat);
+ if (bmp != pow2_mask (16))
+ return count + count_trailing_zeros (~bmp);
+
+ data += 16;
+ count += 16;
+ }
+ if (count == max_count)
+ return count;
+ else
+ {
+ u32 mask = pow2_mask (max_count - count);
+ u32 bmp =
+ u32x16_is_equal_mask (u32x16_mask_load_zero (data, mask), splat) &
+ mask;
+ return count + count_trailing_zeros (~bmp);
+ }
+#elif defined(CLIB_HAVE_VEC256)
+ u32x8 splat = u32x8_splat (first);
+ while (count + 7 < max_count)
+ {
+ u32 bmp;
+#ifdef __AVX512F__
+ bmp = u32x8_is_equal_mask (u32x8_load_unaligned (data), splat);
+ if (bmp != pow2_mask (8))
+ return count + count_trailing_zeros (~bmp);
+#else
+ bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ return count + count_trailing_zeros (~bmp) / 4;
+#endif
+
+ data += 8;
+ count += 8;
+ }
+ if (count == max_count)
+ return count;
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ else
+ {
+ u32 mask = pow2_mask (max_count - count);
+ u32 bmp =
+ u32x8_is_equal_mask (u32x8_mask_load_zero (data, mask), splat) & mask;
+ return count + count_trailing_zeros (~bmp);
+ }
+#endif
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u32x4 splat = u32x4_splat (first);
+ while (count + 3 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
+ if (bmp != pow2_mask (4 * 4))
+ {
+ count += count_trailing_zeros (~bmp) / 4;
+ return count;
+ }
+
+ data += 4;
+ count += 4;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u16 (u16 *data, uword max_count)
+{
+ uword count;
+ u16 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u16x16 splat = u16x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return count;
+ }
+
+ data += 16;
+ count += 16;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u16x8 splat = u16x8_splat (first);
+ while (count + 7 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return count;
+ }
+
+ data += 8;
+ count += 8;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u8 (u8 *data, uword max_count)
+{
+ uword count;
+ u8 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC512)
+ u8x64 splat = u8x64_splat (first);
+ while (count + 63 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x64_is_equal_mask (u8x64_load_unaligned (data), splat);
+ if (bmp != -1)
+ return count + count_trailing_zeros (~bmp);
+
+ data += 64;
+ count += 64;
+ }
+ if (count == max_count)
+ return count;
+#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ else
+ {
+ u64 mask = pow2_mask (max_count - count);
+ u64 bmp =
+ u8x64_is_equal_mask (u8x64_mask_load_zero (data, mask), splat) & mask;
+ return count + count_trailing_zeros (~bmp);
+ }
+#endif
+#elif defined(CLIB_HAVE_VEC256)
+ u8x32 splat = u8x32_splat (first);
+ while (count + 31 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ return count + count_trailing_zeros (~bmp);
+
+ data += 32;
+ count += 32;
+ }
+ if (count == max_count)
+ return count;
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ else
+ {
+ u32 mask = pow2_mask (max_count - count);
+ u64 bmp =
+ u8x32_msb_mask (u8x32_mask_load_zero (data, mask) == splat) & mask;
+ return count + count_trailing_zeros (~bmp);
+ }
+#endif
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u8x16 splat = u8x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ return count + count_trailing_zeros (~bmp);
+
+ data += 16;
+ count += 16;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+#endif
diff --git a/src/vppinfra/vector/index_to_ptr.h b/src/vppinfra/vector/index_to_ptr.h
new file mode 100644
index 00000000000..3985b757d54
--- /dev/null
+++ b/src/vppinfra/vector/index_to_ptr.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_index_to_ptr_h
+#define included_vector_index_to_ptr_h
+#include <vppinfra/clib.h>
+
+#ifdef CLIB_HAVE_VEC128
+static_always_inline void
+clib_index_to_ptr_u32x4 (u32 *indices, void **ptrs, i32 i, u64x2 ov, u8 shift)
+{
+ u32x4 iv4 = u32x4_load_unaligned (indices + i);
+ u64x2 pv2;
+ pv2 = u64x2_from_u32x4 (iv4);
+ u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i);
+#ifdef __aarch64__
+ pv2 = u64x2_from_u32x4_high (iv4);
+#else
+ pv2 = u64x2_from_u32x4 ((u32x4) u8x16_word_shift_right (iv4, 8));
+#endif
+ u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i + 2);
+}
+#endif
+
+/** \brief Convert array of indices to pointers with base and shift
+
+ @param indices source array of u32 indices
+ @param base base pointer
+ @param shift numbers of bits to be shifted
+ @param ptrs destinatin array of pointers
+ @param n_elts number of elements in the source array
+*/
+
+static_always_inline void
+clib_index_to_ptr_u32 (u32 *indices, void *base, u8 shift, void **ptrs,
+ u32 n_elts)
+{
+#if defined CLIB_HAVE_VEC512
+ if (n_elts >= 8)
+ {
+ u64x8 off = u64x8_splat ((u64) base);
+ u64x8 b0, b1, b2, b3, b4, b5, b6, b7;
+
+ while (n_elts >= 64)
+ {
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
+ b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
+ b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
+ b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
+ b4 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 32));
+ b5 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 40));
+ b6 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 48));
+ b7 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 56));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
+ u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
+ u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
+ u64x8_store_unaligned ((b4 << shift) + off, ptrs + 32);
+ u64x8_store_unaligned ((b5 << shift) + off, ptrs + 40);
+ u64x8_store_unaligned ((b6 << shift) + off, ptrs + 48);
+ u64x8_store_unaligned ((b7 << shift) + off, ptrs + 56);
+ ptrs += 64;
+ indices += 64;
+ n_elts -= 64;
+ }
+
+ if (n_elts == 0)
+ return;
+
+ if (n_elts >= 32)
+ {
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
+ b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
+ b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
+ b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
+ u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
+ u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
+ ptrs += 32;
+ indices += 32;
+ n_elts -= 32;
+ }
+ if (n_elts >= 16)
+ {
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
+ b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
+ ptrs += 16;
+ indices += 16;
+ n_elts -= 16;
+ }
+ if (n_elts >= 8)
+ {
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs);
+ ptrs += 8;
+ indices += 8;
+ n_elts -= 8;
+ }
+
+ if (n_elts == 0)
+ return;
+
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + n_elts - 8));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 8);
+ }
+ else
+ {
+ u32 mask = pow2_mask (n_elts);
+ u64x8 r = u64x8_from_u32x8 (u32x8_mask_load_zero (indices, mask));
+ u64x8_mask_store ((r << shift) + u64x8_splat ((u64) base), ptrs, mask);
+ return;
+ }
+#elif defined CLIB_HAVE_VEC256
+ if (n_elts >= 4)
+ {
+ u64x4 off = u64x4_splat ((u64) base);
+ u64x4 b0, b1, b2, b3, b4, b5, b6, b7;
+
+ while (n_elts >= 32)
+ {
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
+ b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
+ b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
+ b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
+ b4 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 16));
+ b5 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 20));
+ b6 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 24));
+ b7 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 28));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
+ u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
+ u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
+ u64x4_store_unaligned ((b4 << shift) + off, ptrs + 16);
+ u64x4_store_unaligned ((b5 << shift) + off, ptrs + 20);
+ u64x4_store_unaligned ((b6 << shift) + off, ptrs + 24);
+ u64x4_store_unaligned ((b7 << shift) + off, ptrs + 28);
+ ptrs += 32;
+ indices += 32;
+ n_elts -= 32;
+ }
+
+ if (n_elts == 0)
+ return;
+
+ if (n_elts >= 16)
+ {
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
+ b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
+ b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
+ b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
+ u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
+ u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
+ ptrs += 16;
+ indices += 16;
+ n_elts -= 16;
+ }
+ if (n_elts >= 8)
+ {
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
+ b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
+ ptrs += 8;
+ indices += 8;
+ n_elts -= 8;
+ }
+ if (n_elts > 4)
+ {
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs);
+ ptrs += 4;
+ indices += 4;
+ n_elts -= 4;
+ }
+
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + n_elts - 4));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 4);
+ return;
+ }
+#ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
+ else
+ {
+ u32 mask = pow2_mask (n_elts);
+ u64x4 r = u64x4_from_u32x4 (u32x4_mask_load_zero (indices, mask));
+ u64x4_mask_store ((r << shift) + u64x4_splat ((u64) base), ptrs, mask);
+ return;
+ }
+#endif
+#elif defined(CLIB_HAVE_VEC128)
+ if (n_elts >= 4)
+ {
+ u64x2 ov = u64x2_splat ((u64) base);
+ u32 *i = (u32 *) indices;
+ void **p = (void **) ptrs;
+ u32 n = n_elts;
+
+ while (n >= 32)
+ {
+ clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 16, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 20, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 24, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 28, ov, shift);
+ indices += 32;
+ ptrs += 32;
+ n -= 32;
+ }
+
+ if (n == 0)
+ return;
+
+ if (n >= 16)
+ {
+ clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
+ indices += 16;
+ ptrs += 16;
+ n -= 16;
+ }
+
+ if (n >= 8)
+ {
+ clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
+ indices += 8;
+ ptrs += 8;
+ n -= 8;
+ }
+
+ if (n > 4)
+ clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
+
+ clib_index_to_ptr_u32x4 (i, p, n_elts - 4, ov, shift);
+ return;
+ }
+#endif
+ while (n_elts)
+ {
+ ptrs[0] = base + ((u64) indices[0] << shift);
+ ptrs += 1;
+ indices += 1;
+ n_elts -= 1;
+ }
+}
+
+#endif
diff --git a/src/vppinfra/vector/ip_csum.h b/src/vppinfra/vector/ip_csum.h
new file mode 100644
index 00000000000..2cea9b448ea
--- /dev/null
+++ b/src/vppinfra/vector/ip_csum.h
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_ip_csum_h
+#define included_vector_ip_csum_h
+#include <vppinfra/clib.h>
+typedef struct
+{
+ u64 sum;
+ u8 odd;
+} clib_ip_csum_t;
+
+#if defined(CLIB_HAVE_VEC128)
+static_always_inline u64x2
+clib_ip_csum_cvt_and_add_4 (u32x4 v)
+{
+ return ((u64x2) u32x4_interleave_lo ((u32x4) v, u32x4_zero ()) +
+ (u64x2) u32x4_interleave_hi ((u32x4) v, u32x4_zero ()));
+}
+static_always_inline u64
+clib_ip_csum_hadd_2 (u64x2 v)
+{
+ return v[0] + v[1];
+}
+#endif
+
+#if defined(CLIB_HAVE_VEC256)
+static_always_inline u64x4
+clib_ip_csum_cvt_and_add_8 (u32x8 v)
+{
+ return ((u64x4) u32x8_interleave_lo ((u32x8) v, u32x8_zero ()) +
+ (u64x4) u32x8_interleave_hi ((u32x8) v, u32x8_zero ()));
+}
+static_always_inline u64
+clib_ip_csum_hadd_4 (u64x4 v)
+{
+ return clib_ip_csum_hadd_2 (u64x4_extract_lo (v) + u64x4_extract_hi (v));
+}
+#endif
+
+#if defined(CLIB_HAVE_VEC512)
+static_always_inline u64x8
+clib_ip_csum_cvt_and_add_16 (u32x16 v)
+{
+ return ((u64x8) u32x16_interleave_lo ((u32x16) v, u32x16_zero ()) +
+ (u64x8) u32x16_interleave_hi ((u32x16) v, u32x16_zero ()));
+}
+static_always_inline u64
+clib_ip_csum_hadd_8 (u64x8 v)
+{
+ return clib_ip_csum_hadd_4 (u64x8_extract_lo (v) + u64x8_extract_hi (v));
+}
+#endif
+
+static_always_inline void
+clib_ip_csum_inline (clib_ip_csum_t *c, u8 *dst, u8 *src, u16 count,
+ int is_copy)
+{
+ if (c->odd)
+ {
+ c->odd = 0;
+ c->sum += (u16) src[0] << 8;
+ count--;
+ src++;
+ if (is_copy)
+ dst++[0] = src[0];
+ }
+
+#if defined(CLIB_HAVE_VEC512)
+ u64x8 sum8 = {};
+
+ while (count >= 512)
+ {
+ u32x16u *s = (u32x16u *) src;
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[0]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[1]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[2]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[3]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[8]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[5]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[6]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[7]);
+ count -= 512;
+ src += 512;
+ if (is_copy)
+ {
+ u32x16u *d = (u32x16u *) dst;
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ d[4] = s[4];
+ d[5] = s[5];
+ d[6] = s[6];
+ d[7] = s[7];
+ dst += 512;
+ }
+ }
+
+ while (count >= 64)
+ {
+ u32x16u *s = (u32x16u *) src;
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[0]);
+ count -= 64;
+ src += 64;
+ if (is_copy)
+ {
+ u32x16u *d = (u32x16u *) dst;
+ d[0] = s[0];
+ dst += 512;
+ }
+ }
+
+#ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
+ if (count)
+ {
+ u64 mask = pow2_mask (count);
+ u32x16 v = (u32x16) u8x64_mask_load_zero (src, mask);
+ sum8 += clib_ip_csum_cvt_and_add_16 (v);
+ c->odd = count & 1;
+ if (is_copy)
+ u32x16_mask_store (v, dst, mask);
+ }
+ c->sum += clib_ip_csum_hadd_8 (sum8);
+ return;
+#endif
+
+ c->sum += clib_ip_csum_hadd_8 (sum8);
+#elif defined(CLIB_HAVE_VEC256)
+ u64x4 sum4 = {};
+
+ while (count >= 256)
+ {
+ u32x8u *s = (u32x8u *) src;
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[0]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[1]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[2]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[3]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[4]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[5]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[6]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[7]);
+ count -= 256;
+ src += 256;
+ if (is_copy)
+ {
+ u32x8u *d = (u32x8u *) dst;
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ d[4] = s[4];
+ d[5] = s[5];
+ d[6] = s[6];
+ d[7] = s[7];
+ dst += 256;
+ }
+ }
+
+ while (count >= 32)
+ {
+ u32x8u *s = (u32x8u *) src;
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[0]);
+ count -= 32;
+ src += 32;
+ if (is_copy)
+ {
+ u32x8u *d = (u32x8u *) dst;
+ d[0] = s[0];
+ dst += 32;
+ }
+ }
+
+#ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
+ if (count)
+ {
+ u32 mask = pow2_mask (count);
+ u32x8 v = (u32x8) u8x32_mask_load_zero (src, mask);
+ sum4 += clib_ip_csum_cvt_and_add_8 (v);
+ c->odd = count & 1;
+ if (is_copy)
+ u32x8_mask_store (v, dst, mask);
+ }
+ c->sum += clib_ip_csum_hadd_4 (sum4);
+ return;
+#endif
+
+ c->sum += clib_ip_csum_hadd_4 (sum4);
+#elif defined(CLIB_HAVE_VEC128)
+ u64x2 sum2 = {};
+
+ while (count >= 128)
+ {
+ u32x4u *s = (u32x4u *) src;
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[0]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[1]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[2]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[3]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[4]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[5]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[6]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[7]);
+ count -= 128;
+ src += 128;
+ if (is_copy)
+ {
+ u32x4u *d = (u32x4u *) dst;
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ d[4] = s[4];
+ d[5] = s[5];
+ d[6] = s[6];
+ d[7] = s[7];
+ dst += 128;
+ }
+ }
+
+ while (count >= 16)
+ {
+ u32x4u *s = (u32x4u *) src;
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[0]);
+ count -= 16;
+ src += 16;
+ if (is_copy)
+ {
+ u32x4u *d = (u32x4u *) dst;
+ d[0] = s[0];
+ dst += 16;
+ }
+ }
+ c->sum += clib_ip_csum_hadd_2 (sum2);
+#else
+ while (count >= 4)
+ {
+ u32 v = *((u32 *) src);
+ c->sum += v;
+ count -= 4;
+ src += 4;
+ if (is_copy)
+ {
+ *(u32 *) dst = v;
+ dst += 4;
+ }
+ }
+#endif
+ while (count >= 2)
+ {
+ u16 v = *((u16 *) src);
+ c->sum += v;
+ count -= 2;
+ src += 2;
+ if (is_copy)
+ {
+ *(u16 *) dst = v;
+ dst += 2;
+ }
+ }
+
+ if (count)
+ {
+ c->odd = 1;
+ c->sum += (u16) src[0];
+ if (is_copy)
+ dst[0] = src[0];
+ }
+}
+
+static_always_inline u16
+clib_ip_csum_fold (clib_ip_csum_t *c)
+{
+ u64 sum = c->sum;
+#if defined(__x86_64__) && defined(__BMI2__)
+ u64 tmp = sum;
+ asm volatile(
+ /* using ADC is much faster than mov, shift, add sequence
+ * compiler produces */
+ "shr $32, %[sum] \n\t"
+ "add %k[tmp], %k[sum] \n\t"
+ "mov $16, %k[tmp] \n\t"
+ "shrx %k[tmp], %k[sum], %k[tmp] \n\t"
+ "adc %w[tmp], %w[sum] \n\t"
+ "adc $0, %w[sum] \n\t"
+ : [ sum ] "+&r"(sum), [ tmp ] "+&r"(tmp));
+#else
+ sum = ((u32) sum) + (sum >> 32);
+ sum = ((u16) sum) + (sum >> 16);
+ sum = ((u16) sum) + (sum >> 16);
+#endif
+ return (~((u16) sum));
+}
+
+static_always_inline void
+clib_ip_csum_chunk (clib_ip_csum_t *c, u8 *src, u16 count)
+{
+ return clib_ip_csum_inline (c, 0, src, count, 0);
+}
+
+static_always_inline void
+clib_ip_csum_and_copy_chunk (clib_ip_csum_t *c, u8 *src, u8 *dst, u16 count)
+{
+ return clib_ip_csum_inline (c, dst, src, count, 1);
+}
+
+static_always_inline u16
+clib_ip_csum (u8 *src, u16 count)
+{
+ clib_ip_csum_t c = {};
+ if (COMPILE_TIME_CONST (count) && count == 12)
+ {
+ for (int i = 0; i < 3; i++)
+ c.sum += ((u32 *) src)[i];
+ }
+ else if (COMPILE_TIME_CONST (count) && count == 20)
+ {
+ for (int i = 0; i < 5; i++)
+ c.sum += ((u32 *) src)[i];
+ }
+ else if (COMPILE_TIME_CONST (count) && count == 40)
+ {
+ for (int i = 0; i < 10; i++)
+ c.sum += ((u32 *) src)[i];
+ }
+ else
+ clib_ip_csum_inline (&c, 0, src, count, 0);
+ return clib_ip_csum_fold (&c);
+}
+
+static_always_inline u16
+clib_ip_csum_and_copy (u8 *dst, u8 *src, u16 count)
+{
+ clib_ip_csum_t c = {};
+ clib_ip_csum_inline (&c, dst, src, count, 1);
+ return clib_ip_csum_fold (&c);
+}
+
+#endif
diff --git a/src/vppinfra/vector/mask_compare.h b/src/vppinfra/vector/mask_compare.h
index cac48a31f47..fc72d7dac35 100644
--- a/src/vppinfra/vector/mask_compare.h
+++ b/src/vppinfra/vector/mask_compare.h
@@ -8,7 +8,7 @@
#include <vppinfra/memcpy.h>
static_always_inline u64
-clib_mask_compare_u16_x64 (u16 v, u16 *a, u32 n_elts)
+clib_mask_compare_u16_x64 (u16 v, u16 *a)
{
u64 mask = 0;
#if defined(CLIB_HAVE_VEC512)
@@ -47,6 +47,38 @@ clib_mask_compare_u16_x64 (u16 v, u16 *a, u32 n_elts)
(u64) i8x16_msb_mask (i8x16_pack (v8 == av[4], v8 == av[5])) << 32 |
(u64) i8x16_msb_mask (i8x16_pack (v8 == av[6], v8 == av[7])) << 48);
#else
+ for (int i = 0; i < 64; i++)
+ if (a[i] == v)
+ mask |= 1ULL << i;
+#endif
+ return mask;
+}
+
+static_always_inline u64
+clib_mask_compare_u16_x64_n (u16 v, u16 *a, u32 n_elts)
+{
+ u64 mask = 0;
+ CLIB_UNUSED (u64 data_mask) = pow2_mask (n_elts);
+#if defined(CLIB_HAVE_VEC512)
+ u16x32 v32 = u16x32_splat (v);
+ u16x32u *av = (u16x32u *) a;
+ mask = ((u64) u16x32_is_equal_mask (
+ u16x32_mask_load_zero (&av[0], data_mask), v32) |
+ (u64) u16x32_is_equal_mask (
+ u16x32_mask_load_zero (&av[1], data_mask >> 32), v32)
+ << 32);
+#elif defined(CLIB_HAVE_VEC256) && defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u16x16 v16 = u16x16_splat (v);
+ u16x16u *av = (u16x16u *) a;
+ i8x32 x;
+
+ x = i8x32_pack (v16 == u16x16_mask_load_zero (&av[0], data_mask),
+ v16 == u16x16_mask_load_zero (&av[1], data_mask >> 16));
+ mask = i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3));
+ x = i8x32_pack (v16 == u16x16_mask_load_zero (&av[2], data_mask >> 32),
+ v16 == u16x16_mask_load_zero (&av[3], data_mask >> 48));
+ mask |= (u64) i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)) << 32;
+#else
for (int i = 0; i < n_elts; i++)
if (a[i] == v)
mask |= 1ULL << i;
@@ -68,7 +100,7 @@ clib_mask_compare_u16 (u16 v, u16 *a, u64 *mask, u32 n_elts)
{
while (n_elts >= 64)
{
- mask++[0] = clib_mask_compare_u16_x64 (v, a, 64);
+ mask++[0] = clib_mask_compare_u16_x64 (v, a);
n_elts -= 64;
a += 64;
}
@@ -76,11 +108,11 @@ clib_mask_compare_u16 (u16 v, u16 *a, u64 *mask, u32 n_elts)
if (PREDICT_TRUE (n_elts == 0))
return;
- mask[0] = clib_mask_compare_u16_x64 (v, a, n_elts) & pow2_mask (n_elts);
+ mask[0] = clib_mask_compare_u16_x64_n (v, a, n_elts) & pow2_mask (n_elts);
}
static_always_inline u64
-clib_mask_compare_u32_x64 (u32 v, u32 *a, u32 n_elts)
+clib_mask_compare_u32_x64 (u32 v, u32 *a)
{
u64 mask = 0;
#if defined(CLIB_HAVE_VEC512)
@@ -131,6 +163,57 @@ clib_mask_compare_u32_x64 (u32 v, u32 *a, u32 n_elts)
}
#else
+ for (int i = 0; i < 64; i++)
+ if (a[i] == v)
+ mask |= 1ULL << i;
+#endif
+ return mask;
+}
+
+static_always_inline u64
+clib_mask_compare_u32_x64_n (u32 v, u32 *a, u32 n_elts)
+{
+ u64 mask = 0;
+ CLIB_UNUSED (u64 data_mask) = pow2_mask (n_elts);
+#if defined(CLIB_HAVE_VEC512)
+ u32x16 v16 = u32x16_splat (v);
+ u32x16u *av = (u32x16u *) a;
+ mask = ((u64) u32x16_is_equal_mask (
+ u32x16_mask_load_zero (&av[0], data_mask), v16) |
+ (u64) u32x16_is_equal_mask (
+ u32x16_mask_load_zero (&av[1], data_mask >> 16), v16)
+ << 16 |
+ (u64) u32x16_is_equal_mask (
+ u32x16_mask_load_zero (&av[2], data_mask >> 32), v16)
+ << 32 |
+ (u64) u32x16_is_equal_mask (
+ u32x16_mask_load_zero (&av[3], data_mask >> 48), v16)
+ << 48);
+#elif defined(CLIB_HAVE_VEC256) && defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32x8 v8 = u32x8_splat (v);
+ u32x8u *av = (u32x8u *) a;
+ u32x8 m = { 0, 4, 1, 5, 2, 6, 3, 7 };
+ i8x32 c;
+
+ c = i8x32_pack (
+ i16x16_pack (
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[0], data_mask)),
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[1], data_mask >> 8))),
+ i16x16_pack (
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[2], data_mask >> 16)),
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[3], data_mask >> 24))));
+ mask = i8x32_msb_mask ((i8x32) u32x8_permute ((u32x8) c, m));
+
+ c = i8x32_pack (
+ i16x16_pack (
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[4], data_mask >> 32)),
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[5], data_mask >> 40))),
+ i16x16_pack (
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[6], data_mask >> 48)),
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[7], data_mask >> 56))));
+ mask |= (u64) i8x32_msb_mask ((i8x32) u32x8_permute ((u32x8) c, m)) << 32;
+ mask |= (u64) i8x32_msb_mask ((i8x32) u32x8_permute ((u32x8) c, m)) << 32;
+#else
for (int i = 0; i < n_elts; i++)
if (a[i] == v)
mask |= 1ULL << i;
@@ -152,7 +235,119 @@ clib_mask_compare_u32 (u32 v, u32 *a, u64 *bitmap, u32 n_elts)
{
while (n_elts >= 64)
{
- bitmap++[0] = clib_mask_compare_u32_x64 (v, a, 64);
+ bitmap++[0] = clib_mask_compare_u32_x64 (v, a);
+ n_elts -= 64;
+ a += 64;
+ }
+
+ if (PREDICT_TRUE (n_elts == 0))
+ return;
+
+ bitmap[0] = clib_mask_compare_u32_x64_n (v, a, n_elts) & pow2_mask (n_elts);
+}
+
+static_always_inline u64
+clib_mask_compare_u64_x64 (u64 v, u64 *a)
+{
+ u64 mask = 0;
+#if defined(CLIB_HAVE_VEC512)
+ u64x8 v8 = u64x8_splat (v);
+ u64x8u *av = (u64x8u *) a;
+ mask = ((u64) u64x8_is_equal_mask (av[0], v8) |
+ (u64) u64x8_is_equal_mask (av[1], v8) << 8 |
+ (u64) u64x8_is_equal_mask (av[2], v8) << 16 |
+ (u64) u64x8_is_equal_mask (av[3], v8) << 24 |
+ (u64) u64x8_is_equal_mask (av[4], v8) << 32 |
+ (u64) u64x8_is_equal_mask (av[5], v8) << 40 |
+ (u64) u64x8_is_equal_mask (av[6], v8) << 48 |
+ (u64) u64x8_is_equal_mask (av[7], v8) << 56);
+
+#elif defined(CLIB_HAVE_VEC256) && defined(__BMI2__)
+ u64x4 v4 = u64x4_splat (v);
+ u64x4u *av = (u64x4u *) a;
+
+ for (int i = 0; i < 16; i += 2)
+ {
+ u64 l = u8x32_msb_mask (v4 == av[i]);
+ u64 h = u8x32_msb_mask (v4 == av[i + 1]);
+ mask |= _pext_u64 (l | h << 32, 0x0101010101010101) << (i * 4);
+ }
+#else
+ for (int i = 0; i < 64; i++)
+ if (a[i] == v)
+ mask |= 1ULL << i;
+#endif
+ return mask;
+}
+
+static_always_inline u64
+clib_mask_compare_u64_x64_n (u64 v, u64 *a, u32 n_elts)
+{
+ u64 mask = 0;
+ CLIB_UNUSED (u64 data_mask) = pow2_mask (n_elts);
+#if defined(CLIB_HAVE_VEC512)
+ u64x8 v8 = u64x8_splat (v);
+ u64x8u *av = (u64x8u *) a;
+ mask =
+ ((u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[0], data_mask), v8) |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[1], data_mask >> 8),
+ v8)
+ << 8 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[2], data_mask >> 16),
+ v8)
+ << 16 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[3], data_mask >> 24),
+ v8)
+ << 24 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[4], data_mask >> 32),
+ v8)
+ << 32 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[5], data_mask >> 40),
+ v8)
+ << 40 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[6], data_mask >> 48),
+ v8)
+ << 48 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[7], data_mask >> 56),
+ v8)
+ << 56);
+
+#elif defined(CLIB_HAVE_VEC256) && defined(__BMI2__) && \
+ defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u64x4 v4 = u64x4_splat (v);
+ u64x4u *av = (u64x4u *) a;
+
+ for (int i = 0; i < 16; i += 2)
+ {
+ u64 l = u8x32_msb_mask (v4 == u64x4_mask_load_zero (&av[i], data_mask));
+ u64 h = u8x32_msb_mask (
+ v4 == u64x4_mask_load_zero (&av[i + 1], data_mask >> 4));
+ mask |= _pext_u64 (l | h << 32, 0x0101010101010101) << (i * 4);
+ data_mask >>= 8;
+ }
+#else
+ for (int i = 0; i < n_elts; i++)
+ if (a[i] == v)
+ mask |= 1ULL << i;
+#endif
+ return mask;
+}
+
+/** \brief Compare 64-bit elemments with provied value and return bitmap
+
+ @param v value to compare elements with
+ @param a array of u64 elements
+ @param mask array of u64 where reuslting mask will be stored
+ @param n_elts number of elements in the array
+ @return none
+*/
+
+static_always_inline void
+clib_mask_compare_u64 (u64 v, u64 *a, u64 *bitmap, u32 n_elts)
+{
+ while (n_elts >= 64)
+ {
+ bitmap++[0] = clib_mask_compare_u64_x64 (v, a);
n_elts -= 64;
a += 64;
}
@@ -160,7 +355,7 @@ clib_mask_compare_u32 (u32 v, u32 *a, u64 *bitmap, u32 n_elts)
if (PREDICT_TRUE (n_elts == 0))
return;
- bitmap[0] = clib_mask_compare_u32_x64 (v, a, n_elts) & pow2_mask (n_elts);
+ bitmap[0] = clib_mask_compare_u64_x64_n (v, a, n_elts) & pow2_mask (n_elts);
}
#endif
diff --git a/src/vppinfra/vector/test/compress.c b/src/vppinfra/vector/test/compress.c
deleted file mode 100644
index 7e3eba9892d..00000000000
--- a/src/vppinfra/vector/test/compress.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright(c) 2021 Cisco Systems, Inc.
- */
-
-#include <vppinfra/format.h>
-#include <vppinfra/vector/test/test.h>
-#include <vppinfra/vector/compress.h>
-
-__clib_test_fn u32
-clib_compress_u32_wrapper (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
-{
- return clib_compress_u32 (dst, src, mask, n_elts);
-}
-
-typedef struct
-{
- u64 mask[10];
- u32 n_elts;
-} compress_test_t;
-
-static compress_test_t tests[] = {
- { .mask = { 1 }, .n_elts = 1 },
- { .mask = { 2 }, .n_elts = 2 },
- { .mask = { 3 }, .n_elts = 2 },
- { .mask = { 0, 1 }, .n_elts = 66 },
- { .mask = { 0, 2 }, .n_elts = 69 },
- { .mask = { 0, 3 }, .n_elts = 66 },
- { .mask = { ~0ULL, ~0ULL, ~0ULL, ~0ULL }, .n_elts = 62 },
- { .mask = { ~0ULL, ~0ULL, ~0ULL, ~0ULL }, .n_elts = 255 },
- { .mask = { ~0ULL, 1, 1, ~0ULL }, .n_elts = 256 },
-};
-
-static clib_error_t *
-test_clib_compress_u32 (clib_error_t *err)
-{
- u32 src[513];
- u32 dst[513];
- u32 i, j;
-
- for (i = 0; i < ARRAY_LEN (src); i++)
- src[i] = i;
-
- for (i = 0; i < ARRAY_LEN (tests); i++)
- {
- compress_test_t *t = tests + i;
- u32 *dp = dst;
- u32 r;
-
- for (j = 0; j < ARRAY_LEN (dst); j++)
- dst[j] = 0xa5a5a5a5;
-
- r = clib_compress_u32_wrapper (dst, src, t->mask, t->n_elts);
-
- for (j = 0; j < t->n_elts; j++)
- {
- if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
- continue;
-
- if (dp[0] != src[j])
- return clib_error_return (err,
- "wrong data in testcase %u at "
- "(dst[%u] = 0x%x, src[%u] = 0x%x)",
- i, dp - dst, dp[0], j, src[j]);
- dp++;
- }
-
- if (dst[dp - dst + 1] != 0xa5a5a5a5)
- return clib_error_return (err, "buffer overrun in testcase %u", i);
-
- if (dp - dst != r)
- return clib_error_return (err, "wrong number of elts in testcase %u",
- i);
- }
-
- return err;
-}
-
-REGISTER_TEST (clib_compress_u32) = {
- .name = "clib_compress_u32",
- .fn = test_clib_compress_u32,
-};
diff --git a/src/vppinfra/vector/test/test.c b/src/vppinfra/vector/test/test.c
deleted file mode 100644
index 1a8b9d6ea10..00000000000
--- a/src/vppinfra/vector/test/test.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright(c) 2021 Cisco Systems, Inc.
- */
-
-#include <vppinfra/format.h>
-#include <vppinfra/vector/test/test.h>
-
-test_registration_t *test_registrations[CLIB_MARCH_TYPE_N_VARIANTS] = {};
-
-int
-test_march_supported (clib_march_variant_type_t type)
-{
-#define _(s, n) \
- if (CLIB_MARCH_VARIANT_TYPE_##s == type) \
- return clib_cpu_march_priority_##s ();
- foreach_march_variant
-#undef _
- return 0;
-}
-
-int
-main (int argc, char *argv[])
-{
- clib_mem_init (0, 64ULL << 20);
-
- for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
- {
- test_registration_t *r = test_registrations[i];
-
- if (r == 0 || test_march_supported (i) < 0)
- continue;
-
- fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i);
- fformat (stdout,
- "-------------------------------------------------------\n");
- while (r)
- {
- clib_error_t *err;
- err = (r->fn) (0);
- fformat (stdout, "%-50s %s\n", r->name, err ? "FAIL" : "PASS");
- if (err)
- {
- clib_error_report (err);
- fformat (stdout, "\n");
- }
-
- r = r->next;
- }
- }
-
- fformat (stdout, "\n");
- return 0;
-}
diff --git a/src/vppinfra/vector/test/test.h b/src/vppinfra/vector/test/test.h
deleted file mode 100644
index bc499fb24e8..00000000000
--- a/src/vppinfra/vector/test/test.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright(c) 2021 Cisco Systems, Inc.
- */
-
-#ifndef included_test_test_h
-#define included_test_test_h
-
-#include <vppinfra/cpu.h>
-
-typedef clib_error_t *(test_fn_t) (clib_error_t *);
-
-typedef struct test_registration_
-{
- char *name;
- u8 multiarch : 1;
- test_fn_t *fn;
- struct test_registration_ *next;
-} test_registration_t;
-
-extern test_registration_t *test_registrations[CLIB_MARCH_TYPE_N_VARIANTS];
-
-#define __clib_test_fn static __clib_noinline __clib_section (".test_wrapper")
-
-#define REGISTER_TEST(x) \
- test_registration_t CLIB_MARCH_SFX (__test_##x); \
- static void __clib_constructor CLIB_MARCH_SFX (__test_registration_##x) ( \
- void) \
- { \
- test_registration_t *r = &CLIB_MARCH_SFX (__test_##x); \
- r->next = test_registrations[CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE)]; \
- test_registrations[CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE)] = r; \
- } \
- test_registration_t CLIB_MARCH_SFX (__test_##x)
-
-#endif
diff --git a/src/vppinfra/vector/toeplitz.c b/src/vppinfra/vector/toeplitz.c
new file mode 100644
index 00000000000..fcc4b64ad19
--- /dev/null
+++ b/src/vppinfra/vector/toeplitz.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/clib.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/vector/toeplitz.h>
+
+static u8 default_key[40] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
+ 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
+ 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
+ 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
+};
+
+#ifdef __x86_64__
+static_always_inline void
+clib_toeplitz_hash_key_expand_8 (u64x2 kv, u64x8u *m)
+{
+ u64x8 kv4, a, b, shift = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+ kv4 = (u64x8){ kv[0], kv[1], kv[0], kv[1], kv[0], kv[1], kv[0], kv[1] };
+
+ /* clang-format off */
+ /* create 8 byte-swapped copies of the bytes 0 - 7 */
+ a = (u64x8) u8x64_shuffle (kv4,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0);
+ /* create 8 byte-swapped copies of the bytes 4 - 11 */
+ b = (u64x8) u8x64_shuffle (kv4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4);
+ /* clang-format on */
+
+ /* shift each 64-bit element for 0 - 7 bits */
+ a <<= shift;
+ b <<= shift;
+
+ /* clang-format off */
+ /* construct eight 8x8 bit matrix used by gf2p8affine */
+ * m = (u64x8) u8x64_shuffle2 (a, b,
+ 0x07, 0x0f, 0x17, 0x1f, 0x27, 0x2f, 0x37, 0x3f,
+ 0x06, 0x0e, 0x16, 0x1e, 0x26, 0x2e, 0x36, 0x3e,
+ 0x05, 0x0d, 0x15, 0x1d, 0x25, 0x2d, 0x35, 0x3d,
+ 0x04, 0x0c, 0x14, 0x1c, 0x24, 0x2c, 0x34, 0x3c,
+ 0x47, 0x4f, 0x57, 0x5f, 0x67, 0x6f, 0x77, 0x7f,
+ 0x46, 0x4e, 0x56, 0x5e, 0x66, 0x6e, 0x76, 0x7e,
+ 0x45, 0x4d, 0x55, 0x5d, 0x65, 0x6d, 0x75, 0x7d,
+ 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c);
+ /* clang-format on */
+}
+
+void
+clib_toeplitz_hash_key_expand (u64 *matrixes, u8 *key, int size)
+{
+ u64x8u *m = (u64x8u *) matrixes;
+ u64x2 kv = {}, zero = {};
+
+ while (size >= 8)
+ {
+ kv = *(u64x2u *) key;
+ clib_toeplitz_hash_key_expand_8 (kv, m);
+ key += 8;
+ m++;
+ size -= 8;
+ }
+
+ kv = u64x2_shuffle2 (kv, zero, 1, 2);
+ clib_toeplitz_hash_key_expand_8 (kv, m);
+}
+#endif
+
+__clib_export clib_toeplitz_hash_key_t *
+clib_toeplitz_hash_key_init (u8 *key, u32 keylen)
+{
+ clib_toeplitz_hash_key_t *k;
+ u32 size, gfni_size = 0;
+
+ if (key == 0)
+ {
+ key = default_key;
+ keylen = sizeof (default_key);
+ }
+
+ size =
+ round_pow2 (sizeof (clib_toeplitz_hash_key_t) + round_pow2 (keylen, 16),
+ CLIB_CACHE_LINE_BYTES);
+#ifdef __x86_64__
+ gfni_size = round_pow2 ((keylen + 1) * 8, CLIB_CACHE_LINE_BYTES);
+#endif
+
+ k = clib_mem_alloc_aligned (size + gfni_size, CLIB_CACHE_LINE_BYTES);
+ clib_memset_u8 (k, 0, size + gfni_size);
+ k->key_length = keylen;
+ k->gfni_offset = size;
+ clib_memcpy_fast (k->data, key, keylen);
+
+#ifdef __x86_64__
+ clib_toeplitz_hash_key_expand ((u64 *) ((u8 *) k + k->gfni_offset), k->data,
+ k->key_length);
+#endif
+
+ return k;
+}
+
+__clib_export void
+clib_toeplitz_hash_key_free (clib_toeplitz_hash_key_t *k)
+{
+ clib_mem_free (k);
+}
diff --git a/src/vppinfra/vector/toeplitz.h b/src/vppinfra/vector/toeplitz.h
new file mode 100644
index 00000000000..76297f05195
--- /dev/null
+++ b/src/vppinfra/vector/toeplitz.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_toeplitz_h
+#define included_vector_toeplitz_h
+#include <vppinfra/clib.h>
+
+typedef struct
+{
+ u16 key_length;
+ u16 gfni_offset;
+ u8 data[];
+} clib_toeplitz_hash_key_t;
+
+clib_toeplitz_hash_key_t *clib_toeplitz_hash_key_init (u8 *key, u32 keylen);
+void clib_toeplitz_hash_key_free (clib_toeplitz_hash_key_t *k);
+
+#ifdef CLIB_HAVE_VEC256
+static_always_inline u32x8
+toeplitz_hash_one_x8 (u32x8 hash, u64x4 v4, u8 data, u8 off)
+{
+ u32x8 v8 = u32x8_shuffle2 (v4 << (off * 8), v4 << (off * 8 + 4),
+ /*uppper 32 bits of each u64 in reverse order */
+ 15, 13, 11, 9, 7, 5, 3, 1);
+
+#ifdef CLIB_HAVE_VEC256_MASK_BITWISE_OPS
+ return u32x8_mask_xor (hash, v8, data);
+#else
+ static const u32x8 bits = { 1, 2, 4, 8, 16, 32, 64, 128 };
+ return hash ^ (((u32x8_splat (data) & bits) != u32x8_zero ()) & v8);
+#endif
+}
+#endif
+
+#if defined(__GFNI__) && defined(__AVX512F__)
+static const u8x64 __clib_toeplitz_hash_gfni_permute = {
+ /* clang-format off */
+ 0x00, 0x01, 0x02, 0x03, 0x40, 0x41, 0x42, 0x43,
+ 0x01, 0x02, 0x03, 0x04, 0x41, 0x42, 0x43, 0x44,
+ 0x02, 0x03, 0x04, 0x05, 0x42, 0x43, 0x44, 0x45,
+ 0x03, 0x04, 0x05, 0x06, 0x43, 0x44, 0x45, 0x46,
+ 0x04, 0x05, 0x06, 0x07, 0x44, 0x45, 0x46, 0x47,
+ 0x05, 0x06, 0x07, 0x08, 0x45, 0x46, 0x47, 0x48,
+ 0x06, 0x07, 0x08, 0x09, 0x46, 0x47, 0x48, 0x49,
+ 0x07, 0x08, 0x09, 0x0a, 0x47, 0x48, 0x49, 0x4a
+ /* clang-format on */
+};
+static_always_inline u64x8
+clib_toeplitz_hash_gfni_one (u8x64 d0, u64x8 m, int i)
+{
+
+ d0 = i == 1 ? (u8x64) u64x8_align_right (d0, d0, 1) : d0;
+ d0 = i == 2 ? (u8x64) u64x8_align_right (d0, d0, 2) : d0;
+ d0 = i == 3 ? (u8x64) u64x8_align_right (d0, d0, 3) : d0;
+ d0 = i == 4 ? (u8x64) u64x8_align_right (d0, d0, 4) : d0;
+ d0 = i == 5 ? (u8x64) u64x8_align_right (d0, d0, 5) : d0;
+ d0 = i == 6 ? (u8x64) u64x8_align_right (d0, d0, 6) : d0;
+
+ d0 = u8x64_permute (__clib_toeplitz_hash_gfni_permute, d0);
+
+ return (u64x8) _mm512_gf2p8affine_epi64_epi8 ((__m512i) d0, (__m512i) m, 0);
+}
+
+static_always_inline u64x8
+clib_toeplitz_hash_gfni_two (u8x64 d0, u8x64 d1, u64x8 m, int i)
+{
+
+ d0 = i == 1 ? (u8x64) u64x8_align_right (d0, d0, 1) : d0;
+ d1 = i == 1 ? (u8x64) u64x8_align_right (d1, d1, 1) : d1;
+ d0 = i == 2 ? (u8x64) u64x8_align_right (d0, d0, 2) : d0;
+ d1 = i == 2 ? (u8x64) u64x8_align_right (d1, d1, 2) : d1;
+ d0 = i == 3 ? (u8x64) u64x8_align_right (d0, d0, 3) : d0;
+ d1 = i == 3 ? (u8x64) u64x8_align_right (d1, d1, 3) : d1;
+ d0 = i == 4 ? (u8x64) u64x8_align_right (d0, d0, 4) : d0;
+ d1 = i == 4 ? (u8x64) u64x8_align_right (d1, d1, 4) : d1;
+ d0 = i == 5 ? (u8x64) u64x8_align_right (d0, d0, 5) : d0;
+ d1 = i == 5 ? (u8x64) u64x8_align_right (d1, d1, 5) : d1;
+ d0 = i == 6 ? (u8x64) u64x8_align_right (d0, d0, 6) : d0;
+ d1 = i == 6 ? (u8x64) u64x8_align_right (d1, d1, 6) : d1;
+
+ d0 = u8x64_permute2 (__clib_toeplitz_hash_gfni_permute, d0, d1);
+
+ return (u64x8) _mm512_gf2p8affine_epi64_epi8 ((__m512i) d0, (__m512i) m, 0);
+}
+#endif
+
+static_always_inline u32
+clib_toeplitz_hash (clib_toeplitz_hash_key_t *k, u8 *data, int n_bytes)
+{
+ u8 *key = k->data;
+ /* key must be 4 bytes longer than data */
+ ASSERT (k->key_length - n_bytes >= 4);
+
+#if defined(__GFNI__) && defined(__AVX512F__)
+ u8x64 d0;
+ u64x8 h0 = {};
+ u64x8u *m = (u64x8u *) ((u8 *) k + k->gfni_offset);
+
+ /* move data ptr backwards for 3 byte so mask load "prepends" three zeros */
+ data -= 3;
+ n_bytes += 3;
+
+ if (n_bytes < 64)
+ {
+ d0 = u8x64_mask_load_zero ((u8 *) data, pow2_mask (n_bytes - 3) << 3);
+ goto last8;
+ }
+
+ d0 = u8x64_mask_load_zero ((u8 *) data, -1ULL << 3);
+next56:
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_one (d0, m[0], 0),
+ clib_toeplitz_hash_gfni_one (d0, m[1], 1));
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_one (d0, m[2], 2),
+ clib_toeplitz_hash_gfni_one (d0, m[3], 3));
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_one (d0, m[4], 4),
+ clib_toeplitz_hash_gfni_one (d0, m[5], 5));
+ h0 ^= clib_toeplitz_hash_gfni_one (d0, m[6], 6);
+ n_bytes -= 56;
+ data += 56;
+ m += 7;
+
+ if (n_bytes >= 64)
+ {
+ d0 = *(u8x64u *) data;
+ goto next56;
+ }
+
+ if (n_bytes == 0)
+ goto done;
+
+ d0 = u8x64_mask_load_zero ((u8 *) data, pow2_mask (n_bytes));
+last8:
+ h0 ^= clib_toeplitz_hash_gfni_one (d0, m[0], 0);
+ n_bytes -= 8;
+
+ if (n_bytes > 0)
+ {
+ m += 1;
+ d0 = (u8x64) u64x8_align_right (u64x8_zero (), d0, 1);
+ goto last8;
+ }
+
+done:
+ return u64x8_hxor (h0);
+#elif defined(CLIB_HAVE_VEC256)
+ u64x4 v4, shift = { 0, 1, 2, 3 };
+ u32x8 h0 = {};
+
+ while (n_bytes >= 4)
+ {
+ v4 = u64x4_splat (clib_net_to_host_u64 (*(u64u *) key)) << shift;
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[0], 0);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[1], 1);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[2], 2);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[3], 3);
+
+ data += 4;
+ key += 4;
+ n_bytes -= 4;
+ }
+
+ if (n_bytes)
+ {
+ u64 v = (u64) clib_net_to_host_u32 ((u64) (*(u32u *) key)) << 32;
+ v |= (u64) key[4] << 24;
+
+ if (n_bytes == 3)
+ {
+ v |= (u64) key[5] << 16;
+ v |= (u64) key[6] << 8;
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[0], 0);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[1], 1);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[2], 2);
+ }
+ else if (n_bytes == 2)
+ {
+ v |= (u64) key[5] << 16;
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[0], 0);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[1], 1);
+ }
+ else
+ {
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[0], 0);
+ }
+ }
+
+ return u32x8_hxor (h0);
+#endif
+ u64 v, hash = 0;
+
+ while (n_bytes >= 4)
+ {
+ v = clib_net_to_host_u64 (*(u64u *) key);
+
+ for (u8 bit = 1 << 7, byte = data[0]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ for (u8 bit = 1 << 7, byte = data[1]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ for (u8 bit = 1 << 7, byte = data[2]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ for (u8 bit = 1 << 7, byte = data[3]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+
+ data += 4;
+ key += 4;
+ n_bytes -= 4;
+ }
+
+ if (n_bytes)
+ {
+ v = (u64) clib_net_to_host_u32 ((u64) (*(u32u *) key)) << 32;
+ v |= (u64) key[4] << 24;
+ for (u8 bit = 1 << 7, byte = data[0]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ if (n_bytes > 1)
+ {
+ v |= (u64) key[5] << 24;
+ for (u8 bit = 1 << 7, byte = data[1]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ }
+ if (n_bytes > 2)
+ {
+ v |= (u64) key[6] << 24;
+ for (u8 bit = 1 << 7, byte = data[2]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ }
+ }
+ return hash >> 32;
+}
+
+static_always_inline void
+clib_toeplitz_hash_x4 (clib_toeplitz_hash_key_t *k, u8 *data0, u8 *data1,
+ u8 *data2, u8 *data3, u32 *hash0, u32 *hash1,
+ u32 *hash2, u32 *hash3, int n_bytes)
+{
+ /* key must be 4 bytes longer than data */
+ ASSERT (k->key_length - n_bytes >= 4);
+#if defined(__GFNI__) && defined(__AVX512F__)
+ u64x8u *m = (u64x8u *) ((u8 *) k + k->gfni_offset);
+ u8x64 d0, d1, d2, d3;
+ u64x8 h0 = {}, h2 = {};
+ u64 h, mask;
+
+ /* move data ptr backwards for 3 byte so mask load "prepends" three zeros */
+ data0 -= 3;
+ data1 -= 3;
+ data2 -= 3;
+ data3 -= 3;
+ n_bytes += 3;
+
+ if (n_bytes < 64)
+ {
+ mask = pow2_mask (n_bytes - 3) << 3;
+ d0 = u8x64_mask_load_zero ((u8 *) data0, mask);
+ d1 = u8x64_mask_load_zero ((u8 *) data1, mask);
+ d2 = u8x64_mask_load_zero ((u8 *) data2, mask);
+ d3 = u8x64_mask_load_zero ((u8 *) data3, mask);
+ goto last8;
+ }
+
+ mask = -1ULL << 3;
+ d0 = u8x64_mask_load_zero ((u8 *) data0, mask);
+ d1 = u8x64_mask_load_zero ((u8 *) data1, mask);
+ d2 = u8x64_mask_load_zero ((u8 *) data2, mask);
+ d3 = u8x64_mask_load_zero ((u8 *) data3, mask);
+next56:
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_two (d0, d1, m[0], 0),
+ clib_toeplitz_hash_gfni_two (d0, d1, m[1], 1));
+ h2 = u64x8_xor3 (h2, clib_toeplitz_hash_gfni_two (d2, d3, m[0], 0),
+ clib_toeplitz_hash_gfni_two (d2, d3, m[1], 1));
+
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_two (d0, d1, m[2], 2),
+ clib_toeplitz_hash_gfni_two (d0, d1, m[3], 3));
+ h2 = u64x8_xor3 (h2, clib_toeplitz_hash_gfni_two (d2, d3, m[2], 2),
+ clib_toeplitz_hash_gfni_two (d2, d3, m[3], 3));
+
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_two (d0, d1, m[4], 4),
+ clib_toeplitz_hash_gfni_two (d0, d1, m[5], 5));
+ h2 = u64x8_xor3 (h2, clib_toeplitz_hash_gfni_two (d2, d3, m[4], 4),
+ clib_toeplitz_hash_gfni_two (d2, d3, m[5], 5));
+
+ h0 ^= clib_toeplitz_hash_gfni_two (d0, d1, m[6], 6);
+ h2 ^= clib_toeplitz_hash_gfni_two (d2, d3, m[6], 6);
+
+ n_bytes -= 56;
+ data0 += 56;
+ data1 += 56;
+ data2 += 56;
+ data3 += 56;
+ m += 7;
+
+ if (n_bytes >= 64)
+ {
+ d0 = *(u8x64u *) data0;
+ d1 = *(u8x64u *) data1;
+ d2 = *(u8x64u *) data2;
+ d3 = *(u8x64u *) data3;
+ goto next56;
+ }
+
+ if (n_bytes == 0)
+ goto done;
+
+ mask = pow2_mask (n_bytes);
+ d0 = u8x64_mask_load_zero ((u8 *) data0, mask);
+ d1 = u8x64_mask_load_zero ((u8 *) data1, mask);
+ d2 = u8x64_mask_load_zero ((u8 *) data2, mask);
+ d3 = u8x64_mask_load_zero ((u8 *) data3, mask);
+last8:
+ h0 ^= clib_toeplitz_hash_gfni_two (d0, d1, m[0], 0);
+ h2 ^= clib_toeplitz_hash_gfni_two (d2, d3, m[0], 0);
+ n_bytes -= 8;
+
+ if (n_bytes > 0)
+ {
+ u64x8 zero = {};
+ m += 1;
+ d0 = (u8x64) u64x8_align_right (zero, d0, 1);
+ d1 = (u8x64) u64x8_align_right (zero, d1, 1);
+ d2 = (u8x64) u64x8_align_right (zero, d2, 1);
+ d3 = (u8x64) u64x8_align_right (zero, d3, 1);
+ goto last8;
+ }
+
+done:
+ h = u64x8_hxor (h0);
+ *hash0 = h;
+ *hash1 = h >> 32;
+ h = u64x8_hxor (h2);
+ *hash2 = h;
+ *hash3 = h >> 32;
+#elif defined(CLIB_HAVE_VEC256)
+ u8 *key = k->data;
+ u64x4 v4, shift = { 0, 1, 2, 3 };
+ u32x8 h0 = {}, h1 = {}, h2 = {}, h3 = {};
+
+ while (n_bytes >= 4)
+ {
+ v4 = u64x4_splat (clib_net_to_host_u64 (*(u64u *) key)) << shift;
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[0], 0);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[0], 0);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[0], 0);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[0], 0);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[1], 1);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[1], 1);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[1], 1);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[1], 1);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[2], 2);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[2], 2);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[2], 2);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[2], 2);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[3], 3);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[3], 3);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[3], 3);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[3], 3);
+
+ data0 += 4;
+ data1 += 4;
+ data2 += 4;
+ data3 += 4;
+ key += 4;
+ n_bytes -= 4;
+ }
+
+ if (n_bytes)
+ {
+ u64 v = (u64) clib_net_to_host_u32 ((u64) (*(u32u *) key)) << 32;
+ v |= (u64) key[4] << 24;
+
+ if (n_bytes == 3)
+ {
+ v |= (u64) key[5] << 16;
+ v |= (u64) key[6] << 8;
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[0], 0);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[0], 0);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[0], 0);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[0], 0);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[1], 1);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[1], 1);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[1], 1);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[1], 1);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[2], 2);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[2], 2);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[2], 2);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[2], 2);
+ }
+ else if (n_bytes == 2)
+ {
+ v |= (u64) key[5] << 16;
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[0], 0);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[0], 0);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[0], 0);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[0], 0);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[1], 1);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[1], 1);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[1], 1);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[1], 1);
+ }
+ else
+ {
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[0], 0);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[0], 0);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[0], 0);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[0], 0);
+ }
+ }
+
+ *hash0 = u32x8_hxor (h0);
+ *hash1 = u32x8_hxor (h1);
+ *hash2 = u32x8_hxor (h2);
+ *hash3 = u32x8_hxor (h3);
+#else
+ u8 *key = k->data;
+ u64 v, h0 = 0, h1 = 0, h2 = 0, h3 = 0;
+
+ while (n_bytes >= 4)
+ {
+ v = clib_net_to_host_u64 (*(u64u *) key);
+
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[0] & bit ? v : 0;
+ h1 ^= data1[0] & bit ? v : 0;
+ h2 ^= data2[0] & bit ? v : 0;
+ h3 ^= data3[0] & bit ? v : 0;
+ }
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[1] & bit ? v : 0;
+ h1 ^= data1[1] & bit ? v : 0;
+ h2 ^= data2[1] & bit ? v : 0;
+ h3 ^= data3[1] & bit ? v : 0;
+ }
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[2] & bit ? v : 0;
+ h1 ^= data1[2] & bit ? v : 0;
+ h2 ^= data2[2] & bit ? v : 0;
+ h3 ^= data3[2] & bit ? v : 0;
+ }
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[3] & bit ? v : 0;
+ h1 ^= data1[3] & bit ? v : 0;
+ h2 ^= data2[3] & bit ? v : 0;
+ h3 ^= data3[3] & bit ? v : 0;
+ }
+
+ data0 += 4;
+ data1 += 4;
+ data2 += 4;
+ data3 += 4;
+ key += 4;
+ n_bytes -= 4;
+ }
+
+ if (n_bytes)
+ {
+ v = (u64) clib_net_to_host_u32 ((u64) (*(u32u *) key)) << 32;
+ v |= (u64) key[4] << 24;
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[0] & bit ? v : 0;
+ h1 ^= data1[0] & bit ? v : 0;
+ h2 ^= data2[0] & bit ? v : 0;
+ h3 ^= data3[0] & bit ? v : 0;
+ }
+ if (n_bytes > 1)
+ {
+ v |= (u64) key[5] << 24;
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[1] & bit ? v : 0;
+ h1 ^= data1[1] & bit ? v : 0;
+ h2 ^= data2[1] & bit ? v : 0;
+ h3 ^= data3[1] & bit ? v : 0;
+ }
+ }
+ if (n_bytes > 2)
+ {
+ v |= (u64) key[6] << 24;
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[2] & bit ? v : 0;
+ h1 ^= data1[2] & bit ? v : 0;
+ h2 ^= data2[2] & bit ? v : 0;
+ h3 ^= data3[2] & bit ? v : 0;
+ }
+ }
+ }
+ *hash0 = h0 >> 32;
+ *hash1 = h1 >> 32;
+ *hash2 = h2 >> 32;
+ *hash3 = h3 >> 32;
+#endif
+}
+
+#endif
diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h
index f38a3bdae73..866c82fcec3 100644
--- a/src/vppinfra/vector_avx2.h
+++ b/src/vppinfra/vector_avx2.h
@@ -19,7 +19,6 @@
#include <vppinfra/clib.h>
#include <x86intrin.h>
-/* *INDENT-OFF* */
#define foreach_avx2_vec256i \
_(i,8,32,epi8) _(i,16,16,epi16) _(i,32,8,epi32) _(i,64,4,epi64)
#define foreach_avx2_vec256u \
@@ -67,7 +66,6 @@ t##s##x##c##_interleave_hi (t##s##x##c a, t##s##x##c b) \
foreach_avx2_vec256i foreach_avx2_vec256u
#undef _
-/* *INDENT-ON* */
always_inline u32x8
u32x8_permute (u32x8 v, u32x8 idx)
@@ -80,7 +78,6 @@ u32x8_permute (u32x8 v, u32x8 idx)
(__m256i) v, ((m0) | (m1) << 2 | (m2) << 4 | (m3) << 6))
/* _extract_lo, _extract_hi */
-/* *INDENT-OFF* */
#define _(t1,t2) \
always_inline t1 \
t2##_extract_lo (t2 v) \
@@ -103,7 +100,6 @@ _(u16x8, u16x16)
_(u32x4, u32x8)
_(u64x2, u64x4)
#undef _
-/* *INDENT-ON* */
/* 256 bit packs. */
#define _(f, t, fn) \
@@ -132,7 +128,6 @@ i8x32_msb_mask (i8x32 v)
}
/* _from_ */
-/* *INDENT-OFF* */
#define _(f,t,i) \
static_always_inline t \
t##_from_##f (f x) \
@@ -151,7 +146,6 @@ _ (i8x16, i16x16, epi8_epi16)
_(i8x16, i32x8, epi8_epi32)
_(i8x16, i64x4, epi8_epi64)
#undef _
-/* *INDENT-ON* */
static_always_inline u64x4
u64x4_byte_swap (u64x4 v)
@@ -183,15 +177,12 @@ u16x16_byte_swap (u16x16 v)
return (u16x16) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) swap);
}
-static_always_inline u8x32
-u8x32_shuffle (u8x32 v, u8x32 m)
-{
- return (u8x32) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) m);
-}
-
#define u8x32_align_right(a, b, imm) \
(u8x32) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, imm)
+#define u64x4_align_right(a, b, imm) \
+ (u64x4) _mm256_alignr_epi64 ((__m256i) a, (__m256i) b, imm)
+
static_always_inline u32
u32x8_sum_elts (u32x8 sum8)
{
@@ -206,6 +197,36 @@ u32x8_hadd (u32x8 v1, u32x8 v2)
return (u32x8) _mm256_hadd_epi32 ((__m256i) v1, (__m256i) v2);
}
+static_always_inline u32
+u32x8_hxor (u32x8 v)
+{
+ u32x4 v4;
+ v4 = u32x8_extract_lo (v) ^ u32x8_extract_hi (v);
+ v4 ^= (u32x4) u8x16_align_right (v4, v4, 8);
+ v4 ^= (u32x4) u8x16_align_right (v4, v4, 4);
+ return v4[0];
+}
+
+static_always_inline u8x32
+u8x32_xor3 (u8x32 a, u8x32 b, u8x32 c)
+{
+#if __AVX512F__
+ return (u8x32) _mm256_ternarylogic_epi32 ((__m256i) a, (__m256i) b,
+ (__m256i) c, 0x96);
+#endif
+ return a ^ b ^ c;
+}
+
+static_always_inline u8x32
+u8x32_reflect_u8x16 (u8x32 x)
+{
+ static const u8x32 mask = {
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ };
+ return (u8x32) _mm256_shuffle_epi8 ((__m256i) x, (__m256i) mask);
+}
+
static_always_inline u16x16
u16x16_mask_last (u16x16 v, u8 n_last)
{
@@ -308,11 +329,17 @@ u32x8_scatter_one (u32x8 r, int index, void *p)
*(u32 *) p = r[index];
}
-static_always_inline u8x32
-u8x32_is_greater (u8x32 v1, u8x32 v2)
-{
- return (u8x32) _mm256_cmpgt_epi8 ((__m256i) v1, (__m256i) v2);
-}
+#define u32x8_gather_u32(base, indices, scale) \
+ (u32x8) _mm256_i32gather_epi32 ((const int *) base, (__m256i) indices, scale)
+
+#ifdef __AVX512F__
+#define u32x8_scatter_u32(base, indices, v, scale) \
+ _mm256_i32scatter_epi32 (base, (__m256i) indices, (__m256i) v, scale)
+#else
+#define u32x8_scatter_u32(base, indices, v, scale) \
+ for (u32 i = 0; i < 8; i++) \
+ *((u32u *) ((u8 *) base + (scale) * (indices)[i])) = (v)[i];
+#endif
static_always_inline u8x32
u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask)
@@ -321,6 +348,11 @@ u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask)
(__m256i) mask);
}
+#define u8x32_word_shift_left(a, n) \
+ (u8x32) _mm256_bslli_epi128 ((__m256i) a, n)
+#define u8x32_word_shift_right(a, n) \
+ (u8x32) _mm256_bsrli_epi128 ((__m256i) a, n)
+
#define u32x8_permute_lanes(a, b, m) \
(u32x8) _mm256_permute2x128_si256 ((__m256i) a, (__m256i) b, m)
#define u64x4_permute_lanes(a, b, m) \
@@ -390,6 +422,58 @@ u64x4_transpose (u64x4 a[8])
a[3] = u64x4_permute_lanes (r[1], r[3], 0x31);
}
+static_always_inline u8x32
+u8x32_splat_u8x16 (u8x16 a)
+{
+ return (u8x32) _mm256_broadcastsi128_si256 ((__m128i) a);
+}
+
+static_always_inline u32x8
+u32x8_splat_u32x4 (u32x4 a)
+{
+ return (u32x8) _mm256_broadcastsi128_si256 ((__m128i) a);
+}
+
+static_always_inline u64x4
+u64x4_splat_u64x2 (u64x2 a)
+{
+ return (u64x4) _mm256_broadcastsi128_si256 ((__m128i) a);
+}
+
+static_always_inline u8x32
+u8x32_load_partial (u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ return u8x32_mask_load_zero (data, pow2_mask (n));
+#else
+ u8x32 r = {};
+ if (n > 16)
+ {
+ r = u8x32_insert_lo (r, *(u8x16u *) data);
+ r = u8x32_insert_hi (r, u8x16_load_partial (data + 16, n - 16));
+ }
+ else
+ r = u8x32_insert_lo (r, u8x16_load_partial (data, n));
+ return r;
+#endif
+}
+
+static_always_inline void
+u8x32_store_partial (u8x32 r, u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u8x32_mask_store (r, data, pow2_mask (n));
+#else
+ if (n > 16)
+ {
+ *(u8x16u *) data = u8x32_extract_lo (r);
+ u8x16_store_partial (u8x32_extract_hi (r), data + 16, n - 16);
+ }
+ else
+ u8x16_store_partial (u8x32_extract_lo (r), data, n);
+#endif
+}
+
#endif /* included_vector_avx2_h */
/*
diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h
index 3a01c1ed824..699afec1212 100644
--- a/src/vppinfra/vector_avx512.h
+++ b/src/vppinfra/vector_avx512.h
@@ -19,7 +19,6 @@
#include <vppinfra/clib.h>
#include <x86intrin.h>
-/* *INDENT-OFF* */
#define foreach_avx512_vec512i \
_(i,8,64,epi8) _(i,16,32,epi16) _(i,32,16,epi32) _(i,64,8,epi64)
#define foreach_avx512_vec512u \
@@ -29,55 +28,68 @@
/* splat, load_unaligned, store_unaligned, is_all_zero, is_equal,
is_all_equal, is_zero_mask */
-#define _(t, s, c, i) \
-static_always_inline t##s##x##c \
-t##s##x##c##_splat (t##s x) \
-{ return (t##s##x##c) _mm512_set1_##i (x); } \
-\
-static_always_inline t##s##x##c \
-t##s##x##c##_load_aligned (void *p) \
-{ return (t##s##x##c) _mm512_load_si512 (p); } \
-\
-static_always_inline void \
-t##s##x##c##_store_aligned (t##s##x##c v, void *p) \
-{ _mm512_store_si512 ((__m512i *) p, (__m512i) v); } \
-\
-static_always_inline t##s##x##c \
-t##s##x##c##_load_unaligned (void *p) \
-{ return (t##s##x##c) _mm512_loadu_si512 (p); } \
-\
-static_always_inline void \
-t##s##x##c##_store_unaligned (t##s##x##c v, void *p) \
-{ _mm512_storeu_si512 ((__m512i *) p, (__m512i) v); } \
-\
-static_always_inline int \
-t##s##x##c##_is_all_zero (t##s##x##c v) \
-{ return (_mm512_test_epi64_mask ((__m512i) v, (__m512i) v) == 0); } \
-\
-static_always_inline int \
-t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
-{ return t##s##x##c##_is_all_zero (a ^ b); } \
-\
-static_always_inline int \
-t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
-{ return t##s##x##c##_is_equal (v, t##s##x##c##_splat (x)); } \
-\
-static_always_inline u##c \
-t##s##x##c##_is_zero_mask (t##s##x##c v) \
-{ return _mm512_test_##i##_mask ((__m512i) v, (__m512i) v); } \
-\
-static_always_inline t##s##x##c \
-t##s##x##c##_interleave_lo (t##s##x##c a, t##s##x##c b) \
-{ return (t##s##x##c) _mm512_unpacklo_##i ((__m512i) a, (__m512i) b); } \
-\
-static_always_inline t##s##x##c \
-t##s##x##c##_interleave_hi (t##s##x##c a, t##s##x##c b) \
-{ return (t##s##x##c) _mm512_unpackhi_##i ((__m512i) a, (__m512i) b); } \
-
+#define _(t, s, c, i) \
+ static_always_inline t##s##x##c t##s##x##c##_splat (t##s x) \
+ { \
+ return (t##s##x##c) _mm512_set1_##i (x); \
+ } \
+ \
+ static_always_inline t##s##x##c t##s##x##c##_load_aligned (void *p) \
+ { \
+ return (t##s##x##c) _mm512_load_si512 (p); \
+ } \
+ \
+ static_always_inline void t##s##x##c##_store_aligned (t##s##x##c v, \
+ void *p) \
+ { \
+ _mm512_store_si512 ((__m512i *) p, (__m512i) v); \
+ } \
+ \
+ static_always_inline t##s##x##c t##s##x##c##_load_unaligned (void *p) \
+ { \
+ return (t##s##x##c) _mm512_loadu_si512 (p); \
+ } \
+ \
+ static_always_inline void t##s##x##c##_store_unaligned (t##s##x##c v, \
+ void *p) \
+ { \
+ _mm512_storeu_si512 ((__m512i *) p, (__m512i) v); \
+ } \
+ \
+ static_always_inline int t##s##x##c##_is_all_zero (t##s##x##c v) \
+ { \
+ return (_mm512_test_epi64_mask ((__m512i) v, (__m512i) v) == 0); \
+ } \
+ \
+ static_always_inline int t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
+ { \
+ return (_mm512_cmpneq_epi64_mask ((__m512i) a, (__m512i) b) == 0); \
+ } \
+ \
+ static_always_inline int t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
+ { \
+ return t##s##x##c##_is_equal (v, t##s##x##c##_splat (x)); \
+ } \
+ \
+ static_always_inline u##c t##s##x##c##_is_zero_mask (t##s##x##c v) \
+ { \
+ return _mm512_test_##i##_mask ((__m512i) v, (__m512i) v); \
+ } \
+ \
+ static_always_inline t##s##x##c t##s##x##c##_interleave_lo (t##s##x##c a, \
+ t##s##x##c b) \
+ { \
+ return (t##s##x##c) _mm512_unpacklo_##i ((__m512i) a, (__m512i) b); \
+ } \
+ \
+ static_always_inline t##s##x##c t##s##x##c##_interleave_hi (t##s##x##c a, \
+ t##s##x##c b) \
+ { \
+ return (t##s##x##c) _mm512_unpackhi_##i ((__m512i) a, (__m512i) b); \
+ }
foreach_avx512_vec512i foreach_avx512_vec512u
#undef _
-/* *INDENT-ON* */
static_always_inline u32
u16x32_msb_mask (u16x32 v)
@@ -85,6 +97,9 @@ u16x32_msb_mask (u16x32 v)
return (u32) _mm512_movepi16_mask ((__m512i) v);
}
+#define u64x8_i64gather(index, base, scale) \
+ (u64x8) _mm512_i64gather_epi64 ((__m512i) index, base, scale)
+
/* 512-bit packs */
#define _(f, t, fn) \
always_inline t t##_pack (f lo, f hi) \
@@ -98,6 +113,18 @@ _ (i32x16, i16x32, _mm512_packs_epi32)
_ (i32x16, u16x32, _mm512_packus_epi32)
#undef _
+static_always_inline u64x8
+u64x8_byte_swap (u64x8 v)
+{
+ u8x64 swap = {
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
+ };
+ return (u64x8) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) swap);
+}
+
static_always_inline u32x16
u32x16_byte_swap (u32x16 v)
{
@@ -184,6 +211,13 @@ u8x64_xor3 (u8x64 a, u8x64 b, u8x64 c)
(__m512i) c, 0x96);
}
+static_always_inline u64x8
+u64x8_xor3 (u64x8 a, u64x8 b, u64x8 c)
+{
+ return (u64x8) _mm512_ternarylogic_epi32 ((__m512i) a, (__m512i) b,
+ (__m512i) c, 0x96);
+}
+
static_always_inline u8x64
u8x64_reflect_u8x16 (u8x64 x)
{
@@ -196,15 +230,12 @@ u8x64_reflect_u8x16 (u8x64 x)
return (u8x64) _mm512_shuffle_epi8 ((__m512i) x, (__m512i) mask);
}
-static_always_inline u8x64
-u8x64_shuffle (u8x64 v, u8x64 m)
-{
- return (u8x64) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) m);
-}
-
#define u8x64_align_right(a, b, imm) \
(u8x64) _mm512_alignr_epi8 ((__m512i) a, (__m512i) b, imm)
+#define u64x8_align_right(a, b, imm) \
+ (u64x8) _mm512_alignr_epi64 ((__m512i) a, (__m512i) b, imm)
+
static_always_inline u32
u32x16_sum_elts (u32x16 sum16)
{
@@ -243,14 +274,42 @@ _ (u64x4, u8, _mm256, __m256i, epi64)
_ (u64x2, u8, _mm, __m128i, epi64)
#undef _
+#define _(t, m, p, i, e) \
+ static_always_inline t t##_mask_and (t a, t b, m mask) \
+ { \
+ return (t) p##_mask_and_##e ((i) a, mask, (i) a, (i) b); \
+ } \
+ static_always_inline t t##_mask_andnot (t a, t b, m mask) \
+ { \
+ return (t) p##_mask_andnot_##e ((i) a, mask, (i) a, (i) b); \
+ } \
+ static_always_inline t t##_mask_xor (t a, t b, m mask) \
+ { \
+ return (t) p##_mask_xor_##e ((i) a, mask, (i) a, (i) b); \
+ } \
+ static_always_inline t t##_mask_or (t a, t b, m mask) \
+ { \
+ return (t) p##_mask_or_##e ((i) a, mask, (i) a, (i) b); \
+ }
+_ (u32x16, u16, _mm512, __m512i, epi32)
+_ (u32x8, u8, _mm256, __m256i, epi32)
+_ (u32x4, u8, _mm, __m128i, epi32)
+_ (u64x8, u8, _mm512, __m512i, epi64)
+_ (u64x4, u8, _mm256, __m256i, epi64)
+_ (u64x2, u8, _mm, __m128i, epi64)
+#undef _
+
#ifdef CLIB_HAVE_VEC512
#define CLIB_HAVE_VEC512_MASK_LOAD_STORE
+#define CLIB_HAVE_VEC512_MASK_BITWISE_OPS
#endif
#ifdef CLIB_HAVE_VEC256
#define CLIB_HAVE_VEC256_MASK_LOAD_STORE
+#define CLIB_HAVE_VEC256_MASK_BITWISE_OPS
#endif
#ifdef CLIB_HAVE_VEC128
#define CLIB_HAVE_VEC128_MASK_LOAD_STORE
+#define CLIB_HAVE_VEC128_MASK_BITWISE_OPS
#endif
static_always_inline u8x64
@@ -265,6 +324,12 @@ u32x16_splat_u32x4 (u32x4 a)
return (u32x16) _mm512_broadcast_i64x2 ((__m128i) a);
}
+static_always_inline u64x8
+u64x8_splat_u64x2 (u64x2 a)
+{
+ return (u64x8) _mm512_broadcast_i64x2 ((__m128i) a);
+}
+
static_always_inline u32x16
u32x16_mask_blend (u32x16 a, u32x16 b, u16 mask)
{
@@ -277,6 +342,19 @@ u8x64_mask_blend (u8x64 a, u8x64 b, u64 mask)
return (u8x64) _mm512_mask_blend_epi8 (mask, (__m512i) a, (__m512i) b);
}
+static_always_inline u8x64
+u8x64_permute (u8x64 idx, u8x64 a)
+{
+ return (u8x64) _mm512_permutexvar_epi8 ((__m512i) idx, (__m512i) a);
+}
+
+static_always_inline u8x64
+u8x64_permute2 (u8x64 idx, u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_permutex2var_epi8 ((__m512i) a, (__m512i) idx,
+ (__m512i) b);
+}
+
#define _(t, m, e, p, it) \
static_always_inline m t##_is_equal_mask (t a, t b) \
{ \
@@ -298,6 +376,27 @@ _ (u32x16, u16, epu32, _mm512, __m512i)
_ (u64x8, u8, epu64, _mm512, __m512i)
#undef _
+#define _(t, m, e, p, it) \
+ static_always_inline m t##_is_not_equal_mask (t a, t b) \
+ { \
+ return p##_cmpneq_##e##_mask ((it) a, (it) b); \
+ }
+_ (u8x16, u16, epu8, _mm, __m128i)
+_ (u16x8, u8, epu16, _mm, __m128i)
+_ (u32x4, u8, epu32, _mm, __m128i)
+_ (u64x2, u8, epu64, _mm, __m128i)
+
+_ (u8x32, u32, epu8, _mm256, __m256i)
+_ (u16x16, u16, epu16, _mm256, __m256i)
+_ (u32x8, u8, epu32, _mm256, __m256i)
+_ (u64x4, u8, epu64, _mm256, __m256i)
+
+_ (u8x64, u64, epu8, _mm512, __m512i)
+_ (u16x32, u32, epu16, _mm512, __m512i)
+_ (u32x16, u16, epu32, _mm512, __m512i)
+_ (u64x8, u8, epu64, _mm512, __m512i)
+#undef _
+
#define _(f, t, fn, it) \
static_always_inline t t##_from_##f (f x) { return (t) fn ((it) x); }
_ (u16x16, u32x16, _mm512_cvtepi16_epi32, __m256i)
@@ -338,9 +437,17 @@ _ (u8x16, u16, _mm, __m128i, epi8)
#ifdef CLIB_HAVE_VEC256
#define CLIB_HAVE_VEC256_COMPRESS
+#ifdef __AVX512VBMI2__
+#define CLIB_HAVE_VEC256_COMPRESS_U8_U16
+#endif
+
#endif
#ifdef CLIB_HAVE_VEC512
#define CLIB_HAVE_VEC512_COMPRESS
+#ifdef __AVX512VBMI2__
+#define CLIB_HAVE_VEC512_COMPRESS_U8_U16
+#endif
+
#endif
#ifndef __AVX512VBMI2__
@@ -357,17 +464,23 @@ u16x8_compress (u16x8 v, u8 mask)
}
#endif
+static_always_inline u64
+u64x8_hxor (u64x8 v)
+{
+ v ^= u64x8_align_right (v, v, 4);
+ v ^= u64x8_align_right (v, v, 2);
+ return v[0] ^ v[1];
+}
+
static_always_inline void
u32x16_transpose (u32x16 m[16])
{
__m512i r[16], a, b, c, d, x, y;
- /* *INDENT-OFF* */
__m512i pm1 = (__m512i) (u64x8) { 0, 1, 8, 9, 4, 5, 12, 13};
__m512i pm2 = (__m512i) (u64x8) { 2, 3, 10, 11, 6, 7, 14, 15};
__m512i pm3 = (__m512i) (u64x8) { 0, 1, 2, 3, 8, 9, 10, 11};
__m512i pm4 = (__m512i) (u64x8) { 4, 5, 6, 7, 12, 13, 14, 15};
- /* *INDENT-ON* */
r[0] = _mm512_unpacklo_epi32 ((__m512i) m[0], (__m512i) m[1]);
r[1] = _mm512_unpacklo_epi32 ((__m512i) m[2], (__m512i) m[3]);
@@ -447,12 +560,10 @@ u64x8_transpose (u64x8 m[8])
{
__m512i r[8], x, y;
- /* *INDENT-OFF* */
__m512i pm1 = (__m512i) (u64x8) { 0, 1, 8, 9, 4, 5, 12, 13};
__m512i pm2 = (__m512i) (u64x8) { 2, 3, 10, 11, 6, 7, 14, 15};
__m512i pm3 = (__m512i) (u64x8) { 0, 1, 2, 3, 8, 9, 10, 11};
__m512i pm4 = (__m512i) (u64x8) { 4, 5, 6, 7, 12, 13, 14, 15};
- /* *INDENT-ON* */
r[0] = _mm512_unpacklo_epi64 ((__m512i) m[0], (__m512i) m[1]);
r[1] = _mm512_unpacklo_epi64 ((__m512i) m[2], (__m512i) m[3]);
@@ -482,6 +593,18 @@ u64x8_transpose (u64x8 m[8])
m[7] = (u64x8) _mm512_permutex2var_epi64 (x, pm4, y);
}
+static_always_inline u8x64
+u8x64_load_partial (u8 *data, uword n)
+{
+ return u8x64_mask_load_zero (data, pow2_mask (n));
+}
+
+static_always_inline void
+u8x64_store_partial (u8x64 r, u8 *data, uword n)
+{
+ u8x64_mask_store (r, data, pow2_mask (n));
+}
+
#endif /* included_vector_avx512_h */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
index 70b05c60884..48644ddbd98 100644
--- a/src/vppinfra/vector_neon.h
+++ b/src/vppinfra/vector_neon.h
@@ -43,7 +43,6 @@ u8x16_compare_byte_mask (u8x16 v)
return (u32) (vgetq_lane_u64 (x64, 0) + (vgetq_lane_u64 (x64, 1) << 8));
}
-/* *INDENT-OFF* */
#define foreach_neon_vec128i \
_(i,8,16,s8) _(i,16,8,s16) _(i,32,4,s32) _(i,64,2,s64)
#define foreach_neon_vec128u \
@@ -88,12 +87,6 @@ u8x16_compare_byte_mask (u8x16 v)
return u8x16_compare_byte_mask (v); \
} \
\
- static_always_inline u##s##x##c t##s##x##c##_is_greater (t##s##x##c a, \
- t##s##x##c b) \
- { \
- return (u##s##x##c) vcgtq_##i (a, b); \
- } \
- \
static_always_inline t##s##x##c t##s##x##c##_add_saturate (t##s##x##c a, \
t##s##x##c b) \
{ \
@@ -115,7 +108,6 @@ u8x16_compare_byte_mask (u8x16 v)
foreach_neon_vec128i foreach_neon_vec128u
#undef _
-/* *INDENT-ON* */
static_always_inline u16x8
u16x8_byte_swap (u16x8 v)
@@ -129,12 +121,6 @@ u32x4_byte_swap (u32x4 v)
return (u32x4) vrev32q_u8 ((u8x16) v);
}
-static_always_inline u8x16
-u8x16_shuffle (u8x16 v, u8x16 m)
-{
- return (u8x16) vqtbl1q_u8 (v, m);
-}
-
static_always_inline u32x4
u32x4_hadd (u32x4 v1, u32x4 v2)
{
@@ -211,6 +197,18 @@ u32x4_min_scalar (u32x4 v)
#define u8x16_word_shift_left(x,n) vextq_u8(u8x16_splat (0), x, 16 - n)
#define u8x16_word_shift_right(x,n) vextq_u8(x, u8x16_splat (0), n)
+always_inline u32x4
+u32x4_interleave_hi (u32x4 a, u32x4 b)
+{
+ return (u32x4) vzip2q_u32 (a, b);
+}
+
+always_inline u32x4
+u32x4_interleave_lo (u32x4 a, u32x4 b)
+{
+ return (u32x4) vzip1q_u32 (a, b);
+}
+
static_always_inline u8x16
u8x16_reflect (u8x16 v)
{
@@ -231,6 +229,61 @@ __asm__ ("eor3 %0.16b,%1.16b,%2.16b,%3.16b": "=w" (r): "0" (a), "w" (b), "w" (c)
return a ^ b ^ c;
}
+static_always_inline u8x16
+u8x16_load_partial (u8 *data, uword n)
+{
+ u8x16 r = {};
+ if (n > 7)
+ {
+ u64x2 r;
+ r[1] = *(u64u *) (data + n - 8);
+ r >>= (16 - n) * 8;
+ r[0] = *(u64u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 3)
+ {
+ u32x4 r = {};
+ r[1] = *(u32u *) (data + n - 4);
+ r >>= (8 - n) * 8;
+ r[0] = *(u32u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 1)
+ {
+ u16x8 r = {};
+ r[1] = *(u16u *) (data + n - 2);
+ r >>= (4 - n) * 8;
+ r[0] = *(u16u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 0)
+ r[0] = *data;
+ return r;
+}
+
+static_always_inline void
+u8x16_store_partial (u8x16 r, u8 *data, uword n)
+{
+ if (n > 7)
+ {
+ *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8);
+ *(u64u *) data = ((u64x2) r)[0];
+ }
+ else if (n > 3)
+ {
+ *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8);
+ *(u32u *) data = ((u32x4) r)[0];
+ }
+ else if (n > 1)
+ {
+ *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8);
+ *(u16u *) data = ((u16x8) r)[0];
+ }
+ else if (n > 0)
+ data[0] = r[0];
+}
+
#define CLIB_HAVE_VEC128_MSB_MASK
#define CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE
diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h
index 7e75ad28710..58d5da90125 100644
--- a/src/vppinfra/vector_sse42.h
+++ b/src/vppinfra/vector_sse42.h
@@ -41,7 +41,6 @@
#include <vppinfra/error_bootstrap.h> /* for ASSERT */
#include <x86intrin.h>
-/* *INDENT-OFF* */
#define foreach_sse42_vec128i \
_(i,8,16,epi8) _(i,16,8,epi16) _(i,32,4,epi32) _(i,64,2,epi64x)
#define foreach_sse42_vec128u \
@@ -92,7 +91,6 @@ t##s##x##c##_max (t##s##x##c a, t##s##x##c b) \
_(i,8,16,epi8) _(i,16,8,epi16) _(i,32,4,epi32) _(i,64,2,epi64)
_(u,8,16,epu8) _(u,16,8,epu16) _(u,32,4,epu32) _(u,64,2,epu64)
#undef _
-/* *INDENT-ON* */
#define CLIB_VEC128_SPLAT_DEFINED
#define CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE
@@ -411,26 +409,7 @@ u32x4_sum_elts (u32x4 sum4)
return sum4[0];
}
-static_always_inline u8x16
-u8x16_shuffle (u8x16 v, u8x16 m)
-{
- return (u8x16) _mm_shuffle_epi8 ((__m128i) v, (__m128i) m);
-}
-
-static_always_inline u32x4
-u32x4_shuffle (u32x4 v, const int a, const int b, const int c, const int d)
-{
-#if defined(__clang__) || !__OPTIMIZE__
- u32x4 r = { v[a], v[b], v[c], v[d] };
- return r;
-#else
- return (u32x4) _mm_shuffle_epi32 ((__m128i) v,
- a | b << 2 | c << 4 | d << 6);
-#endif
-}
-
/* _from_ */
-/* *INDENT-OFF* */
#define _(f,t,i) \
static_always_inline t \
t##_from_##f (f x) \
@@ -450,7 +429,6 @@ _(i16x8, i32x4, epi16_epi32)
_(i16x8, i64x2, epi16_epi64)
_(i32x4, i64x2, epi32_epi64)
#undef _
-/* *INDENT-ON* */
static_always_inline u64x2
u64x2_gather (void *p0, void *p1)
@@ -496,12 +474,6 @@ u32x4_scatter_one (u32x4 r, int index, void *p)
}
static_always_inline u8x16
-u8x16_is_greater (u8x16 v1, u8x16 v2)
-{
- return (u8x16) _mm_cmpgt_epi8 ((__m128i) v1, (__m128i) v2);
-}
-
-static_always_inline u8x16
u8x16_blend (u8x16 v1, u8x16 v2, u8x16 mask)
{
return (u8x16) _mm_blendv_epi8 ((__m128i) v1, (__m128i) v2, (__m128i) mask);
@@ -517,6 +489,68 @@ u8x16_xor3 (u8x16 a, u8x16 b, u8x16 c)
return a ^ b ^ c;
}
+static_always_inline u8x16
+u8x16_load_partial (u8 *data, uword n)
+{
+ u8x16 r = {};
+#if defined(CLIB_HAVE_VEC128_MASK_LOAD_STORE)
+ return u8x16_mask_load_zero (data, pow2_mask (n));
+#endif
+ if (n > 7)
+ {
+ u64x2 r;
+ r[1] = *(u64u *) (data + n - 8);
+ r >>= (16 - n) * 8;
+ r[0] = *(u64u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 3)
+ {
+ u32x4 r = {};
+ r[1] = *(u32u *) (data + n - 4);
+ r >>= (8 - n) * 8;
+ r[0] = *(u32u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 1)
+ {
+ u16x8 r = {};
+ r[1] = *(u16u *) (data + n - 2);
+ r >>= (4 - n) * 8;
+ r[0] = *(u16u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 0)
+ r[0] = *data;
+ return r;
+}
+
+static_always_inline void
+u8x16_store_partial (u8x16 r, u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u8x16_mask_store (r, data, pow2_mask (n));
+#else
+ if (n > 7)
+ {
+ *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8);
+ *(u64u *) data = ((u64x2) r)[0];
+ }
+ else if (n > 3)
+ {
+ *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8);
+ *(u32u *) data = ((u32x4) r)[0];
+ }
+ else if (n > 1)
+ {
+ *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8);
+ *(u16u *) data = ((u16x8) r)[0];
+ }
+ else if (n > 0)
+ data[0] = r[0];
+#endif
+}
+
#endif /* included_vector_sse2_h */
/*