diff options
-rw-r--r-- | src/plugins/crypto_native/aes_cbc.c | 459 | ||||
-rw-r--r-- | src/plugins/crypto_native/aes_gcm.c | 1164 | ||||
-rw-r--r-- | src/vppinfra/CMakeLists.txt | 8 | ||||
-rw-r--r-- | src/vppinfra/crypto/aes.h (renamed from src/plugins/crypto_native/aes.h) | 41 | ||||
-rw-r--r-- | src/vppinfra/crypto/aes_cbc.h | 549 | ||||
-rw-r--r-- | src/vppinfra/crypto/aes_gcm.h | 975 | ||||
-rw-r--r-- | src/vppinfra/crypto/ghash.h (renamed from src/plugins/crypto_native/ghash.h) | 188 | ||||
-rw-r--r-- | src/vppinfra/perfmon/bundle_default.c | 33 | ||||
-rw-r--r-- | src/vppinfra/sha2.h | 637 | ||||
-rw-r--r-- | src/vppinfra/test/aes_cbc.c | 187 | ||||
-rw-r--r-- | src/vppinfra/test/aes_gcm.c | 1177 | ||||
-rw-r--r-- | src/vppinfra/vector_avx2.h | 55 | ||||
-rw-r--r-- | src/vppinfra/vector_avx512.h | 12 | ||||
-rw-r--r-- | src/vppinfra/vector_neon.h | 55 | ||||
-rw-r--r-- | src/vppinfra/vector_sse42.h | 62 |
15 files changed, 3308 insertions, 2294 deletions
diff --git a/src/plugins/crypto_native/aes_cbc.c b/src/plugins/crypto_native/aes_cbc.c index 02d96b31c79..1f21dc149fa 100644 --- a/src/plugins/crypto_native/aes_cbc.c +++ b/src/plugins/crypto_native/aes_cbc.c @@ -19,7 +19,7 @@ #include <vnet/plugin/plugin.h> #include <vnet/crypto/crypto.h> #include <crypto_native/crypto_native.h> -#include <crypto_native/aes.h> +#include <vppinfra/crypto/aes_cbc.h> #if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 #pragma GCC optimize ("O3") @@ -48,412 +48,6 @@ #define u32xN_splat u32x4_splat #endif -typedef struct -{ - u8x16 encrypt_key[15]; - u8xN decrypt_key[15]; -} aes_cbc_key_data_t; - -static_always_inline void __clib_unused -aes_cbc_dec (u8x16 * k, u8x16u * src, u8x16u * dst, u8x16u * iv, int count, - int rounds) -{ - u8x16 r[4], c[4], f; - - f = iv[0]; - while (count >= 64) - { - clib_prefetch_load (src + 8); - clib_prefetch_load (dst + 8); - - c[0] = r[0] = src[0]; - c[1] = r[1] = src[1]; - c[2] = r[2] = src[2]; - c[3] = r[3] = src[3]; - -#if __x86_64__ - r[0] ^= k[0]; - r[1] ^= k[0]; - r[2] ^= k[0]; - r[3] ^= k[0]; - - for (int i = 1; i < rounds; i++) - { - r[0] = aes_dec_round (r[0], k[i]); - r[1] = aes_dec_round (r[1], k[i]); - r[2] = aes_dec_round (r[2], k[i]); - r[3] = aes_dec_round (r[3], k[i]); - } - - r[0] = aes_dec_last_round (r[0], k[rounds]); - r[1] = aes_dec_last_round (r[1], k[rounds]); - r[2] = aes_dec_last_round (r[2], k[rounds]); - r[3] = aes_dec_last_round (r[3], k[rounds]); -#else - for (int i = 0; i < rounds - 1; i++) - { - r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i])); - r[1] = vaesimcq_u8 (vaesdq_u8 (r[1], k[i])); - r[2] = vaesimcq_u8 (vaesdq_u8 (r[2], k[i])); - r[3] = vaesimcq_u8 (vaesdq_u8 (r[3], k[i])); - } - r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds]; - r[1] = vaesdq_u8 (r[1], k[rounds - 1]) ^ k[rounds]; - r[2] = vaesdq_u8 (r[2], k[rounds - 1]) ^ k[rounds]; - r[3] = vaesdq_u8 (r[3], k[rounds - 1]) ^ k[rounds]; -#endif - dst[0] = r[0] ^ f; - dst[1] = r[1] ^ c[0]; - dst[2] = r[2] ^ c[1]; - dst[3] = r[3] ^ c[2]; - f = c[3]; - - count -= 64; - src += 4; - dst += 4; - } - - while (count > 0) - { - c[0] = r[0] = src[0]; -#if __x86_64__ - r[0] ^= k[0]; - for (int i = 1; i < rounds; i++) - r[0] = aes_dec_round (r[0], k[i]); - r[0] = aes_dec_last_round (r[0], k[rounds]); -#else - c[0] = r[0] = src[0]; - for (int i = 0; i < rounds - 1; i++) - r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i])); - r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds]; -#endif - dst[0] = r[0] ^ f; - f = c[0]; - - count -= 16; - src += 1; - dst += 1; - } -} - -#if __x86_64__ -#if defined(__VAES__) && defined(__AVX512F__) - -static_always_inline u8x64 -aes_block_load_x4 (u8 * src[], int i) -{ - u8x64 r = { }; - r = u8x64_insert_u8x16 (r, aes_block_load (src[0] + i), 0); - r = u8x64_insert_u8x16 (r, aes_block_load (src[1] + i), 1); - r = u8x64_insert_u8x16 (r, aes_block_load (src[2] + i), 2); - r = u8x64_insert_u8x16 (r, aes_block_load (src[3] + i), 3); - return r; -} - -static_always_inline void -aes_block_store_x4 (u8 * dst[], int i, u8x64 r) -{ - aes_block_store (dst[0] + i, u8x64_extract_u8x16 (r, 0)); - aes_block_store (dst[1] + i, u8x64_extract_u8x16 (r, 1)); - aes_block_store (dst[2] + i, u8x64_extract_u8x16 (r, 2)); - aes_block_store (dst[3] + i, u8x64_extract_u8x16 (r, 3)); -} - -static_always_inline u8x64 -aes4_cbc_dec_permute (u8x64 a, u8x64 b) -{ - return (u8x64) u64x8_shuffle2 (a, b, 6, 7, 8, 9, 10, 11, 12, 13); -} - -static_always_inline void -aes4_cbc_dec (u8x64 *k, u8x64u *src, u8x64u *dst, u8x16u *iv, int count, - aes_key_size_t rounds) -{ - u8x64 f, r[4], c[4] = { }; - __mmask8 m; - int i, n_blocks = count >> 4; - - f = (u8x64) _mm512_mask_loadu_epi64 (_mm512_setzero_si512 (), 0xc0, - (__m512i *) (iv - 3)); - - while (n_blocks >= 16) - { - c[0] = src[0]; - c[1] = src[1]; - c[2] = src[2]; - c[3] = src[3]; - - r[0] = c[0] ^ k[0]; - r[1] = c[1] ^ k[0]; - r[2] = c[2] ^ k[0]; - r[3] = c[3] ^ k[0]; - - for (i = 1; i < rounds; i++) - { - r[0] = aes_dec_round_x4 (r[0], k[i]); - r[1] = aes_dec_round_x4 (r[1], k[i]); - r[2] = aes_dec_round_x4 (r[2], k[i]); - r[3] = aes_dec_round_x4 (r[3], k[i]); - } - - r[0] = aes_dec_last_round_x4 (r[0], k[i]); - r[1] = aes_dec_last_round_x4 (r[1], k[i]); - r[2] = aes_dec_last_round_x4 (r[2], k[i]); - r[3] = aes_dec_last_round_x4 (r[3], k[i]); - - dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]); - dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]); - dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]); - dst[3] = r[3] ^= aes4_cbc_dec_permute (c[2], c[3]); - f = c[3]; - - n_blocks -= 16; - src += 4; - dst += 4; - } - - if (n_blocks >= 12) - { - c[0] = src[0]; - c[1] = src[1]; - c[2] = src[2]; - - r[0] = c[0] ^ k[0]; - r[1] = c[1] ^ k[0]; - r[2] = c[2] ^ k[0]; - - for (i = 1; i < rounds; i++) - { - r[0] = aes_dec_round_x4 (r[0], k[i]); - r[1] = aes_dec_round_x4 (r[1], k[i]); - r[2] = aes_dec_round_x4 (r[2], k[i]); - } - - r[0] = aes_dec_last_round_x4 (r[0], k[i]); - r[1] = aes_dec_last_round_x4 (r[1], k[i]); - r[2] = aes_dec_last_round_x4 (r[2], k[i]); - - dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]); - dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]); - dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]); - f = c[2]; - - n_blocks -= 12; - src += 3; - dst += 3; - } - else if (n_blocks >= 8) - { - c[0] = src[0]; - c[1] = src[1]; - - r[0] = c[0] ^ k[0]; - r[1] = c[1] ^ k[0]; - - for (i = 1; i < rounds; i++) - { - r[0] = aes_dec_round_x4 (r[0], k[i]); - r[1] = aes_dec_round_x4 (r[1], k[i]); - } - - r[0] = aes_dec_last_round_x4 (r[0], k[i]); - r[1] = aes_dec_last_round_x4 (r[1], k[i]); - - dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]); - dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]); - f = c[1]; - - n_blocks -= 8; - src += 2; - dst += 2; - } - else if (n_blocks >= 4) - { - c[0] = src[0]; - - r[0] = c[0] ^ k[0]; - - for (i = 1; i < rounds; i++) - { - r[0] = aes_dec_round_x4 (r[0], k[i]); - } - - r[0] = aes_dec_last_round_x4 (r[0], k[i]); - - dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]); - f = c[0]; - - n_blocks -= 4; - src += 1; - dst += 1; - } - - if (n_blocks > 0) - { - m = (1 << (n_blocks * 2)) - 1; - c[0] = (u8x64) _mm512_mask_loadu_epi64 ((__m512i) c[0], m, - (__m512i *) src); - f = aes4_cbc_dec_permute (f, c[0]); - r[0] = c[0] ^ k[0]; - for (i = 1; i < rounds; i++) - r[0] = aes_dec_round_x4 (r[0], k[i]); - r[0] = aes_dec_last_round_x4 (r[0], k[i]); - _mm512_mask_storeu_epi64 ((__m512i *) dst, m, (__m512i) (r[0] ^ f)); - } -} -#elif defined(__VAES__) - -static_always_inline u8x32 -aes_block_load_x2 (u8 *src[], int i) -{ - u8x32 r = {}; - r = u8x32_insert_lo (r, aes_block_load (src[0] + i)); - r = u8x32_insert_hi (r, aes_block_load (src[1] + i)); - return r; -} - -static_always_inline void -aes_block_store_x2 (u8 *dst[], int i, u8x32 r) -{ - aes_block_store (dst[0] + i, u8x32_extract_lo (r)); - aes_block_store (dst[1] + i, u8x32_extract_hi (r)); -} - -static_always_inline u8x32 -aes2_cbc_dec_permute (u8x32 a, u8x32 b) -{ - return (u8x32) u64x4_shuffle2 ((u64x4) a, (u64x4) b, 2, 3, 4, 5); -} - -static_always_inline void -aes2_cbc_dec (u8x32 *k, u8x32u *src, u8x32u *dst, u8x16u *iv, int count, - aes_key_size_t rounds) -{ - u8x32 f = {}, r[4], c[4] = {}; - int i, n_blocks = count >> 4; - - f = u8x32_insert_hi (f, *iv); - - while (n_blocks >= 8) - { - c[0] = src[0]; - c[1] = src[1]; - c[2] = src[2]; - c[3] = src[3]; - - r[0] = c[0] ^ k[0]; - r[1] = c[1] ^ k[0]; - r[2] = c[2] ^ k[0]; - r[3] = c[3] ^ k[0]; - - for (i = 1; i < rounds; i++) - { - r[0] = aes_dec_round_x2 (r[0], k[i]); - r[1] = aes_dec_round_x2 (r[1], k[i]); - r[2] = aes_dec_round_x2 (r[2], k[i]); - r[3] = aes_dec_round_x2 (r[3], k[i]); - } - - r[0] = aes_dec_last_round_x2 (r[0], k[i]); - r[1] = aes_dec_last_round_x2 (r[1], k[i]); - r[2] = aes_dec_last_round_x2 (r[2], k[i]); - r[3] = aes_dec_last_round_x2 (r[3], k[i]); - - dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]); - dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]); - dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]); - dst[3] = r[3] ^= aes2_cbc_dec_permute (c[2], c[3]); - f = c[3]; - - n_blocks -= 8; - src += 4; - dst += 4; - } - - if (n_blocks >= 6) - { - c[0] = src[0]; - c[1] = src[1]; - c[2] = src[2]; - - r[0] = c[0] ^ k[0]; - r[1] = c[1] ^ k[0]; - r[2] = c[2] ^ k[0]; - - for (i = 1; i < rounds; i++) - { - r[0] = aes_dec_round_x2 (r[0], k[i]); - r[1] = aes_dec_round_x2 (r[1], k[i]); - r[2] = aes_dec_round_x2 (r[2], k[i]); - } - - r[0] = aes_dec_last_round_x2 (r[0], k[i]); - r[1] = aes_dec_last_round_x2 (r[1], k[i]); - r[2] = aes_dec_last_round_x2 (r[2], k[i]); - - dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]); - dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]); - dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]); - f = c[2]; - - n_blocks -= 6; - src += 3; - dst += 3; - } - else if (n_blocks >= 4) - { - c[0] = src[0]; - c[1] = src[1]; - - r[0] = c[0] ^ k[0]; - r[1] = c[1] ^ k[0]; - - for (i = 1; i < rounds; i++) - { - r[0] = aes_dec_round_x2 (r[0], k[i]); - r[1] = aes_dec_round_x2 (r[1], k[i]); - } - - r[0] = aes_dec_last_round_x2 (r[0], k[i]); - r[1] = aes_dec_last_round_x2 (r[1], k[i]); - - dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]); - dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]); - f = c[1]; - - n_blocks -= 4; - src += 2; - dst += 2; - } - else if (n_blocks >= 2) - { - c[0] = src[0]; - r[0] = c[0] ^ k[0]; - - for (i = 1; i < rounds; i++) - r[0] = aes_dec_round_x2 (r[0], k[i]); - - r[0] = aes_dec_last_round_x2 (r[0], k[i]); - dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]); - f = c[0]; - - n_blocks -= 2; - src += 1; - dst += 1; - } - - if (n_blocks > 0) - { - u8x16 rl = *(u8x16u *) src ^ u8x32_extract_lo (k[0]); - for (i = 1; i < rounds; i++) - rl = aes_dec_round (rl, u8x32_extract_lo (k[i])); - rl = aes_dec_last_round (rl, u8x32_extract_lo (k[i])); - *(u8x16 *) dst = rl ^ u8x32_extract_hi (f); - } -} -#endif -#endif - static_always_inline u32 aes_ops_enc_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops, aes_key_size_t ks) @@ -658,28 +252,6 @@ decrypt: return n_ops; } -static_always_inline void * -aes_cbc_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks) -{ - u8x16 e[15], d[15]; - aes_cbc_key_data_t *kd; - kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES); - aes_key_expand (e, key->data, ks); - aes_key_enc_to_dec (e, d, ks); - for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++) - { -#if defined(__VAES__) && defined(__AVX512F__) - kd->decrypt_key[i] = u8x64_splat_u8x16 (d[i]); -#elif defined(__VAES__) - kd->decrypt_key[i] = u8x32_splat_u8x16 (d[i]); -#else - kd->decrypt_key[i] = d[i]; -#endif - kd->encrypt_key[i] = e[i]; - } - return kd; -} - #define foreach_aes_cbc_handler_type _(128) _(192) _(256) #define _(x) \ @@ -689,12 +261,37 @@ static u32 aes_ops_dec_aes_cbc_##x \ static u32 aes_ops_enc_aes_cbc_##x \ (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ { return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \ -static void * aes_cbc_key_exp_##x (vnet_crypto_key_t *key) \ -{ return aes_cbc_key_exp (key, AES_KEY_##x); } foreach_aes_cbc_handler_type; #undef _ +static void * +aes_cbc_key_exp_128 (vnet_crypto_key_t *key) +{ + aes_cbc_key_data_t *kd; + kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES); + clib_aes128_cbc_key_expand (kd, key->data); + return kd; +} + +static void * +aes_cbc_key_exp_192 (vnet_crypto_key_t *key) +{ + aes_cbc_key_data_t *kd; + kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES); + clib_aes192_cbc_key_expand (kd, key->data); + return kd; +} + +static void * +aes_cbc_key_exp_256 (vnet_crypto_key_t *key) +{ + aes_cbc_key_data_t *kd; + kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES); + clib_aes256_cbc_key_expand (kd, key->data); + return kd; +} + #include <fcntl.h> clib_error_t * diff --git a/src/plugins/crypto_native/aes_gcm.c b/src/plugins/crypto_native/aes_gcm.c index c13665e3fb1..6589d411975 100644 --- a/src/plugins/crypto_native/aes_gcm.c +++ b/src/plugins/crypto_native/aes_gcm.c @@ -19,1098 +19,26 @@ #include <vnet/plugin/plugin.h> #include <vnet/crypto/crypto.h> #include <crypto_native/crypto_native.h> -#include <crypto_native/aes.h> -#include <crypto_native/ghash.h> +#include <vppinfra/crypto/aes_gcm.h> -#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 -#pragma GCC optimize ("O3") +#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0 +#pragma GCC optimize("O3") #endif -#if defined(__VAES__) && defined(__AVX512F__) -#define NUM_HI 32 -#else -#define NUM_HI 8 -#endif - -typedef struct -{ - /* pre-calculated hash key values */ - const u8x16 Hi[NUM_HI]; - /* extracted AES key */ - const u8x16 Ke[15]; -#if defined(__VAES__) && defined(__AVX512F__) - const u8x64 Ke4[15]; -#endif -} aes_gcm_key_data_t; - -typedef struct -{ - u32 counter; - union - { - u32x4 Y; - u32x16 Y4; - }; -} aes_gcm_counter_t; - -typedef enum -{ - AES_GCM_F_WITH_GHASH = (1 << 0), - AES_GCM_F_LAST_ROUND = (1 << 1), - AES_GCM_F_ENCRYPT = (1 << 2), - AES_GCM_F_DECRYPT = (1 << 3), -} aes_gcm_flags_t; - -static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 }; - -static_always_inline void -aes_gcm_enc_first_round (u8x16 * r, aes_gcm_counter_t * ctr, u8x16 k, - int n_blocks) -{ - if (PREDICT_TRUE ((u8) ctr->counter < (256 - 2 * n_blocks))) - { - for (int i = 0; i < n_blocks; i++) - { - r[i] = k ^ (u8x16) ctr->Y; - ctr->Y += ctr_inv_1; - } - ctr->counter += n_blocks; - } - else - { - for (int i = 0; i < n_blocks; i++) - { - r[i] = k ^ (u8x16) ctr->Y; - ctr->counter++; - ctr->Y[3] = clib_host_to_net_u32 (ctr->counter + 1); - } - } -} - -static_always_inline void -aes_gcm_enc_round (u8x16 * r, u8x16 k, int n_blocks) -{ - for (int i = 0; i < n_blocks; i++) - r[i] = aes_enc_round (r[i], k); -} - -static_always_inline void -aes_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k, - int rounds, int n_blocks) -{ - - /* additional ronuds for AES-192 and AES-256 */ - for (int i = 10; i < rounds; i++) - aes_gcm_enc_round (r, k[i], n_blocks); - - for (int i = 0; i < n_blocks; i++) - d[i] ^= aes_enc_last_round (r[i], k[rounds]); -} - -static_always_inline u8x16 -aes_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd, - u8x16u * in, int n_blocks) -{ - ghash_data_t _gd, *gd = &_gd; - u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks; - ghash_mul_first (gd, u8x16_reflect (in[0]) ^ T, Hi[0]); - for (int i = 1; i < n_blocks; i++) - ghash_mul_next (gd, u8x16_reflect ((in[i])), Hi[i]); - ghash_reduce (gd); - ghash_reduce2 (gd); - return ghash_final (gd); -} - -static_always_inline u8x16 -aes_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left) -{ - - while (n_left >= 128) - { - T = aes_gcm_ghash_blocks (T, kd, in, 8); - n_left -= 128; - in += 8; - } - - if (n_left >= 64) - { - T = aes_gcm_ghash_blocks (T, kd, in, 4); - n_left -= 64; - in += 4; - } - - if (n_left >= 32) - { - T = aes_gcm_ghash_blocks (T, kd, in, 2); - n_left -= 32; - in += 2; - } - - if (n_left >= 16) - { - T = aes_gcm_ghash_blocks (T, kd, in, 1); - n_left -= 16; - in += 1; - } - - if (n_left) - { - u8x16 r = aes_load_partial (in, n_left); - T = ghash_mul (u8x16_reflect (r) ^ T, kd->Hi[NUM_HI - 1]); - } - return T; -} - -static_always_inline __clib_unused u8x16 -aes_gcm_calc (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, - aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, int rounds, - int n, int last_block_bytes, aes_gcm_flags_t f) -{ - u8x16 r[n]; - ghash_data_t _gd = { }, *gd = &_gd; - const u8x16 *rk = (u8x16 *) kd->Ke; - int ghash_blocks = (f & AES_GCM_F_ENCRYPT) ? 4 : n, gc = 1; - u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - ghash_blocks; - - clib_prefetch_load (inv + 4); - - /* AES rounds 0 and 1 */ - aes_gcm_enc_first_round (r, ctr, rk[0], n); - aes_gcm_enc_round (r, rk[1], n); - - /* load data - decrypt round */ - if (f & AES_GCM_F_DECRYPT) - { - for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++) - d[i] = inv[i]; - - if (f & AES_GCM_F_LAST_ROUND) - d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes); - } - - /* GHASH multiply block 1 */ - if (f & AES_GCM_F_WITH_GHASH) - ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]); - - /* AES rounds 2 and 3 */ - aes_gcm_enc_round (r, rk[2], n); - aes_gcm_enc_round (r, rk[3], n); - - /* GHASH multiply block 2 */ - if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks) - ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]); - - /* AES rounds 4 and 5 */ - aes_gcm_enc_round (r, rk[4], n); - aes_gcm_enc_round (r, rk[5], n); - - /* GHASH multiply block 3 */ - if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks) - ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]); - - /* AES rounds 6 and 7 */ - aes_gcm_enc_round (r, rk[6], n); - aes_gcm_enc_round (r, rk[7], n); - - /* GHASH multiply block 4 */ - if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks) - ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]); - - /* AES rounds 8 and 9 */ - aes_gcm_enc_round (r, rk[8], n); - aes_gcm_enc_round (r, rk[9], n); - - /* GHASH reduce 1st step */ - if (f & AES_GCM_F_WITH_GHASH) - ghash_reduce (gd); - - /* load data - encrypt round */ - if (f & AES_GCM_F_ENCRYPT) - { - for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++) - d[i] = inv[i]; - - if (f & AES_GCM_F_LAST_ROUND) - d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes); - } - - /* GHASH reduce 2nd step */ - if (f & AES_GCM_F_WITH_GHASH) - ghash_reduce2 (gd); - - /* AES last round(s) */ - aes_gcm_enc_last_round (r, d, rk, rounds, n); - - /* store data */ - for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++) - outv[i] = d[i]; - - if (f & AES_GCM_F_LAST_ROUND) - aes_store_partial (outv + n - 1, d[n - 1], last_block_bytes); - - /* GHASH final step */ - if (f & AES_GCM_F_WITH_GHASH) - T = ghash_final (gd); - - return T; -} - -static_always_inline __clib_unused u8x16 -aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, - aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, - int rounds, aes_gcm_flags_t f) -{ - u8x16 r[4]; - ghash_data_t _gd, *gd = &_gd; - const u8x16 *rk = (u8x16 *) kd->Ke; - u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - 8; - - /* AES rounds 0 and 1 */ - aes_gcm_enc_first_round (r, ctr, rk[0], 4); - aes_gcm_enc_round (r, rk[1], 4); - - /* load 4 blocks of data - decrypt round */ - if (f & AES_GCM_F_DECRYPT) - { - d[0] = inv[0]; - d[1] = inv[1]; - d[2] = inv[2]; - d[3] = inv[3]; - } - - /* GHASH multiply block 0 */ - ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]); - - /* AES rounds 2 and 3 */ - aes_gcm_enc_round (r, rk[2], 4); - aes_gcm_enc_round (r, rk[3], 4); - - /* GHASH multiply block 1 */ - ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]); - - /* AES rounds 4 and 5 */ - aes_gcm_enc_round (r, rk[4], 4); - aes_gcm_enc_round (r, rk[5], 4); - - /* GHASH multiply block 2 */ - ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]); - - /* AES rounds 6 and 7 */ - aes_gcm_enc_round (r, rk[6], 4); - aes_gcm_enc_round (r, rk[7], 4); - - /* GHASH multiply block 3 */ - ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]); - - /* AES rounds 8 and 9 */ - aes_gcm_enc_round (r, rk[8], 4); - aes_gcm_enc_round (r, rk[9], 4); - - /* load 4 blocks of data - encrypt round */ - if (f & AES_GCM_F_ENCRYPT) - { - d[0] = inv[0]; - d[1] = inv[1]; - d[2] = inv[2]; - d[3] = inv[3]; - } - - /* AES last round(s) */ - aes_gcm_enc_last_round (r, d, rk, rounds, 4); - - /* store 4 blocks of data */ - outv[0] = d[0]; - outv[1] = d[1]; - outv[2] = d[2]; - outv[3] = d[3]; - - /* load next 4 blocks of data data - decrypt round */ - if (f & AES_GCM_F_DECRYPT) - { - d[0] = inv[4]; - d[1] = inv[5]; - d[2] = inv[6]; - d[3] = inv[7]; - } - - /* GHASH multiply block 4 */ - ghash_mul_next (gd, u8x16_reflect (d[0]), Hi[4]); - - /* AES rounds 0, 1 and 2 */ - aes_gcm_enc_first_round (r, ctr, rk[0], 4); - aes_gcm_enc_round (r, rk[1], 4); - aes_gcm_enc_round (r, rk[2], 4); - - /* GHASH multiply block 5 */ - ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[5]); - - /* AES rounds 3 and 4 */ - aes_gcm_enc_round (r, rk[3], 4); - aes_gcm_enc_round (r, rk[4], 4); - - /* GHASH multiply block 6 */ - ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[6]); - - /* AES rounds 5 and 6 */ - aes_gcm_enc_round (r, rk[5], 4); - aes_gcm_enc_round (r, rk[6], 4); - - /* GHASH multiply block 7 */ - ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[7]); - - /* AES rounds 7 and 8 */ - aes_gcm_enc_round (r, rk[7], 4); - aes_gcm_enc_round (r, rk[8], 4); - - /* GHASH reduce 1st step */ - ghash_reduce (gd); - - /* AES round 9 */ - aes_gcm_enc_round (r, rk[9], 4); - - /* load data - encrypt round */ - if (f & AES_GCM_F_ENCRYPT) - { - d[0] = inv[4]; - d[1] = inv[5]; - d[2] = inv[6]; - d[3] = inv[7]; - } - - /* GHASH reduce 2nd step */ - ghash_reduce2 (gd); - - /* AES last round(s) */ - aes_gcm_enc_last_round (r, d, rk, rounds, 4); - - /* store data */ - outv[4] = d[0]; - outv[5] = d[1]; - outv[6] = d[2]; - outv[7] = d[3]; - - /* GHASH final step */ - return ghash_final (gd); -} - -static_always_inline __clib_unused u8x16 -aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, int n_blocks, - int n_bytes) -{ - ghash_data_t _gd, *gd = &_gd; - u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks; - - if (n_bytes) - d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes); - - ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]); - if (n_blocks > 1) - ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]); - if (n_blocks > 2) - ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]); - if (n_blocks > 3) - ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]); - ghash_reduce (gd); - ghash_reduce2 (gd); - return ghash_final (gd); -} - -#if defined(__VAES__) && defined(__AVX512F__) -static const u32x16 ctr_inv_1234 = { - 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24, -}; - -static const u32x16 ctr_inv_4444 = { - 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24 -}; - -static const u32x16 ctr_1234 = { - 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, -}; - -static_always_inline void -aes4_gcm_enc_first_round (u8x64 * r, aes_gcm_counter_t * ctr, u8x64 k, int n) -{ - u8 last_byte = (u8) ctr->counter; - int i = 0; - - /* As counter is stored in network byte order for performance reasons we - are incrementing least significant byte only except in case where we - overlow. As we are processing four 512-blocks in parallel except the - last round, overflow can happen only when n == 4 */ - - if (n == 4) - for (; i < 2; i++) - { - r[i] = k ^ (u8x64) ctr->Y4; - ctr->Y4 += ctr_inv_4444; - } - - if (n == 4 && PREDICT_TRUE (last_byte == 241)) - { - u32x16 Yc, Yr = (u32x16) u8x64_reflect_u8x16 ((u8x64) ctr->Y4); - - for (; i < n; i++) - { - r[i] = k ^ (u8x64) ctr->Y4; - Yc = u32x16_splat (ctr->counter + 4 * (i + 1)) + ctr_1234; - Yr = (u32x16) u32x16_mask_blend (Yr, Yc, 0x1111); - ctr->Y4 = (u32x16) u8x64_reflect_u8x16 ((u8x64) Yr); - } - } - else - { - for (; i < n; i++) - { - r[i] = k ^ (u8x64) ctr->Y4; - ctr->Y4 += ctr_inv_4444; - } - } - ctr->counter += n * 4; -} - -static_always_inline void -aes4_gcm_enc_round (u8x64 * r, u8x64 k, int n_blocks) -{ - for (int i = 0; i < n_blocks; i++) - r[i] = aes_enc_round_x4 (r[i], k); -} - -static_always_inline void -aes4_gcm_enc_last_round (u8x64 * r, u8x64 * d, u8x64 const *k, - int rounds, int n_blocks) -{ - - /* additional ronuds for AES-192 and AES-256 */ - for (int i = 10; i < rounds; i++) - aes4_gcm_enc_round (r, k[i], n_blocks); - - for (int i = 0; i < n_blocks; i++) - d[i] ^= aes_enc_last_round_x4 (r[i], k[rounds]); -} - -static_always_inline u8x16 -aes4_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d, - aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out, - int rounds, int n, int last_4block_bytes, aes_gcm_flags_t f) -{ - ghash4_data_t _gd, *gd = &_gd; - const u8x64 *rk = (u8x64 *) kd->Ke4; - int i, ghash_blocks, gc = 1; - u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out; - u8x64 r[4]; - u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes); - - if (f & AES_GCM_F_ENCRYPT) - { - /* during encryption we either hash four 512-bit blocks from previous - round or we don't hash at all */ - ghash_blocks = 4; - Hi4 = (u8x64u *) (kd->Hi + NUM_HI - ghash_blocks * 4); - } - else - { - /* during deccryption we hash 1..4 512-bit blocks from current round */ - ghash_blocks = n; - int n_128bit_blocks = n * 4; - /* if this is last round of decryption, we may have less than 4 - 128-bit blocks in the last 512-bit data block, so we need to adjust - Hi4 pointer accordingly */ - if (f & AES_GCM_F_LAST_ROUND) - n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4; - Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks); - } - - /* AES rounds 0 and 1 */ - aes4_gcm_enc_first_round (r, ctr, rk[0], n); - aes4_gcm_enc_round (r, rk[1], n); - - /* load 4 blocks of data - decrypt round */ - if (f & AES_GCM_F_DECRYPT) - { - for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++) - d[i] = inv[i]; - - if (f & AES_GCM_F_LAST_ROUND) - d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask); - } - - /* GHASH multiply block 0 */ - if (f & AES_GCM_F_WITH_GHASH) - ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^ - u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]); - - /* AES rounds 2 and 3 */ - aes4_gcm_enc_round (r, rk[2], n); - aes4_gcm_enc_round (r, rk[3], n); - - /* GHASH multiply block 1 */ - if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks) - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]); - - /* AES rounds 4 and 5 */ - aes4_gcm_enc_round (r, rk[4], n); - aes4_gcm_enc_round (r, rk[5], n); - - /* GHASH multiply block 2 */ - if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks) - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]); - - /* AES rounds 6 and 7 */ - aes4_gcm_enc_round (r, rk[6], n); - aes4_gcm_enc_round (r, rk[7], n); - - /* GHASH multiply block 3 */ - if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks) - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]); - - /* load 4 blocks of data - decrypt round */ - if (f & AES_GCM_F_ENCRYPT) - { - for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++) - d[i] = inv[i]; - - if (f & AES_GCM_F_LAST_ROUND) - d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask); - } - - /* AES rounds 8 and 9 */ - aes4_gcm_enc_round (r, rk[8], n); - aes4_gcm_enc_round (r, rk[9], n); - - /* AES last round(s) */ - aes4_gcm_enc_last_round (r, d, rk, rounds, n); - - /* store 4 blocks of data */ - for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++) - outv[i] = d[i]; - - if (f & AES_GCM_F_LAST_ROUND) - u8x64_mask_store (d[i], outv + i, byte_mask); - - /* GHASH reduce 1st step */ - ghash4_reduce (gd); - - /* GHASH reduce 2nd step */ - ghash4_reduce2 (gd); - - /* GHASH final step */ - return ghash4_final (gd); -} - -static_always_inline u8x16 -aes4_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d, - aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out, - int rounds, aes_gcm_flags_t f) -{ - u8x64 r[4]; - ghash4_data_t _gd, *gd = &_gd; - const u8x64 *rk = (u8x64 *) kd->Ke4; - u8x64 *Hi4 = (u8x64 *) (kd->Hi + NUM_HI - 32); - u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out; - - /* AES rounds 0 and 1 */ - aes4_gcm_enc_first_round (r, ctr, rk[0], 4); - aes4_gcm_enc_round (r, rk[1], 4); - - /* load 4 blocks of data - decrypt round */ - if (f & AES_GCM_F_DECRYPT) - for (int i = 0; i < 4; i++) - d[i] = inv[i]; - - /* GHASH multiply block 0 */ - ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^ - u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]); - - /* AES rounds 2 and 3 */ - aes4_gcm_enc_round (r, rk[2], 4); - aes4_gcm_enc_round (r, rk[3], 4); - - /* GHASH multiply block 1 */ - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]); - - /* AES rounds 4 and 5 */ - aes4_gcm_enc_round (r, rk[4], 4); - aes4_gcm_enc_round (r, rk[5], 4); - - /* GHASH multiply block 2 */ - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]); - - /* AES rounds 6 and 7 */ - aes4_gcm_enc_round (r, rk[6], 4); - aes4_gcm_enc_round (r, rk[7], 4); - - /* GHASH multiply block 3 */ - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]); - - /* AES rounds 8 and 9 */ - aes4_gcm_enc_round (r, rk[8], 4); - aes4_gcm_enc_round (r, rk[9], 4); - - /* load 4 blocks of data - encrypt round */ - if (f & AES_GCM_F_ENCRYPT) - for (int i = 0; i < 4; i++) - d[i] = inv[i]; - - /* AES last round(s) */ - aes4_gcm_enc_last_round (r, d, rk, rounds, 4); - - /* store 4 blocks of data */ - for (int i = 0; i < 4; i++) - outv[i] = d[i]; - - /* load 4 blocks of data - decrypt round */ - if (f & AES_GCM_F_DECRYPT) - for (int i = 0; i < 4; i++) - d[i] = inv[i + 4]; - - /* GHASH multiply block 3 */ - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[0]), Hi4[4]); - - /* AES rounds 0 and 1 */ - aes4_gcm_enc_first_round (r, ctr, rk[0], 4); - aes4_gcm_enc_round (r, rk[1], 4); - - /* GHASH multiply block 5 */ - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[5]); - - /* AES rounds 2 and 3 */ - aes4_gcm_enc_round (r, rk[2], 4); - aes4_gcm_enc_round (r, rk[3], 4); - - /* GHASH multiply block 6 */ - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[6]); - - /* AES rounds 4 and 5 */ - aes4_gcm_enc_round (r, rk[4], 4); - aes4_gcm_enc_round (r, rk[5], 4); - - /* GHASH multiply block 7 */ - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[7]); - - /* AES rounds 6 and 7 */ - aes4_gcm_enc_round (r, rk[6], 4); - aes4_gcm_enc_round (r, rk[7], 4); - - /* GHASH reduce 1st step */ - ghash4_reduce (gd); - - /* AES rounds 8 and 9 */ - aes4_gcm_enc_round (r, rk[8], 4); - aes4_gcm_enc_round (r, rk[9], 4); - - /* GHASH reduce 2nd step */ - ghash4_reduce2 (gd); - - /* load 4 blocks of data - encrypt round */ - if (f & AES_GCM_F_ENCRYPT) - for (int i = 0; i < 4; i++) - d[i] = inv[i + 4]; - - /* AES last round(s) */ - aes4_gcm_enc_last_round (r, d, rk, rounds, 4); - - /* store 4 blocks of data */ - for (int i = 0; i < 4; i++) - outv[i + 4] = d[i]; - - /* GHASH final step */ - return ghash4_final (gd); -} - -static_always_inline u8x16 -aes4_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d, - int n, int last_4block_bytes) -{ - ghash4_data_t _gd, *gd = &_gd; - u8x64u *Hi4; - int n_128bit_blocks; - u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes); - n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4); - Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks); - - d[n - 1] = u8x64_mask_blend (u8x64_splat (0), d[n - 1], byte_mask); - ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^ - u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]); - if (n > 1) - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]); - if (n > 2) - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]); - if (n > 3) - ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]); - ghash4_reduce (gd); - ghash4_reduce2 (gd); - return ghash4_final (gd); -} -#endif - -static_always_inline u8x16 -aes_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr, - u8x16u * inv, u8x16u * outv, u32 n_left, int rounds) -{ - aes_gcm_flags_t f = AES_GCM_F_ENCRYPT; - - if (n_left == 0) - return T; - -#if defined(__VAES__) && defined(__AVX512F__) - u8x64 d4[4]; - if (n_left < 256) - { - f |= AES_GCM_F_LAST_ROUND; - if (n_left > 192) - { - n_left -= 192; - aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f); - return aes4_gcm_ghash_last (T, kd, d4, 4, n_left); - } - else if (n_left > 128) - { - n_left -= 128; - aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f); - return aes4_gcm_ghash_last (T, kd, d4, 3, n_left); - } - else if (n_left > 64) - { - n_left -= 64; - aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f); - return aes4_gcm_ghash_last (T, kd, d4, 2, n_left); - } - else - { - aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f); - return aes4_gcm_ghash_last (T, kd, d4, 1, n_left); - } - } - - aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f); - - /* next */ - n_left -= 256; - outv += 16; - inv += 16; - - f |= AES_GCM_F_WITH_GHASH; - - while (n_left >= 512) - { - T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f); - - /* next */ - n_left -= 512; - outv += 32; - inv += 32; - } - - while (n_left >= 256) - { - T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f); - - /* next */ - n_left -= 256; - outv += 16; - inv += 16; - } - - if (n_left == 0) - return aes4_gcm_ghash_last (T, kd, d4, 4, 64); - - f |= AES_GCM_F_LAST_ROUND; - - if (n_left > 192) - { - n_left -= 192; - T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f); - return aes4_gcm_ghash_last (T, kd, d4, 4, n_left); - } - - if (n_left > 128) - { - n_left -= 128; - T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f); - return aes4_gcm_ghash_last (T, kd, d4, 3, n_left); - } - - if (n_left > 64) - { - n_left -= 64; - T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f); - return aes4_gcm_ghash_last (T, kd, d4, 2, n_left); - } - - T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f); - return aes4_gcm_ghash_last (T, kd, d4, 1, n_left); -#else - u8x16 d[4]; - if (n_left < 64) - { - f |= AES_GCM_F_LAST_ROUND; - if (n_left > 48) - { - n_left -= 48; - aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f); - return aes_gcm_ghash_last (T, kd, d, 4, n_left); - } - else if (n_left > 32) - { - n_left -= 32; - aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f); - return aes_gcm_ghash_last (T, kd, d, 3, n_left); - } - else if (n_left > 16) - { - n_left -= 16; - aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f); - return aes_gcm_ghash_last (T, kd, d, 2, n_left); - } - else - { - aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f); - return aes_gcm_ghash_last (T, kd, d, 1, n_left); - } - } - - aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f); - - /* next */ - n_left -= 64; - outv += 4; - inv += 4; - - f |= AES_GCM_F_WITH_GHASH; - - while (n_left >= 128) - { - T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f); - - /* next */ - n_left -= 128; - outv += 8; - inv += 8; - } - - if (n_left >= 64) - { - T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f); - - /* next */ - n_left -= 64; - outv += 4; - inv += 4; - } - - if (n_left == 0) - return aes_gcm_ghash_last (T, kd, d, 4, 0); - - f |= AES_GCM_F_LAST_ROUND; - - if (n_left > 48) - { - n_left -= 48; - T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f); - return aes_gcm_ghash_last (T, kd, d, 4, n_left); - } - - if (n_left > 32) - { - n_left -= 32; - T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f); - return aes_gcm_ghash_last (T, kd, d, 3, n_left); - } - - if (n_left > 16) - { - n_left -= 16; - T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f); - return aes_gcm_ghash_last (T, kd, d, 2, n_left); - } - - T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f); - return aes_gcm_ghash_last (T, kd, d, 1, n_left); -#endif -} - -static_always_inline u8x16 -aes_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr, - u8x16u * inv, u8x16u * outv, u32 n_left, int rounds) -{ - aes_gcm_flags_t f = AES_GCM_F_WITH_GHASH | AES_GCM_F_DECRYPT; -#if defined(__VAES__) && defined(__AVX512F__) - u8x64 d4[4] = { }; - - while (n_left >= 512) - { - T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f); - - /* next */ - n_left -= 512; - outv += 32; - inv += 32; - } - - while (n_left >= 256) - { - T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f); - - /* next */ - n_left -= 256; - outv += 16; - inv += 16; - } - - if (n_left == 0) - return T; - - f |= AES_GCM_F_LAST_ROUND; - - if (n_left > 192) - return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, - n_left - 192, f); - if (n_left > 128) - return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, - n_left - 128, f); - if (n_left > 64) - return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, - n_left - 64, f); - return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f); -#else - u8x16 d[4] = {}; - while (n_left >= 128) - { - T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f); - - /* next */ - n_left -= 128; - outv += 8; - inv += 8; - } - - if (n_left >= 64) - { - T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f); - - /* next */ - n_left -= 64; - outv += 4; - inv += 4; - } - - if (n_left == 0) - return T; - - f |= AES_GCM_F_LAST_ROUND; - - if (n_left > 48) - return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48, f); - - if (n_left > 32) - return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32, f); - - if (n_left > 16) - return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16, f); - - return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f); -#endif -} - -static_always_inline int -aes_gcm (u8x16u *in, u8x16u *out, u8x16u *addt, u8 *ivp, u8x16u *tag, - u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t *kd, - int aes_rounds, int is_encrypt) -{ - int i; - u8x16 r, T = { }; - vec128_t Y0 = {}; - ghash_data_t _gd, *gd = &_gd; - aes_gcm_counter_t _ctr, *ctr = &_ctr; - - clib_prefetch_load (ivp); - clib_prefetch_load (in); - clib_prefetch_load (in + 4); - - /* calculate ghash for AAD - optimized for ipsec common cases */ - if (aad_bytes == 8) - T = aes_gcm_ghash (T, kd, addt, 8); - else if (aad_bytes == 12) - T = aes_gcm_ghash (T, kd, addt, 12); - else - T = aes_gcm_ghash (T, kd, addt, aad_bytes); - - /* initalize counter */ - ctr->counter = 1; - Y0.as_u64x2[0] = *(u64u *) ivp; - Y0.as_u32x4[2] = *(u32u *) (ivp + 8); - Y0.as_u32x4 += ctr_inv_1; -#if defined(__VAES__) && defined(__AVX512F__) - ctr->Y4 = u32x16_splat_u32x4 (Y0.as_u32x4) + ctr_inv_1234; -#else - ctr->Y = Y0.as_u32x4 + ctr_inv_1; -#endif - - /* ghash and encrypt/edcrypt */ - if (is_encrypt) - T = aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds); - else - T = aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds); - - clib_prefetch_load (tag); - - /* Finalize ghash - data bytes and aad bytes converted to bits */ - /* *INDENT-OFF* */ - r = (u8x16) ((u64x2) {data_bytes, aad_bytes} << 3); - /* *INDENT-ON* */ - - /* interleaved computation of final ghash and E(Y0, k) */ - ghash_mul_first (gd, r ^ T, kd->Hi[NUM_HI - 1]); - r = kd->Ke[0] ^ Y0.as_u8x16; - for (i = 1; i < 5; i += 1) - r = aes_enc_round (r, kd->Ke[i]); - ghash_reduce (gd); - ghash_reduce2 (gd); - for (; i < 9; i += 1) - r = aes_enc_round (r, kd->Ke[i]); - T = ghash_final (gd); - for (; i < aes_rounds; i += 1) - r = aes_enc_round (r, kd->Ke[i]); - r = aes_enc_last_round (r, kd->Ke[aes_rounds]); - T = u8x16_reflect (T) ^ r; - - /* tag_len 16 -> 0 */ - tag_len &= 0xf; - - if (is_encrypt) - { - /* store tag */ - if (tag_len) - aes_store_partial (tag, T, tag_len); - else - tag[0] = T; - } - else - { - /* check tag */ - u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff; - if ((u8x16_msb_mask (tag[0] == T) & tag_mask) != tag_mask) - return 0; - } - return 1; -} - static_always_inline u32 -aes_ops_enc_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], - u32 n_ops, aes_key_size_t ks) +aes_ops_enc_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, + aes_key_size_t ks) { crypto_native_main_t *cm = &crypto_native_main; vnet_crypto_op_t *op = ops[0]; aes_gcm_key_data_t *kd; u32 n_left = n_ops; - next: kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index]; - aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad, - (u8 *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len, - op->tag_len, kd, AES_KEY_ROUNDS (ks), /* is_encrypt */ 1); + aes_gcm (op->src, op->dst, op->aad, (u8 *) op->iv, op->tag, op->len, + op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks), + AES_GCM_OP_ENCRYPT); op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; if (--n_left) @@ -1123,7 +51,7 @@ next: } static_always_inline u32 -aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops, +aes_ops_dec_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops, aes_key_size_t ks) { crypto_native_main_t *cm = &crypto_native_main; @@ -1134,10 +62,9 @@ aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops, next: kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index]; - rv = aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad, - (u8 *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len, - op->tag_len, kd, AES_KEY_ROUNDS (ks), - /* is_encrypt */ 0); + rv = aes_gcm (op->src, op->dst, op->aad, (u8 *) op->iv, op->tag, op->len, + op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks), + AES_GCM_OP_DECRYPT); if (rv) { @@ -1159,39 +86,34 @@ next: } static_always_inline void * -aes_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks) +aes_gcm_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks) { aes_gcm_key_data_t *kd; - u8x16 H; kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES); - /* expand AES key */ - aes_key_expand ((u8x16 *) kd->Ke, key->data, ks); + clib_aes_gcm_key_expand (kd, key->data, ks); - /* pre-calculate H */ - H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks); - H = u8x16_reflect (H); - ghash_precompute (H, (u8x16 *) kd->Hi, NUM_HI); -#if defined(__VAES__) && defined(__AVX512F__) - u8x64 *Ke4 = (u8x64 *) kd->Ke4; - for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++) - Ke4[i] = u8x64_splat_u8x16 (kd->Ke[i]); -#endif return kd; } -#define foreach_aes_gcm_handler_type _(128) _(192) _(256) - -#define _(x) \ -static u32 aes_ops_dec_aes_gcm_##x \ -(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ -{ return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \ -static u32 aes_ops_enc_aes_gcm_##x \ -(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ -{ return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \ -static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \ -{ return aes_gcm_key_exp (key, AES_KEY_##x); } +#define foreach_aes_gcm_handler_type _ (128) _ (192) _ (256) + +#define _(x) \ + static u32 aes_ops_dec_aes_gcm_##x (vlib_main_t *vm, \ + vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); \ + } \ + static u32 aes_ops_enc_aes_gcm_##x (vlib_main_t *vm, \ + vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); \ + } \ + static void *aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \ + { \ + return aes_gcm_key_exp (key, AES_KEY_##x); \ + } foreach_aes_gcm_handler_type; #undef _ @@ -1199,25 +121,27 @@ foreach_aes_gcm_handler_type; clib_error_t * #if defined(__VAES__) && defined(__AVX512F__) crypto_native_aes_gcm_init_icl (vlib_main_t *vm) +#elif defined(__VAES__) +crypto_native_aes_gcm_init_adl (vlib_main_t *vm) #elif __AVX512F__ -crypto_native_aes_gcm_init_skx (vlib_main_t * vm) +crypto_native_aes_gcm_init_skx (vlib_main_t *vm) #elif __AVX2__ -crypto_native_aes_gcm_init_hsw (vlib_main_t * vm) +crypto_native_aes_gcm_init_hsw (vlib_main_t *vm) #elif __aarch64__ -crypto_native_aes_gcm_init_neon (vlib_main_t * vm) +crypto_native_aes_gcm_init_neon (vlib_main_t *vm) #else -crypto_native_aes_gcm_init_slm (vlib_main_t * vm) +crypto_native_aes_gcm_init_slm (vlib_main_t *vm) #endif { crypto_native_main_t *cm = &crypto_native_main; -#define _(x) \ - vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ - VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \ - aes_ops_enc_aes_gcm_##x); \ - vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ - VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \ - aes_ops_dec_aes_gcm_##x); \ +#define _(x) \ + vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ + VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \ + aes_ops_enc_aes_gcm_##x); \ + vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ + VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \ + aes_ops_dec_aes_gcm_##x); \ cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x; foreach_aes_gcm_handler_type; #undef _ diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index 975bf503827..ad942a258e3 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -128,6 +128,11 @@ set(VPPINFRA_HEADERS clib.h cpu.h crc32.h + crypto/sha2.h + crypto/ghash.h + crypto/aes.h + crypto/aes_cbc.h + crypto/aes_gcm.h dlist.h dlmalloc.h elf_clib.h @@ -168,7 +173,6 @@ set(VPPINFRA_HEADERS random_isaac.h rbtree.h serialize.h - sha2.h smp.h socket.h sparse_vec.h @@ -278,6 +282,8 @@ if(VPP_BUILD_VPPINFRA_TESTS) endif(VPP_BUILD_VPPINFRA_TESTS) set(test_files + test/aes_cbc.c + test/aes_gcm.c test/array_mask.c test/compress.c test/count_equal.c diff --git a/src/plugins/crypto_native/aes.h b/src/vppinfra/crypto/aes.h index 40fe681e2b7..a5e286e4c6e 100644 --- a/src/plugins/crypto_native/aes.h +++ b/src/vppinfra/crypto/aes.h @@ -28,10 +28,6 @@ typedef enum #define AES_KEY_ROUNDS(x) (10 + x * 2) #define AES_KEY_BYTES(x) (16 + x * 8) -static const u8x16 byte_mask_scale = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 -}; - static_always_inline u8x16 aes_block_load (u8 * p) { @@ -132,43 +128,6 @@ aes_block_store (u8 * p, u8x16 r) } static_always_inline u8x16 -aes_byte_mask (u8x16 x, u8 n_bytes) -{ - return x & (u8x16_splat (n_bytes) > byte_mask_scale); -} - -static_always_inline u8x16 -aes_load_partial (u8x16u * p, int n_bytes) -{ - ASSERT (n_bytes <= 16); -#ifdef __AVX512F__ - __m128i zero = { }; - return (u8x16) _mm_mask_loadu_epi8 (zero, (1 << n_bytes) - 1, p); -#else - u8x16 v = {}; - CLIB_ASSUME (n_bytes < 16); - clib_memcpy_fast (&v, p, n_bytes); - return v; -#endif -} - -static_always_inline void -aes_store_partial (void *p, u8x16 r, int n_bytes) -{ -#if __aarch64__ - clib_memcpy_fast (p, &r, n_bytes); -#else -#ifdef __AVX512F__ - _mm_mask_storeu_epi8 (p, (1 << n_bytes) - 1, (__m128i) r); -#else - u8x16 mask = u8x16_splat (n_bytes) > byte_mask_scale; - _mm_maskmoveu_si128 ((__m128i) r, (__m128i) mask, p); -#endif -#endif -} - - -static_always_inline u8x16 aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks) { int rounds = AES_KEY_ROUNDS (ks); diff --git a/src/vppinfra/crypto/aes_cbc.h b/src/vppinfra/crypto/aes_cbc.h new file mode 100644 index 00000000000..5c3054f4a93 --- /dev/null +++ b/src/vppinfra/crypto/aes_cbc.h @@ -0,0 +1,549 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#ifndef __crypto_aes_cbc_h__ +#define __crypto_aes_cbc_h__ + +#include <vppinfra/clib.h> +#include <vppinfra/vector.h> +#include <vppinfra/crypto/aes.h> + +typedef struct +{ + const u8x16 encrypt_key[15]; + const u8x16 decrypt_key[15]; +} aes_cbc_key_data_t; + +static_always_inline void +clib_aes_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *src, uword len, + const u8 *iv, aes_key_size_t ks, u8 *dst) +{ + int rounds = AES_KEY_ROUNDS (ks); + u8x16 r, *k = (u8x16 *) kd->encrypt_key; + + r = *(u8x16u *) iv; + + for (int i = 0; i < len; i += 16) + { + int j; +#if __x86_64__ + r = u8x16_xor3 (r, *(u8x16u *) (src + i), k[0]); + for (j = 1; j < rounds; j++) + r = aes_enc_round (r, k[j]); + r = aes_enc_last_round (r, k[rounds]); +#else + r ^= *(u8x16u *) (src + i); + for (j = 1; j < rounds - 1; j++) + r = vaesmcq_u8 (vaeseq_u8 (r, k[j])); + r = vaeseq_u8 (r, k[j]) ^ k[rounds]; +#endif + *(u8x16u *) (dst + i) = r; + } +} + +static_always_inline void +clib_aes128_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext, + uword len, const u8 *iv, u8 *ciphertext) +{ + clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_128, ciphertext); +} + +static_always_inline void +clib_aes192_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext, + uword len, const u8 *iv, u8 *ciphertext) +{ + clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_192, ciphertext); +} + +static_always_inline void +clib_aes256_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext, + uword len, const u8 *iv, u8 *ciphertext) +{ + clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_256, ciphertext); +} + +static_always_inline void __clib_unused +aes_cbc_dec (const u8x16 *k, u8x16u *src, u8x16u *dst, u8x16u *iv, int count, + int rounds) +{ + u8x16 r[4], c[4], f; + + f = iv[0]; + while (count >= 64) + { + c[0] = r[0] = src[0]; + c[1] = r[1] = src[1]; + c[2] = r[2] = src[2]; + c[3] = r[3] = src[3]; + +#if __x86_64__ + r[0] ^= k[0]; + r[1] ^= k[0]; + r[2] ^= k[0]; + r[3] ^= k[0]; + + for (int i = 1; i < rounds; i++) + { + r[0] = aes_dec_round (r[0], k[i]); + r[1] = aes_dec_round (r[1], k[i]); + r[2] = aes_dec_round (r[2], k[i]); + r[3] = aes_dec_round (r[3], k[i]); + } + + r[0] = aes_dec_last_round (r[0], k[rounds]); + r[1] = aes_dec_last_round (r[1], k[rounds]); + r[2] = aes_dec_last_round (r[2], k[rounds]); + r[3] = aes_dec_last_round (r[3], k[rounds]); +#else + for (int i = 0; i < rounds - 1; i++) + { + r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i])); + r[1] = vaesimcq_u8 (vaesdq_u8 (r[1], k[i])); + r[2] = vaesimcq_u8 (vaesdq_u8 (r[2], k[i])); + r[3] = vaesimcq_u8 (vaesdq_u8 (r[3], k[i])); + } + r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds]; + r[1] = vaesdq_u8 (r[1], k[rounds - 1]) ^ k[rounds]; + r[2] = vaesdq_u8 (r[2], k[rounds - 1]) ^ k[rounds]; + r[3] = vaesdq_u8 (r[3], k[rounds - 1]) ^ k[rounds]; +#endif + dst[0] = r[0] ^ f; + dst[1] = r[1] ^ c[0]; + dst[2] = r[2] ^ c[1]; + dst[3] = r[3] ^ c[2]; + f = c[3]; + + count -= 64; + src += 4; + dst += 4; + } + + while (count > 0) + { + c[0] = r[0] = src[0]; +#if __x86_64__ + r[0] ^= k[0]; + for (int i = 1; i < rounds; i++) + r[0] = aes_dec_round (r[0], k[i]); + r[0] = aes_dec_last_round (r[0], k[rounds]); +#else + c[0] = r[0] = src[0]; + for (int i = 0; i < rounds - 1; i++) + r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i])); + r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds]; +#endif + dst[0] = r[0] ^ f; + f = c[0]; + + count -= 16; + src += 1; + dst += 1; + } +} + +#if __x86_64__ +#if defined(__VAES__) && defined(__AVX512F__) + +static_always_inline u8x64 +aes_block_load_x4 (u8 *src[], int i) +{ + u8x64 r = {}; + r = u8x64_insert_u8x16 (r, aes_block_load (src[0] + i), 0); + r = u8x64_insert_u8x16 (r, aes_block_load (src[1] + i), 1); + r = u8x64_insert_u8x16 (r, aes_block_load (src[2] + i), 2); + r = u8x64_insert_u8x16 (r, aes_block_load (src[3] + i), 3); + return r; +} + +static_always_inline void +aes_block_store_x4 (u8 *dst[], int i, u8x64 r) +{ + aes_block_store (dst[0] + i, u8x64_extract_u8x16 (r, 0)); + aes_block_store (dst[1] + i, u8x64_extract_u8x16 (r, 1)); + aes_block_store (dst[2] + i, u8x64_extract_u8x16 (r, 2)); + aes_block_store (dst[3] + i, u8x64_extract_u8x16 (r, 3)); +} + +static_always_inline u8x64 +aes4_cbc_dec_permute (u8x64 a, u8x64 b) +{ + return (u8x64) u64x8_shuffle2 (a, b, 6, 7, 8, 9, 10, 11, 12, 13); +} + +static_always_inline void +aes4_cbc_dec (const u8x16 *k, u8x64u *src, u8x64u *dst, u8x16u *iv, int count, + aes_key_size_t rounds) +{ + u8x64 f, k4, r[4], c[4] = {}; + __mmask8 m; + int i, n_blocks = count >> 4; + + f = u8x64_insert_u8x16 (u8x64_zero (), *iv, 3); + + while (n_blocks >= 16) + { + k4 = u8x64_splat_u8x16 (k[0]); + c[0] = src[0]; + c[1] = src[1]; + c[2] = src[2]; + c[3] = src[3]; + + r[0] = c[0] ^ k4; + r[1] = c[1] ^ k4; + r[2] = c[2] ^ k4; + r[3] = c[3] ^ k4; + + for (i = 1; i < rounds; i++) + { + k4 = u8x64_splat_u8x16 (k[i]); + r[0] = aes_dec_round_x4 (r[0], k4); + r[1] = aes_dec_round_x4 (r[1], k4); + r[2] = aes_dec_round_x4 (r[2], k4); + r[3] = aes_dec_round_x4 (r[3], k4); + } + + k4 = u8x64_splat_u8x16 (k[i]); + r[0] = aes_dec_last_round_x4 (r[0], k4); + r[1] = aes_dec_last_round_x4 (r[1], k4); + r[2] = aes_dec_last_round_x4 (r[2], k4); + r[3] = aes_dec_last_round_x4 (r[3], k4); + + dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]); + dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]); + dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]); + dst[3] = r[3] ^= aes4_cbc_dec_permute (c[2], c[3]); + f = c[3]; + + n_blocks -= 16; + src += 4; + dst += 4; + } + + if (n_blocks >= 12) + { + k4 = u8x64_splat_u8x16 (k[0]); + c[0] = src[0]; + c[1] = src[1]; + c[2] = src[2]; + + r[0] = c[0] ^ k4; + r[1] = c[1] ^ k4; + r[2] = c[2] ^ k4; + + for (i = 1; i < rounds; i++) + { + k4 = u8x64_splat_u8x16 (k[i]); + r[0] = aes_dec_round_x4 (r[0], k4); + r[1] = aes_dec_round_x4 (r[1], k4); + r[2] = aes_dec_round_x4 (r[2], k4); + } + + k4 = u8x64_splat_u8x16 (k[i]); + r[0] = aes_dec_last_round_x4 (r[0], k4); + r[1] = aes_dec_last_round_x4 (r[1], k4); + r[2] = aes_dec_last_round_x4 (r[2], k4); + + dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]); + dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]); + dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]); + f = c[2]; + + n_blocks -= 12; + src += 3; + dst += 3; + } + else if (n_blocks >= 8) + { + k4 = u8x64_splat_u8x16 (k[0]); + c[0] = src[0]; + c[1] = src[1]; + + r[0] = c[0] ^ k4; + r[1] = c[1] ^ k4; + + for (i = 1; i < rounds; i++) + { + k4 = u8x64_splat_u8x16 (k[i]); + r[0] = aes_dec_round_x4 (r[0], k4); + r[1] = aes_dec_round_x4 (r[1], k4); + } + + k4 = u8x64_splat_u8x16 (k[i]); + r[0] = aes_dec_last_round_x4 (r[0], k4); + r[1] = aes_dec_last_round_x4 (r[1], k4); + + dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]); + dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]); + f = c[1]; + + n_blocks -= 8; + src += 2; + dst += 2; + } + else if (n_blocks >= 4) + { + c[0] = src[0]; + + r[0] = c[0] ^ u8x64_splat_u8x16 (k[0]); + + for (i = 1; i < rounds; i++) + r[0] = aes_dec_round_x4 (r[0], u8x64_splat_u8x16 (k[i])); + + r[0] = aes_dec_last_round_x4 (r[0], u8x64_splat_u8x16 (k[i])); + + dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]); + f = c[0]; + + n_blocks -= 4; + src += 1; + dst += 1; + } + + if (n_blocks > 0) + { + k4 = u8x64_splat_u8x16 (k[0]); + m = (1 << (n_blocks * 2)) - 1; + c[0] = + (u8x64) _mm512_mask_loadu_epi64 ((__m512i) c[0], m, (__m512i *) src); + f = aes4_cbc_dec_permute (f, c[0]); + r[0] = c[0] ^ k4; + for (i = 1; i < rounds; i++) + r[0] = aes_dec_round_x4 (r[0], u8x64_splat_u8x16 (k[i])); + r[0] = aes_dec_last_round_x4 (r[0], u8x64_splat_u8x16 (k[i])); + _mm512_mask_storeu_epi64 ((__m512i *) dst, m, (__m512i) (r[0] ^ f)); + } +} +#elif defined(__VAES__) + +static_always_inline u8x32 +aes_block_load_x2 (u8 *src[], int i) +{ + u8x32 r = {}; + r = u8x32_insert_lo (r, aes_block_load (src[0] + i)); + r = u8x32_insert_hi (r, aes_block_load (src[1] + i)); + return r; +} + +static_always_inline void +aes_block_store_x2 (u8 *dst[], int i, u8x32 r) +{ + aes_block_store (dst[0] + i, u8x32_extract_lo (r)); + aes_block_store (dst[1] + i, u8x32_extract_hi (r)); +} + +static_always_inline u8x32 +aes2_cbc_dec_permute (u8x32 a, u8x32 b) +{ + return (u8x32) u64x4_shuffle2 ((u64x4) a, (u64x4) b, 2, 3, 4, 5); +} + +static_always_inline void +aes2_cbc_dec (const u8x16 *k, u8x32u *src, u8x32u *dst, u8x16u *iv, int count, + aes_key_size_t rounds) +{ + u8x32 k2, f = {}, r[4], c[4] = {}; + int i, n_blocks = count >> 4; + + f = u8x32_insert_hi (f, *iv); + + while (n_blocks >= 8) + { + k2 = u8x32_splat_u8x16 (k[0]); + c[0] = src[0]; + c[1] = src[1]; + c[2] = src[2]; + c[3] = src[3]; + + r[0] = c[0] ^ k2; + r[1] = c[1] ^ k2; + r[2] = c[2] ^ k2; + r[3] = c[3] ^ k2; + + for (i = 1; i < rounds; i++) + { + k2 = u8x32_splat_u8x16 (k[i]); + r[0] = aes_dec_round_x2 (r[0], k2); + r[1] = aes_dec_round_x2 (r[1], k2); + r[2] = aes_dec_round_x2 (r[2], k2); + r[3] = aes_dec_round_x2 (r[3], k2); + } + + k2 = u8x32_splat_u8x16 (k[i]); + r[0] = aes_dec_last_round_x2 (r[0], k2); + r[1] = aes_dec_last_round_x2 (r[1], k2); + r[2] = aes_dec_last_round_x2 (r[2], k2); + r[3] = aes_dec_last_round_x2 (r[3], k2); + + dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]); + dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]); + dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]); + dst[3] = r[3] ^= aes2_cbc_dec_permute (c[2], c[3]); + f = c[3]; + + n_blocks -= 8; + src += 4; + dst += 4; + } + + if (n_blocks >= 6) + { + k2 = u8x32_splat_u8x16 (k[0]); + c[0] = src[0]; + c[1] = src[1]; + c[2] = src[2]; + + r[0] = c[0] ^ k2; + r[1] = c[1] ^ k2; + r[2] = c[2] ^ k2; + + for (i = 1; i < rounds; i++) + { + k2 = u8x32_splat_u8x16 (k[i]); + r[0] = aes_dec_round_x2 (r[0], k2); + r[1] = aes_dec_round_x2 (r[1], k2); + r[2] = aes_dec_round_x2 (r[2], k2); + } + + k2 = u8x32_splat_u8x16 (k[i]); + r[0] = aes_dec_last_round_x2 (r[0], k2); + r[1] = aes_dec_last_round_x2 (r[1], k2); + r[2] = aes_dec_last_round_x2 (r[2], k2); + + dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]); + dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]); + dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]); + f = c[2]; + + n_blocks -= 6; + src += 3; + dst += 3; + } + else if (n_blocks >= 4) + { + k2 = u8x32_splat_u8x16 (k[0]); + c[0] = src[0]; + c[1] = src[1]; + + r[0] = c[0] ^ k2; + r[1] = c[1] ^ k2; + + for (i = 1; i < rounds; i++) + { + k2 = u8x32_splat_u8x16 (k[i]); + r[0] = aes_dec_round_x2 (r[0], k2); + r[1] = aes_dec_round_x2 (r[1], k2); + } + + k2 = u8x32_splat_u8x16 (k[i]); + r[0] = aes_dec_last_round_x2 (r[0], k2); + r[1] = aes_dec_last_round_x2 (r[1], k2); + + dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]); + dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]); + f = c[1]; + + n_blocks -= 4; + src += 2; + dst += 2; + } + else if (n_blocks >= 2) + { + k2 = u8x32_splat_u8x16 (k[0]); + c[0] = src[0]; + r[0] = c[0] ^ k2; + + for (i = 1; i < rounds; i++) + r[0] = aes_dec_round_x2 (r[0], u8x32_splat_u8x16 (k[i])); + + r[0] = aes_dec_last_round_x2 (r[0], u8x32_splat_u8x16 (k[i])); + dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]); + f = c[0]; + + n_blocks -= 2; + src += 1; + dst += 1; + } + + if (n_blocks > 0) + { + u8x16 rl = *(u8x16u *) src ^ k[0]; + for (i = 1; i < rounds; i++) + rl = aes_dec_round (rl, k[i]); + rl = aes_dec_last_round (rl, k[i]); + *(u8x16 *) dst = rl ^ u8x32_extract_hi (f); + } +} +#endif +#endif + +static_always_inline void +clib_aes_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key, + aes_key_size_t ks) +{ + u8x16 e[15], d[15]; + aes_key_expand (e, key, ks); + aes_key_enc_to_dec (e, d, ks); + for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++) + { + ((u8x16 *) kd->decrypt_key)[i] = d[i]; + ((u8x16 *) kd->encrypt_key)[i] = e[i]; + } +} + +static_always_inline void +clib_aes128_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key) +{ + clib_aes_cbc_key_expand (kd, key, AES_KEY_128); +} +static_always_inline void +clib_aes192_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key) +{ + clib_aes_cbc_key_expand (kd, key, AES_KEY_192); +} +static_always_inline void +clib_aes256_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key) +{ + clib_aes_cbc_key_expand (kd, key, AES_KEY_256); +} + +static_always_inline void +clib_aes_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext, + uword len, const u8 *iv, aes_key_size_t ks, + u8 *plaintext) +{ + int rounds = AES_KEY_ROUNDS (ks); +#if defined(__VAES__) && defined(__AVX512F__) + aes4_cbc_dec (kd->decrypt_key, (u8x64u *) ciphertext, (u8x64u *) plaintext, + (u8x16u *) iv, (int) len, rounds); +#elif defined(__VAES__) + aes2_cbc_dec (kd->decrypt_key, (u8x32u *) ciphertext, (u8x32u *) plaintext, + (u8x16u *) iv, (int) len, rounds); +#else + aes_cbc_dec (kd->decrypt_key, (u8x16u *) ciphertext, (u8x16u *) plaintext, + (u8x16u *) iv, (int) len, rounds); +#endif +} + +static_always_inline void +clib_aes128_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext, + uword len, const u8 *iv, u8 *plaintext) +{ + clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_128, plaintext); +} + +static_always_inline void +clib_aes192_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext, + uword len, const u8 *iv, u8 *plaintext) +{ + clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_192, plaintext); +} + +static_always_inline void +clib_aes256_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext, + uword len, const u8 *iv, u8 *plaintext) +{ + clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_256, plaintext); +} + +#endif /* __crypto_aes_cbc_h__ */ diff --git a/src/vppinfra/crypto/aes_gcm.h b/src/vppinfra/crypto/aes_gcm.h new file mode 100644 index 00000000000..8a5f76c3b33 --- /dev/null +++ b/src/vppinfra/crypto/aes_gcm.h @@ -0,0 +1,975 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Cisco Systems, Inc. + */ + +#ifndef __crypto_aes_gcm_h__ +#define __crypto_aes_gcm_h__ + +#include <vppinfra/clib.h> +#include <vppinfra/vector.h> +#include <vppinfra/cache.h> +#include <vppinfra/string.h> +#include <vppinfra/crypto/aes.h> +#include <vppinfra/crypto/ghash.h> + +#define NUM_HI 36 +#if defined(__VAES__) && defined(__AVX512F__) +typedef u8x64 aes_data_t; +typedef u8x64u aes_ghash_t; +typedef u8x64u aes_mem_t; +typedef u32x16 aes_gcm_counter_t; +#define N 64 +#define aes_gcm_load_partial(p, n) u8x64_load_partial ((u8 *) (p), n) +#define aes_gcm_store_partial(v, p, n) u8x64_store_partial (v, (u8 *) (p), n) +#define aes_gcm_splat(v) u8x64_splat (v) +#define aes_gcm_reflect(r) u8x64_reflect_u8x16 (r) +#define aes_gcm_ghash_reduce(c) ghash4_reduce (&(c)->gd) +#define aes_gcm_ghash_reduce2(c) ghash4_reduce2 (&(c)->gd) +#define aes_gcm_ghash_final(c) (c)->T = ghash4_final (&(c)->gd) +#elif defined(__VAES__) +typedef u8x32 aes_data_t; +typedef u8x32u aes_ghash_t; +typedef u8x32u aes_mem_t; +typedef u32x8 aes_gcm_counter_t; +#define N 32 +#define aes_gcm_load_partial(p, n) u8x32_load_partial ((u8 *) (p), n) +#define aes_gcm_store_partial(v, p, n) u8x32_store_partial (v, (u8 *) (p), n) +#define aes_gcm_splat(v) u8x32_splat (v) +#define aes_gcm_reflect(r) u8x32_reflect_u8x16 (r) +#define aes_gcm_ghash_reduce(c) ghash2_reduce (&(c)->gd) +#define aes_gcm_ghash_reduce2(c) ghash2_reduce2 (&(c)->gd) +#define aes_gcm_ghash_final(c) (c)->T = ghash2_final (&(c)->gd) +#else +typedef u8x16 aes_data_t; +typedef u8x16 aes_ghash_t; +typedef u8x16u aes_mem_t; +typedef u32x4 aes_gcm_counter_t; +#define N 16 +#define aes_gcm_load_partial(p, n) u8x16_load_partial ((u8 *) (p), n) +#define aes_gcm_store_partial(v, p, n) u8x16_store_partial (v, (u8 *) (p), n) +#define aes_gcm_splat(v) u8x16_splat (v) +#define aes_gcm_reflect(r) u8x16_reflect (r) +#define aes_gcm_ghash_reduce(c) ghash_reduce (&(c)->gd) +#define aes_gcm_ghash_reduce2(c) ghash_reduce2 (&(c)->gd) +#define aes_gcm_ghash_final(c) (c)->T = ghash_final (&(c)->gd) +#endif +#define N_LANES (N / 16) + +typedef enum +{ + AES_GCM_OP_UNKNONW = 0, + AES_GCM_OP_ENCRYPT, + AES_GCM_OP_DECRYPT, + AES_GCM_OP_GMAC +} aes_gcm_op_t; + +typedef union +{ + u8x16 x1; + u8x32 x2; + u8x64 x4; + u8x16 lanes[4]; +} __clib_aligned (64) +aes_gcm_expaned_key_t; + +typedef struct +{ + /* pre-calculated hash key values */ + const u8x16 Hi[NUM_HI]; + /* extracted AES key */ + const aes_gcm_expaned_key_t Ke[AES_KEY_ROUNDS (AES_KEY_256) + 1]; +} aes_gcm_key_data_t; + +typedef struct +{ + aes_gcm_op_t operation; + int last; + u8 rounds; + uword data_bytes; + uword aad_bytes; + + u8x16 T; + + /* hash */ + const u8x16 *Hi; + const aes_ghash_t *next_Hi; + + /* expaded keys */ + const aes_gcm_expaned_key_t *Ke; + + /* counter */ + u32 counter; + u8x16 EY0; + aes_gcm_counter_t Y; + + /* ghash */ + ghash_data_t gd; +} aes_gcm_ctx_t; + +static_always_inline void +aes_gcm_ghash_mul_first (aes_gcm_ctx_t *ctx, aes_data_t data, u32 n_lanes) +{ + uword hash_offset = NUM_HI - n_lanes; + ctx->next_Hi = (aes_ghash_t *) (ctx->Hi + hash_offset); +#if N_LANES == 4 + u8x64 tag4 = {}; + tag4 = u8x64_insert_u8x16 (tag4, ctx->T, 0); + ghash4_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag4, *ctx->next_Hi++); +#elif N_LANES == 2 + u8x32 tag2 = {}; + tag2 = u8x32_insert_lo (tag2, ctx->T); + ghash2_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag2, *ctx->next_Hi++); +#else + ghash_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ ctx->T, *ctx->next_Hi++); +#endif +} + +static_always_inline void +aes_gcm_ghash_mul_next (aes_gcm_ctx_t *ctx, aes_data_t data) +{ +#if N_LANES == 4 + ghash4_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++); +#elif N_LANES == 2 + ghash2_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++); +#else + ghash_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++); +#endif +} + +static_always_inline void +aes_gcm_ghash_mul_bit_len (aes_gcm_ctx_t *ctx) +{ + u8x16 r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3); +#if N_LANES == 4 + u8x64 h = u8x64_insert_u8x16 (u8x64_zero (), ctx->Hi[NUM_HI - 1], 0); + u8x64 r4 = u8x64_insert_u8x16 (u8x64_zero (), r, 0); + ghash4_mul_next (&ctx->gd, r4, h); +#elif N_LANES == 2 + u8x32 h = u8x32_insert_lo (u8x32_zero (), ctx->Hi[NUM_HI - 1]); + u8x32 r2 = u8x32_insert_lo (u8x32_zero (), r); + ghash2_mul_next (&ctx->gd, r2, h); +#else + ghash_mul_next (&ctx->gd, r, ctx->Hi[NUM_HI - 1]); +#endif +} + +static_always_inline void +aes_gcm_enc_ctr0_round (aes_gcm_ctx_t *ctx, int aes_round) +{ + if (aes_round == 0) + ctx->EY0 ^= ctx->Ke[0].x1; + else if (aes_round == ctx->rounds) + ctx->EY0 = aes_enc_last_round (ctx->EY0, ctx->Ke[aes_round].x1); + else + ctx->EY0 = aes_enc_round (ctx->EY0, ctx->Ke[aes_round].x1); +} + +static_always_inline void +aes_gcm_ghash (aes_gcm_ctx_t *ctx, u8 *data, u32 n_left) +{ + uword i; + aes_data_t r = {}; + const aes_mem_t *d = (aes_mem_t *) data; + + for (; n_left >= 8 * N; n_left -= 8 * N, d += 8) + { + if (ctx->operation == AES_GCM_OP_GMAC && n_left == N * 8) + { + aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES + 1); + for (i = 1; i < 8; i++) + aes_gcm_ghash_mul_next (ctx, d[i]); + aes_gcm_ghash_mul_bit_len (ctx); + aes_gcm_ghash_reduce (ctx); + aes_gcm_ghash_reduce2 (ctx); + aes_gcm_ghash_final (ctx); + goto done; + } + + aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES); + for (i = 1; i < 8; i++) + aes_gcm_ghash_mul_next (ctx, d[i]); + aes_gcm_ghash_reduce (ctx); + aes_gcm_ghash_reduce2 (ctx); + aes_gcm_ghash_final (ctx); + } + + if (n_left > 0) + { + int n_lanes = (n_left + 15) / 16; + + if (ctx->operation == AES_GCM_OP_GMAC) + n_lanes++; + + if (n_left < N) + { + clib_memcpy_fast (&r, d, n_left); + aes_gcm_ghash_mul_first (ctx, r, n_lanes); + } + else + { + aes_gcm_ghash_mul_first (ctx, d[0], n_lanes); + n_left -= N; + i = 1; + + if (n_left >= 4 * N) + { + aes_gcm_ghash_mul_next (ctx, d[i]); + aes_gcm_ghash_mul_next (ctx, d[i + 1]); + aes_gcm_ghash_mul_next (ctx, d[i + 2]); + aes_gcm_ghash_mul_next (ctx, d[i + 3]); + n_left -= 4 * N; + i += 4; + } + if (n_left >= 2 * N) + { + aes_gcm_ghash_mul_next (ctx, d[i]); + aes_gcm_ghash_mul_next (ctx, d[i + 1]); + n_left -= 2 * N; + i += 2; + } + + if (n_left >= N) + { + aes_gcm_ghash_mul_next (ctx, d[i]); + n_left -= N; + i += 1; + } + + if (n_left) + { + clib_memcpy_fast (&r, d + i, n_left); + aes_gcm_ghash_mul_next (ctx, r); + } + } + + if (ctx->operation == AES_GCM_OP_GMAC) + aes_gcm_ghash_mul_bit_len (ctx); + aes_gcm_ghash_reduce (ctx); + aes_gcm_ghash_reduce2 (ctx); + aes_gcm_ghash_final (ctx); + } + else if (ctx->operation == AES_GCM_OP_GMAC) + { + u8x16 r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3); + ctx->T = ghash_mul (r ^ ctx->T, ctx->Hi[NUM_HI - 1]); + } + +done: + /* encrypt counter 0 E(Y0, k) */ + if (ctx->operation == AES_GCM_OP_GMAC) + for (int i = 0; i < ctx->rounds + 1; i += 1) + aes_gcm_enc_ctr0_round (ctx, i); +} + +static_always_inline void +aes_gcm_enc_first_round (aes_gcm_ctx_t *ctx, aes_data_t *r, uword n_blocks) +{ + const aes_gcm_expaned_key_t Ke0 = ctx->Ke[0]; + uword i = 0; + +#if N_LANES == 4 + const u32x16 ctr_inv_4444 = { 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, + 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24 }; + + const u32x16 ctr_4444 = { + 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, + }; + + /* As counter is stored in network byte order for performance reasons we + are incrementing least significant byte only except in case where we + overlow. As we are processing four 512-blocks in parallel except the + last round, overflow can happen only when n == 4 */ + + if (n_blocks == 4) + for (; i < 2; i++) + { + r[i] = Ke0.x4 ^ (u8x64) ctx->Y; + ctx->Y += ctr_inv_4444; + } + + if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 242)) + { + u32x16 Yr = (u32x16) aes_gcm_reflect ((u8x64) ctx->Y); + + for (; i < n_blocks; i++) + { + r[i] = Ke0.x4 ^ (u8x64) ctx->Y; + Yr += ctr_4444; + ctx->Y = (u32x16) aes_gcm_reflect ((u8x64) Yr); + } + } + else + { + for (; i < n_blocks; i++) + { + r[i] = Ke0.x4 ^ (u8x64) ctx->Y; + ctx->Y += ctr_inv_4444; + } + } + ctx->counter += n_blocks * 4; +#elif N_LANES == 2 + const u32x8 ctr_inv_22 = { 0, 0, 0, 2 << 24, 0, 0, 0, 2 << 24 }; + const u32x8 ctr_22 = { 2, 0, 0, 0, 2, 0, 0, 0 }; + + /* As counter is stored in network byte order for performance reasons we + are incrementing least significant byte only except in case where we + overlow. As we are processing four 512-blocks in parallel except the + last round, overflow can happen only when n == 4 */ + + if (n_blocks == 4) + for (; i < 2; i++) + { + r[i] = Ke0.x2 ^ (u8x32) ctx->Y; + ctx->Y += ctr_inv_22; + } + + if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 250)) + { + u32x8 Yr = (u32x8) aes_gcm_reflect ((u8x32) ctx->Y); + + for (; i < n_blocks; i++) + { + r[i] = Ke0.x2 ^ (u8x32) ctx->Y; + Yr += ctr_22; + ctx->Y = (u32x8) aes_gcm_reflect ((u8x32) Yr); + } + } + else + { + for (; i < n_blocks; i++) + { + r[i] = Ke0.x2 ^ (u8x32) ctx->Y; + ctx->Y += ctr_inv_22; + } + } + ctx->counter += n_blocks * 2; +#else + const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 }; + + if (PREDICT_TRUE ((u8) ctx->counter < 0xfe) || n_blocks < 3) + { + for (; i < n_blocks; i++) + { + r[i] = Ke0.x1 ^ (u8x16) ctx->Y; + ctx->Y += ctr_inv_1; + } + ctx->counter += n_blocks; + } + else + { + r[i++] = Ke0.x1 ^ (u8x16) ctx->Y; + ctx->Y += ctr_inv_1; + ctx->counter += 1; + + for (; i < n_blocks; i++) + { + r[i] = Ke0.x1 ^ (u8x16) ctx->Y; + ctx->counter++; + ctx->Y[3] = clib_host_to_net_u32 (ctx->counter); + } + } +#endif +} + +static_always_inline void +aes_gcm_enc_round (aes_data_t *r, const aes_gcm_expaned_key_t *Ke, + uword n_blocks) +{ + for (int i = 0; i < n_blocks; i++) +#if N_LANES == 4 + r[i] = aes_enc_round_x4 (r[i], Ke->x4); +#elif N_LANES == 2 + r[i] = aes_enc_round_x2 (r[i], Ke->x2); +#else + r[i] = aes_enc_round (r[i], Ke->x1); +#endif +} + +static_always_inline void +aes_gcm_enc_last_round (aes_gcm_ctx_t *ctx, aes_data_t *r, aes_data_t *d, + const aes_gcm_expaned_key_t *Ke, uword n_blocks) +{ + /* additional ronuds for AES-192 and AES-256 */ + for (int i = 10; i < ctx->rounds; i++) + aes_gcm_enc_round (r, Ke + i, n_blocks); + + for (int i = 0; i < n_blocks; i++) +#if N_LANES == 4 + d[i] ^= aes_enc_last_round_x4 (r[i], Ke[ctx->rounds].x4); +#elif N_LANES == 2 + d[i] ^= aes_enc_last_round_x2 (r[i], Ke[ctx->rounds].x2); +#else + d[i] ^= aes_enc_last_round (r[i], Ke[ctx->rounds].x1); +#endif +} + +static_always_inline void +aes_gcm_calc (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst, u32 n, + u32 n_bytes, int with_ghash) +{ + const aes_gcm_expaned_key_t *k = ctx->Ke; + const aes_mem_t *sv = (aes_mem_t *) src; + aes_mem_t *dv = (aes_mem_t *) dst; + uword ghash_blocks, gc = 1; + aes_data_t r[4]; + u32 i, n_lanes; + + if (ctx->operation == AES_GCM_OP_ENCRYPT) + { + ghash_blocks = 4; + n_lanes = N_LANES * 4; + } + else + { + ghash_blocks = n; + n_lanes = n * N_LANES; +#if N_LANES != 1 + if (ctx->last) + n_lanes = (n_bytes + 15) / 16; +#endif + } + + n_bytes -= (n - 1) * N; + + /* AES rounds 0 and 1 */ + aes_gcm_enc_first_round (ctx, r, n); + aes_gcm_enc_round (r, k + 1, n); + + /* load data - decrypt round */ + if (ctx->operation == AES_GCM_OP_DECRYPT) + { + for (i = 0; i < n - ctx->last; i++) + d[i] = sv[i]; + + if (ctx->last) + d[n - 1] = aes_gcm_load_partial ((u8 *) (sv + n - 1), n_bytes); + } + + /* GHASH multiply block 0 */ + if (with_ghash) + aes_gcm_ghash_mul_first (ctx, d[0], n_lanes); + + /* AES rounds 2 and 3 */ + aes_gcm_enc_round (r, k + 2, n); + aes_gcm_enc_round (r, k + 3, n); + + /* GHASH multiply block 1 */ + if (with_ghash && gc++ < ghash_blocks) + aes_gcm_ghash_mul_next (ctx, (d[1])); + + /* AES rounds 4 and 5 */ + aes_gcm_enc_round (r, k + 4, n); + aes_gcm_enc_round (r, k + 5, n); + + /* GHASH multiply block 2 */ + if (with_ghash && gc++ < ghash_blocks) + aes_gcm_ghash_mul_next (ctx, (d[2])); + + /* AES rounds 6 and 7 */ + aes_gcm_enc_round (r, k + 6, n); + aes_gcm_enc_round (r, k + 7, n); + + /* GHASH multiply block 3 */ + if (with_ghash && gc++ < ghash_blocks) + aes_gcm_ghash_mul_next (ctx, (d[3])); + + /* load 4 blocks of data - decrypt round */ + if (ctx->operation == AES_GCM_OP_ENCRYPT) + { + for (i = 0; i < n - ctx->last; i++) + d[i] = sv[i]; + + if (ctx->last) + d[n - 1] = aes_gcm_load_partial (sv + n - 1, n_bytes); + } + + /* AES rounds 8 and 9 */ + aes_gcm_enc_round (r, k + 8, n); + aes_gcm_enc_round (r, k + 9, n); + + /* AES last round(s) */ + aes_gcm_enc_last_round (ctx, r, d, k, n); + + /* store data */ + for (i = 0; i < n - ctx->last; i++) + dv[i] = d[i]; + + if (ctx->last) + aes_gcm_store_partial (d[n - 1], dv + n - 1, n_bytes); + + /* GHASH reduce 1st step */ + aes_gcm_ghash_reduce (ctx); + + /* GHASH reduce 2nd step */ + if (with_ghash) + aes_gcm_ghash_reduce2 (ctx); + + /* GHASH final step */ + if (with_ghash) + aes_gcm_ghash_final (ctx); +} + +static_always_inline void +aes_gcm_calc_double (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst, + int with_ghash) +{ + const aes_gcm_expaned_key_t *k = ctx->Ke; + const aes_mem_t *sv = (aes_mem_t *) src; + aes_mem_t *dv = (aes_mem_t *) dst; + aes_data_t r[4]; + + /* AES rounds 0 and 1 */ + aes_gcm_enc_first_round (ctx, r, 4); + aes_gcm_enc_round (r, k + 1, 4); + + /* load 4 blocks of data - decrypt round */ + if (ctx->operation == AES_GCM_OP_DECRYPT) + for (int i = 0; i < 4; i++) + d[i] = sv[i]; + + /* GHASH multiply block 0 */ + aes_gcm_ghash_mul_first (ctx, d[0], N_LANES * 8); + + /* AES rounds 2 and 3 */ + aes_gcm_enc_round (r, k + 2, 4); + aes_gcm_enc_round (r, k + 3, 4); + + /* GHASH multiply block 1 */ + aes_gcm_ghash_mul_next (ctx, (d[1])); + + /* AES rounds 4 and 5 */ + aes_gcm_enc_round (r, k + 4, 4); + aes_gcm_enc_round (r, k + 5, 4); + + /* GHASH multiply block 2 */ + aes_gcm_ghash_mul_next (ctx, (d[2])); + + /* AES rounds 6 and 7 */ + aes_gcm_enc_round (r, k + 6, 4); + aes_gcm_enc_round (r, k + 7, 4); + + /* GHASH multiply block 3 */ + aes_gcm_ghash_mul_next (ctx, (d[3])); + + /* AES rounds 8 and 9 */ + aes_gcm_enc_round (r, k + 8, 4); + aes_gcm_enc_round (r, k + 9, 4); + + /* load 4 blocks of data - encrypt round */ + if (ctx->operation == AES_GCM_OP_ENCRYPT) + for (int i = 0; i < 4; i++) + d[i] = sv[i]; + + /* AES last round(s) */ + aes_gcm_enc_last_round (ctx, r, d, k, 4); + + /* store 4 blocks of data */ + for (int i = 0; i < 4; i++) + dv[i] = d[i]; + + /* load next 4 blocks of data data - decrypt round */ + if (ctx->operation == AES_GCM_OP_DECRYPT) + for (int i = 0; i < 4; i++) + d[i] = sv[i + 4]; + + /* GHASH multiply block 4 */ + aes_gcm_ghash_mul_next (ctx, (d[0])); + + /* AES rounds 0 and 1 */ + aes_gcm_enc_first_round (ctx, r, 4); + aes_gcm_enc_round (r, k + 1, 4); + + /* GHASH multiply block 5 */ + aes_gcm_ghash_mul_next (ctx, (d[1])); + + /* AES rounds 2 and 3 */ + aes_gcm_enc_round (r, k + 2, 4); + aes_gcm_enc_round (r, k + 3, 4); + + /* GHASH multiply block 6 */ + aes_gcm_ghash_mul_next (ctx, (d[2])); + + /* AES rounds 4 and 5 */ + aes_gcm_enc_round (r, k + 4, 4); + aes_gcm_enc_round (r, k + 5, 4); + + /* GHASH multiply block 7 */ + aes_gcm_ghash_mul_next (ctx, (d[3])); + + /* AES rounds 6 and 7 */ + aes_gcm_enc_round (r, k + 6, 4); + aes_gcm_enc_round (r, k + 7, 4); + + /* GHASH reduce 1st step */ + aes_gcm_ghash_reduce (ctx); + + /* AES rounds 8 and 9 */ + aes_gcm_enc_round (r, k + 8, 4); + aes_gcm_enc_round (r, k + 9, 4); + + /* GHASH reduce 2nd step */ + aes_gcm_ghash_reduce2 (ctx); + + /* load 4 blocks of data - encrypt round */ + if (ctx->operation == AES_GCM_OP_ENCRYPT) + for (int i = 0; i < 4; i++) + d[i] = sv[i + 4]; + + /* AES last round(s) */ + aes_gcm_enc_last_round (ctx, r, d, k, 4); + + /* store data */ + for (int i = 0; i < 4; i++) + dv[i + 4] = d[i]; + + /* GHASH final step */ + aes_gcm_ghash_final (ctx); +} + +static_always_inline void +aes_gcm_mask_bytes (aes_data_t *d, uword n_bytes) +{ + const union + { + u8 b[64]; + aes_data_t r; + } scale = { + .b = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 }, + }; + + d[0] &= (aes_gcm_splat (n_bytes) > scale.r); +} + +static_always_inline void +aes_gcm_calc_last (aes_gcm_ctx_t *ctx, aes_data_t *d, int n_blocks, + u32 n_bytes) +{ + int n_lanes = (N_LANES == 1 ? n_blocks : (n_bytes + 15) / 16) + 1; + n_bytes -= (n_blocks - 1) * N; + int i; + + aes_gcm_enc_ctr0_round (ctx, 0); + aes_gcm_enc_ctr0_round (ctx, 1); + + if (n_bytes != N) + aes_gcm_mask_bytes (d + n_blocks - 1, n_bytes); + + aes_gcm_ghash_mul_first (ctx, d[0], n_lanes); + + aes_gcm_enc_ctr0_round (ctx, 2); + aes_gcm_enc_ctr0_round (ctx, 3); + + if (n_blocks > 1) + aes_gcm_ghash_mul_next (ctx, d[1]); + + aes_gcm_enc_ctr0_round (ctx, 4); + aes_gcm_enc_ctr0_round (ctx, 5); + + if (n_blocks > 2) + aes_gcm_ghash_mul_next (ctx, d[2]); + + aes_gcm_enc_ctr0_round (ctx, 6); + aes_gcm_enc_ctr0_round (ctx, 7); + + if (n_blocks > 3) + aes_gcm_ghash_mul_next (ctx, d[3]); + + aes_gcm_enc_ctr0_round (ctx, 8); + aes_gcm_enc_ctr0_round (ctx, 9); + + aes_gcm_ghash_mul_bit_len (ctx); + aes_gcm_ghash_reduce (ctx); + + for (i = 10; i < ctx->rounds; i++) + aes_gcm_enc_ctr0_round (ctx, i); + + aes_gcm_ghash_reduce2 (ctx); + + aes_gcm_ghash_final (ctx); + + aes_gcm_enc_ctr0_round (ctx, i); +} + +static_always_inline void +aes_gcm_enc (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, u32 n_left) +{ + aes_data_t d[4]; + + if (PREDICT_FALSE (n_left == 0)) + { + int i; + for (i = 0; i < ctx->rounds + 1; i++) + aes_gcm_enc_ctr0_round (ctx, i); + return; + } + + if (n_left < 4 * N) + { + ctx->last = 1; + if (n_left > 3 * N) + { + aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 0); + aes_gcm_calc_last (ctx, d, 4, n_left); + } + else if (n_left > 2 * N) + { + aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 0); + aes_gcm_calc_last (ctx, d, 3, n_left); + } + else if (n_left > N) + { + aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 0); + aes_gcm_calc_last (ctx, d, 2, n_left); + } + else + { + aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 0); + aes_gcm_calc_last (ctx, d, 1, n_left); + } + return; + } + aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 0); + + /* next */ + n_left -= 4 * N; + dst += 4 * N; + src += 4 * N; + + for (; n_left >= 8 * N; n_left -= 8 * N, src += 8 * N, dst += 8 * N) + aes_gcm_calc_double (ctx, d, src, dst, /* with_ghash */ 1); + + if (n_left >= 4 * N) + { + aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1); + + /* next */ + n_left -= 4 * N; + dst += 4 * N; + src += 4 * N; + } + + if (n_left == 0) + { + aes_gcm_calc_last (ctx, d, 4, 4 * N); + return; + } + + ctx->last = 1; + + if (n_left > 3 * N) + { + aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1); + aes_gcm_calc_last (ctx, d, 4, n_left); + } + else if (n_left > 2 * N) + { + aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1); + aes_gcm_calc_last (ctx, d, 3, n_left); + } + else if (n_left > N) + { + aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1); + aes_gcm_calc_last (ctx, d, 2, n_left); + } + else + { + aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1); + aes_gcm_calc_last (ctx, d, 1, n_left); + } +} + +static_always_inline void +aes_gcm_dec (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, uword n_left) +{ + aes_data_t d[4] = {}; + for (; n_left >= 8 * N; n_left -= 8 * N, dst += 8 * N, src += 8 * N) + aes_gcm_calc_double (ctx, d, src, dst, /* with_ghash */ 1); + + if (n_left >= 4 * N) + { + aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1); + + /* next */ + n_left -= 4 * N; + dst += N * 4; + src += N * 4; + } + + if (n_left == 0) + goto done; + + ctx->last = 1; + + if (n_left > 3 * N) + aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1); + else if (n_left > 2 * N) + aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1); + else if (n_left > N) + aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1); + else + aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1); + + u8x16 r; +done: + r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3); + ctx->T = ghash_mul (r ^ ctx->T, ctx->Hi[NUM_HI - 1]); + + /* encrypt counter 0 E(Y0, k) */ + for (int i = 0; i < ctx->rounds + 1; i += 1) + aes_gcm_enc_ctr0_round (ctx, i); +} + +static_always_inline int +aes_gcm (const u8 *src, u8 *dst, const u8 *aad, u8 *ivp, u8 *tag, + u32 data_bytes, u32 aad_bytes, u8 tag_len, + const aes_gcm_key_data_t *kd, int aes_rounds, aes_gcm_op_t op) +{ + u8 *addt = (u8 *) aad; + u32x4 Y0; + + aes_gcm_ctx_t _ctx = { .counter = 2, + .rounds = aes_rounds, + .operation = op, + .data_bytes = data_bytes, + .aad_bytes = aad_bytes, + .Hi = kd->Hi }, + *ctx = &_ctx; + + /* initalize counter */ + Y0 = (u32x4) (u64x2){ *(u64u *) ivp, 0 }; + Y0[2] = *(u32u *) (ivp + 8); + Y0[3] = 1 << 24; + ctx->EY0 = (u8x16) Y0; + ctx->Ke = kd->Ke; +#if N_LANES == 4 + ctx->Y = u32x16_splat_u32x4 (Y0) + (u32x16){ + 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24, + }; +#elif N_LANES == 2 + ctx->Y = + u32x8_splat_u32x4 (Y0) + (u32x8){ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24 }; +#else + ctx->Y = Y0 + (u32x4){ 0, 0, 0, 1 << 24 }; +#endif + + /* calculate ghash for AAD */ + aes_gcm_ghash (ctx, addt, aad_bytes); + + clib_prefetch_load (tag); + + /* ghash and encrypt/edcrypt */ + if (op == AES_GCM_OP_ENCRYPT) + aes_gcm_enc (ctx, src, dst, data_bytes); + else if (op == AES_GCM_OP_DECRYPT) + aes_gcm_dec (ctx, src, dst, data_bytes); + + /* final tag is */ + ctx->T = u8x16_reflect (ctx->T) ^ ctx->EY0; + + /* tag_len 16 -> 0 */ + tag_len &= 0xf; + + if (op == AES_GCM_OP_ENCRYPT || op == AES_GCM_OP_GMAC) + { + /* store tag */ + if (tag_len) + u8x16_store_partial (ctx->T, tag, tag_len); + else + ((u8x16u *) tag)[0] = ctx->T; + } + else + { + /* check tag */ + if (tag_len) + { + u16 mask = pow2_mask (tag_len); + u8x16 expected = u8x16_load_partial (tag, tag_len); + if ((u8x16_msb_mask (expected == ctx->T) & mask) == mask) + return 1; + } + else + { + if (u8x16_is_equal (ctx->T, *(u8x16u *) tag)) + return 1; + } + } + return 0; +} + +static_always_inline void +clib_aes_gcm_key_expand (aes_gcm_key_data_t *kd, const u8 *key, + aes_key_size_t ks) +{ + u8x16 H; + u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1]; + aes_gcm_expaned_key_t *Ke = (aes_gcm_expaned_key_t *) kd->Ke; + + /* expand AES key */ + aes_key_expand (ek, key, ks); + for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++) + Ke[i].lanes[0] = Ke[i].lanes[1] = Ke[i].lanes[2] = Ke[i].lanes[3] = ek[i]; + + /* pre-calculate H */ + H = aes_encrypt_block (u8x16_zero (), ek, ks); + H = u8x16_reflect (H); + ghash_precompute (H, (u8x16 *) kd->Hi, ARRAY_LEN (kd->Hi)); +} + +static_always_inline void +clib_aes128_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext, + u32 data_bytes, const u8 *aad, u32 aad_bytes, + const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag) +{ + aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes, + tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_ENCRYPT); +} + +static_always_inline void +clib_aes256_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext, + u32 data_bytes, const u8 *aad, u32 aad_bytes, + const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag) +{ + aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes, + tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_ENCRYPT); +} + +static_always_inline int +clib_aes128_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext, + u32 data_bytes, const u8 *aad, u32 aad_bytes, + const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext) +{ + return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag, + data_bytes, aad_bytes, tag_bytes, kd, + AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_DECRYPT); +} + +static_always_inline int +clib_aes256_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext, + u32 data_bytes, const u8 *aad, u32 aad_bytes, + const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext) +{ + return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag, + data_bytes, aad_bytes, tag_bytes, kd, + AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_DECRYPT); +} + +static_always_inline void +clib_aes128_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes, + const u8 *iv, u32 tag_bytes, u8 *tag) +{ + aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd, + AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_GMAC); +} + +static_always_inline void +clib_aes256_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes, + const u8 *iv, u32 tag_bytes, u8 *tag) +{ + aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd, + AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_GMAC); +} + +#endif /* __crypto_aes_gcm_h__ */ diff --git a/src/plugins/crypto_native/ghash.h b/src/vppinfra/crypto/ghash.h index 5f619cfa129..bae8badb5fc 100644 --- a/src/plugins/crypto_native/ghash.h +++ b/src/vppinfra/crypto/ghash.h @@ -86,7 +86,7 @@ * This allows us to improve performance by deferring reduction. For example * to caclulate ghash of 4 128-bit blocks of data (b0, b1, b2, b3), we can do: * - * __i128 Hi[4]; + * u8x16 Hi[4]; * ghash_precompute (H, Hi, 4); * * ghash_data_t _gd, *gd = &_gd; @@ -151,6 +151,8 @@ gmul_hi_hi (u8x16 a, u8x16 b) typedef struct { u8x16 mid, hi, lo, tmp_lo, tmp_hi; + u8x32 hi2, lo2, mid2, tmp_lo2, tmp_hi2; + u8x64 hi4, lo4, mid4, tmp_lo4, tmp_hi4; int pending; } ghash_data_t; @@ -172,7 +174,7 @@ ghash_mul_first (ghash_data_t * gd, u8x16 a, u8x16 b) /* a0 * b0 */ gd->lo = gmul_lo_lo (a, b); /* a0 * b1 ^ a1 * b0 */ - gd->mid = (gmul_hi_lo (a, b) ^ gmul_lo_hi (a, b)); + gd->mid = gmul_hi_lo (a, b) ^ gmul_lo_hi (a, b); /* set gd->pending to 0 so next invocation of ghash_mul_next(...) knows that there is no pending data in tmp_lo and tmp_hi */ @@ -270,12 +272,6 @@ static const u8x64 ghash4_poly2 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, }; -typedef struct -{ - u8x64 hi, lo, mid, tmp_lo, tmp_hi; - int pending; -} ghash4_data_t; - static_always_inline u8x64 gmul4_lo_lo (u8x64 a, u8x64 b) { @@ -300,18 +296,17 @@ gmul4_hi_hi (u8x64 a, u8x64 b) return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x11); } - static_always_inline void -ghash4_mul_first (ghash4_data_t * gd, u8x64 a, u8x64 b) +ghash4_mul_first (ghash_data_t *gd, u8x64 a, u8x64 b) { - gd->hi = gmul4_hi_hi (a, b); - gd->lo = gmul4_lo_lo (a, b); - gd->mid = (gmul4_hi_lo (a, b) ^ gmul4_lo_hi (a, b)); + gd->hi4 = gmul4_hi_hi (a, b); + gd->lo4 = gmul4_lo_lo (a, b); + gd->mid4 = gmul4_hi_lo (a, b) ^ gmul4_lo_hi (a, b); gd->pending = 0; } static_always_inline void -ghash4_mul_next (ghash4_data_t * gd, u8x64 a, u8x64 b) +ghash4_mul_next (ghash_data_t *gd, u8x64 a, u8x64 b) { u8x64 hi = gmul4_hi_hi (a, b); u8x64 lo = gmul4_lo_lo (a, b); @@ -319,63 +314,62 @@ ghash4_mul_next (ghash4_data_t * gd, u8x64 a, u8x64 b) if (gd->pending) { /* there is peding data from previous invocation so we can XOR */ - gd->hi = u8x64_xor3 (gd->hi, gd->tmp_hi, hi); - gd->lo = u8x64_xor3 (gd->lo, gd->tmp_lo, lo); + gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, hi); + gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, lo); gd->pending = 0; } else { /* there is no peding data from previous invocation so we postpone XOR */ - gd->tmp_hi = hi; - gd->tmp_lo = lo; + gd->tmp_hi4 = hi; + gd->tmp_lo4 = lo; gd->pending = 1; } - gd->mid = u8x64_xor3 (gd->mid, gmul4_hi_lo (a, b), gmul4_lo_hi (a, b)); + gd->mid4 = u8x64_xor3 (gd->mid4, gmul4_hi_lo (a, b), gmul4_lo_hi (a, b)); } static_always_inline void -ghash4_reduce (ghash4_data_t * gd) +ghash4_reduce (ghash_data_t *gd) { u8x64 r; /* Final combination: - gd->lo ^= gd->mid << 64 - gd->hi ^= gd->mid >> 64 */ + gd->lo4 ^= gd->mid4 << 64 + gd->hi4 ^= gd->mid4 >> 64 */ - u8x64 midl = u8x64_word_shift_left (gd->mid, 8); - u8x64 midr = u8x64_word_shift_right (gd->mid, 8); + u8x64 midl = u8x64_word_shift_left (gd->mid4, 8); + u8x64 midr = u8x64_word_shift_right (gd->mid4, 8); if (gd->pending) { - gd->lo = u8x64_xor3 (gd->lo, gd->tmp_lo, midl); - gd->hi = u8x64_xor3 (gd->hi, gd->tmp_hi, midr); + gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, midl); + gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, midr); } else { - gd->lo ^= midl; - gd->hi ^= midr; + gd->lo4 ^= midl; + gd->hi4 ^= midr; } - r = gmul4_hi_lo (ghash4_poly2, gd->lo); - gd->lo ^= u8x64_word_shift_left (r, 8); - + r = gmul4_hi_lo (ghash4_poly2, gd->lo4); + gd->lo4 ^= u8x64_word_shift_left (r, 8); } static_always_inline void -ghash4_reduce2 (ghash4_data_t * gd) +ghash4_reduce2 (ghash_data_t *gd) { - gd->tmp_lo = gmul4_lo_lo (ghash4_poly2, gd->lo); - gd->tmp_hi = gmul4_lo_hi (ghash4_poly2, gd->lo); + gd->tmp_lo4 = gmul4_lo_lo (ghash4_poly2, gd->lo4); + gd->tmp_hi4 = gmul4_lo_hi (ghash4_poly2, gd->lo4); } static_always_inline u8x16 -ghash4_final (ghash4_data_t * gd) +ghash4_final (ghash_data_t *gd) { u8x64 r; u8x32 t; - r = u8x64_xor3 (gd->hi, u8x64_word_shift_right (gd->tmp_lo, 4), - u8x64_word_shift_left (gd->tmp_hi, 4)); + r = u8x64_xor3 (gd->hi4, u8x64_word_shift_right (gd->tmp_lo4, 4), + u8x64_word_shift_left (gd->tmp_hi4, 4)); /* horizontal XOR of 4 128-bit lanes */ t = u8x64_extract_lo (r) ^ u8x64_extract_hi (r); @@ -383,6 +377,117 @@ ghash4_final (ghash4_data_t * gd) } #endif +#if defined(__VPCLMULQDQ__) + +static const u8x32 ghash2_poly2 = { + 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, +}; + +static_always_inline u8x32 +gmul2_lo_lo (u8x32 a, u8x32 b) +{ + return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x00); +} + +static_always_inline u8x32 +gmul2_hi_lo (u8x32 a, u8x32 b) +{ + return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x01); +} + +static_always_inline u8x32 +gmul2_lo_hi (u8x32 a, u8x32 b) +{ + return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x10); +} + +static_always_inline u8x32 +gmul2_hi_hi (u8x32 a, u8x32 b) +{ + return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x11); +} + +static_always_inline void +ghash2_mul_first (ghash_data_t *gd, u8x32 a, u8x32 b) +{ + gd->hi2 = gmul2_hi_hi (a, b); + gd->lo2 = gmul2_lo_lo (a, b); + gd->mid2 = gmul2_hi_lo (a, b) ^ gmul2_lo_hi (a, b); + gd->pending = 0; +} + +static_always_inline void +ghash2_mul_next (ghash_data_t *gd, u8x32 a, u8x32 b) +{ + u8x32 hi = gmul2_hi_hi (a, b); + u8x32 lo = gmul2_lo_lo (a, b); + + if (gd->pending) + { + /* there is peding data from previous invocation so we can XOR */ + gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, hi); + gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, lo); + gd->pending = 0; + } + else + { + /* there is no peding data from previous invocation so we postpone XOR */ + gd->tmp_hi2 = hi; + gd->tmp_lo2 = lo; + gd->pending = 1; + } + gd->mid2 = u8x32_xor3 (gd->mid2, gmul2_hi_lo (a, b), gmul2_lo_hi (a, b)); +} + +static_always_inline void +ghash2_reduce (ghash_data_t *gd) +{ + u8x32 r; + + /* Final combination: + gd->lo2 ^= gd->mid2 << 64 + gd->hi2 ^= gd->mid2 >> 64 */ + + u8x32 midl = u8x32_word_shift_left (gd->mid2, 8); + u8x32 midr = u8x32_word_shift_right (gd->mid2, 8); + + if (gd->pending) + { + gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, midl); + gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, midr); + } + else + { + gd->lo2 ^= midl; + gd->hi2 ^= midr; + } + + r = gmul2_hi_lo (ghash2_poly2, gd->lo2); + gd->lo2 ^= u8x32_word_shift_left (r, 8); +} + +static_always_inline void +ghash2_reduce2 (ghash_data_t *gd) +{ + gd->tmp_lo2 = gmul2_lo_lo (ghash2_poly2, gd->lo2); + gd->tmp_hi2 = gmul2_lo_hi (ghash2_poly2, gd->lo2); +} + +static_always_inline u8x16 +ghash2_final (ghash_data_t *gd) +{ + u8x32 r; + + r = u8x32_xor3 (gd->hi2, u8x32_word_shift_right (gd->tmp_lo2, 4), + u8x32_word_shift_left (gd->tmp_hi2, 4)); + + /* horizontal XOR of 2 128-bit lanes */ + return u8x32_extract_hi (r) ^ u8x32_extract_lo (r); +} +#endif + static_always_inline void ghash_precompute (u8x16 H, u8x16 * Hi, int n) { @@ -398,9 +503,7 @@ ghash_precompute (u8x16 H, u8x16 * Hi, int n) #else r32[3] = r32[0]; #endif - /* *INDENT-OFF* */ r32 = r32 == (u32x4) {1, 0, 0, 1}; - /* *INDENT-ON* */ Hi[n - 1] = H = H ^ ((u8x16) r32 & ghash_poly); /* calculate H^(i + 1) */ @@ -410,10 +513,3 @@ ghash_precompute (u8x16 H, u8x16 * Hi, int n) #endif /* __ghash_h__ */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/perfmon/bundle_default.c b/src/vppinfra/perfmon/bundle_default.c index b5282c51740..c2118aed974 100644 --- a/src/vppinfra/perfmon/bundle_default.c +++ b/src/vppinfra/perfmon/bundle_default.c @@ -24,25 +24,21 @@ format_perfmon_bundle_default (u8 *s, va_list *args) case 1: return format (s, "%5.2f", (f64) d[2] / d[0]); case 2: - if (c->n_ops > 1) - return format (s, "%8.2f", (f64) d[0] / c->n_ops); - else - return format (s, "%8u", d[0]); + return format (s, "%8u", d[0]); case 3: - if (c->n_ops > 1) - return format (s, "%8.2f", (f64) d[2] / c->n_ops); - else - return format (s, "%8u", d[2]); + return format (s, "%8.2f", (f64) d[0] / c->n_ops); case 4: - if (c->n_ops > 1) - return format (s, "%9.2f", (f64) d[3] / c->n_ops); - else - return format (s, "%9u", d[3]); + return format (s, "%8u", d[2]); case 5: - if (c->n_ops > 1) - return format (s, "%10.2f", (f64) d[4] / c->n_ops); - else - return format (s, "%10u", d[4]); + return format (s, "%8.2f", (f64) d[2] / c->n_ops); + case 6: + return format (s, "%9u", d[3]); + case 7: + return format (s, "%9.2f", (f64) d[3] / c->n_ops); + case 8: + return format (s, "%10u", d[4]); + case 9: + return format (s, "%10.2f", (f64) d[4] / c->n_ops); default: return s; } @@ -59,6 +55,7 @@ CLIB_PERFMON_BUNDLE (default) = { .config[4] = PERF_COUNT_HW_BRANCH_MISSES, .n_events = 5, .format_fn = format_perfmon_bundle_default, - .column_headers = CLIB_STRING_ARRAY ("Freq", "IPC", "Clks/Op", "Inst/Op", - "Brnch/Op", "BrMiss/Op"), + .column_headers = CLIB_STRING_ARRAY ("Freq", "IPC", "Clks", "Clks/Op", + "Inst", "Inst/Op", "Brnch", "Brnch/Op", + "BrMiss", "BrMiss/Op"), }; diff --git a/src/vppinfra/sha2.h b/src/vppinfra/sha2.h deleted file mode 100644 index 61fb7f52961..00000000000 --- a/src/vppinfra/sha2.h +++ /dev/null @@ -1,637 +0,0 @@ -/* - * Copyright (c) 2019 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_sha2_h -#define included_sha2_h - -#include <vppinfra/clib.h> - -#define SHA224_DIGEST_SIZE 28 -#define SHA224_BLOCK_SIZE 64 - -#define SHA256_DIGEST_SIZE 32 -#define SHA256_BLOCK_SIZE 64 -#define SHA256_ROTR(x, y) ((x >> y) | (x << (32 - y))) -#define SHA256_CH(a, b, c) ((a & b) ^ (~a & c)) -#define SHA256_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c)) -#define SHA256_CSIGMA0(x) (SHA256_ROTR(x, 2) ^ \ - SHA256_ROTR(x, 13) ^ \ - SHA256_ROTR(x, 22)); -#define SHA256_CSIGMA1(x) (SHA256_ROTR(x, 6) ^ \ - SHA256_ROTR(x, 11) ^ \ - SHA256_ROTR(x, 25)); -#define SHA256_SSIGMA0(x) (SHA256_ROTR (x, 7) ^ \ - SHA256_ROTR (x, 18) ^ \ - (x >> 3)) -#define SHA256_SSIGMA1(x) (SHA256_ROTR (x, 17) ^ \ - SHA256_ROTR (x, 19) ^ \ - (x >> 10)) - -#define SHA256_MSG_SCHED(w, j) \ -{ \ - w[j] = w[j - 7] + w[j - 16]; \ - w[j] += SHA256_SSIGMA0 (w[j - 15]); \ - w[j] += SHA256_SSIGMA1 (w[j - 2]); \ -} - -#define SHA256_TRANSFORM(s, w, i, k) \ -{ \ - __typeof__(s[0]) t1, t2; \ - t1 = k + w[i] + s[7]; \ - t1 += SHA256_CSIGMA1 (s[4]); \ - t1 += SHA256_CH (s[4], s[5], s[6]); \ - t2 = SHA256_CSIGMA0 (s[0]); \ - t2 += SHA256_MAJ (s[0], s[1], s[2]); \ - s[7] = s[6]; \ - s[6] = s[5]; \ - s[5] = s[4]; \ - s[4] = s[3] + t1; \ - s[3] = s[2]; \ - s[2] = s[1]; \ - s[1] = s[0]; \ - s[0] = t1 + t2; \ -} - -#define SHA512_224_DIGEST_SIZE 28 -#define SHA512_224_BLOCK_SIZE 128 - -#define SHA512_256_DIGEST_SIZE 32 -#define SHA512_256_BLOCK_SIZE 128 - -#define SHA384_DIGEST_SIZE 48 -#define SHA384_BLOCK_SIZE 128 - -#define SHA512_DIGEST_SIZE 64 -#define SHA512_BLOCK_SIZE 128 -#define SHA512_ROTR(x, y) ((x >> y) | (x << (64 - y))) -#define SHA512_CH(a, b, c) ((a & b) ^ (~a & c)) -#define SHA512_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c)) -#define SHA512_CSIGMA0(x) (SHA512_ROTR (x, 28) ^ \ - SHA512_ROTR (x, 34) ^ \ - SHA512_ROTR (x, 39)) -#define SHA512_CSIGMA1(x) (SHA512_ROTR (x, 14) ^ \ - SHA512_ROTR (x, 18) ^ \ - SHA512_ROTR (x, 41)) -#define SHA512_SSIGMA0(x) (SHA512_ROTR (x, 1) ^ \ - SHA512_ROTR (x, 8) ^ \ - (x >> 7)) -#define SHA512_SSIGMA1(x) (SHA512_ROTR (x, 19) ^ \ - SHA512_ROTR (x, 61) ^ \ - (x >> 6)) - -#define SHA512_MSG_SCHED(w, j) \ -{ \ - w[j] = w[j - 7] + w[j - 16]; \ - w[j] += SHA512_SSIGMA0 (w[j - 15]); \ - w[j] += SHA512_SSIGMA1 (w[j - 2]); \ -} - -#define SHA512_TRANSFORM(s, w, i, k) \ -{ \ - __typeof__(s[0]) t1, t2; \ - t1 = k + w[i] + s[7]; \ - t1 += SHA512_CSIGMA1 (s[4]); \ - t1 += SHA512_CH (s[4], s[5], s[6]); \ - t2 = SHA512_CSIGMA0 (s[0]); \ - t2 += SHA512_MAJ (s[0], s[1], s[2]); \ - s[7] = s[6]; \ - s[6] = s[5]; \ - s[5] = s[4]; \ - s[4] = s[3] + t1; \ - s[3] = s[2]; \ - s[2] = s[1]; \ - s[1] = s[0]; \ - s[0] = t1 + t2; \ -} - -static const u32 sha224_h[8] = { - 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, - 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4 -}; - -static const u32 sha256_h[8] = { - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, - 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 -}; - -static const u32 sha256_k[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -static const u64 sha384_h[8] = { - 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, - 0x9159015a3070dd17, 0x152fecd8f70e5939, - 0x67332667ffc00b31, 0x8eb44a8768581511, - 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4 -}; - -static const u64 sha512_h[8] = { - 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, - 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, - 0x510e527fade682d1, 0x9b05688c2b3e6c1f, - 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 -}; - -static const u64 sha512_224_h[8] = { - 0x8c3d37c819544da2, 0x73e1996689dcd4d6, - 0x1dfab7ae32ff9c82, 0x679dd514582f9fcf, - 0x0f6d2b697bd44da8, 0x77e36f7304c48942, - 0x3f9d85a86a1d36c8, 0x1112e6ad91d692a1 -}; - -static const u64 sha512_256_h[8] = { - 0x22312194fc2bf72c, 0x9f555fa3c84c64c2, - 0x2393b86b6f53b151, 0x963877195940eabd, - 0x96283ee2a88effe3, 0xbe5e1e2553863992, - 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2 -}; - -static const u64 sha512_k[80] = { - 0x428a2f98d728ae22, 0x7137449123ef65cd, - 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, - 0x3956c25bf348b538, 0x59f111f1b605d019, - 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, - 0xd807aa98a3030242, 0x12835b0145706fbe, - 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, - 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, - 0x9bdc06a725c71235, 0xc19bf174cf692694, - 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, - 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, - 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, - 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, - 0x983e5152ee66dfab, 0xa831c66d2db43210, - 0xb00327c898fb213f, 0xbf597fc7beef0ee4, - 0xc6e00bf33da88fc2, 0xd5a79147930aa725, - 0x06ca6351e003826f, 0x142929670a0e6e70, - 0x27b70a8546d22ffc, 0x2e1b21385c26c926, - 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, - 0x650a73548baf63de, 0x766a0abb3c77b2a8, - 0x81c2c92e47edaee6, 0x92722c851482353b, - 0xa2bfe8a14cf10364, 0xa81a664bbc423001, - 0xc24b8b70d0f89791, 0xc76c51a30654be30, - 0xd192e819d6ef5218, 0xd69906245565a910, - 0xf40e35855771202a, 0x106aa07032bbd1b8, - 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, - 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, - 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, - 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, - 0x748f82ee5defb2fc, 0x78a5636f43172f60, - 0x84c87814a1f0ab72, 0x8cc702081a6439ec, - 0x90befffa23631e28, 0xa4506cebde82bde9, - 0xbef9a3f7b2c67915, 0xc67178f2e372532b, - 0xca273eceea26619c, 0xd186b8c721c0c207, - 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, - 0x06f067aa72176fba, 0x0a637dc5a2c898a6, - 0x113f9804bef90dae, 0x1b710b35131c471b, - 0x28db77f523047d84, 0x32caab7b40c72493, - 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, - 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, - 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 -}; - -typedef enum -{ - CLIB_SHA2_224, - CLIB_SHA2_256, - CLIB_SHA2_384, - CLIB_SHA2_512, - CLIB_SHA2_512_224, - CLIB_SHA2_512_256, -} clib_sha2_type_t; - -#define SHA2_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE -#define SHA2_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE - -typedef struct -{ - u64 total_bytes; - u16 n_pending; - u8 block_size; - u8 digest_size; - union - { - u32 h32[8]; - u64 h64[8]; -#if defined(__SHA__) && defined (__x86_64__) - u32x4 h32x4[2]; -#endif - }; - union - { - u8 as_u8[SHA2_MAX_BLOCK_SIZE]; - u64 as_u64[SHA2_MAX_BLOCK_SIZE / sizeof (u64)]; - uword as_uword[SHA2_MAX_BLOCK_SIZE / sizeof (uword)]; - } - pending; -} -clib_sha2_ctx_t; - -static_always_inline void -clib_sha2_init (clib_sha2_ctx_t * ctx, clib_sha2_type_t type) -{ - const u32 *h32 = 0; - const u64 *h64 = 0; - - ctx->total_bytes = 0; - ctx->n_pending = 0; - - switch (type) - { - case CLIB_SHA2_224: - h32 = sha224_h; - ctx->block_size = SHA224_BLOCK_SIZE; - ctx->digest_size = SHA224_DIGEST_SIZE; - break; - case CLIB_SHA2_256: - h32 = sha256_h; - ctx->block_size = SHA256_BLOCK_SIZE; - ctx->digest_size = SHA256_DIGEST_SIZE; - break; - case CLIB_SHA2_384: - h64 = sha384_h; - ctx->block_size = SHA384_BLOCK_SIZE; - ctx->digest_size = SHA384_DIGEST_SIZE; - break; - case CLIB_SHA2_512: - h64 = sha512_h; - ctx->block_size = SHA512_BLOCK_SIZE; - ctx->digest_size = SHA512_DIGEST_SIZE; - break; - case CLIB_SHA2_512_224: - h64 = sha512_224_h; - ctx->block_size = SHA512_224_BLOCK_SIZE; - ctx->digest_size = SHA512_224_DIGEST_SIZE; - break; - case CLIB_SHA2_512_256: - h64 = sha512_256_h; - ctx->block_size = SHA512_256_BLOCK_SIZE; - ctx->digest_size = SHA512_256_DIGEST_SIZE; - break; - } - if (h32) - for (int i = 0; i < 8; i++) - ctx->h32[i] = h32[i]; - - if (h64) - for (int i = 0; i < 8; i++) - ctx->h64[i] = h64[i]; -} - -#if defined(__SHA__) && defined (__x86_64__) -static inline void -shani_sha256_cycle_w (u32x4 cw[], u8 a, u8 b, u8 c, u8 d) -{ - cw[a] = (u32x4) _mm_sha256msg1_epu32 ((__m128i) cw[a], (__m128i) cw[b]); - cw[a] += (u32x4) _mm_alignr_epi8 ((__m128i) cw[d], (__m128i) cw[c], 4); - cw[a] = (u32x4) _mm_sha256msg2_epu32 ((__m128i) cw[a], (__m128i) cw[d]); -} - -static inline void -shani_sha256_4_rounds (u32x4 cw, u8 n, u32x4 s[]) -{ - u32x4 r = *(u32x4 *) (sha256_k + 4 * n) + cw; - s[0] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[0], (__m128i) s[1], - (__m128i) r); - r = (u32x4) u64x2_interleave_hi ((u64x2) r, (u64x2) r); - s[1] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[1], (__m128i) s[0], - (__m128i) r); -} - -static inline void -shani_sha256_shuffle (u32x4 d[2], u32x4 s[2]) -{ - /* {0, 1, 2, 3}, {4, 5, 6, 7} -> {7, 6, 3, 2}, {5, 4, 1, 0} */ - d[0] = (u32x4) _mm_shuffle_ps ((__m128) s[1], (__m128) s[0], 0xbb); - d[1] = (u32x4) _mm_shuffle_ps ((__m128) s[1], (__m128) s[0], 0x11); -} -#endif - -static inline void -clib_sha256_block (clib_sha2_ctx_t *ctx, const u8 *msg, uword n_blocks) -{ -#if defined(__SHA__) && defined (__x86_64__) - u32x4 h[2], s[2], w[4]; - - shani_sha256_shuffle (h, ctx->h32x4); - - while (n_blocks) - { - w[0] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 0)); - w[1] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 16)); - w[2] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 32)); - w[3] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 48)); - - s[0] = h[0]; - s[1] = h[1]; - - shani_sha256_4_rounds (w[0], 0, s); - shani_sha256_4_rounds (w[1], 1, s); - shani_sha256_4_rounds (w[2], 2, s); - shani_sha256_4_rounds (w[3], 3, s); - - shani_sha256_cycle_w (w, 0, 1, 2, 3); - shani_sha256_4_rounds (w[0], 4, s); - shani_sha256_cycle_w (w, 1, 2, 3, 0); - shani_sha256_4_rounds (w[1], 5, s); - shani_sha256_cycle_w (w, 2, 3, 0, 1); - shani_sha256_4_rounds (w[2], 6, s); - shani_sha256_cycle_w (w, 3, 0, 1, 2); - shani_sha256_4_rounds (w[3], 7, s); - - shani_sha256_cycle_w (w, 0, 1, 2, 3); - shani_sha256_4_rounds (w[0], 8, s); - shani_sha256_cycle_w (w, 1, 2, 3, 0); - shani_sha256_4_rounds (w[1], 9, s); - shani_sha256_cycle_w (w, 2, 3, 0, 1); - shani_sha256_4_rounds (w[2], 10, s); - shani_sha256_cycle_w (w, 3, 0, 1, 2); - shani_sha256_4_rounds (w[3], 11, s); - - shani_sha256_cycle_w (w, 0, 1, 2, 3); - shani_sha256_4_rounds (w[0], 12, s); - shani_sha256_cycle_w (w, 1, 2, 3, 0); - shani_sha256_4_rounds (w[1], 13, s); - shani_sha256_cycle_w (w, 2, 3, 0, 1); - shani_sha256_4_rounds (w[2], 14, s); - shani_sha256_cycle_w (w, 3, 0, 1, 2); - shani_sha256_4_rounds (w[3], 15, s); - - h[0] += s[0]; - h[1] += s[1]; - - /* next */ - msg += SHA256_BLOCK_SIZE; - n_blocks--; - } - - shani_sha256_shuffle (ctx->h32x4, h); -#else - u32 w[64], s[8], i; - - while (n_blocks) - { - for (i = 0; i < 8; i++) - s[i] = ctx->h32[i]; - - for (i = 0; i < 16; i++) - { - w[i] = clib_net_to_host_u32 (*((u32 *) msg + i)); - SHA256_TRANSFORM (s, w, i, sha256_k[i]); - } - - for (i = 16; i < 64; i++) - { - SHA256_MSG_SCHED (w, i); - SHA256_TRANSFORM (s, w, i, sha256_k[i]); - } - - for (i = 0; i < 8; i++) - ctx->h32[i] += s[i]; - - /* next */ - msg += SHA256_BLOCK_SIZE; - n_blocks--; - } -#endif -} - -static_always_inline void -clib_sha512_block (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_blocks) -{ - u64 w[80], s[8], i; - - while (n_blocks) - { - for (i = 0; i < 8; i++) - s[i] = ctx->h64[i]; - - for (i = 0; i < 16; i++) - { - w[i] = clib_net_to_host_u64 (*((u64 *) msg + i)); - SHA512_TRANSFORM (s, w, i, sha512_k[i]); - } - - for (i = 16; i < 80; i++) - { - SHA512_MSG_SCHED (w, i); - SHA512_TRANSFORM (s, w, i, sha512_k[i]); - } - - for (i = 0; i < 8; i++) - ctx->h64[i] += s[i]; - - /* next */ - msg += SHA512_BLOCK_SIZE; - n_blocks--; - } -} - -static_always_inline void -clib_sha2_update (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_bytes) -{ - uword n_blocks; - if (ctx->n_pending) - { - uword n_left = ctx->block_size - ctx->n_pending; - if (n_bytes < n_left) - { - clib_memcpy_fast (ctx->pending.as_u8 + ctx->n_pending, msg, - n_bytes); - ctx->n_pending += n_bytes; - return; - } - else - { - clib_memcpy_fast (ctx->pending.as_u8 + ctx->n_pending, msg, n_left); - if (ctx->block_size == SHA512_BLOCK_SIZE) - clib_sha512_block (ctx, ctx->pending.as_u8, 1); - else - clib_sha256_block (ctx, ctx->pending.as_u8, 1); - ctx->n_pending = 0; - ctx->total_bytes += ctx->block_size; - n_bytes -= n_left; - msg += n_left; - } - } - - if ((n_blocks = n_bytes / ctx->block_size)) - { - if (ctx->block_size == SHA512_BLOCK_SIZE) - clib_sha512_block (ctx, msg, n_blocks); - else - clib_sha256_block (ctx, msg, n_blocks); - n_bytes -= n_blocks * ctx->block_size; - msg += n_blocks * ctx->block_size; - ctx->total_bytes += n_blocks * ctx->block_size; - } - - if (n_bytes) - { - clib_memset_u8 (ctx->pending.as_u8, 0, ctx->block_size); - clib_memcpy_fast (ctx->pending.as_u8, msg, n_bytes); - ctx->n_pending = n_bytes; - } - else - ctx->n_pending = 0; -} - -static_always_inline void -clib_sha2_final (clib_sha2_ctx_t * ctx, u8 * digest) -{ - int i; - - ctx->total_bytes += ctx->n_pending; - if (ctx->n_pending == 0) - { - clib_memset (ctx->pending.as_u8, 0, ctx->block_size); - ctx->pending.as_u8[0] = 0x80; - } - else if (ctx->n_pending + sizeof (u64) + sizeof (u8) > ctx->block_size) - { - ctx->pending.as_u8[ctx->n_pending] = 0x80; - if (ctx->block_size == SHA512_BLOCK_SIZE) - clib_sha512_block (ctx, ctx->pending.as_u8, 1); - else - clib_sha256_block (ctx, ctx->pending.as_u8, 1); - clib_memset (ctx->pending.as_u8, 0, ctx->block_size); - } - else - ctx->pending.as_u8[ctx->n_pending] = 0x80; - - ctx->pending.as_u64[ctx->block_size / 8 - 1] = - clib_net_to_host_u64 (ctx->total_bytes * 8); - if (ctx->block_size == SHA512_BLOCK_SIZE) - clib_sha512_block (ctx, ctx->pending.as_u8, 1); - else - clib_sha256_block (ctx, ctx->pending.as_u8, 1); - - if (ctx->block_size == SHA512_BLOCK_SIZE) - { - for (i = 0; i < ctx->digest_size / sizeof (u64); i++) - *((u64 *) digest + i) = clib_net_to_host_u64 (ctx->h64[i]); - - /* sha512-224 case - write half of u64 */ - if (i * sizeof (u64) < ctx->digest_size) - *((u32 *) digest + 2 * i) = clib_net_to_host_u32 (ctx->h64[i] >> 32); - } - else - for (i = 0; i < ctx->digest_size / sizeof (u32); i++) - *((u32 *) digest + i) = clib_net_to_host_u32 (ctx->h32[i]); -} - -static_always_inline void -clib_sha2 (clib_sha2_type_t type, const u8 * msg, uword len, u8 * digest) -{ - clib_sha2_ctx_t ctx; - clib_sha2_init (&ctx, type); - clib_sha2_update (&ctx, msg, len); - clib_sha2_final (&ctx, digest); -} - -#define clib_sha224(...) clib_sha2 (CLIB_SHA2_224, __VA_ARGS__) -#define clib_sha256(...) clib_sha2 (CLIB_SHA2_256, __VA_ARGS__) -#define clib_sha384(...) clib_sha2 (CLIB_SHA2_384, __VA_ARGS__) -#define clib_sha512(...) clib_sha2 (CLIB_SHA2_512, __VA_ARGS__) -#define clib_sha512_224(...) clib_sha2 (CLIB_SHA2_512_224, __VA_ARGS__) -#define clib_sha512_256(...) clib_sha2 (CLIB_SHA2_512_256, __VA_ARGS__) - -static_always_inline void -clib_hmac_sha2 (clib_sha2_type_t type, const u8 * key, uword key_len, - const u8 * msg, uword len, u8 * digest) -{ - clib_sha2_ctx_t _ctx, *ctx = &_ctx; - uword key_data[SHA2_MAX_BLOCK_SIZE / sizeof (uword)]; - u8 i_digest[SHA2_MAX_DIGEST_SIZE]; - int i, n_words; - - clib_sha2_init (ctx, type); - n_words = ctx->block_size / sizeof (uword); - - /* key */ - if (key_len > ctx->block_size) - { - /* key is longer than block, calculate hash of key */ - clib_sha2_update (ctx, key, key_len); - for (i = (ctx->digest_size / sizeof (uword)) / 2; i < n_words; i++) - key_data[i] = 0; - clib_sha2_final (ctx, (u8 *) key_data); - clib_sha2_init (ctx, type); - } - else - { - for (i = 0; i < n_words; i++) - key_data[i] = 0; - clib_memcpy_fast (key_data, key, key_len); - } - - /* ipad */ - for (i = 0; i < n_words; i++) - ctx->pending.as_uword[i] = key_data[i] ^ (uword) 0x3636363636363636; - if (ctx->block_size == SHA512_BLOCK_SIZE) - clib_sha512_block (ctx, ctx->pending.as_u8, 1); - else - clib_sha256_block (ctx, ctx->pending.as_u8, 1); - ctx->total_bytes += ctx->block_size; - - /* message */ - clib_sha2_update (ctx, msg, len); - clib_sha2_final (ctx, i_digest); - - /* opad */ - clib_sha2_init (ctx, type); - for (i = 0; i < n_words; i++) - ctx->pending.as_uword[i] = key_data[i] ^ (uword) 0x5c5c5c5c5c5c5c5c; - if (ctx->block_size == SHA512_BLOCK_SIZE) - clib_sha512_block (ctx, ctx->pending.as_u8, 1); - else - clib_sha256_block (ctx, ctx->pending.as_u8, 1); - ctx->total_bytes += ctx->block_size; - - /* digest */ - clib_sha2_update (ctx, i_digest, ctx->digest_size); - clib_sha2_final (ctx, digest); -} - -#define clib_hmac_sha224(...) clib_hmac_sha2 (CLIB_SHA2_224, __VA_ARGS__) -#define clib_hmac_sha256(...) clib_hmac_sha2 (CLIB_SHA2_256, __VA_ARGS__) -#define clib_hmac_sha384(...) clib_hmac_sha2 (CLIB_SHA2_384, __VA_ARGS__) -#define clib_hmac_sha512(...) clib_hmac_sha2 (CLIB_SHA2_512, __VA_ARGS__) -#define clib_hmac_sha512_224(...) clib_hmac_sha2 (CLIB_SHA2_512_224, __VA_ARGS__) -#define clib_hmac_sha512_256(...) clib_hmac_sha2 (CLIB_SHA2_512_256, __VA_ARGS__) - -#endif /* included_sha2_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/test/aes_cbc.c b/src/vppinfra/test/aes_cbc.c new file mode 100644 index 00000000000..bff439ab7b5 --- /dev/null +++ b/src/vppinfra/test/aes_cbc.c @@ -0,0 +1,187 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2021 Cisco Systems, Inc. + */ + +#ifdef __AES__ +#include <vppinfra/format.h> +#include <vppinfra/test/test.h> +#include <vppinfra/crypto/aes_cbc.h> + +static const u8 iv[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, +}; + +static const u8 plaintext[] = { + 0x6B, 0xC1, 0xBE, 0xE2, 0x2E, 0x40, 0x9F, 0x96, 0xE9, 0x3D, 0x7E, 0x11, 0x73, + 0x93, 0x17, 0x2A, 0xAE, 0x2D, 0x8A, 0x57, 0x1E, 0x03, 0xAC, 0x9C, 0x9E, 0xB7, + 0x6F, 0xAC, 0x45, 0xAF, 0x8E, 0x51, 0x30, 0xC8, 0x1C, 0x46, 0xA3, 0x5C, 0xE4, + 0x11, 0xE5, 0xFB, 0xC1, 0x19, 0x1A, 0x0A, 0x52, 0xEF, 0xF6, 0x9F, 0x24, 0x45, + 0xDF, 0x4F, 0x9B, 0x17, 0xAD, 0x2B, 0x41, 0x7B, 0xE6, 0x6C, 0x37, 0x10, +}; + +static const u8 key128[] = { 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6, + 0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C }; + +static const u8 key192[24] = { + 0x8E, 0x73, 0xB0, 0xF7, 0xDA, 0x0E, 0x64, 0x52, 0xC8, 0x10, 0xF3, 0x2B, + 0x80, 0x90, 0x79, 0xE5, 0x62, 0xF8, 0xEA, 0xD2, 0x52, 0x2C, 0x6B, 0x7B, +}; + +static const u8 ciphertext128[] = { + 0x76, 0x49, 0xAB, 0xAC, 0x81, 0x19, 0xB2, 0x46, 0xCE, 0xE9, 0x8E, 0x9B, 0x12, + 0xE9, 0x19, 0x7D, 0x50, 0x86, 0xCB, 0x9B, 0x50, 0x72, 0x19, 0xEE, 0x95, 0xDB, + 0x11, 0x3A, 0x91, 0x76, 0x78, 0xB2, 0x73, 0xBE, 0xD6, 0xB8, 0xE3, 0xC1, 0x74, + 0x3B, 0x71, 0x16, 0xE6, 0x9E, 0x22, 0x22, 0x95, 0x16, 0x3F, 0xF1, 0xCA, 0xA1, + 0x68, 0x1F, 0xAC, 0x09, 0x12, 0x0E, 0xCA, 0x30, 0x75, 0x86, 0xE1, 0xA7, +}; + +static const u8 ciphertext192[64] = { + 0x4F, 0x02, 0x1D, 0xB2, 0x43, 0xBC, 0x63, 0x3D, 0x71, 0x78, 0x18, 0x3A, 0x9F, + 0xA0, 0x71, 0xE8, 0xB4, 0xD9, 0xAD, 0xA9, 0xAD, 0x7D, 0xED, 0xF4, 0xE5, 0xE7, + 0x38, 0x76, 0x3F, 0x69, 0x14, 0x5A, 0x57, 0x1B, 0x24, 0x20, 0x12, 0xFB, 0x7A, + 0xE0, 0x7F, 0xA9, 0xBA, 0xAC, 0x3D, 0xF1, 0x02, 0xE0, 0x08, 0xB0, 0xE2, 0x79, + 0x88, 0x59, 0x88, 0x81, 0xD9, 0x20, 0xA9, 0xE6, 0x4F, 0x56, 0x15, 0xCD, +}; + +static const u8 key256[32] = { + 0x60, 0x3D, 0xEB, 0x10, 0x15, 0xCA, 0x71, 0xBE, 0x2B, 0x73, 0xAE, + 0xF0, 0x85, 0x7D, 0x77, 0x81, 0x1F, 0x35, 0x2C, 0x07, 0x3B, 0x61, + 0x08, 0xD7, 0x2D, 0x98, 0x10, 0xA3, 0x09, 0x14, 0xDF, 0xF4, +}; + +static const u8 ciphertext256[64] = { + 0xF5, 0x8C, 0x4C, 0x04, 0xD6, 0xE5, 0xF1, 0xBA, 0x77, 0x9E, 0xAB, 0xFB, 0x5F, + 0x7B, 0xFB, 0xD6, 0x9C, 0xFC, 0x4E, 0x96, 0x7E, 0xDB, 0x80, 0x8D, 0x67, 0x9F, + 0x77, 0x7B, 0xC6, 0x70, 0x2C, 0x7D, 0x39, 0xF2, 0x33, 0x69, 0xA9, 0xD9, 0xBA, + 0xCF, 0xA5, 0x30, 0xE2, 0x63, 0x04, 0x23, 0x14, 0x61, 0xB2, 0xEB, 0x05, 0xE2, + 0xC3, 0x9B, 0xE9, 0xFC, 0xDA, 0x6C, 0x19, 0x07, 0x8C, 0x6A, 0x9D, 0x1B, +}; + +#define _(b) \ + static clib_error_t *test_clib_aes##b##_cbc_encrypt (clib_error_t *err) \ + { \ + aes_cbc_key_data_t k; \ + u8 data[512]; \ + clib_aes##b##_cbc_key_expand (&k, key##b); \ + clib_aes##b##_cbc_encrypt (&k, plaintext, sizeof (plaintext), iv, data); \ + if (memcmp (ciphertext##b, data, sizeof (ciphertext##b)) != 0) \ + err = \ + clib_error_return (err, "encrypted data doesn't match plaintext"); \ + return err; \ + } \ + void __test_perf_fn perftest_aes##b##_enc_var_sz (test_perf_t *tp) \ + { \ + u32 n = tp->n_ops; \ + aes_cbc_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \ + u8 *dst = test_mem_alloc (n + 16); \ + u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \ + clib_aes##b##_cbc_key_expand (kd, key##b); \ + \ + test_perf_event_enable (tp); \ + clib_aes##b##_cbc_encrypt (kd, src, n, iv, dst); \ + test_perf_event_disable (tp); \ + } +_ (128) +_ (192) +_ (256) +#undef _ + +REGISTER_TEST (clib_aes128_cbc_encrypt) = { + .name = "clib_aes128_cbc_encrypt", + .fn = test_clib_aes128_cbc_encrypt, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes128_enc_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 9008, + .fn = perftest_aes128_enc_var_sz }), +}; + +REGISTER_TEST (clib_aes192_cbc_encrypt) = { + .name = "clib_aes192_cbc_encrypt", + .fn = test_clib_aes192_cbc_encrypt, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes192_enc_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 9008, + .fn = perftest_aes192_enc_var_sz }), +}; + +REGISTER_TEST (clib_aes256_cbc_encrypt) = { + .name = "clib_aes256_cbc_encrypt", + .fn = test_clib_aes256_cbc_encrypt, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes256_enc_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 9008, + .fn = perftest_aes256_enc_var_sz }), +}; + +#define _(b) \ + static clib_error_t *test_clib_aes##b##_cbc_decrypt (clib_error_t *err) \ + { \ + aes_cbc_key_data_t k; \ + u8 data[512]; \ + clib_aes##b##_cbc_key_expand (&k, key##b); \ + clib_aes##b##_cbc_decrypt (&k, ciphertext##b, sizeof (ciphertext##b), iv, \ + data); \ + if (memcmp (plaintext, data, sizeof (plaintext)) != 0) \ + err = \ + clib_error_return (err, "decrypted data doesn't match plaintext"); \ + return err; \ + } \ + void __test_perf_fn perftest_aes##b##_dec_var_sz (test_perf_t *tp) \ + { \ + u32 n = tp->n_ops; \ + aes_cbc_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \ + u8 *dst = test_mem_alloc (n + 16); \ + u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \ + clib_aes##b##_cbc_key_expand (kd, key##b); \ + \ + test_perf_event_enable (tp); \ + clib_aes##b##_cbc_decrypt (kd, src, n, iv, dst); \ + test_perf_event_disable (tp); \ + } + +_ (128) +_ (192) +_ (256) +#undef _ + +REGISTER_TEST (clib_aes128_cbc_decrypt) = { + .name = "clib_aes128_cbc_decrypt", + .fn = test_clib_aes128_cbc_decrypt, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes128_dec_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 9008, + .fn = perftest_aes128_dec_var_sz }), +}; + +REGISTER_TEST (clib_aes192_cbc_decrypt) = { + .name = "clib_aes192_cbc_decrypt", + .fn = test_clib_aes192_cbc_decrypt, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes192_dec_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 9008, + .fn = perftest_aes192_dec_var_sz }), +}; + +REGISTER_TEST (clib_aes256_cbc_decrypt) = { + .name = "clib_aes256_cbc_decrypt", + .fn = test_clib_aes256_cbc_decrypt, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes256_dec_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 9008, + .fn = perftest_aes256_dec_var_sz }), +}; + +#endif diff --git a/src/vppinfra/test/aes_gcm.c b/src/vppinfra/test/aes_gcm.c new file mode 100644 index 00000000000..aeaf7cf8c15 --- /dev/null +++ b/src/vppinfra/test/aes_gcm.c @@ -0,0 +1,1177 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2021 Cisco Systems, Inc. + */ + +#if defined(__AES__) && defined(__PCLMUL__) +#include <vppinfra/format.h> +#include <vppinfra/test/test.h> +#include <vppinfra/crypto/aes_gcm.h> + +static const u8 tc1_key128[16] = { + 0, +}; + +static const u8 tc1_iv[12] = { + 0, +}; + +static const u8 tc1_tag128[] = { 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, + 0x30, 0x61, 0x36, 0x7f, 0x1d, 0x57, + 0xa4, 0xe7, 0x45, 0x5a }; +static const u8 tc1_key256[32] = { + 0, +}; + +static const u8 tc1_tag256[] = { + 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9, + 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b, +}; + +static const u8 tc2_ciphertext256[] = { 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, + 0x6b, 0x6e, 0x07, 0x4e, 0xc5, 0xd3, + 0xba, 0xf3, 0x9d, 0x18 }; + +static const u8 tc2_tag256[] = { 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, + 0x6b, 0xf0, 0x26, 0x5b, 0x98, 0xb5, + 0xd4, 0x8a, 0xb9, 0x19 }; + +static const u8 tc2_plaintext[16] = { + 0, +}; + +static const u8 tc2_tag128[] = { 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, + 0x13, 0xbd, 0xf5, 0x3a, 0x67, 0xb2, + 0x12, 0x57, 0xbd, 0xdf }; + +static const u8 tc2_ciphertext128[] = { 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, + 0xa3, 0x92, 0xf3, 0x28, 0xc2, 0xb9, + 0x71, 0xb2, 0xfe, 0x78 }; + +static const u8 tc3_key128[] = { 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, + 0x73, 0x1c, 0x6d, 0x6a, 0x8f, 0x94, + 0x67, 0x30, 0x83, 0x08 }; + +static const u8 tc3_iv[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, + 0xdb, 0xad, 0xde, 0xca, 0xf8, 0x88 }; + +static const u8 tc3_plaintext[] = { + 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, 0xa5, 0x59, 0x09, 0xc5, 0xaf, + 0xf5, 0x26, 0x9a, 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, 0x2e, 0x4c, + 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, + 0x53, 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, 0xb1, 0x6a, 0xed, 0xf5, + 0xaa, 0x0d, 0xe6, 0x57, 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55 +}; + +static const u8 tc3_ciphertext128[] = { + 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, 0x4b, 0x72, 0x21, 0xb7, 0x84, + 0xd0, 0xd4, 0x9c, 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, 0x35, 0xc1, + 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, + 0x1c, 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, 0x1b, 0xa3, 0x0b, 0x39, + 0x6a, 0x0a, 0xac, 0x97, 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85 +}; + +static const u8 tc3_tag128[] = { 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, + 0x64, 0xa6, 0x2c, 0xf3, 0x5a, 0xbd, + 0x2b, 0xa6, 0xfa, 0xb4 }; + +static const u8 tc3_key256[] = { 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, + 0x1c, 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, + 0x83, 0x08, 0xfe, 0xff, 0xe9, 0x92, 0x86, + 0x65, 0x73, 0x1c, 0x6d, 0x6a, 0x8f, 0x94, + 0x67, 0x30, 0x83, 0x08 }; + +static const u8 tc3_ciphertext256[] = { + 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, 0xf4, 0x7f, 0x37, 0xa3, 0x2a, + 0x84, 0x42, 0x7d, 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, 0x75, 0x98, + 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, + 0x3d, 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, 0xc5, 0xf6, 0x1e, 0x63, + 0x93, 0xba, 0x7a, 0x0a, 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad +}; + +static const u8 tc3_tag256[] = { 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, + 0x71, 0xbd, 0xec, 0x1a, 0x50, 0x22, + 0x70, 0xe3, 0xcc, 0x6c }; + +static const u8 tc4_plaintext[] = { + 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, 0xa5, 0x59, 0x09, 0xc5, + 0xaf, 0xf5, 0x26, 0x9a, 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, + 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, 0x1c, 0x3c, 0x0c, 0x95, + 0x95, 0x68, 0x09, 0x53, 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, + 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, 0xba, 0x63, 0x7b, 0x39, +}; + +static const u8 tc4_aad[] = { 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, + 0xef, 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, + 0xbe, 0xef, 0xab, 0xad, 0xda, 0xd2 }; + +static const u8 tc4_ciphertext128[] = { + 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, 0x4b, 0x72, 0x21, 0xb7, + 0x84, 0xd0, 0xd4, 0x9c, 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, + 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, 0x21, 0xd5, 0x14, 0xb2, + 0x54, 0x66, 0x93, 0x1c, 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, + 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97, 0x3d, 0x58, 0xe0, 0x91 +}; + +static const u8 tc4_tag128[] = { 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, + 0xa5, 0xdb, 0x94, 0xfa, 0xe9, 0x5a, + 0xe7, 0x12, 0x1a, 0x47 }; + +static const u8 tc4_ciphertext256[] = { + 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, 0xf4, 0x7f, 0x37, 0xa3, + 0x2a, 0x84, 0x42, 0x7d, 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, + 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, 0x8c, 0xb0, 0x8e, 0x48, + 0x59, 0x0d, 0xbb, 0x3d, 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, + 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, 0xbc, 0xc9, 0xf6, 0x62 +}; + +static const u8 tc4_tag256[] = { 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, + 0x17, 0x68, 0xcd, 0xdf, 0x88, 0x53, + 0xbb, 0x2d, 0x55, 0x1b }; + +static const u8 inc_key[] = { 0x97, 0x3e, 0x43, 0x70, 0x84, 0x71, 0xd4, 0xe2, + 0x45, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x5f, 0x3b, + 0x97, 0x3e, 0x43, 0x70, 0x84, 0x71, 0xd4, 0xe2, + 0x45, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x5f, 0x3b }; +static const u8 inc_iv[] = { 0xe2, 0xe4, 0x3f, 0x29, 0xfe, 0xd4, + 0xbc, 0x31, 0x56, 0xa7, 0x97, 0xf5 }; + +static const struct +{ + const u16 n_bytes; + const u64 tag_gcm_128[2]; + const u64 tag_gcm_256[2]; + const u64 tag_gmac_128[2]; + const u64 tag_gmac_256[2]; + const u8 tag256[16]; +} inc_test_cases[] = { + { + .n_bytes = 0, + .tag_gcm_128 = { 0x95f4b8cc824294eb, 0xbf964ccf94b47f96 }, + .tag_gcm_256 = { 0x206b456eaa81a3c8, 0xa308160d180e080d }, + .tag_gmac_128 = { 0x95f4b8cc824294eb, 0xbf964ccf94b47f96 }, + .tag_gmac_256 = { 0x206b456eaa81a3c8, 0xa308160d180e080d }, + }, + { + .n_bytes = 1, + .tag_gcm_128 = { 0xe89aa5be94fa1db4, 0x70d82ed02542a560 }, + .tag_gcm_256 = { 0xcb0659b38e60d3a7, 0x9758b874959187ff }, + .tag_gmac_128 = { 0xf9be1e7db073c565, 0x3b8a0ecc7a91f09d }, + .tag_gmac_256 = { 0x1e302e97ab394130, 0xef29621c33bdb710 }, + }, + { + .n_bytes = 7, + .tag_gcm_128 = { 0xf4af7cbe57bd2078, 0x063dd60abbe51049 }, + .tag_gcm_256 = { 0x7d231388fe8a19be, 0x59be3e7205269abd }, + .tag_gmac_128 = { 0x27d0a47980eed1c6, 0xe6163485e73d02b3 }, + .tag_gmac_256 = { 0x61ce281b47729f6c, 0x128a6bc0880e5d84 }, + }, + { + .n_bytes = 8, + .tag_gcm_128 = { 0xf45b40961422abc4, 0x0a932b98c4999694 }, + .tag_gcm_256 = { 0xf7f945beed586ee2, 0x67239433a7bd3f23 }, + .tag_gmac_128 = { 0x3a25d38572abe3b1, 0x220798aca96d594a }, + .tag_gmac_256 = { 0x2e0e6d58d1ab41ca, 0x09bbc83e3b7b5e11 }, + }, + { + .n_bytes = 9, + .tag_gcm_128 = { 0x791b0a879d236364, 0xde9553e3ed1b763f }, + .tag_gcm_256 = { 0x24c13ed7b46813cd, 0xe646ce24ea4b281e }, + .tag_gmac_128 = { 0x0e521672b23a4fc7, 0x16f129224dec5fd8 }, + .tag_gmac_256 = { 0x8b9c603789c34043, 0x0a8b626928c9fb6f }, + }, + { + .n_bytes = 15, + .tag_gcm_128 = { 0xb277ef05e2be1cc0, 0x2922fba5e321c81e }, + .tag_gcm_256 = { 0xc3ca9f633fa803dc, 0x96e60b0c3347d744 }, + .tag_gmac_128 = { 0xab99e6327c8e1493, 0x09a9a153045ba43f }, + .tag_gmac_256 = { 0xfc9ec2d6a1ad492b, 0xf0b0ba877663732d }, + }, + { + .n_bytes = 16, + .tag_gcm_128 = { 0x3e3438e8f932ebe3, 0x958e270d56ae588e }, + .tag_gcm_256 = { 0x6ac53524effc8171, 0xccab3a16a0b5813c }, + .tag_gmac_128 = { 0x0eb4a09c6c7db16b, 0x1cdb5573a27a2e4a }, + .tag_gmac_256 = { 0x71752018b31eae33, 0xdc4bd36d44b9fd5d }, + }, + { + .n_bytes = 31, + .tag_gcm_128 = { 0x1f4d4a7a056e4bca, 0x97ac76121dccb4e0 }, + .tag_gcm_256 = { 0x609aea9aec919ab6, 0x1eba3c4998e7abb9 }, + .tag_gmac_128 = { 0x289280f9e8879c68, 0xe6b0e36afc0d2ae1 }, + .tag_gmac_256 = { 0x0b3f61762ba4ed43, 0x293f596a76d63b37 }, + }, + { + .n_bytes = 32, + .tag_gcm_128 = { 0xc4b64505d045818f, 0x72bfd499f0f983b4 }, + .tag_gcm_256 = { 0x3f003fb179b2c480, 0x883876d4904700c2 }, + .tag_gmac_128 = { 0x3dd10ab954d807f0, 0x5ae32ee41675051e }, + .tag_gmac_256 = { 0x1a80ab830fc736c0, 0x51db27630adae337 }, + }, + { + .n_bytes = 47, + .tag_gcm_128 = { 0x3aedb0c6c14f2ea1, 0xe4626626bae641cd }, + .tag_gcm_256 = { 0x9c91b87dfd302880, 0x05bb594dde5abb9c }, + .tag_gmac_128 = { 0xe0fe54f2bdadeba8, 0x6f8f40edb569701f }, + .tag_gmac_256 = { 0x26c5632c7abbdb3f, 0xc18ccc24df8bb239 }, + }, + { + .n_bytes = 48, + .tag_gcm_128 = { 0xdbceb2aed0dbbe27, 0xfef0013e8ebe6ef1 }, + .tag_gcm_256 = { 0x98ad025f30b58ffa, 0xabc8a99857034e42 }, + .tag_gmac_128 = { 0x269518e8584b7f6c, 0x1c9f41410a81799c }, + .tag_gmac_256 = { 0x144807ce7aa8eb61, 0x611a8355b4377dc6 }, + }, + { + .n_bytes = 63, + .tag_gcm_128 = { 0x1769ccf523a2046e, 0x7328e18749a559b4 }, + .tag_gcm_256 = { 0xcdf2f28efa9689ce, 0x636676f6aedea9de }, + .tag_gmac_128 = { 0x4d47537060defce8, 0x0d4819c20ba8e889 }, + .tag_gmac_256 = { 0x7b60615e7bfc9a7a, 0x610633296eb30b94 }, + }, + { + .n_bytes = 64, + .tag_gcm_128 = { 0xa5602f73865b6a77, 0x78317e461ff9b560 }, + .tag_gcm_256 = { 0x5c17a6dcd1f23b65, 0x25331c378256a93e }, + .tag_gmac_128 = { 0x39d941ed85d81ab0, 0xe358a61078628d63 }, + .tag_gmac_256 = { 0x5276fbdd333f380d, 0xb0dc63e68f137e74 }, + }, + { + .n_bytes = 79, + .tag_gcm_128 = { 0x5d32cd75f2e82d84, 0xbc15801c1fe285bd }, + .tag_gcm_256 = { 0xb2b2855f4b1ecf70, 0xa524adc1609c757b }, + .tag_gmac_128 = { 0xa147493f08a8738e, 0xbf07da9f4a88944f }, + .tag_gmac_256 = { 0xfee15e0d4b936bc7, 0x1dc88398c6b168bc }, + }, + { + .n_bytes = 80, + .tag_gcm_128 = { 0xa303b7247b9b00df, 0xe72d6d7063d48b72 }, + .tag_gcm_256 = { 0x7abfffc9ecfa00ec, 0x9c5ffcd753ee4568 }, + .tag_gmac_128 = { 0xc3e61bf9f370b40e, 0x66b1c4a6df3b19d7 }, + .tag_gmac_256 = { 0x0cc7b09a7d602352, 0x29e8a64447a764d2 }, + }, + { + .n_bytes = 95, + .tag_gcm_128 = { 0xf0fb35c36eac3025, 0xa13463307fc48907 }, + .tag_gcm_256 = { 0x283a73a05bd0e3c2, 0x794a181dd07a0fb7 }, + .tag_gmac_128 = { 0x26f3546060d9f958, 0xc1367fca8869ab40 }, + .tag_gmac_256 = { 0xa046e1705100c711, 0xbcf9d6a06f360260 }, + }, + { + .n_bytes = 96, + .tag_gcm_128 = { 0x974bb3c1c258bfb5, 0xcf057344bccb0596 }, + .tag_gcm_256 = { 0x18920d75fcfb702e, 0x18e5f14ba429b7be }, + .tag_gmac_128 = { 0xf43cca4837ad00b8, 0xb1a1585d51838352 }, + .tag_gmac_256 = { 0xce3427dc5123b31f, 0xdcc6e49fa0f6587e }, + }, + { + .n_bytes = 111, + .tag_gcm_128 = { 0x5d73baa8eef0ced3, 0x79339e31d5d813de }, + .tag_gcm_256 = { 0x4cefa311c9c39a86, 0xe809ee78930ef736 }, + .tag_gmac_128 = { 0x452003e6d535a523, 0x723f08581012c62e }, + .tag_gmac_256 = { 0x6ce2e1661db942ca, 0xccd700c9c6d03cfd }, + }, + { + .n_bytes = 112, + .tag_gcm_128 = { 0x189aa61ce15a0d11, 0xc907e6bccbdbb8f9 }, + .tag_gcm_256 = { 0xa41c96c843b791b4, 0x0f9f60953f03e5fc }, + .tag_gmac_128 = { 0x44c75b94dbf8539f, 0xcdebe3ed9c68c840 }, + .tag_gmac_256 = { 0x21a289dd39eadd19, 0x749a038e1ea0711c }, + }, + { + .n_bytes = 127, + .tag_gcm_128 = { 0xc6ea87bfe82d73f6, 0x9d85dbf8072bb051 }, + .tag_gcm_256 = { 0xd5e436b2ddfac9fa, 0x54d7d13fa214703a }, + .tag_gmac_128 = { 0xdc5374b7d7d221c4, 0xa8cf4e11958b9dff }, + .tag_gmac_256 = { 0xc7ad0bba9de54f6a, 0x38ed037fe0924dee }, + }, + { + .n_bytes = 128, + .tag_gcm_128 = { 0x357d4954b7c2b440, 0xb3b07ce0cd143149 }, + .tag_gcm_256 = { 0x5555d09cb247322d, 0xeb9d1cea38b68951 }, + .tag_gmac_128 = { 0x6a77579181663dde, 0xe359157bd4246d3f }, + .tag_gmac_256 = { 0x9fe930d50d661e37, 0xba4a0f3c3a6b63cf }, + }, + { + .n_bytes = 143, + .tag_gcm_128 = { 0x358f897d4783966f, 0x6fa44993a9ed54c4 }, + .tag_gcm_256 = { 0x60e91f959f2ccdbe, 0x116c56fdaa107deb }, + .tag_gmac_128 = { 0x121d26aba8aaee0d, 0xc37cda9c43f51008 }, + .tag_gmac_256 = { 0x06918b1cd20e0abc, 0x42938b1d8e745dcd }, + }, + { + .n_bytes = 144, + .tag_gcm_128 = { 0x8a9efe3df387e069, 0xc0a3f2f7547c704b }, + .tag_gcm_256 = { 0x217d59f53bfbc314, 0x2d8f088d05532b0d }, + .tag_gmac_128 = { 0x382949d56e0e8f05, 0x4e87fb8f83f095a7 }, + .tag_gmac_256 = { 0x75e07060883db37d, 0x5fde7b9bda37d680 }, + }, + { + .n_bytes = 159, + .tag_gcm_128 = { 0x297252081cc8db1e, 0x6357143fa7f756c8 }, + .tag_gcm_256 = { 0x7e8fca9d1b17e003, 0x7bf7dad063b9a5c9 }, + .tag_gmac_128 = { 0x5d0524b130e97547, 0xd6befd8591072437 }, + .tag_gmac_256 = { 0xf5f631d391b635fc, 0xe8f7b6808544f312 }, + }, + { + .n_bytes = 160, + .tag_gcm_128 = { 0x90e034ee0f08a871, 0x002f483eefa24ec9 }, + .tag_gcm_256 = { 0xed24df02e455d6d3, 0x7a7d318ed132cb7f }, + .tag_gmac_128 = { 0xc75f87215ae12a2f, 0xf264e5381d5b0412 }, + .tag_gmac_256 = { 0x1ad3e294fd55b0a6, 0xa1a551e59fd12e2f }, + }, + { + .n_bytes = 175, + .tag_gcm_128 = { 0x8f663955c8e4249e, 0xd9d8d8d7352b18d9 }, + .tag_gcm_256 = { 0xd9af34eae74a35e1, 0xc22e74b34267e5df }, + .tag_gmac_128 = { 0xb54a2e8b186a55db, 0x980f586c6da8afce }, + .tag_gmac_256 = { 0x9cceb31baad18ff1, 0xce97588909ece8af }, + }, + { + .n_bytes = 176, + .tag_gcm_128 = { 0x258ec0df82f003bd, 0x571496e92c966695 }, + .tag_gcm_256 = { 0xa1925cda1fa1dd2c, 0x914038618faecf99 }, + .tag_gmac_128 = { 0xfc384b412bdb05ef, 0x73968cf3b464a997 }, + .tag_gmac_256 = { 0x50d9ce4be242e176, 0x5fb78e9404c9226d }, + }, + { + .n_bytes = 191, + .tag_gcm_128 = { 0x796a90a3edaab614, 0x4bf34c2c6333c736 }, + .tag_gcm_256 = { 0x4ffd3a84b346c6d5, 0x9d4c84c7ac5a191c }, + .tag_gmac_128 = { 0x16c11c6bfad5973e, 0xa0825b9c827137c8 }, + .tag_gmac_256 = { 0x82c144c209c22758, 0x7428b4ac38a65c56 }, + }, + { + .n_bytes = 192, + .tag_gcm_128 = { 0x2a44492af2e06a75, 0xbe4eab62aacfc2d3 }, + .tag_gcm_256 = { 0xb7d4971a8061092d, 0x94da543669369e41 }, + .tag_gmac_128 = { 0xed462726c984b596, 0xd61b317d979f5df8 }, + .tag_gmac_256 = { 0x554dc7f30981dbf6, 0x94447d0fbf9f2c8b }, + }, + { + .n_bytes = 207, + .tag_gcm_128 = { 0xcfac9f67252713c8, 0xd638cf6b74c6acf6 }, + .tag_gcm_256 = { 0x57a4a9d299663925, 0xa802f8453e8bcc5b }, + .tag_gmac_128 = { 0xef03f3cdcb0ea819, 0xeea8f0f7f805c306 }, + .tag_gmac_256 = { 0x3d8cd7d92cf0a212, 0x12c1ddddab7e752c }, + }, + { + .n_bytes = 208, + .tag_gcm_128 = { 0x5467633795b92cf5, 0x6b45fb93e19f9341 }, + .tag_gcm_256 = { 0xaeced4090d4d20bc, 0xd20161cd2617613e }, + .tag_gmac_128 = { 0x02bb88dbe681ab69, 0xaf973bfd0b924144 }, + .tag_gmac_256 = { 0x313020fc5283b45e, 0x1757616d4cf17c7f }, + }, + { + .n_bytes = 223, + .tag_gcm_128 = { 0x2f9c725903c07adf, 0xe01712c7d6d5055d }, + .tag_gcm_256 = { 0xeae53a9b0d03a4f9, 0x42b2375d569d384e }, + .tag_gmac_128 = { 0x6ea092dd400ec00d, 0x23237fa0bd0c1977 }, + .tag_gmac_256 = { 0xa02e0f41f12f0053, 0xfba53430aa616219 }, + }, + { + .n_bytes = 224, + .tag_gcm_128 = { 0x73e40772334901a9, 0xddf6075b357cb307 }, + .tag_gcm_256 = { 0x2eb3450f9462c968, 0xa9fb95f281c117e9 }, + .tag_gmac_128 = { 0x33762525c12dfd1d, 0xcb3d8d0402c23ebf }, + .tag_gmac_256 = { 0x30c6d05fb98c2a84, 0xaa2c9f6303383d3a }, + }, + { + .n_bytes = 239, + .tag_gcm_128 = { 0x184d15fd2e2c63a6, 0x3dfe238b88dd2924 }, + .tag_gcm_256 = { 0x18deafee39975b36, 0xc07761cf4fc16c06 }, + .tag_gmac_128 = { 0x10a48f2bc4e64f87, 0x85eec49ae83d4256 }, + .tag_gmac_256 = { 0x5ac87f47f32770eb, 0x31706ca888dd6d44 }, + }, + { + .n_bytes = 240, + .tag_gcm_128 = { 0x153134f11cfa06ec, 0xd987642cc3688a34 }, + .tag_gcm_256 = { 0x3eb66b6dc0bba824, 0x274c4648d515c844 }, + .tag_gmac_128 = { 0x9e5afe891c7c7dcb, 0xa2b3fa1c026343e2 }, + .tag_gmac_256 = { 0xe9120e4e9ff4b1e1, 0xb88bf68336342598 }, + }, + { + .n_bytes = 255, + .tag_gcm_128 = { 0x2b5e78936d1ace73, 0x15b766bfee18d348 }, + .tag_gcm_256 = { 0xeb3741a345395c97, 0x02e11e0478e4cc5a }, + .tag_gmac_128 = { 0xf7daf525751192df, 0x1b1641c3362905ac }, + .tag_gmac_256 = { 0x0b16a2bb842caaca, 0x996732fedaa6b829 }, + }, + { + .n_bytes = 256, + .tag_gcm_128 = { 0x6d4507e0c354e80a, 0x2345eccddd0bd71e }, + .tag_gcm_256 = { 0xa582b8122d699b63, 0xb16db944f6b073f3 }, + .tag_gmac_128 = { 0xc58bb57544c07b40, 0x1a8dd3d8124cdf39 }, + .tag_gmac_256 = { 0xb0f6db0da52e1dc2, 0xbd3a86a577ed208a }, + }, + { + .n_bytes = 319, + .tag_gcm_128 = { 0x2cd41fdf6f659a6b, 0x2486849d7666d76e }, + .tag_gcm_256 = { 0xb7e416c8a716cb4d, 0xc7abe0d755b48845 }, + .tag_gmac_128 = { 0xad83725394d4a36b, 0x5fdd42e941cad49b }, + .tag_gmac_256 = { 0xbb0b73609b90f7eb, 0xe4d382b8b9b7d43e }, + }, + { + .n_bytes = 320, + .tag_gcm_128 = { 0x064cfe34b7d9f89c, 0xb6c7263f66c89b47 }, + .tag_gcm_256 = { 0x1254c9ae84d8ff50, 0x9faeab423099dc9a }, + .tag_gmac_128 = { 0xd91d60ce71d24533, 0xb1cdfd3b3200b171 }, + .tag_gmac_256 = { 0x921de9e3d353559c, 0x3509d2775817a1de }, + }, + { + .n_bytes = 383, + .tag_gcm_128 = { 0x14788c7531d682e1, 0x8af79effe807a4dc }, + .tag_gcm_256 = { 0x947754a0844b4a4d, 0x9eb3849d93d5048e }, + .tag_gmac_128 = { 0xfa84d3a18ea6f895, 0x9a45c729797a8ac4 }, + .tag_gmac_256 = { 0xe8e61e134e40359a, 0xe8e404d4b523607c }, + }, + { + .n_bytes = 384, + .tag_gcm_128 = { 0xfba3fcfd9022e9a7, 0x257ba59f12055d70 }, + .tag_gcm_256 = { 0x7c6ca4e7fba2bc35, 0x1c590be09b3d549b }, + .tag_gmac_128 = { 0x4ca0f087d812e48f, 0xd1d39c4338d57a04 }, + .tag_gmac_256 = { 0xb0a2257cdec364c7, 0x6a4308976fda4e5d }, + }, + { + .n_bytes = 447, + .tag_gcm_128 = { 0x8fde1490c60f09bf, 0xd2932f04c202c5e4 }, + .tag_gcm_256 = { 0x1845a80cbdcf2e62, 0xc7c49c9864bca732 }, + .tag_gmac_128 = { 0x35aa90d2deb41b9c, 0x516ab85a3f17b71e }, + .tag_gmac_256 = { 0x1db78f8b7b34d9e7, 0xd168177351e601fe }, + }, + { + .n_bytes = 448, + .tag_gcm_128 = { 0xd0a7b75f734a1a7c, 0xc7689b7c571a09bf }, + .tag_gcm_256 = { 0xef3a9118c347118d, 0x282a7736060d7bb5 }, + .tag_gmac_128 = { 0xce2dab9fede53934, 0x27f3d2bb2af9dd2e }, + .tag_gmac_256 = { 0xca3b0cba7b772549, 0x3104ded0d6df7123 }, + }, + { + .n_bytes = 511, + .tag_gcm_128 = { 0x6fb5d366fa97b2d2, 0xed2d955fcc78e556 }, + .tag_gcm_256 = { 0xc2bc52eca9348b7c, 0x0ec18a2eb637446f }, + .tag_gmac_128 = { 0xe3012a4897edd5b5, 0xfe18c3ec617a7e88 }, + .tag_gmac_256 = { 0x00e050eecf184591, 0xba24484f84867f4f }, + }, + { + .n_bytes = 512, + .tag_gcm_128 = { 0x25138f7fe88b54bd, 0xcc078b619c0e83a2 }, + .tag_gcm_256 = { 0x63313c5ebe68fa92, 0xccc78784896cdcc3 }, + .tag_gmac_128 = { 0xc688fe54c5595ec0, 0x5b8a687343c3ef03 }, + .tag_gmac_256 = { 0x807c9f8e1c198242, 0xb1e0befc0b9b8193 }, + }, + { + .n_bytes = 575, + .tag_gcm_128 = { 0x0ce8e0b7332a7076, 0xe4aa7ab60dd0946a }, + .tag_gcm_256 = { 0x585cff3cf78504d4, 0x45f3a9532ea40e8b }, + .tag_gmac_128 = { 0xc06ca34dbad542b4, 0x840508722ff031dc }, + .tag_gmac_256 = { 0xa46e22748f195488, 0x43817a5d4d17408a }, + }, + { + .n_bytes = 576, + .tag_gcm_128 = { 0x45360be81e8323bd, 0x10892d9804b75bb5 }, + .tag_gcm_256 = { 0x66208ae5d809036e, 0x603d0af49475de88 }, + .tag_gmac_128 = { 0xb4f2b1d05fd3a4ec, 0x6a15b7a05c3a5436 }, + .tag_gmac_256 = { 0x8d78b8f7c7daf6ff, 0x925b2a92acb7356a }, + }, + { + .n_bytes = 577, + .tag_gcm_128 = { 0xc7e5cd17251fd138, 0xecfb0e05110303df }, + .tag_gcm_256 = { 0x2939d12c85ea8cf8, 0xea063fba37c92eb5 }, + .tag_gmac_128 = { 0x1fa02b370bec64a0, 0x8c759ca95a8cea85 }, + .tag_gmac_256 = { 0x6a602c2b1fff6617, 0x17e06d829bd24a8d }, + }, + { + .n_bytes = 639, + .tag_gcm_128 = { 0xc679ef7a01e8f14c, 0x281e3b9a9f715cb9 }, + .tag_gcm_256 = { 0x13abd2d67e162f98, 0xf637d467046af949 }, + .tag_gmac_128 = { 0x05037392550b7ae2, 0x5095b4629ba46d40 }, + .tag_gmac_256 = { 0xd8e8045772299aa7, 0x564d72fb58ea9808 }, + }, + { + .n_bytes = 640, + .tag_gcm_128 = { 0xff1a2c922cdd1336, 0xcaa02eab8691bf51 }, + .tag_gcm_256 = { 0xd57e16f169d79da5, 0x3e2b47264f8efe9c }, + .tag_gmac_128 = { 0xb32750b403bf66f8, 0x1b03ef08da0b9d80 }, + .tag_gmac_256 = { 0x80ac3f38e2aacbfa, 0xd4ea7eb88213b629 }, + }, + { + .n_bytes = 703, + .tag_gcm_128 = { 0xefd0804f0155b8f1, 0xb1849ed867269569 }, + .tag_gcm_256 = { 0xf66c5ecbd1a06fa4, 0x55ef36f3fdbe763a }, + .tag_gmac_128 = { 0x725813463d977e5b, 0xd52aaabb923cfabb }, + .tag_gmac_256 = { 0x4add8f86736adc52, 0xf6dabb4596975fd7 }, + }, + { + .n_bytes = 704, + .tag_gcm_128 = { 0x583b29260ea8e49f, 0xfaa93b0db98f9274 }, + .tag_gcm_256 = { 0x0b777f2cd9e2f0ef, 0x01510fc85a99382e }, + .tag_gmac_128 = { 0x89df280b0ec65cf3, 0xa3b3c05a87d2908b }, + .tag_gmac_256 = { 0x9d510cb7732920fc, 0x16b672e611ae2f0a }, + }, + { + .n_bytes = 767, + .tag_gcm_128 = { 0x671ec58ab6d4a210, 0x0845fbe603169eff }, + .tag_gcm_256 = { 0xb3913f7eb9bbdbbb, 0x4cb17aa290f6ab11 }, + .tag_gmac_128 = { 0x3036046580a81443, 0xe18d34bb706e632b }, + .tag_gmac_256 = { 0x4e82bc959349466c, 0x01210641d62bbdda }, + }, + { + .n_bytes = 768, + .tag_gcm_128 = { 0x66993b5de915fc6e, 0x4aaf0b8441040267 }, + .tag_gcm_256 = { 0x958ed0a6c1bf11e0, 0xc29d9f4a8ce8bdc6 }, + .tag_gmac_128 = { 0x02674435b179fddc, 0xe016a6a0540bb9be }, + .tag_gmac_256 = { 0xf562c523b24bf164, 0x257cb21a7b602579 }, + }, + { + .n_bytes = 831, + .tag_gcm_128 = { 0x4914f7980699f93c, 0xc2e44fdba6a839e7 }, + .tag_gcm_256 = { 0xa8fab43ecd572a25, 0x3cd465e491195b81 }, + .tag_gmac_128 = { 0xa6d725516e956d5d, 0x630768e80ac3de3d }, + .tag_gmac_256 = { 0xb4746cdde367c9e2, 0x3ea53280901a0375 }, + }, + { + .n_bytes = 832, + .tag_gcm_128 = { 0xac9a519f06fb8c70, 0xdc1a6544ed2cfcf7 }, + .tag_gcm_256 = { 0x54877a7ccd02c592, 0x1a09a4474d903b56 }, + .tag_gmac_128 = { 0xd24937cc8b938b05, 0x8d17d73a7909bbd7 }, + .tag_gmac_256 = { 0x9d62f65eaba46b95, 0xef7f624f71ba7695 }, + }, + { + .n_bytes = 895, + .tag_gcm_128 = { 0x3d365bf4d44c1071, 0x07ac3129079f2013 }, + .tag_gcm_256 = { 0x608543d4fe6526a1, 0xc78a987b87c8d96c }, + .tag_gmac_128 = { 0xc71cf903f7a557c5, 0x06788583ad2122a5 }, + .tag_gmac_256 = { 0x7cdaa511565b289a, 0xf818a4c85a8bd575 }, + }, + { + .n_bytes = 896, + .tag_gcm_128 = { 0x97000fafd1359a0b, 0xfc226d534866b495 }, + .tag_gcm_256 = { 0x1850ee7af3133326, 0xf198d539eee4b1f5 }, + .tag_gmac_128 = { 0x7138da25a1114bdf, 0x4deedee9ec8ed265 }, + .tag_gmac_256 = { 0x249e9e7ec6d879c7, 0x7abfa88b8072fb54 }, + }, + { + .n_bytes = 959, + .tag_gcm_128 = { 0x17200025564902f2, 0x3f2c3b711ba4086d }, + .tag_gcm_256 = { 0x3d0bf3e8b24e296d, 0x42fe0f54e33deb6d }, + .tag_gmac_128 = { 0x8baae9b6f3bd797a, 0x177e0b6c577f2436 }, + .tag_gmac_256 = { 0x853f961c965f472c, 0x8adc4113b3cf933a }, + }, + { + .n_bytes = 960, + .tag_gcm_128 = { 0x2a30ca7325e7a81b, 0xacbc71832bdceb63 }, + .tag_gcm_256 = { 0x037786319dc22ed7, 0x6730acf359ec3b6e }, + .tag_gmac_128 = { 0x702dd2fbc0ec5bd2, 0x61e7618d42914e06 }, + .tag_gmac_256 = { 0x52b3152d961cbb82, 0x6ab088b034f6e3e7 }, + }, + { + .n_bytes = 1023, + .tag_gcm_128 = { 0x8e8789e6c4c90855, 0x4ec5503d7f953df6 }, + .tag_gcm_256 = { 0xdb0afebe6c085f53, 0x4eb6f07b63b8a020 }, + .tag_gmac_128 = { 0x6e9b48e5ad508180, 0xdc86430db2bad514 }, + .tag_gmac_256 = { 0xbb52b4fbf236b741, 0x47ae63bc836dfba3 }, + }, + { + .n_bytes = 1024, + .tag_gcm_128 = { 0x94e1ccbea0f24089, 0xf51b53b600363bd2 }, + .tag_gcm_256 = { 0x70f3eb3d562f0b34, 0xffd09e1a25d5bef3 }, + .tag_gmac_128 = { 0x65a2b560392ecee3, 0x30079a9a9dbbd3a3 }, + .tag_gmac_256 = { 0x4d361736c43090e6, 0x135810df49dcc981 }, + }, + { + .n_bytes = 1025, + .tag_gcm_128 = { 0x830a99737df5a71a, 0xd9ea6e87c63d3aae }, + .tag_gcm_256 = { 0xa3fc30e0254a5ee2, 0x52e59adc9a75be40 }, + .tag_gmac_128 = { 0xb217556427fc09ab, 0xc32fd72ec886730d }, + .tag_gmac_256 = { 0xeab5a9a02cb0869e, 0xd59e51684bc2839c }, + }, + { + .n_bytes = 1039, + .tag_gcm_128 = { 0x238f229130e92934, 0x52752fc860bca067 }, + .tag_gcm_256 = { 0xae2754bcaed68191, 0xe0770d1e9a7a67f3 }, + .tag_gmac_128 = { 0xe030ad2beb01d85d, 0xf10c78b1b64c27af }, + .tag_gmac_256 = { 0x081b45e126248e85, 0xca0789f30e1c47a1 }, + }, + { + .n_bytes = 1040, + .tag_gcm_128 = { 0x4eebcf7391d66c6f, 0x107d8bef4a93d9c6 }, + .tag_gcm_256 = { 0xbeb02ae5466964f3, 0x8eb90364c5f9e4cb }, + .tag_gmac_128 = { 0x451deb85fbf27da5, 0xe47e8c91106dadda }, + .tag_gmac_256 = { 0x85f0a72f3497699d, 0xe6fce0193cc6c9d1 }, + }, + { + .n_bytes = 1041, + .tag_gcm_128 = { 0xbbddfb0304411d71, 0xe573f63553d7ede4 }, + .tag_gcm_256 = { 0x68e42d2959af0b24, 0x35ac8e73c749e7f4 }, + .tag_gmac_128 = { 0x98d022b9896b68f8, 0x98dfde2a17b2869b }, + .tag_gmac_256 = { 0xb8dac6add35d0d9b, 0x1c55973c6dd769af }, + }, + { + .n_bytes = 1536, + .tag_gcm_128 = { 0x7d8933fd922418bd, 0xc88c2f289c5d3d83 }, + .tag_gcm_256 = { 0x966c103eb6ee69f2, 0x2f6b070b5c0fc66f }, + .tag_gmac_128 = { 0x3b70f6154246e758, 0xd485c0edf236b6e2 }, + .tag_gmac_256 = { 0xfefe1832387b9768, 0xc876712098256ca3 }, + }, + { + .n_bytes = 2047, + .tag_gcm_128 = { 0x15c6bbcb0d835fd4, 0xc33afd1328c1deb1 }, + .tag_gcm_256 = { 0xcde3edeea228ada6, 0x8276721a8662e708 }, + .tag_gmac_128 = { 0xb556b0e42419759e, 0x23b0365cf956a3ad }, + .tag_gmac_256 = { 0x8df762cbbe4b2a04, 0x6841bc61e5702419 }, + }, + { + .n_bytes = 2048, + .tag_gcm_128 = { 0xc5ddbeb8765e3aac, 0x1bad7349fd9f2b50 }, + .tag_gcm_256 = { 0xa2a623dde251a98d, 0xaf905fbd16f6a7d9 }, + .tag_gmac_128 = { 0xe20f1e533df2b3d0, 0x5d170bdbcc278a63 }, + .tag_gmac_256 = { 0x9663185c4342cd4a, 0x82d3c5a3a4998fc6 }, + }, + { + .n_bytes = 2064, + .tag_gcm_128 = { 0x12b76ea0a6ee9cbc, 0xdaecfae7c815aa58 }, + .tag_gcm_256 = { 0xb5bb2f76028713dd, 0xc8f3a1448b3bd050 }, + .tag_gmac_128 = { 0x019445c168c42f9b, 0xdf33e251bd9a27fe }, + .tag_gmac_256 = { 0xbbabd0cefc4d6a42, 0xb138675ca66ba54f }, + }, + { + .n_bytes = 2065, + .tag_gcm_128 = { 0x8758c5168ffc3fd7, 0x554f1df7cfa3b976 }, + .tag_gcm_256 = { 0xc9808cf0fd21aede, 0xe26921f3fd308006 }, + .tag_gmac_128 = { 0x44a57e7a32031596, 0x75476d5542faa57b }, + .tag_gmac_256 = { 0xea0e81807fa79a4a, 0x889cca80746fb8d5 }, + }, + { + .n_bytes = 4095, + .tag_gcm_128 = { 0x06db87757f541dc9, 0x823c619c6b88ef80 }, + .tag_gcm_256 = { 0xdf0861a56a7fe7b0, 0xe077a5c735cc21b2 }, + .tag_gmac_128 = { 0x43cb482bea0449e9, 0x70d668af983c9a6c }, + .tag_gmac_256 = { 0x5fc304ad7be1d19a, 0x81bf2f4111de0b06 }, + }, + { + .n_bytes = 4096, + .tag_gcm_128 = { 0xe4afdad642876152, 0xf78cfcfcb92520b6 }, + .tag_gcm_256 = { 0x7552cda8d91bdab1, 0x4bf57b7567d59e89 }, + .tag_gmac_128 = { 0xac5240f8e9c49cfc, 0x2a3c9d0999aded50 }, + .tag_gmac_256 = { 0x9fb6cd8f10f7b6c5, 0x16e442c147869222 }, + }, + { + .n_bytes = 4112, + .tag_gcm_128 = { 0x2a34db8f06bcf0ee, 0x7a4a2456fa340c33 }, + .tag_gcm_256 = { 0x4b6c0c5b5c943f5e, 0x6d1669e849ce061a }, + .tag_gmac_128 = { 0x143bfc9ab07d9bb5, 0xf0aa7510a9039349 }, + .tag_gmac_256 = { 0x8a97bdd033775ba0, 0x5901a5160739be25 }, + }, + { + .n_bytes = 4113, + .tag_gcm_128 = { 0x296acfcbcbf529af, 0xe3e2cfb1bc5855c8 }, + .tag_gcm_256 = { 0x181f6f9068ea477e, 0x1e05bfd01ee3e173 }, + .tag_gmac_128 = { 0x0d81fcb0829e3c8b, 0x68016225b5fa7745 }, + .tag_gmac_256 = { 0xa2421ac50d65c6b5, 0x84bd16fa55486af8 }, + }, + { + .n_bytes = 16382, + .tag_gcm_128 = { 0xd39fd367e00a103d, 0xf873a278b32d207f }, + .tag_gcm_256 = { 0xa8da09a851ae6c88, 0x2ef17f0da7f191f1 }, + .tag_gmac_128 = { 0xd4a22896f44c1c14, 0x69a5d02715c90ea4 }, + .tag_gmac_256 = { 0x64788ca5e11722b6, 0x63d74a4b24538762 }, + }, + { + .n_bytes = 16383, + .tag_gcm_128 = { 0x2162b91aad49eebc, 0x28c7efe93e639c75 }, + .tag_gcm_256 = { 0xc5baee5e40004087, 0xf6b26211facc66a5 }, + .tag_gmac_128 = { 0x3ec003d690d3d846, 0x204baef851d8ad7d }, + .tag_gmac_256 = { 0xdb51d6f5dddf16bb, 0x529f3825cf78dbd5 }, + }, + { + .n_bytes = 16384, + .tag_gcm_128 = { 0x2272e778c4c5c9ef, 0x84c50021e75ddbab }, + .tag_gcm_256 = { 0x6c32f1c5666b1f4c, 0x91142a86ae5241b2 }, + .tag_gmac_128 = { 0x43dadd5ecee9674b, 0xa30fea9ae8091c6c }, + .tag_gmac_256 = { 0xc360b76ac1887181, 0xcb732f29ea86edeb }, + }, + { + .n_bytes = 16385, + .tag_gcm_128 = { 0xe2a47837578b4056, 0xf96e7233cbeb1ce1 }, + .tag_gcm_256 = { 0xfa3aa4ebe36fb390, 0x6a2cf1671f4f1a01 }, + .tag_gmac_128 = { 0xfd0b7312c4975687, 0xdd3096b1c850e80a }, + .tag_gmac_256 = { 0xaf2cae4642a5536a, 0xb27aff5cc8bd354c }, + }, + { + .n_bytes = 16386, + .tag_gcm_128 = { 0xe1b4c0e5825304ae, 0x48c5dd82aa114320 }, + .tag_gcm_256 = { 0x76c3612118f47fa8, 0xdd0a47b132ecad3a }, + .tag_gmac_128 = { 0x346bc841a7f5b642, 0x6fb1b96391c66b40 }, + .tag_gmac_256 = { 0x2f1a1b6a000e18b2, 0xf7cba25e02551d43 }, + }, +}; + +#define MAX_TEST_DATA_LEN 32768 + +static const struct +{ + char *name; + const u8 *pt, *key128, *key256, *ct128, *ct256, *tag128, *tag256, *aad, *iv; + u32 data_len, tag128_len, tag256_len, aad_len; +} test_cases[] = { + /* test cases */ + { + .name = "GCM Spec. TC1", + .iv = tc1_iv, + .key128 = tc1_key128, + .key256 = tc1_key256, + .tag128 = tc1_tag128, + .tag128_len = sizeof (tc1_tag128), + .tag256 = tc1_tag256, + .tag256_len = sizeof (tc1_tag256), + }, + { + .name = "GCM Spec. TC2", + .pt = tc2_plaintext, + .data_len = sizeof (tc2_plaintext), + .iv = tc1_iv, + .key128 = tc1_key128, + .key256 = tc1_key256, + .ct128 = tc2_ciphertext128, + .ct256 = tc2_ciphertext256, + .tag128 = tc2_tag128, + .tag128_len = sizeof (tc2_tag128), + .tag256 = tc2_tag256, + .tag256_len = sizeof (tc2_tag256), + }, + { + .name = "GCM Spec. TC3", + .pt = tc3_plaintext, + .data_len = sizeof (tc3_plaintext), + .iv = tc3_iv, + .key128 = tc3_key128, + .key256 = tc3_key256, + .ct128 = tc3_ciphertext128, + .ct256 = tc3_ciphertext256, + .tag128 = tc3_tag128, + .tag128_len = sizeof (tc3_tag128), + .tag256 = tc3_tag256, + .tag256_len = sizeof (tc3_tag256), + }, + { + .name = "GCM Spec. TC4", + .pt = tc4_plaintext, + .data_len = sizeof (tc4_plaintext), + .aad = tc4_aad, + .aad_len = sizeof (tc4_aad), + .iv = tc3_iv, + .key128 = tc3_key128, + .key256 = tc3_key256, + .ct128 = tc4_ciphertext128, + .ct256 = tc4_ciphertext256, + .tag128 = tc4_tag128, + .tag128_len = sizeof (tc4_tag128), + .tag256 = tc4_tag256, + .tag256_len = sizeof (tc4_tag256), + } +}; + +#define perftest_aesXXX_enc_var_sz(a) \ + void __test_perf_fn perftest_aes##a##_enc_var_sz (test_perf_t *tp) \ + { \ + u32 n = tp->n_ops; \ + aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \ + u8 *dst = test_mem_alloc (n + 16); \ + u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \ + u8 *tag = test_mem_alloc (16); \ + u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \ + u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \ + \ + clib_aes_gcm_key_expand (kd, key, AES_KEY_##a); \ + \ + test_perf_event_enable (tp); \ + clib_aes##a##_gcm_enc (kd, src, n, 0, 0, iv, 16, dst, tag); \ + test_perf_event_disable (tp); \ + } + +#define perftest_aesXXX_dec_var_sz(a) \ + void __test_perf_fn perftest_aes##a##_dec_var_sz (test_perf_t *tp) \ + { \ + u32 n = tp->n_ops; \ + aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \ + u8 *dst = test_mem_alloc (n + 16); \ + u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \ + u8 *tag = test_mem_alloc (16); \ + u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \ + u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \ + int *rv = test_mem_alloc (16); \ + \ + clib_aes_gcm_key_expand (kd, key, AES_KEY_##a); \ + \ + test_perf_event_enable (tp); \ + rv[0] = clib_aes##a##_gcm_dec (kd, src, n, 0, 0, iv, tag, 16, dst); \ + test_perf_event_disable (tp); \ + } + +static clib_error_t * +test_clib_aes128_gcm_enc (clib_error_t *err) +{ + aes_gcm_key_data_t kd; + u8 pt[MAX_TEST_DATA_LEN]; + u8 ct[MAX_TEST_DATA_LEN]; + u8 tag[16]; + + FOREACH_ARRAY_ELT (tc, test_cases) + { + clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128); + clib_aes128_gcm_enc (&kd, tc->pt, tc->data_len, tc->aad, tc->aad_len, + tc->iv, tc->tag128_len, ct, tag); + + if (memcmp (tc->tag128, tag, tc->tag128_len) != 0) + return clib_error_return (err, "%s: invalid tag", tc->name); + + if (tc->data_len && memcmp (tc->ct128, ct, tc->data_len) != 0) + return clib_error_return (err, "%s: invalid ciphertext", tc->name); + } + + for (int i = 0; i < sizeof (pt); i++) + pt[i] = i; + + clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128); + FOREACH_ARRAY_ELT (tc, inc_test_cases) + { + clib_aes128_gcm_enc (&kd, pt, tc->n_bytes, 0, 0, inc_iv, 16, ct, tag); + + if (memcmp (tc->tag_gcm_128, tag, 16) != 0) + return clib_error_return (err, "incremental %u bytes: invalid tag", + tc->n_bytes); + } + + return err; +} + +perftest_aesXXX_enc_var_sz (128); + +REGISTER_TEST (clib_aes128_gcm_enc) = { + .name = "clib_aes128_gcm_enc", + .fn = test_clib_aes128_gcm_enc, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes128_enc_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 1 << 20, + .fn = perftest_aes128_enc_var_sz }), +}; + +static clib_error_t * +test_clib_aes256_gcm_enc (clib_error_t *err) +{ + aes_gcm_key_data_t kd; + u8 pt[MAX_TEST_DATA_LEN]; + u8 ct[MAX_TEST_DATA_LEN]; + u8 tag[16]; + + FOREACH_ARRAY_ELT (tc, test_cases) + { + clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256); + clib_aes256_gcm_enc (&kd, tc->pt, tc->data_len, tc->aad, tc->aad_len, + tc->iv, tc->tag256_len, ct, tag); + + if (memcmp (tc->tag256, tag, tc->tag256_len) != 0) + return clib_error_return (err, "%s: invalid tag", tc->name); + + if (tc->data_len && memcmp (tc->ct256, ct, tc->data_len) != 0) + return clib_error_return (err, "%s: invalid ciphertext", tc->name); + } + + for (int i = 0; i < sizeof (pt); i++) + pt[i] = i; + + clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_256); + FOREACH_ARRAY_ELT (tc, inc_test_cases) + { + clib_aes256_gcm_enc (&kd, pt, tc->n_bytes, 0, 0, inc_iv, 16, ct, tag); + + if (memcmp (tc->tag_gcm_256, tag, 16) != 0) + return clib_error_return (err, "incremental %u bytes: invalid tag", + tc->n_bytes); + } + + return err; +} + +perftest_aesXXX_enc_var_sz (256); +REGISTER_TEST (clib_aes256_gcm_enc) = { + .name = "clib_aes256_gcm_enc", + .fn = test_clib_aes256_gcm_enc, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes256_enc_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 1 << 20, + .fn = perftest_aes256_enc_var_sz }), +}; + +static clib_error_t * +test_clib_aes128_gcm_dec (clib_error_t *err) +{ + aes_gcm_key_data_t kd; + u8 pt[MAX_TEST_DATA_LEN]; + u8 ct[MAX_TEST_DATA_LEN]; + u8 tag[16]; + int rv; + + FOREACH_ARRAY_ELT (tc, test_cases) + { + clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128); + rv = clib_aes128_gcm_dec (&kd, tc->ct128, tc->data_len, tc->aad, + tc->aad_len, tc->iv, tc->tag128, + tc->tag128_len, pt); + + if (!rv) + return clib_error_return (err, "%s: invalid tag", tc->name); + + if (tc->data_len && memcmp (tc->pt, pt, tc->data_len) != 0) + return clib_error_return (err, "%s: invalid ciphertext", tc->name); + } + + for (int i = 0; i < sizeof (pt); i++) + pt[i] = i; + + clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128); + clib_aes128_gcm_enc (&kd, pt, sizeof (ct), 0, 0, inc_iv, 16, ct, tag); + + FOREACH_ARRAY_ELT (tc, inc_test_cases) + { + if (!clib_aes128_gcm_dec (&kd, ct, tc->n_bytes, 0, 0, inc_iv, + (u8 *) tc->tag_gcm_128, 16, pt)) + return clib_error_return (err, "incremental %u bytes: invalid tag", + tc->n_bytes); + } + + return err; +} + +perftest_aesXXX_dec_var_sz (128); + +REGISTER_TEST (clib_aes128_gcm_dec) = { + .name = "clib_aes128_gcm_dec", + .fn = test_clib_aes128_gcm_dec, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes128_dec_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 1 << 20, + .fn = perftest_aes128_dec_var_sz }), +}; + +static clib_error_t * +test_clib_aes256_gcm_dec (clib_error_t *err) +{ + aes_gcm_key_data_t kd; + u8 pt[MAX_TEST_DATA_LEN]; + u8 ct[MAX_TEST_DATA_LEN]; + u8 tag[16]; + int rv; + + FOREACH_ARRAY_ELT (tc, test_cases) + { + clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256); + rv = clib_aes256_gcm_dec (&kd, tc->ct256, tc->data_len, tc->aad, + tc->aad_len, tc->iv, tc->tag256, + tc->tag256_len, pt); + + if (!rv) + return clib_error_return (err, "%s: invalid tag", tc->name); + + if (tc->data_len && memcmp (tc->pt, pt, tc->data_len) != 0) + return clib_error_return (err, "%s: invalid ciphertext", tc->name); + } + + for (int i = 0; i < sizeof (pt); i++) + pt[i] = i; + + clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128); + clib_aes128_gcm_enc (&kd, pt, sizeof (ct), 0, 0, inc_iv, 16, ct, tag); + + FOREACH_ARRAY_ELT (tc, inc_test_cases) + { + if (!clib_aes128_gcm_dec (&kd, ct, tc->n_bytes, 0, 0, inc_iv, + (u8 *) tc->tag_gcm_128, 16, pt)) + return clib_error_return (err, "incremental %u bytes: invalid tag", + tc->n_bytes); + } + + return err; +} + +perftest_aesXXX_dec_var_sz (256); +REGISTER_TEST (clib_aes256_gcm_dec) = { + .name = "clib_aes256_gcm_dec", + .fn = test_clib_aes256_gcm_dec, + .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)", + .n_ops = 1424, + .fn = perftest_aes256_dec_var_sz }, + { .name = "variable size (per byte)", + .n_ops = 1 << 20, + .fn = perftest_aes256_dec_var_sz }), +}; + +static const u8 gmac1_key[] = { + 0x77, 0xbe, 0x63, 0x70, 0x89, 0x71, 0xc4, 0xe2, + 0x40, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x7f, 0xeb +}; +static const u8 gmac1_iv[] = { 0xe0, 0xe0, 0x0f, 0x19, 0xfe, 0xd7, + 0xba, 0x01, 0x36, 0xa7, 0x97, 0xf3 }; +static const u8 gmac1_aad[] = { + 0x7a, 0x43, 0xec, 0x1d, 0x9c, 0x0a, 0x5a, 0x78, + 0xa0, 0xb1, 0x65, 0x33, 0xa6, 0x21, 0x3c, 0xab +}; +static const u8 gmac1_tag[] = { + 0x20, 0x9f, 0xcc, 0x8d, 0x36, 0x75, 0xed, 0x93, + 0x8e, 0x9c, 0x71, 0x66, 0x70, 0x9d, 0xd9, 0x46 +}; + +static const u8 gmac2_key[] = { + 0x20, 0xb5, 0xb6, 0xb8, 0x54, 0xe1, 0x87, 0xb0, + 0x58, 0xa8, 0x4d, 0x57, 0xbc, 0x15, 0x38, 0xb6 +}; + +static const u8 gmac2_iv[] = { 0x94, 0xc1, 0x93, 0x5a, 0xfc, 0x06, + 0x1c, 0xbf, 0x25, 0x4b, 0x93, 0x6f }; + +static const u8 gmac2_aad[] = { + 0xca, 0x41, 0x8e, 0x71, 0xdb, 0xf8, 0x10, 0x03, 0x81, 0x74, 0xea, 0xa3, 0x71, + 0x9b, 0x3f, 0xcb, 0x80, 0x53, 0x1c, 0x71, 0x10, 0xad, 0x91, 0x92, 0xd1, 0x05, + 0xee, 0xaa, 0xfa, 0x15, 0xb8, 0x19, 0xac, 0x00, 0x56, 0x68, 0x75, 0x2b, 0x34, + 0x4e, 0xd1, 0xb2, 0x2f, 0xaf, 0x77, 0x04, 0x8b, 0xaf, 0x03, 0xdb, 0xdd, 0xb3, + 0xb4, 0x7d, 0x6b, 0x00, 0xe9, 0x5c, 0x4f, 0x00, 0x5e, 0x0c, 0xc9, 0xb7, 0x62, + 0x7c, 0xca, 0xfd, 0x3f, 0x21, 0xb3, 0x31, 0x2a, 0xa8, 0xd9, 0x1d, 0x3f, 0xa0, + 0x89, 0x3f, 0xe5, 0xbf, 0xf7, 0xd4, 0x4c, 0xa4, 0x6f, 0x23, 0xaf, 0xe0 +}; + +static const u8 gmac2_tag[] = { + 0xb3, 0x72, 0x86, 0xeb, 0xaf, 0x4a, 0x54, 0xe0, + 0xff, 0xc2, 0xa1, 0xde, 0xaf, 0xc9, 0xf6, 0xdb +}; + +static const struct +{ + char *name; + const u8 *key128, *key256, *tag128, *tag256, *aad, *iv; + u32 tag128_len, tag256_len, aad_len; +} gmac_test_cases[] = { + /* test cases */ + { + .name = "GMAC1", + .iv = gmac1_iv, + .key128 = gmac1_key, + .tag128 = gmac1_tag, + .tag128_len = sizeof (gmac1_tag), + .aad = gmac1_aad, + .aad_len = sizeof (gmac1_aad), + }, + { + .name = "GMAC2", + .iv = gmac2_iv, + .key128 = gmac2_key, + .tag128 = gmac2_tag, + .tag128_len = sizeof (gmac2_tag), + .aad = gmac2_aad, + .aad_len = sizeof (gmac2_aad), + }, +}; + +static clib_error_t * +test_clib_aes128_gmac (clib_error_t *err) +{ + u8 data[MAX_TEST_DATA_LEN]; + aes_gcm_key_data_t kd; + u8 tag[16]; + + FOREACH_ARRAY_ELT (tc, gmac_test_cases) + { + clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128); + clib_aes128_gmac (&kd, tc->aad, tc->aad_len, tc->iv, tc->tag128_len, + tag); + + if (memcmp (tc->tag128, tag, tc->tag128_len) != 0) + return clib_error_return (err, "%s: invalid tag", tc->name); + } + + for (int i = 0; i < sizeof (data); i++) + data[i] = i; + + clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128); + FOREACH_ARRAY_ELT (tc, inc_test_cases) + { + clib_aes128_gmac (&kd, data, tc->n_bytes, inc_iv, 16, tag); + + if (memcmp (tc->tag_gmac_128, tag, 16) != 0) + return clib_error_return (err, "incremental %u bytes: invalid tag", + tc->n_bytes); + } + + return err; +} + +void __test_perf_fn +perftest_gmac256_fixed_512byte (test_perf_t *tp) +{ + uword n = tp->n_ops; + aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (aes_gcm_key_data_t)); + u8 *ivs = test_mem_alloc_and_fill_inc_u8 (n * 12, 0, 0); + u8 *tags = test_mem_alloc_and_fill_inc_u8 (8 + n * 16, 0, 0); + u8 *data = test_mem_alloc_and_fill_inc_u8 (512, 0, 0); + + test_perf_event_enable (tp); + clib_aes_gcm_key_expand (kd, inc_key, AES_KEY_128); + + for (int i = 0; i < n; i++) + clib_aes128_gmac (kd, data, 512, ivs + n * 12, 16, tags + n * 16); + test_perf_event_disable (tp); +} + +REGISTER_TEST (clib_aes128_gmac) = { + .name = "clib_aes128_gmac", + .fn = test_clib_aes128_gmac, + .perf_tests = PERF_TESTS ({ .name = "fixed (512 byte)", + .n_ops = 256, + .fn = perftest_gmac256_fixed_512byte }), +}; + +static clib_error_t * +test_clib_aes256_gmac (clib_error_t *err) +{ + u8 data[MAX_TEST_DATA_LEN]; + aes_gcm_key_data_t kd; + u8 tag[16]; + +#if 0 + FOREACH_ARRAY_ELT (tc, gmac_test_cases) + { + clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256); + clib_aes256_gmac (&kd, tc->aad, tc->aad_len, tc->iv, tc->tag256_len, + tag); + + if (memcmp (tc->tag256, tag, tc->tag256_len) != 0) + return clib_error_return (err, "%s: invalid tag", tc->name); + } +#endif + + for (int i = 0; i < sizeof (data); i++) + data[i] = i; + + clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_256); + FOREACH_ARRAY_ELT (tc, inc_test_cases) + { + clib_aes256_gmac (&kd, data, tc->n_bytes, inc_iv, 16, tag); + + if (memcmp (tc->tag_gmac_256, tag, 16) != 0) + return clib_error_return (err, "incremental %u bytes: invalid tag", + tc->n_bytes); + } + + return err; +} + +REGISTER_TEST (clib_aes256_gmac) = { + .name = "clib_aes256_gmac", + .fn = test_clib_aes256_gmac, +}; +#endif diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h index 80c2e39bdfc..17271b8fcd0 100644 --- a/src/vppinfra/vector_avx2.h +++ b/src/vppinfra/vector_avx2.h @@ -223,6 +223,16 @@ u8x32_xor3 (u8x32 a, u8x32 b, u8x32 c) return a ^ b ^ c; } +static_always_inline u8x32 +u8x32_reflect_u8x16 (u8x32 x) +{ + static const u8x32 mask = { + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, + }; + return (u8x32) _mm256_shuffle_epi8 ((__m256i) x, (__m256i) mask); +} + static_always_inline u16x16 u16x16_mask_last (u16x16 v, u8 n_last) { @@ -332,6 +342,11 @@ u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask) (__m256i) mask); } +#define u8x32_word_shift_left(a, n) \ + (u8x32) _mm256_bslli_epi128 ((__m256i) a, n) +#define u8x32_word_shift_right(a, n) \ + (u8x32) _mm256_bsrli_epi128 ((__m256i) a, n) + #define u32x8_permute_lanes(a, b, m) \ (u32x8) _mm256_permute2x128_si256 ((__m256i) a, (__m256i) b, m) #define u64x4_permute_lanes(a, b, m) \ @@ -407,6 +422,46 @@ u8x32_splat_u8x16 (u8x16 a) return (u8x32) _mm256_broadcastsi128_si256 ((__m128i) a); } +static_always_inline u32x8 +u32x8_splat_u32x4 (u32x4 a) +{ + return (u32x8) _mm256_broadcastsi128_si256 ((__m128i) a); +} + +static_always_inline u8x32 +u8x32_load_partial (u8 *data, uword n) +{ +#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE) + return u8x32_mask_load_zero (data, pow2_mask (n)); +#else + u8x32 r = {}; + if (n > 16) + { + r = u8x32_insert_lo (r, *(u8x16u *) data); + r = u8x32_insert_hi (r, u8x16_load_partial (data + 16, n - 16)); + } + else + r = u8x32_insert_lo (r, u8x16_load_partial (data, n)); + return r; +#endif +} + +static_always_inline void +u8x32_store_partial (u8x32 r, u8 *data, uword n) +{ +#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE) + u8x32_mask_store (r, data, pow2_mask (n)); +#else + if (n > 16) + { + *(u8x16u *) data = u8x32_extract_lo (r); + u8x16_store_partial (u8x32_extract_hi (r), data + 16, n - 16); + } + else + u8x16_store_partial (u8x32_extract_lo (r), data, n); +#endif +} + #endif /* included_vector_avx2_h */ /* diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h index eda65caed35..b745b46fd73 100644 --- a/src/vppinfra/vector_avx512.h +++ b/src/vppinfra/vector_avx512.h @@ -593,6 +593,18 @@ u64x8_transpose (u64x8 m[8]) m[7] = (u64x8) _mm512_permutex2var_epi64 (x, pm4, y); } +static_always_inline u8x64 +u8x64_load_partial (u8 *data, uword n) +{ + return u8x64_mask_load_zero (data, pow2_mask (n)); +} + +static_always_inline void +u8x64_store_partial (u8x64 r, u8 *data, uword n) +{ + u8x64_mask_store (r, data, pow2_mask (n)); +} + #endif /* included_vector_avx512_h */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h index 568b689c234..70a7bc0f11e 100644 --- a/src/vppinfra/vector_neon.h +++ b/src/vppinfra/vector_neon.h @@ -231,6 +231,61 @@ __asm__ ("eor3 %0.16b,%1.16b,%2.16b,%3.16b": "=w" (r): "0" (a), "w" (b), "w" (c) return a ^ b ^ c; } +static_always_inline u8x16 +u8x16_load_partial (u8 *data, uword n) +{ + u8x16 r = {}; + if (n > 7) + { + u64x2 r; + r[1] = *(u64u *) (data + n - 8); + r >>= (16 - n) * 8; + r[0] = *(u64u *) data; + return (u8x16) r; + } + else if (n > 3) + { + u32x4 r = {}; + r[1] = *(u32u *) (data + n - 4); + r >>= (8 - n) * 8; + r[0] = *(u32u *) data; + return (u8x16) r; + } + else if (n > 1) + { + u16x8 r = {}; + r[1] = *(u16u *) (data + n - 2); + r >>= (4 - n) * 8; + r[0] = *(u16u *) data; + return (u8x16) r; + } + else if (n > 0) + r[0] = *data; + return r; +} + +static_always_inline void +u8x16_store_partial (u8x16 r, u8 *data, uword n) +{ + if (n > 7) + { + *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8); + *(u64u *) data = ((u64x2) r)[0]; + } + else if (n > 3) + { + *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8); + *(u32u *) data = ((u32x4) r)[0]; + } + else if (n > 1) + { + *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8); + *(u16u *) data = ((u16x8) r)[0]; + } + else if (n > 0) + data[0] = r[0]; +} + #define CLIB_HAVE_VEC128_MSB_MASK #define CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h index 2b8927b6920..7c8e6255385 100644 --- a/src/vppinfra/vector_sse42.h +++ b/src/vppinfra/vector_sse42.h @@ -493,6 +493,68 @@ u8x16_xor3 (u8x16 a, u8x16 b, u8x16 c) return a ^ b ^ c; } +static_always_inline u8x16 +u8x16_load_partial (u8 *data, uword n) +{ + u8x16 r = {}; +#if defined(CLIB_HAVE_VEC128_MASK_LOAD_STORE) + return u8x16_mask_load_zero (data, pow2_mask (n)); +#endif + if (n > 7) + { + u64x2 r; + r[1] = *(u64u *) (data + n - 8); + r >>= (16 - n) * 8; + r[0] = *(u64u *) data; + return (u8x16) r; + } + else if (n > 3) + { + u32x4 r = {}; + r[1] = *(u32u *) (data + n - 4); + r >>= (8 - n) * 8; + r[0] = *(u32u *) data; + return (u8x16) r; + } + else if (n > 1) + { + u16x8 r = {}; + r[1] = *(u16u *) (data + n - 2); + r >>= (4 - n) * 8; + r[0] = *(u16u *) data; + return (u8x16) r; + } + else if (n > 0) + r[0] = *data; + return r; +} + +static_always_inline void +u8x16_store_partial (u8x16 r, u8 *data, uword n) +{ +#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE) + u8x16_mask_store (r, data, pow2_mask (n)); +#else + if (n > 7) + { + *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8); + *(u64u *) data = ((u64x2) r)[0]; + } + else if (n > 3) + { + *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8); + *(u32u *) data = ((u32x4) r)[0]; + } + else if (n > 1) + { + *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8); + *(u16u *) data = ((u16x8) r)[0]; + } + else if (n > 0) + data[0] = r[0]; +#endif +} + #endif /* included_vector_sse2_h */ /* |