diff options
author | Damjan Marion <damarion@cisco.com> | 2019-04-02 19:06:50 +0200 |
---|---|---|
committer | Neale Ranns <nranns@cisco.com> | 2019-04-04 09:25:51 +0000 |
commit | deb8af6eb71f9ad951137ee6e84e971c3ab23ec4 (patch) | |
tree | 3a8aeaaf2f1c5efa987ec209040842e06c0aa7c3 /src/plugins/crypto_ia32/aesni.h | |
parent | 8ea109e40a65c1d7696162fa3d4c1e386b6c9414 (diff) |
Add crypto_ia32 plugin
Currently this plugin provies AES CBC optimized code. Encryption code
supports parallel encryption of 4 buffers with different size and key
which improves performance 4x compared to standard serialized aproach.
On Skylake Server measured performance is around 0.71 clocks/byte with
256 buffers with size in range between 7000 and 8000 bytes.
Measured performance includes overhead of processing crypto ops.
Change-Id: I5ec2afee708fcdf16a4234926534dd64ff1155c3
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/plugins/crypto_ia32/aesni.h')
-rw-r--r-- | src/plugins/crypto_ia32/aesni.h | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/src/plugins/crypto_ia32/aesni.h b/src/plugins/crypto_ia32/aesni.h new file mode 100644 index 00000000000..077889ae903 --- /dev/null +++ b/src/plugins/crypto_ia32/aesni.h @@ -0,0 +1,224 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __aesni_h__ +#define __aesni_h__ + +typedef enum +{ + AESNI_KEY_128 = 0, + AESNI_KEY_192 = 1, + AESNI_KEY_256 = 2, +} aesni_key_size_t; + +#define AESNI_KEY_ROUNDS(x) (10 + x *2) +#define AESNI_KEY_BYTES(x) (16 + x * 8) + + +/* AES-NI based AES key expansion based on code samples from + Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper + (323641-001) */ + +static_always_inline __m128i +aes128_key_assist (__m128i r1, __m128i r2) +{ + r1 ^= _mm_slli_si128 (r1, 4); + r1 ^= _mm_slli_si128 (r1, 4); + r1 ^= _mm_slli_si128 (r1, 4); + return r1 ^ _mm_shuffle_epi32 (r2, 0xff); +} + +static_always_inline void +aes128_key_expand (__m128i * k, u8 * key) +{ + k[0] = _mm_loadu_si128 ((const __m128i *) key); + k[1] = aes128_key_assist (k[0], _mm_aeskeygenassist_si128 (k[0], 0x01)); + k[2] = aes128_key_assist (k[1], _mm_aeskeygenassist_si128 (k[1], 0x02)); + k[3] = aes128_key_assist (k[2], _mm_aeskeygenassist_si128 (k[2], 0x04)); + k[4] = aes128_key_assist (k[3], _mm_aeskeygenassist_si128 (k[3], 0x08)); + k[5] = aes128_key_assist (k[4], _mm_aeskeygenassist_si128 (k[4], 0x10)); + k[6] = aes128_key_assist (k[5], _mm_aeskeygenassist_si128 (k[5], 0x20)); + k[7] = aes128_key_assist (k[6], _mm_aeskeygenassist_si128 (k[6], 0x40)); + k[8] = aes128_key_assist (k[7], _mm_aeskeygenassist_si128 (k[7], 0x80)); + k[9] = aes128_key_assist (k[8], _mm_aeskeygenassist_si128 (k[8], 0x1b)); + k[10] = aes128_key_assist (k[9], _mm_aeskeygenassist_si128 (k[9], 0x36)); +} + +static_always_inline void +aes192_key_assist (__m128i * r1, __m128i * r2, __m128i * r3) +{ + __m128i r; + *r1 ^= r = _mm_slli_si128 (*r1, 0x4); + *r1 ^= r = _mm_slli_si128 (r, 0x4); + *r1 ^= _mm_slli_si128 (r, 0x4); + *r1 ^= _mm_shuffle_epi32 (*r2, 0x55); + *r3 ^= _mm_slli_si128 (*r3, 0x4); + *r3 ^= *r2 = _mm_shuffle_epi32 (*r1, 0xff); +} + +static_always_inline void +aes192_key_expand (__m128i * k, u8 * key) +{ + __m128i r1, r2, r3; + + k[0] = r1 = _mm_loadu_si128 ((__m128i *) key); + r3 = _mm_loadu_si128 ((__m128i *) (key + 16)); + + k[1] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x1); + aes192_key_assist (&r1, &r2, &r3); + k[1] = (__m128i) _mm_shuffle_pd ((__m128d) k[1], (__m128d) r1, 0); + k[2] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1); + r2 = _mm_aeskeygenassist_si128 (r3, 0x2); + aes192_key_assist (&r1, &r2, &r3); + k[3] = r1; + + k[4] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x4); + aes192_key_assist (&r1, &r2, &r3); + k[4] = (__m128i) _mm_shuffle_pd ((__m128d) k[4], (__m128d) r1, 0); + k[5] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1); + r2 = _mm_aeskeygenassist_si128 (r3, 0x8); + aes192_key_assist (&r1, &r2, &r3); + k[6] = r1; + + k[7] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x10); + aes192_key_assist (&r1, &r2, &r3); + k[7] = (__m128i) _mm_shuffle_pd ((__m128d) k[7], (__m128d) r1, 0); + k[8] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1); + r2 = _mm_aeskeygenassist_si128 (r3, 0x20); + aes192_key_assist (&r1, &r2, &r3); + k[9] = r1; + + k[10] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x40); + aes192_key_assist (&r1, &r2, &r3); + k[10] = (__m128i) _mm_shuffle_pd ((__m128d) k[10], (__m128d) r1, 0); + k[11] = (__m128i) _mm_shuffle_pd ((__m128d) r1, (__m128d) r3, 1); + r2 = _mm_aeskeygenassist_si128 (r3, 0x80); + aes192_key_assist (&r1, &r2, &r3); + k[12] = r1; +} + +static_always_inline void +aes256_key_assist1 (__m128i * r1, __m128i * r2) +{ + __m128i r; + *r1 ^= r = _mm_slli_si128 (*r1, 0x4); + *r1 ^= r = _mm_slli_si128 (r, 0x4); + *r1 ^= _mm_slli_si128 (r, 0x4); + *r1 ^= *r2 = _mm_shuffle_epi32 (*r2, 0xff); +} + +static_always_inline void +aes256_key_assist2 (__m128i r1, __m128i * r3) +{ + __m128i r; + *r3 ^= r = _mm_slli_si128 (*r3, 0x4); + *r3 ^= r = _mm_slli_si128 (r, 0x4); + *r3 ^= _mm_slli_si128 (r, 0x4); + *r3 ^= _mm_shuffle_epi32 (_mm_aeskeygenassist_si128 (r1, 0x0), 0xaa); +} + +static_always_inline void +aes256_key_expand (__m128i * k, u8 * key) +{ + __m128i r1, r2, r3; + k[0] = r1 = _mm_loadu_si128 ((__m128i *) key); + k[1] = r3 = _mm_loadu_si128 ((__m128i *) (key + 16)); + r2 = _mm_aeskeygenassist_si128 (k[1], 0x01); + aes256_key_assist1 (&r1, &r2); + k[2] = r1; + aes256_key_assist2 (r1, &r3); + k[3] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x02); + aes256_key_assist1 (&r1, &r2); + k[4] = r1; + aes256_key_assist2 (r1, &r3); + k[5] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x04); + aes256_key_assist1 (&r1, &r2); + k[6] = r1; + aes256_key_assist2 (r1, &r3); + k[7] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x08); + aes256_key_assist1 (&r1, &r2); + k[8] = r1; + aes256_key_assist2 (r1, &r3); + k[9] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x10); + aes256_key_assist1 (&r1, &r2); + k[10] = r1; + aes256_key_assist2 (r1, &r3); + k[11] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x20); + aes256_key_assist1 (&r1, &r2); + k[12] = r1; + aes256_key_assist2 (r1, &r3); + k[13] = r3; + r2 = _mm_aeskeygenassist_si128 (r3, 0x40); + aes256_key_assist1 (&r1, &r2); + k[14] = r1; +} + +static_always_inline void +aes_key_expand (__m128i * k, u8 * key, aesni_key_size_t ks) +{ + switch (ks) + { + case AESNI_KEY_128: + aes128_key_expand (k, key); + break; + case AESNI_KEY_192: + aes192_key_expand (k, key); + break; + case AESNI_KEY_256: + aes256_key_expand (k, key); + break; + } +} + + +static_always_inline void +aes_key_enc_to_dec (__m128i * k, aesni_key_size_t rounds) +{ + __m128i r; + + r = k[rounds]; + k[rounds] = k[0]; + k[0] = r; + + for (int i = 1; i < (rounds / 2); i++) + { + r = k[rounds - i]; + k[rounds - i] = _mm_aesimc_si128 (k[i]); + k[i] = _mm_aesimc_si128 (r); + } + + k[rounds / 2] = _mm_aesimc_si128 (k[rounds / 2]); +} + +#endif /* __aesni_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |