aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/api_errno.h
AgeCommit message (Expand)AuthorFilesLines
2022-01-17vnet: introduce vnet_error()Damjan Marion1-160/+9
2020-07-31nsim: basic reorder supportFlorin Coras1-0/+1
2020-05-25api: add new stream message conventionOle Troan1-0/+1
2020-03-21ikev2: add support for custom ipsec-over-udp portFilip Tehlar1-1/+3
2020-03-17ip: ip_address_t uses ip46_address_tNeale Ranns1-1/+1
2019-12-17ip: Protocol Independent IP NeighborsNeale Ranns1-1/+2
2019-11-07session: ckpair store & crypto engine as mq paramsNathan Skrzypczak1-1/+2
2019-10-07mactime: add a "top" command to watch device statsDave Barach1-1/+3
2019-07-08map gbp papi: match endianess of f64Paul Vinciguerra1-2/+2
2019-06-19api: fix typos in api error descriptionsPaul Vinciguerra1-7/+7
2019-06-18fib: fib api updatesNeale Ranns1-2/+3
2019-06-05IPSEC: some CLI fixesNeale Ranns1-1/+2
2019-05-15Remove unused function nat44_ha_resync() and error IN_PROGRESS.Jon Loeliger1-1/+0
2019-04-25IPSEC; dpdk backend for tunnel interface encryptionNeale Ranns1-1/+2
2019-03-26IPSEC: more expressive API errorsNeale Ranns1-1/+3
2018-11-12IPv6: Make link-local configurable per-interface (VPP-1446)Juraj Sloboda1-1/+1
2018-09-26Modify return values of L2 bridge API handlersAlexander Chernavin1-1/+3
2018-09-24Network delay simulator pluginDave Barach1-0/+1
2018-03-22acl-plugin: implement ACL lookup contexts for "ACL as a service" use by other...Andrew Yourtchenko1-1/+2
2018-03-16stats: allow configuring poller delayKlement Sekera1-0/+28
2018-02-15Optimize GRE Tunnel and add support for ERSPAN encapJohn Lo1-1/+2
2018-02-07VXLAN: Allow user to specify a custom vxlan tunnel instance id.Jon Loeliger1-1/+2
2018-02-06BIER: fix support for longer bit-string lengthsNeale Ranns1-1/+2
2018-01-25session: add support for memfd segmentsFlorin Coras1-1/+1
2017-12-02tap_v2: multiple improvementsDamjan Marion1-3/+3
2017-11-30virtio: fast TAP interfaces with vhost-net backendDamjan Marion1-1/+3
2017-11-11ACLs: Use better error return codes than "-1" everywhere.Jon Loeliger1-1/+3
2017-11-09session: lookup/rules table improvements and cleanupFlorin Coras1-1/+2
2017-10-13VPP-1027: DNS name resolverDave Barach1-0/+8
2017-10-10session: add support for application namespacingFlorin Coras1-2/+7
2017-10-03Propagate duplicate IF addr add/del error up to API.Jon Loeliger1-1/+2
2017-09-07Devices: Set interface rx-mode may cause SIGSEGV with nonexistent queueSteven1-1/+2
2017-08-03Add support for API client to receive L2 MAC eventsJohn Lo1-2/+3
2017-08-01P2P EthernetPavel Kotucek1-1/+2
2017-05-19Enforce Bridge Domain ID range to match 24-bit VNI rangeJohn Lo1-2/+3
2017-05-02Prevent Bridge Domain operations on BD 0.Jon Loeliger1-1/+2
2017-05-02Add interface rx mode commands, unify rx mode and placement CLIDamjan Marion1-1/+2
2017-04-24BD:unify bridge domain creation codeEyal Bari1-1/+3
2017-04-13Session layer refactoringFlorin Coras1-1/+3
2017-03-31ARP/API:protect against identical registrationsEyal Bari1-1/+2
2017-03-04Cleanup URI code and TCP bugfixingFlorin Coras1-1/+2
2017-03-03Changing the IP table for an interface is an error if the interface already h...Neale Ranns1-1/+2
2017-03-02BFD: command line interfaceKlement Sekera1-1/+0
2017-03-01VPP-598: tcp stack initial commitDave Barach1-8/+13
2017-02-27Add GPE CLI/API for setting encap modeFilip Tehlar1-1/+3
2017-02-08BFD: modify session parametersKlement Sekera1-1/+2
2017-02-07LISP: reject remote mappings that have as locators local IPsFilip Tehlar1-1/+2
2017-02-02BFD: SHA1 authenticationKlement Sekera1-2/+4
2017-01-14Provision linux stack ip4 and ip6 addresses for tap interfacesDave Barach1-1/+1
2016-12-28Reorganize source tree to use single autotools instanceDamjan Marion1-0/+113
954' href='#n954'>954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
/*
 *------------------------------------------------------------------
 * Copyright (c) 2019 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#include <vlib/vlib.h>
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
#include <crypto_native/crypto_native.h>
#include <crypto_native/aes.h>
#include <crypto_native/ghash.h>

#if __GNUC__ > 4  && !__clang__ && CLIB_DEBUG == 0
#pragma GCC optimize ("O3")
#endif

#if defined(__VAES__) && defined(__AVX512F__)
#define NUM_HI 32
#else
#define NUM_HI 8
#endif

typedef struct
{
  /* pre-calculated hash key values */
  const u8x16 Hi[NUM_HI];
  /* extracted AES key */
  const u8x16 Ke[15];
#if defined(__VAES__) && defined(__AVX512F__)
  const u8x64 Ke4[15];
#endif
} aes_gcm_key_data_t;

typedef struct
{
  u32 counter;
  union
  {
    u32x4 Y;
    u32x16 Y4;
  };
} aes_gcm_counter_t;

typedef enum
{
  AES_GCM_F_WITH_GHASH = (1 << 0),
  AES_GCM_F_LAST_ROUND = (1 << 1),
  AES_GCM_F_ENCRYPT = (1 << 2),
  AES_GCM_F_DECRYPT = (1 << 3),
} aes_gcm_flags_t;

static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };

static_always_inline void
aes_gcm_enc_first_round (u8x16 * r, aes_gcm_counter_t * ctr, u8x16 k,
			 int n_blocks)
{
  if (PREDICT_TRUE ((u8) ctr->counter < (256 - 2 * n_blocks)))
    {
      for (int i = 0; i < n_blocks; i++)
	{
	  r[i] = k ^ (u8x16) ctr->Y;
	  ctr->Y += ctr_inv_1;
	}
      ctr->counter += n_blocks;
    }
  else
    {
      for (int i = 0; i < n_blocks; i++)
	{
	  r[i] = k ^ (u8x16) ctr->Y;
	  ctr->counter++;
	  ctr->Y[3] = clib_host_to_net_u32 (ctr->counter + 1);
	}
    }
}

static_always_inline void
aes_gcm_enc_round (u8x16 * r, u8x16 k, int n_blocks)
{
  for (int i = 0; i < n_blocks; i++)
    r[i] = aes_enc_round (r[i], k);
}

static_always_inline void
aes_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k,
			int rounds, int n_blocks)
{

  /* additional ronuds for AES-192 and AES-256 */
  for (int i = 10; i < rounds; i++)
    aes_gcm_enc_round (r, k[i], n_blocks);

  for (int i = 0; i < n_blocks; i++)
    d[i] ^= aes_enc_last_round (r[i], k[rounds]);
}

static_always_inline u8x16
aes_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
		      u8x16u * in, int n_blocks)
{
  ghash_data_t _gd, *gd = &_gd;
  u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
  ghash_mul_first (gd, u8x16_reflect (in[0]) ^ T, Hi[0]);
  for (int i = 1; i < n_blocks; i++)
    ghash_mul_next (gd, u8x16_reflect ((in[i])), Hi[i]);
  ghash_reduce (gd);
  ghash_reduce2 (gd);
  return ghash_final (gd);
}

static_always_inline u8x16
aes_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
{

  while (n_left >= 128)
    {
      T = aes_gcm_ghash_blocks (T, kd, in, 8);
      n_left -= 128;
      in += 8;
    }

  if (n_left >= 64)
    {
      T = aes_gcm_ghash_blocks (T, kd, in, 4);
      n_left -= 64;
      in += 4;
    }

  if (n_left >= 32)
    {
      T = aes_gcm_ghash_blocks (T, kd, in, 2);
      n_left -= 32;
      in += 2;
    }

  if (n_left >= 16)
    {
      T = aes_gcm_ghash_blocks (T, kd, in, 1);
      n_left -= 16;
      in += 1;
    }

  if (n_left)
    {
      u8x16 r = aes_load_partial (in, n_left);
      T = ghash_mul (u8x16_reflect (r) ^ T, kd->Hi[NUM_HI - 1]);
    }
  return T;
}

static_always_inline __clib_unused u8x16
aes_gcm_calc (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d,
	      aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv, int rounds,
	      int n, int last_block_bytes, aes_gcm_flags_t f)
{
  u8x16 r[n];
  ghash_data_t _gd = { }, *gd = &_gd;
  const u8x16 *rk = (u8x16 *) kd->Ke;
  int ghash_blocks = (f & AES_GCM_F_ENCRYPT) ? 4 : n, gc = 1;
  u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - ghash_blocks;

  clib_prefetch_load (inv + 4);

  /* AES rounds 0 and 1 */
  aes_gcm_enc_first_round (r, ctr, rk[0], n);
  aes_gcm_enc_round (r, rk[1], n);

  /* load data - decrypt round */
  if (f & AES_GCM_F_DECRYPT)
    {
      for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
	d[i] = inv[i];

      if (f & AES_GCM_F_LAST_ROUND)
	d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
    }

  /* GHASH multiply block 1 */
  if (f & AES_GCM_F_WITH_GHASH)
    ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);

  /* AES rounds 2 and 3 */
  aes_gcm_enc_round (r, rk[2], n);
  aes_gcm_enc_round (r, rk[3], n);

  /* GHASH multiply block 2 */
  if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
    ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);

  /* AES rounds 4 and 5 */
  aes_gcm_enc_round (r, rk[4], n);
  aes_gcm_enc_round (r, rk[5], n);

  /* GHASH multiply block 3 */
  if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
    ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);

  /* AES rounds 6 and 7 */
  aes_gcm_enc_round (r, rk[6], n);
  aes_gcm_enc_round (r, rk[7], n);

  /* GHASH multiply block 4 */
  if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
    ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);

  /* AES rounds 8 and 9 */
  aes_gcm_enc_round (r, rk[8], n);
  aes_gcm_enc_round (r, rk[9], n);

  /* GHASH reduce 1st step */
  if (f & AES_GCM_F_WITH_GHASH)
    ghash_reduce (gd);

  /* load data - encrypt round */
  if (f & AES_GCM_F_ENCRYPT)
    {
      for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
	d[i] = inv[i];

      if (f & AES_GCM_F_LAST_ROUND)
	d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
    }

  /* GHASH reduce 2nd step */
  if (f & AES_GCM_F_WITH_GHASH)
    ghash_reduce2 (gd);

  /* AES last round(s) */
  aes_gcm_enc_last_round (r, d, rk, rounds, n);

  /* store data */
  for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
    outv[i] = d[i];

  if (f & AES_GCM_F_LAST_ROUND)
    aes_store_partial (outv + n - 1, d[n - 1], last_block_bytes);

  /* GHASH final step */
  if (f & AES_GCM_F_WITH_GHASH)
    T = ghash_final (gd);

  return T;
}

static_always_inline __clib_unused u8x16
aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d,
		     aes_gcm_counter_t *ctr, u8x16u *inv, u8x16u *outv,
		     int rounds, aes_gcm_flags_t f)
{
  u8x16 r[4];
  ghash_data_t _gd, *gd = &_gd;
  const u8x16 *rk = (u8x16 *) kd->Ke;
  u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - 8;

  /* AES rounds 0 and 1 */
  aes_gcm_enc_first_round (r, ctr, rk[0], 4);
  aes_gcm_enc_round (r, rk[1], 4);

  /* load 4 blocks of data - decrypt round */
  if (f & AES_GCM_F_DECRYPT)
    {
      d[0] = inv[0];
      d[1] = inv[1];
      d[2] = inv[2];
      d[3] = inv[3];
    }

  /* GHASH multiply block 0 */
  ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);

  /* AES rounds 2 and 3 */
  aes_gcm_enc_round (r, rk[2], 4);
  aes_gcm_enc_round (r, rk[3], 4);

  /* GHASH multiply block 1 */
  ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);

  /* AES rounds 4 and 5 */
  aes_gcm_enc_round (r, rk[4], 4);
  aes_gcm_enc_round (r, rk[5], 4);

  /* GHASH multiply block 2 */
  ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);

  /* AES rounds 6 and 7 */
  aes_gcm_enc_round (r, rk[6], 4);
  aes_gcm_enc_round (r, rk[7], 4);

  /* GHASH multiply block 3 */
  ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);

  /* AES rounds 8 and 9 */
  aes_gcm_enc_round (r, rk[8], 4);
  aes_gcm_enc_round (r, rk[9], 4);

  /* load 4 blocks of data - encrypt round */
  if (f & AES_GCM_F_ENCRYPT)
    {
      d[0] = inv[0];
      d[1] = inv[1];
      d[2] = inv[2];
      d[3] = inv[3];
    }

  /* AES last round(s) */
  aes_gcm_enc_last_round (r, d, rk, rounds, 4);

  /* store 4 blocks of data */
  outv[0] = d[0];
  outv[1] = d[1];
  outv[2] = d[2];
  outv[3] = d[3];

  /* load next 4 blocks of data data - decrypt round */
  if (f & AES_GCM_F_DECRYPT)
    {
      d[0] = inv[4];
      d[1] = inv[5];
      d[2] = inv[6];
      d[3] = inv[7];
    }

  /* GHASH multiply block 4 */
  ghash_mul_next (gd, u8x16_reflect (d[0]), Hi[4]);

  /* AES rounds 0, 1 and 2 */
  aes_gcm_enc_first_round (r, ctr, rk[0], 4);
  aes_gcm_enc_round (r, rk[1], 4);
  aes_gcm_enc_round (r, rk[2], 4);

  /* GHASH multiply block 5 */
  ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[5]);

  /* AES rounds 3 and 4 */
  aes_gcm_enc_round (r, rk[3], 4);
  aes_gcm_enc_round (r, rk[4], 4);

  /* GHASH multiply block 6 */
  ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[6]);

  /* AES rounds 5 and 6 */
  aes_gcm_enc_round (r, rk[5], 4);
  aes_gcm_enc_round (r, rk[6], 4);

  /* GHASH multiply block 7 */
  ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[7]);

  /* AES rounds 7 and 8 */
  aes_gcm_enc_round (r, rk[7], 4);
  aes_gcm_enc_round (r, rk[8], 4);

  /* GHASH reduce 1st step */
  ghash_reduce (gd);

  /* AES round 9 */
  aes_gcm_enc_round (r, rk[9], 4);

  /* load data - encrypt round */
  if (f & AES_GCM_F_ENCRYPT)
    {
      d[0] = inv[4];
      d[1] = inv[5];
      d[2] = inv[6];
      d[3] = inv[7];
    }

  /* GHASH reduce 2nd step */
  ghash_reduce2 (gd);

  /* AES last round(s) */
  aes_gcm_enc_last_round (r, d, rk, rounds, 4);

  /* store data */
  outv[4] = d[0];
  outv[5] = d[1];
  outv[6] = d[2];
  outv[7] = d[3];

  /* GHASH final step */
  return ghash_final (gd);
}

static_always_inline __clib_unused u8x16
aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t *kd, u8x16 *d, int n_blocks,
		    int n_bytes)
{
  ghash_data_t _gd, *gd = &_gd;
  u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;

  if (n_bytes)
    d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes);

  ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
  if (n_blocks > 1)
    ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
  if (n_blocks > 2)
    ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
  if (n_blocks > 3)
    ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
  ghash_reduce (gd);
  ghash_reduce2 (gd);
  return ghash_final (gd);
}

#if defined(__VAES__) && defined(__AVX512F__)
static const u32x16 ctr_inv_1234 = {
  0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
};

static const u32x16 ctr_inv_4444 = {
  0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24
};

static const u32x16 ctr_1234 = {
  1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0,
};

static_always_inline void
aes4_gcm_enc_first_round (u8x64 * r, aes_gcm_counter_t * ctr, u8x64 k, int n)
{
  u8 last_byte = (u8) ctr->counter;
  int i = 0;

  /* As counter is stored in network byte order for performance reasons we
     are incrementing least significant byte only except in case where we
     overlow. As we are processing four 512-blocks in parallel except the
     last round, overflow can happen only when n == 4 */

  if (n == 4)
    for (; i < 2; i++)
      {
	r[i] = k ^ (u8x64) ctr->Y4;
	ctr->Y4 += ctr_inv_4444;
      }

  if (n == 4 && PREDICT_TRUE (last_byte == 241))
    {
      u32x16 Yc, Yr = (u32x16) u8x64_reflect_u8x16 ((u8x64) ctr->Y4);

      for (; i < n; i++)
	{
	  r[i] = k ^ (u8x64) ctr->Y4;
	  Yc = u32x16_splat (ctr->counter + 4 * (i + 1)) + ctr_1234;
	  Yr = (u32x16) u32x16_mask_blend (Yr, Yc, 0x1111);
	  ctr->Y4 = (u32x16) u8x64_reflect_u8x16 ((u8x64) Yr);
	}
    }
  else
    {
      for (; i < n; i++)
	{
	  r[i] = k ^ (u8x64) ctr->Y4;
	  ctr->Y4 += ctr_inv_4444;
	}
    }
  ctr->counter += n * 4;
}

static_always_inline void
aes4_gcm_enc_round (u8x64 * r, u8x64 k, int n_blocks)
{
  for (int i = 0; i < n_blocks; i++)
    r[i] = aes_enc_round_x4 (r[i], k);
}

static_always_inline void
aes4_gcm_enc_last_round (u8x64 * r, u8x64 * d, u8x64 const *k,
			 int rounds, int n_blocks)
{

  /* additional ronuds for AES-192 and AES-256 */
  for (int i = 10; i < rounds; i++)
    aes4_gcm_enc_round (r, k[i], n_blocks);

  for (int i = 0; i < n_blocks; i++)
    d[i] ^= aes_enc_last_round_x4 (r[i], k[rounds]);
}

static_always_inline u8x16
aes4_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
	       aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
	       int rounds, int n, int last_4block_bytes, aes_gcm_flags_t f)
{
  ghash4_data_t _gd, *gd = &_gd;
  const u8x64 *rk = (u8x64 *) kd->Ke4;
  int i, ghash_blocks, gc = 1;
  u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out;
  u8x64 r[4];
  u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);

  if (f & AES_GCM_F_ENCRYPT)
    {
      /* during encryption we either hash four 512-bit blocks from previous
         round or we don't hash at all */
      ghash_blocks = 4;
      Hi4 = (u8x64u *) (kd->Hi + NUM_HI - ghash_blocks * 4);
    }
  else
    {
      /* during deccryption we hash 1..4 512-bit blocks from current round */
      ghash_blocks = n;
      int n_128bit_blocks = n * 4;
      /* if this is last round of decryption, we may have less than 4
         128-bit blocks in the last 512-bit data block, so we need to adjust
         Hi4 pointer accordingly */
      if (f & AES_GCM_F_LAST_ROUND)
	n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4;
      Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
    }

  /* AES rounds 0 and 1 */
  aes4_gcm_enc_first_round (r, ctr, rk[0], n);
  aes4_gcm_enc_round (r, rk[1], n);

  /* load 4 blocks of data - decrypt round */
  if (f & AES_GCM_F_DECRYPT)
    {
      for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
	d[i] = inv[i];

      if (f & AES_GCM_F_LAST_ROUND)
	d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
    }

  /* GHASH multiply block 0 */
  if (f & AES_GCM_F_WITH_GHASH)
    ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
		      u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);

  /* AES rounds 2 and 3 */
  aes4_gcm_enc_round (r, rk[2], n);
  aes4_gcm_enc_round (r, rk[3], n);

  /* GHASH multiply block 1 */
  if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
    ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);

  /* AES rounds 4 and 5 */
  aes4_gcm_enc_round (r, rk[4], n);
  aes4_gcm_enc_round (r, rk[5], n);

  /* GHASH multiply block 2 */
  if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
    ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);

  /* AES rounds 6 and 7 */
  aes4_gcm_enc_round (r, rk[6], n);
  aes4_gcm_enc_round (r, rk[7], n);

  /* GHASH multiply block 3 */
  if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
    ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);

  /* load 4 blocks of data - decrypt round */
  if (f & AES_GCM_F_ENCRYPT)
    {
      for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
	d[i] = inv[i];

      if (f & AES_GCM_F_LAST_ROUND)
	d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
    }

  /* AES rounds 8 and 9 */
  aes4_gcm_enc_round (r, rk[8], n);
  aes4_gcm_enc_round (r, rk[9], n);

  /* AES last round(s) */
  aes4_gcm_enc_last_round (r, d, rk, rounds, n);

  /* store 4 blocks of data */
  for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
    outv[i] = d[i];

  if (f & AES_GCM_F_LAST_ROUND)
    u8x64_mask_store (d[i], outv + i, byte_mask);

  /* GHASH reduce 1st step */
  ghash4_reduce (gd);

  /* GHASH reduce 2nd step */
  ghash4_reduce2 (gd);

  /* GHASH final step */
  return ghash4_final (gd);
}

static_always_inline u8x16
aes4_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
		      aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
		      int rounds, aes_gcm_flags_t f)
{
  u8x64 r[4];
  ghash4_data_t _gd, *gd = &_gd;
  const u8x64 *rk = (u8x64 *) kd->Ke4;
  u8x64 *Hi4 = (u8x64 *) (kd->Hi + NUM_HI - 32);
  u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out;

  /* AES rounds 0 and 1 */
  aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
  aes4_gcm_enc_round (r, rk[1], 4);

  /* load 4 blocks of data - decrypt round */
  if (f & AES_GCM_F_DECRYPT)
    for (int i = 0; i < 4; i++)
      d[i] = inv[i];

  /* GHASH multiply block 0 */
  ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
		    u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);

  /* AES rounds 2 and 3 */
  aes4_gcm_enc_round (r, rk[2], 4);
  aes4_gcm_enc_round (r, rk[3], 4);

  /* GHASH multiply block 1 */
  ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);

  /* AES rounds 4 and 5 */
  aes4_gcm_enc_round (r, rk[4], 4);
  aes4_gcm_enc_round (r, rk[5], 4);

  /* GHASH multiply block 2 */
  ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);

  /* AES rounds 6 and 7 */
  aes4_gcm_enc_round (r, rk[6], 4);
  aes4_gcm_enc_round (r, rk[7], 4);

  /* GHASH multiply block 3 */
  ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);

  /* AES rounds 8 and 9 */
  aes4_gcm_enc_round (r, rk[8], 4);
  aes4_gcm_enc_round (r, rk[9], 4);

  /* load 4 blocks of data - encrypt round */
  if (f & AES_GCM_F_ENCRYPT)
    for (int i = 0; i < 4; i++)
      d[i] = inv[i];

  /* AES last round(s) */
  aes4_gcm_enc_last_round (r, d, rk, rounds, 4);

  /* store 4 blocks of data */
  for (int i = 0; i < 4; i++)
    outv[i] = d[i];

  /* load 4 blocks of data - decrypt round */
  if (f & AES_GCM_F_DECRYPT)
    for (int i = 0; i < 4; i++)
      d[i] = inv[i + 4];

  /* GHASH multiply block 3 */
  ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[0]), Hi4[4]);

  /* AES rounds 0 and 1 */
  aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
  aes4_gcm_enc_round (r, rk[1], 4);

  /* GHASH multiply block 5 */
  ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[5]);

  /* AES rounds 2 and 3 */
  aes4_gcm_enc_round (r, rk[2], 4);
  aes4_gcm_enc_round (r, rk[3], 4);

  /* GHASH multiply block 6 */
  ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[6]);

  /* AES rounds 4 and 5 */
  aes4_gcm_enc_round (r, rk[4], 4);
  aes4_gcm_enc_round (r, rk[5], 4);

  /* GHASH multiply block 7 */
  ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[7]);

  /* AES rounds 6 and 7 */
  aes4_gcm_enc_round (r, rk[6], 4);
  aes4_gcm_enc_round (r, rk[7], 4);

  /* GHASH reduce 1st step */
  ghash4_reduce (gd);

  /* AES rounds 8 and 9 */
  aes4_gcm_enc_round (r, rk[8], 4);
  aes4_gcm_enc_round (r, rk[9], 4);

  /* GHASH reduce 2nd step */
  ghash4_reduce2 (gd);

  /* load 4 blocks of data - encrypt round */
  if (f & AES_GCM_F_ENCRYPT)
    for (int i = 0; i < 4; i++)
      d[i] = inv[i + 4];

  /* AES last round(s) */
  aes4_gcm_enc_last_round (r, d, rk, rounds, 4);

  /* store 4 blocks of data */
  for (int i = 0; i < 4; i++)
    outv[i + 4] = d[i];

  /* GHASH final step */
  return ghash4_final (gd);
}

static_always_inline u8x16
aes4_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
		     int n, int last_4block_bytes)
{
  ghash4_data_t _gd, *gd = &_gd;
  u8x64u *Hi4;
  int n_128bit_blocks;
  u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
  n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4);
  Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);

  d[n - 1] = u8x64_mask_blend (u8x64_splat (0), d[n - 1], byte_mask);
  ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
		    u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
  if (n > 1)
    ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
  if (n > 2)
    ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
  if (n > 3)
    ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
  ghash4_reduce (gd);
  ghash4_reduce2 (gd);
  return ghash4_final (gd);
}
#endif

static_always_inline u8x16
aes_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
	     u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
{
  aes_gcm_flags_t f = AES_GCM_F_ENCRYPT;

  if (n_left == 0)
    return T;

#if defined(__VAES__) && defined(__AVX512F__)
  u8x64 d4[4];
  if (n_left < 256)
    {
      f |= AES_GCM_F_LAST_ROUND;
      if (n_left > 192)
	{
	  n_left -= 192;
	  aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
	  return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
	}
      else if (n_left > 128)
	{
	  n_left -= 128;
	  aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
	  return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
	}
      else if (n_left > 64)
	{
	  n_left -= 64;
	  aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
	  return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
	}
      else
	{
	  aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
	  return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
	}
    }

  aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);

  /* next */
  n_left -= 256;
  outv += 16;
  inv += 16;

  f |= AES_GCM_F_WITH_GHASH;

  while (n_left >= 512)
    {
      T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);

      /* next */
      n_left -= 512;
      outv += 32;
      inv += 32;
    }

  while (n_left >= 256)
    {
      T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);

      /* next */
      n_left -= 256;
      outv += 16;
      inv += 16;
    }

  if (n_left == 0)
    return aes4_gcm_ghash_last (T, kd, d4, 4, 64);

  f |= AES_GCM_F_LAST_ROUND;

  if (n_left > 192)
    {
      n_left -= 192;
      T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
      return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
    }

  if (n_left > 128)
    {
      n_left -= 128;
      T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
      return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
    }

  if (n_left > 64)
    {
      n_left -= 64;
      T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
      return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
    }

  T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
  return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
#else
  u8x16 d[4];
  if (n_left < 64)
    {
      f |= AES_GCM_F_LAST_ROUND;
      if (n_left > 48)
	{
	  n_left -= 48;
	  aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
	  return aes_gcm_ghash_last (T, kd, d, 4, n_left);
	}
      else if (n_left > 32)
	{
	  n_left -= 32;
	  aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
	  return aes_gcm_ghash_last (T, kd, d, 3, n_left);
	}
      else if (n_left > 16)
	{
	  n_left -= 16;
	  aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
	  return aes_gcm_ghash_last (T, kd, d, 2, n_left);
	}
      else
	{
	  aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
	  return aes_gcm_ghash_last (T, kd, d, 1, n_left);
	}
    }

  aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);

  /* next */
  n_left -= 64;
  outv += 4;
  inv += 4;

  f |= AES_GCM_F_WITH_GHASH;

  while (n_left >= 128)
    {
      T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);

      /* next */
      n_left -= 128;
      outv += 8;
      inv += 8;
    }

  if (n_left >= 64)
    {
      T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);

      /* next */
      n_left -= 64;
      outv += 4;
      inv += 4;
    }

  if (n_left == 0)
    return aes_gcm_ghash_last (T, kd, d, 4, 0);

  f |= AES_GCM_F_LAST_ROUND;

  if (n_left > 48)
    {
      n_left -= 48;
      T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
      return aes_gcm_ghash_last (T, kd, d, 4, n_left);
    }

  if (n_left > 32)
    {
      n_left -= 32;
      T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
      return aes_gcm_ghash_last (T, kd, d, 3, n_left);
    }

  if (n_left > 16)
    {
      n_left -= 16;
      T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
      return aes_gcm_ghash_last (T, kd, d, 2, n_left);
    }

  T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
  return aes_gcm_ghash_last (T, kd, d, 1, n_left);
#endif
}

static_always_inline u8x16
aes_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
	     u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
{
  aes_gcm_flags_t f = AES_GCM_F_WITH_GHASH | AES_GCM_F_DECRYPT;
#if defined(__VAES__) && defined(__AVX512F__)
  u8x64 d4[4] = { };

  while (n_left >= 512)
    {
      T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);

      /* next */
      n_left -= 512;
      outv += 32;
      inv += 32;
    }

  while (n_left >= 256)
    {
      T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);

      /* next */
      n_left -= 256;
      outv += 16;
      inv += 16;
    }

  if (n_left == 0)
    return T;

  f |= AES_GCM_F_LAST_ROUND;

  if (n_left > 192)
    return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
			  n_left - 192, f);
  if (n_left > 128)
    return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
			  n_left - 128, f);
  if (n_left > 64)
    return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
			  n_left - 64, f);
  return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
#else
  u8x16 d[4] = {};
  while (n_left >= 128)
    {
      T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);

      /* next */
      n_left -= 128;
      outv += 8;
      inv += 8;
    }

  if (n_left >= 64)
    {
      T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);

      /* next */
      n_left -= 64;
      outv += 4;
      inv += 4;
    }

  if (n_left == 0)
    return T;

  f |= AES_GCM_F_LAST_ROUND;

  if (n_left > 48)
    return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48, f);

  if (n_left > 32)
    return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32, f);

  if (n_left > 16)
    return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16, f);

  return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
#endif
}

static_always_inline int
aes_gcm (u8x16u *in, u8x16u *out, u8x16u *addt, u8 *ivp, u8x16u *tag,
	 u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t *kd,
	 int aes_rounds, int is_encrypt)
{
  int i;
  u8x16 r, T = { };
  vec128_t Y0 = {};
  ghash_data_t _gd, *gd = &_gd;
  aes_gcm_counter_t _ctr, *ctr = &_ctr;

  clib_prefetch_load (ivp);
  clib_prefetch_load (in);
  clib_prefetch_load (in + 4);

  /* calculate ghash for AAD - optimized for ipsec common cases */
  if (aad_bytes == 8)
    T = aes_gcm_ghash (T, kd, addt, 8);
  else if (aad_bytes == 12)
    T = aes_gcm_ghash (T, kd, addt, 12);
  else
    T = aes_gcm_ghash (T, kd, addt, aad_bytes);

  /* initalize counter */
  ctr->counter = 1;
  Y0.as_u64x2[0] = *(u64u *) ivp;
  Y0.as_u32x4[2] = *(u32u *) (ivp + 8);
  Y0.as_u32x4 += ctr_inv_1;
#if defined(__VAES__) && defined(__AVX512F__)
  ctr->Y4 = u32x16_splat_u32x4 (Y0.as_u32x4) + ctr_inv_1234;
#else
  ctr->Y = Y0.as_u32x4 + ctr_inv_1;
#endif

  /* ghash and encrypt/edcrypt  */
  if (is_encrypt)
    T = aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
  else
    T = aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);

  clib_prefetch_load (tag);

  /* Finalize ghash  - data bytes and aad bytes converted to bits */
  /* *INDENT-OFF* */
  r = (u8x16) ((u64x2) {data_bytes, aad_bytes} << 3);
  /* *INDENT-ON* */

  /* interleaved computation of final ghash and E(Y0, k) */
  ghash_mul_first (gd, r ^ T, kd->Hi[NUM_HI - 1]);
  r = kd->Ke[0] ^ Y0.as_u8x16;
  for (i = 1; i < 5; i += 1)
    r = aes_enc_round (r, kd->Ke[i]);
  ghash_reduce (gd);
  ghash_reduce2 (gd);
  for (; i < 9; i += 1)
    r = aes_enc_round (r, kd->Ke[i]);
  T = ghash_final (gd);
  for (; i < aes_rounds; i += 1)
    r = aes_enc_round (r, kd->Ke[i]);
  r = aes_enc_last_round (r, kd->Ke[aes_rounds]);
  T = u8x16_reflect (T) ^ r;

  /* tag_len 16 -> 0 */
  tag_len &= 0xf;

  if (is_encrypt)
    {
      /* store tag */
      if (tag_len)
	aes_store_partial (tag, T, tag_len);
      else
	tag[0] = T;
    }
  else
    {
      /* check tag */
      u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
      if ((u8x16_msb_mask (tag[0] == T) & tag_mask) != tag_mask)
	return 0;
    }
  return 1;
}

static_always_inline u32
aes_ops_enc_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[],
		     u32 n_ops, aes_key_size_t ks)
{
  crypto_native_main_t *cm = &crypto_native_main;
  vnet_crypto_op_t *op = ops[0];
  aes_gcm_key_data_t *kd;
  u32 n_left = n_ops;


next:
  kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
  aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
	   (u8 *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len,
	   op->tag_len, kd, AES_KEY_ROUNDS (ks), /* is_encrypt */ 1);
  op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;

  if (--n_left)
    {
      op += 1;
      goto next;
    }

  return n_ops;
}

static_always_inline u32
aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops,
		     aes_key_size_t ks)
{
  crypto_native_main_t *cm = &crypto_native_main;
  vnet_crypto_op_t *op = ops[0];
  aes_gcm_key_data_t *kd;
  u32 n_left = n_ops;
  int rv;

next:
  kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
  rv = aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
		(u8 *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len,
		op->tag_len, kd, AES_KEY_ROUNDS (ks),
		/* is_encrypt */ 0);

  if (rv)
    {
      op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
    }
  else
    {
      op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
      n_ops--;
    }

  if (--n_left)
    {
      op += 1;
      goto next;
    }

  return n_ops;
}

static_always_inline void *
aes_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
{
  aes_gcm_key_data_t *kd;
  u8x16 H;

  kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);

  /* expand AES key */
  aes_key_expand ((u8x16 *) kd->Ke, key->data, ks);

  /* pre-calculate H */
  H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
  H = u8x16_reflect (H);
  ghash_precompute (H, (u8x16 *) kd->Hi, NUM_HI);
#if defined(__VAES__) && defined(__AVX512F__)
  u8x64 *Ke4 = (u8x64 *) kd->Ke4;
  for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
    Ke4[i] = u8x64_splat_u8x16 (kd->Ke[i]);
#endif
  return kd;
}

#define foreach_aes_gcm_handler_type _(128) _(192) _(256)

#define _(x) \
static u32 aes_ops_dec_aes_gcm_##x                                         \
(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops)                      \
{ return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); }              \
static u32 aes_ops_enc_aes_gcm_##x                                         \
(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops)                      \
{ return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); }              \
static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key)                 \
{ return aes_gcm_key_exp (key, AES_KEY_##x); }

foreach_aes_gcm_handler_type;
#undef _

clib_error_t *
#if defined(__VAES__) && defined(__AVX512F__)
crypto_native_aes_gcm_init_icl (vlib_main_t *vm)
#elif __AVX512F__
crypto_native_aes_gcm_init_skx (vlib_main_t * vm)
#elif __AVX2__
crypto_native_aes_gcm_init_hsw (vlib_main_t * vm)
#elif __aarch64__
crypto_native_aes_gcm_init_neon (vlib_main_t * vm)
#else
crypto_native_aes_gcm_init_slm (vlib_main_t * vm)
#endif
{
  crypto_native_main_t *cm = &crypto_native_main;

#define _(x) \
  vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
				    VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \
				    aes_ops_enc_aes_gcm_##x); \
  vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
				    VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \
				    aes_ops_dec_aes_gcm_##x); \
  cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
  foreach_aes_gcm_handler_type;
#undef _
  return 0;
}

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */