summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/sha2.h
diff options
context:
space:
mode:
authorOle Troan <ot@cisco.com>2019-05-29 12:34:20 +0200
committerPaul Vinciguerra <pvinci@vinciconsulting.com>2019-05-30 11:17:30 +0000
commit78d91cf9a514cb25f6ea9c901bb7d6c398f5cdc7 (patch)
treea0a2ad476027595a29f94944931efff4067971ff /src/vppinfra/sha2.h
parent288e093624515e31a8bc0f0f870e5a9c7fa2dbaa (diff)
sample-plugin: refactor .api to use explicit types
Use explicit types in .api definition. Change-Id: Ib4c3c4ab6282a6d443e3d19af029dc091b462dac Type: refactor Signed-off-by: Ole Troan <ot@cisco.com>
Diffstat (limited to 'src/vppinfra/sha2.h')
0 files changed, 0 insertions, 0 deletions
>151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
/* SPDX-License-Identifier: Apache-2.0
 * Copyright(c) 2021 Cisco Systems, Inc.
 */

#ifndef included_vector_index_to_ptr_h
#define included_vector_index_to_ptr_h
#include <vppinfra/clib.h>

#ifdef CLIB_HAVE_VEC128
static_always_inline void
clib_index_to_ptr_u32x4 (u32 *indices, void **ptrs, i32 i, u64x2 ov, u8 shift)
{
  u32x4 iv4 = u32x4_load_unaligned (indices + i);
  u64x2 pv2;
  pv2 = u64x2_from_u32x4 (iv4);
  u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i);
#ifdef __aarch64__
  pv2 = u64x2_from_u32x4_high (iv4);
#else
  pv2 = u64x2_from_u32x4 ((u32x4) u8x16_word_shift_right (iv4, 8));
#endif
  u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i + 2);
}
#endif

/** \brief Convert array of indices to pointers with base and shift

    @param indices source array of u32 indices
    @param base base pointer
    @param shift numbers of bits to be shifted
    @param ptrs destinatin array of pointers
    @param n_elts number of elements in the source array
*/

static_always_inline void
clib_index_to_ptr_u32 (u32 *indices, void *base, u8 shift, void **ptrs,
		       u32 n_elts)
{
#if defined CLIB_HAVE_VEC512
  if (n_elts >= 8)
    {
      u64x8 off = u64x8_splat ((u64) base);
      u64x8 b0, b1, b2, b3, b4, b5, b6, b7;

      while (n_elts >= 64)
	{
	  b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
	  b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
	  b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
	  b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
	  b4 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 32));
	  b5 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 40));
	  b6 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 48));
	  b7 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 56));
	  u64x8_store_unaligned ((b0 << shift) + off, ptrs);
	  u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
	  u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
	  u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
	  u64x8_store_unaligned ((b4 << shift) + off, ptrs + 32);
	  u64x8_store_unaligned ((b5 << shift) + off, ptrs + 40);
	  u64x8_store_unaligned ((b6 << shift) + off, ptrs + 48);
	  u64x8_store_unaligned ((b7 << shift) + off, ptrs + 56);
	  ptrs += 64;
	  indices += 64;
	  n_elts -= 64;
	}

      if (n_elts == 0)
	return;

      if (n_elts >= 32)
	{
	  b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
	  b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
	  b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
	  b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
	  u64x8_store_unaligned ((b0 << shift) + off, ptrs);
	  u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
	  u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
	  u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
	  ptrs += 32;
	  indices += 32;
	  n_elts -= 32;
	}
      if (n_elts >= 16)
	{
	  b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
	  b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
	  u64x8_store_unaligned ((b0 << shift) + off, ptrs);
	  u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
	  ptrs += 16;
	  indices += 16;
	  n_elts -= 16;
	}
      if (n_elts >= 8)
	{
	  b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
	  u64x8_store_unaligned ((b0 << shift) + off, ptrs);
	  ptrs += 8;
	  indices += 8;
	  n_elts -= 8;
	}

      if (n_elts == 0)
	return;

      b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + n_elts - 8));
      u64x8_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 8);
    }
  else
    {
      u32 mask = pow2_mask (n_elts);
      u64x8 r = u64x8_from_u32x8 (u32x8_mask_load_zero (indices, mask));
      u64x8_mask_store ((r << shift) + u64x8_splat ((u64) base), ptrs, mask);
      return;
    }
#elif defined CLIB_HAVE_VEC256
  if (n_elts >= 4)
    {
      u64x4 off = u64x4_splat ((u64) base);
      u64x4 b0, b1, b2, b3, b4, b5, b6, b7;

      while (n_elts >= 32)
	{
	  b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
	  b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
	  b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
	  b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
	  b4 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 16));
	  b5 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 20));
	  b6 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 24));
	  b7 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 28));
	  u64x4_store_unaligned ((b0 << shift) + off, ptrs);
	  u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
	  u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
	  u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
	  u64x4_store_unaligned ((b4 << shift) + off, ptrs + 16);
	  u64x4_store_unaligned ((b5 << shift) + off, ptrs + 20);
	  u64x4_store_unaligned ((b6 << shift) + off, ptrs + 24);
	  u64x4_store_unaligned ((b7 << shift) + off, ptrs + 28);
	  ptrs += 32;
	  indices += 32;
	  n_elts -= 32;
	}

      if (n_elts == 0)
	return;

      if (n_elts >= 16)
	{
	  b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
	  b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
	  b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
	  b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
	  u64x4_store_unaligned ((b0 << shift) + off, ptrs);
	  u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
	  u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
	  u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
	  ptrs += 16;
	  indices += 16;
	  n_elts -= 16;
	}
      if (n_elts >= 8)
	{
	  b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
	  b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
	  u64x4_store_unaligned ((b0 << shift) + off, ptrs);
	  u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
	  ptrs += 8;
	  indices += 8;
	  n_elts -= 8;
	}
      if (n_elts > 4)
	{
	  b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
	  u64x4_store_unaligned ((b0 << shift) + off, ptrs);
	  ptrs += 4;
	  indices += 4;
	  n_elts -= 4;
	}

      b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + n_elts - 4));
      u64x4_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 4);
      return;
    }
#ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
  else
    {
      u32 mask = pow2_mask (n_elts);
      u64x4 r = u64x4_from_u32x4 (u32x4_mask_load_zero (indices, mask));
      u64x4_mask_store ((r << shift) + u64x4_splat ((u64) base), ptrs, mask);
      return;
    }
#endif
#elif defined(CLIB_HAVE_VEC128)
  if (n_elts >= 4)
    {
      u64x2 ov = u64x2_splat ((u64) base);
      u32 *i = (u32 *) indices;
      void **p = (void **) ptrs;
      u32 n = n_elts;

      while (n >= 32)
	{
	  clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 16, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 20, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 24, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 28, ov, shift);
	  indices += 32;
	  ptrs += 32;
	  n -= 32;
	}

      if (n == 0)
	return;

      if (n >= 16)
	{
	  clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
	  indices += 16;
	  ptrs += 16;
	  n -= 16;
	}

      if (n >= 8)
	{
	  clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
	  clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
	  indices += 8;
	  ptrs += 8;
	  n -= 8;
	}

      if (n > 4)
	clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);

      clib_index_to_ptr_u32x4 (i, p, n_elts - 4, ov, shift);
      return;
    }
#endif
  while (n_elts)
    {
      ptrs[0] = base + ((u64) indices[0] << shift);
      ptrs += 1;
      indices += 1;
      n_elts -= 1;
    }
}

#endif