aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/vector/array_mask.h
blob: 3d4a82ac01b170734b50c13c59b53058f120e1d9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/* SPDX-License-Identifier: Apache-2.0
 * Copyright(c) 2021 Cisco Systems, Inc.
 */

#ifndef included_vector_array_mask_h
#define included_vector_array_mask_h
#include <vppinfra/clib.h>

/** \brief Mask array of 32-bit elemments

    @param src source array of u32 elements
    @param mask use to mask the values of source array
    @param n_elts number of elements in the source array
    @return masked values are return in source array
*/

static_always_inline void
clib_array_mask_u32 (u32 *src, u32 mask, u32 n_elts)
{
#if defined(CLIB_HAVE_VEC512)
  u32x16 mask16 = u32x16_splat (mask);
  if (n_elts <= 16)
    {
      u32 m = pow2_mask (n_elts);
      u32x16 r = u32x16_mask_load_zero (src, m);
      u32x16_mask_store (r & mask16, src, m);
      return;
    }
  for (; n_elts >= 16; n_elts -= 16, src += 16)
    *((u32x16u *) src) &= mask16;
  *((u32x16u *) (src + n_elts - 16)) &= mask16;
#elif defined(CLIB_HAVE_VEC256)
  u32x8 mask8 = u32x8_splat (mask);
#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
  if (n_elts <= 8)
    {
      u32 m = pow2_mask (n_elts);
      u32x8 r = u32x8_mask_load_zero (src, m);
      u32x8_mask_store (r & mask8, src, m);
      return;
    }
#else
  if (PREDICT_FALSE (n_elts < 4))
    {
      if (n_elts & 2)
	{
	  src[0] &= mask;
	  src[1] &= mask;
	  src += 2;
	}
      if (n_elts & 1)
	src[0] &= mask;
      return;
    }
  if (n_elts <= 8)
    {
      u32x4 mask4 = u32x4_splat (mask);
      *(u32x4u *) src &= mask4;
      *(u32x4u *) (src + n_elts - 4) &= mask4;
      return;
    }
#endif

  for (; n_elts >= 8; n_elts -= 8, src += 8)
    *((u32x8u *) src) &= mask8;
  *((u32x8u *) (src + n_elts - 8)) &= mask8;
#elif defined(CLIB_HAVE_VEC128)
  u32x4 mask4 = u32x4_splat (mask);

  if (PREDICT_FALSE (n_elts < 4))
    {
      if (n_elts & 2)
	{
	  src[0] &= mask;
	  src[1] &= mask;
	  src += 2;
	}
      if (n_elts & 1)
	src[0] &= mask;
      return;
    }

  for (; n_elts >= 4; n_elts -= 4, src += 4)
    *((u32x4u *) src) &= mask4;
  *((u32x4u *) (src + n_elts - 4)) &= mask4;
  return;
#else
  while (n_elts > 0)
    {
      src[0] &= mask;
      src++;
      n_elts--;
    }
#endif
}

static_always_inline void
clib_array_mask_set_u32_x64 (u32 *a, u32 v, uword bmp, int n_elts)
{
#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
  u32x16 r = u32x16_splat (v);
  for (; n_elts > 0; n_elts -= 16, a += 16, bmp >>= 16)
    u32x16_mask_store (r, a, bmp);
#elif defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
  u32x8 r = u32x8_splat (v);
  for (; n_elts > 0; n_elts -= 8, a += 8, bmp >>= 8)
    u32x8_mask_store (r, a, bmp);
#else
  while (bmp)
    {
      a[get_lowest_set_bit_index (bmp)] = v;
      bmp = clear_lowest_set_bit (bmp);
    }
#endif
}

static_always_inline void
clib_array_mask_set_u32 (u32 *a, u32 v, uword *bmp, u32 n_elts)
{
  while (n_elts >= uword_bits)
    {
      clib_array_mask_set_u32_x64 (a, v, bmp++[0], uword_bits);
      a += uword_bits;
      n_elts -= uword_bits;
    }

  clib_array_mask_set_u32_x64 (a, v, bmp[0] & pow2_mask (n_elts), n_elts);
}

#endif