summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/cache.h
AgeCommit message (Collapse)AuthorFilesLines
2021-07-27vppinfra: introduce CLIB_CACHE_PREFETCH_BYTESDamjan Marion1-31/+29
Type: improvement Change-Id: Ic07010f11ef303f5213a33b0faf24aaedb62f110 Signed-off-by: Damjan Marion <damarion@cisco.com>
2020-02-12vppinfra: add clib_prefetch_load and clib_prefetch_storeDamjan Marion1-0/+12
For people tired of typen CLIB_CACHE_LINE_BYTES.... Type: improvement Change-Id: I7658a8525ff6e3edc81a29b05a6fda33e537806e Signed-off-by: Damjan Marion <dmarion@me.com>
2019-01-23vppinfra: add CLIB_CACHE_LINE_ROUND() macroDamjan Marion1-0/+1
Change-Id: Ic141162acaf39878ab978a997e3f6757d4f7c240 Signed-off-by: Damjan Marion <damarion@cisco.com>
2018-10-01Support dynamic dual/quad loop selection on aarch64Lijian Zhang1-0/+5
Currently, there are three variants available on aarch64, qdf24xx, thunderx2t99, and cortex-a72. -DCLIB_N_PREFETCHES is passed to source code to select dual/quad implementation. Besides, different compiler options are applied on these critical functions. gcc-7.3.0 reports ICE(internal compiler error) with -mtune=thunderx2t99, so -mtune=thunderx2t99 is enabled only when gcc version is greater than 7.3.0 Cavium ThunderX2, Impermenter 0x43, Part 0x0af -march=armv8-a+crc+crypto -mtune=thunderx2t99 Qualcomm Centriq 2400, Impermenter 0x51, Part 0xc00 -march=armv8.1-a+crc+crypto -mtune=qdf24xx Cortex-A72, Impermenter 0x41, Part 0xd08 -march=armv8-a+crc+crypto -mtune=cortex-a72 Change-Id: Id5649c6325c1e642d0fd42535e3908793b13e02a Signed-off-by: Lijian Zhang <Lijian.Zhang@arm.com> Reviewed-by: Sirshak Das <sirshak.das@arm.com> Reviewed-by: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
2018-08-01Store USE_DLMALLOC in vppinfra/config.hDamjan Marion1-3/+0
Change-Id: Ib596e7f525b83dc7e830bcf6a126cd210216ce86 Signed-off-by: Damjan Marion <damarion@cisco.com>
2018-06-04Configure or deduce CLIB_LOG2_CACHE_LINE_BYTES (VPP-1064)Dave Barach1-1/+4
Added configure argument "--with-log2-cache-line-bytes=5|6|7|auto" AKA 32, 64, or 128 bytes, or use the inferred value from the build host. produces build-xxx/vpp/vppinfra/config.h, which .../src/vppinfra/cache.h Kernels which implement the following pseudo-file (aka x86_64) are easy: /sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size Otherwise, extract the cpuid from /proc/cpuinfo and map it to the cache line size. Change-Id: I7ff861e042faf82c3901fa1db98864fbdea95b74 Signed-off-by: Dave Barach <dave@barachs.net> Signed-off-by: Nitin Saxena <nitin.saxena@cavium.com>
2018-02-21vppinfra: change default cache line size 64 bytesDamjan Marion1-10/+2
This change only affects Aarch64 where previously we were using 128 bytes. Change-Id: I52a3f2f3ff8c06abe8ae3933bc0d7a2a7749dd8a Signed-off-by: Damjan Marion <damarion@cisco.com>
2017-04-25Define cache line size for x86 32-bitDamjan Marion1-1/+1
Change-Id: Ie70e805f342bda69207b9df9543f1eccb5e69612 Signed-off-by: Damjan Marion <damarion@cisco.com>
2016-12-28Reorganize source tree to use single autotools instanceDamjan Marion1-0/+104
Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion <damarion@cisco.com>
11' href='#n311'>311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
  ------------------------------------------------------------------------------
  By Bob Jenkins, 1996, Public Domain
  MODIFIED:
  960327: Creation (addition of randinit, really)
  970719: use context, not global variables, for internal state
  980324: renamed seed to flag
  980605: recommend ISAAC_LOG2_SIZE=4 for noncryptography.
  010626: note this is public domain
  ------------------------------------------------------------------------------

  Modified for CLIB by Eliot Dresselhaus.
  Dear Bob, Thanks for all the great work. - Eliot

  modifications copyright (c) 2003 Eliot Dresselhaus

  Permission is hereby granted, free of charge, to any person obtaining
  a copy of this software and associated documentation files (the
  "Software"), to deal in the Software without restriction, including
  without limitation the rights to use, copy, modify, merge, publish,
  distribute, sublicense, and/or sell copies of the Software, and to
  permit persons to whom the Software is furnished to do so, subject to
  the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

/* ISAAC is Bob Jenkins' random number generator.
   http://burtleburtle.net/bob/rand/isaacafa.html */

#include <vppinfra/random_isaac.h>

#if uword_bits != 32 && uword_bits != 64
#error "isaac only works for 32 or 64 bit words"
#endif

#if uword_bits == 32

#define ind32(mm,x)  (*(u32 *)((u8 *)(mm) + ((x) & ((ISAAC_SIZE-1)<<2))))
#define rngstep32(mix,a,b,mm,m,m2,r,x,y)		\
{							\
  x = *m;						\
  a = (a^(mix)) + *(m2++);				\
  *(m++) = y = ind32(mm,x) + a + b;			\
  *(r++) = b = ind32(mm,y>>ISAAC_LOG2_SIZE) + x;	\
}

void
isaac (isaac_t * ctx, uword * results)
{
  u32 a, b, c, x, y, *m, *mm, *m2, *r, *mend;

  mm = ctx->memory;
  r = results;
  a = ctx->a;
  b = ctx->b;
  c = ctx->c;

  b += ++c;
  mend = m2 = mm + ARRAY_LEN (ctx->memory) / 2;
  m = mm;
  while (m < mend)
    {
      rngstep32 (a << 13, a, b, mm, m, m2, r, x, y);
      rngstep32 (a >> 6, a, b, mm, m, m2, r, x, y);
      rngstep32 (a << 2, a, b, mm, m, m2, r, x, y);
      rngstep32 (a >> 16, a, b, mm, m, m2, r, x, y);
    }

  m2 = mm;
  while (m2 < mend)
    {
      rngstep32 (a << 13, a, b, mm, m, m2, r, x, y);
      rngstep32 (a >> 6, a, b, mm, m, m2, r, x, y);
      rngstep32 (a << 2, a, b, mm, m, m2, r, x, y);
      rngstep32 (a >> 16, a, b, mm, m, m2, r, x, y);
    }

  ctx->a = a;
  ctx->b = b;
  ctx->c = c;
}

/* Perform 2 isaac runs with different contexts simultaneously. */
void
isaac2 (isaac_t * ctx, uword * results)
{
#define _(n) \
  u32 a##n, b##n, c##n, x##n, y##n, * m##n, * mm##n, * m2##n, * r##n, * mend##n

  _(0);
  _(1);
  (void) mend1;			/* "set but unused variable" error on mend1 with gcc 4.9  */
#undef _

#define _(n)							\
do {								\
  mm##n = ctx[(n)].memory;					\
  r##n = results + (n) * ISAAC_SIZE;				\
  a##n = ctx[(n)].a;						\
  b##n = ctx[(n)].b;						\
  c##n = ctx[(n)].c;						\
  b##n += ++c##n;						\
  mend##n = m2##n = mm##n + ARRAY_LEN (ctx[(n)].memory) / 2;	\
  m##n = mm##n;							\
} while (0)

  _(0);
  _(1);

#undef _

  while (m0 < mend0)
    {
      rngstep32 (a0 << 13, a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep32 (a1 << 13, a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep32 (a0 >> 6, a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep32 (a1 >> 6, a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep32 (a0 << 2, a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep32 (a1 << 2, a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep32 (a0 >> 16, a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep32 (a1 >> 16, a1, b1, mm1, m1, m21, r1, x1, y1);
    }

  m20 = mm0;
  m21 = mm1;
  while (m20 < mend0)
    {
      rngstep32 (a0 << 13, a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep32 (a1 << 13, a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep32 (a0 >> 6, a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep32 (a1 >> 6, a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep32 (a0 << 2, a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep32 (a1 << 2, a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep32 (a0 >> 16, a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep32 (a1 >> 16, a1, b1, mm1, m1, m21, r1, x1, y1);
    }

  ctx[0].a = a0;
  ctx[0].b = b0;
  ctx[0].c = c0;
  ctx[1].a = a1;
  ctx[1].b = b1;
  ctx[1].c = c1;
}

#define mix32(a,b,c,d,e,f,g,h)			\
{						\
   a^=b<<11; d+=a; b+=c;			\
   b^=c>>2;  e+=b; c+=d;			\
   c^=d<<8;  f+=c; d+=e;			\
   d^=e>>16; g+=d; e+=f;			\
   e^=f<<10; h+=e; f+=g;			\
   f^=g>>4;  a+=f; g+=h;			\
   g^=h<<8;  b+=g; h+=a;			\
   h^=a>>9;  c+=h; a+=b;			\
}

void
isaac_init (isaac_t * ctx, uword * seeds)
{
  word i;
  u32 a, b, c, d, e, f, g, h, *m, *r;

  ctx->a = ctx->b = ctx->c = 0;
  m = ctx->memory;
  r = seeds;

  a = b = c = d = e = f = g = h = 0x9e3779b9;	/* the golden ratio */

  for (i = 0; i < 4; ++i)	/* scramble it */
    mix32 (a, b, c, d, e, f, g, h);

  /* initialize using the contents of r[] as the seed */
  for (i = 0; i < ISAAC_SIZE; i += 8)
    {
      a += r[i];
      b += r[i + 1];
      c += r[i + 2];
      d += r[i + 3];
      e += r[i + 4];
      f += r[i + 5];
      g += r[i + 6];
      h += r[i + 7];
      mix32 (a, b, c, d, e, f, g, h);
      m[i] = a;
      m[i + 1] = b;
      m[i + 2] = c;
      m[i + 3] = d;
      m[i + 4] = e;
      m[i + 5] = f;
      m[i + 6] = g;
      m[i + 7] = h;
    }

  /* do a second pass to make all of the seed affect all of m */
  for (i = 0; i < ISAAC_SIZE; i += 8)
    {
      a += m[i];
      b += m[i + 1];
      c += m[i + 2];
      d += m[i + 3];
      e += m[i + 4];
      f += m[i + 5];
      g += m[i + 6];
      h += m[i + 7];
      mix32 (a, b, c, d, e, f, g, h);
      m[i] = a;
      m[i + 1] = b;
      m[i + 2] = c;
      m[i + 3] = d;
      m[i + 4] = e;
      m[i + 5] = f;
      m[i + 6] = g;
      m[i + 7] = h;
    }
}
#endif /* uword_bits == 32 */

#if uword_bits == 64

#define ind64(mm,x)  (*(u64 *)((u8 *)(mm) + ((x) & ((ISAAC_SIZE-1)<<3))))
#define rngstep64(mix,a,b,mm,m,m2,r,x,y)		\
{							\
  x = *m;						\
  a = (mix) + *(m2++);					\
  *(m++) = y = ind64(mm,x) + a + b;			\
  *(r++) = b = ind64(mm,y>>ISAAC_LOG2_SIZE) + x;	\
}

void
isaac (isaac_t * ctx, uword * results)
{
  u64 a, b, c, x, y, *m, *mm, *m2, *r, *mend;

  mm = ctx->memory;
  r = results;
  a = ctx->a;
  b = ctx->b;
  c = ctx->c;

  b += ++c;
  mend = m2 = mm + ARRAY_LEN (ctx->memory) / 2;
  m = mm;
  while (m < mend)
    {
      rngstep64 (~(a ^ (a << 21)), a, b, mm, m, m2, r, x, y);
      rngstep64 (a ^ (a >> 5), a, b, mm, m, m2, r, x, y);
      rngstep64 (a ^ (a << 12), a, b, mm, m, m2, r, x, y);
      rngstep64 (a ^ (a >> 33), a, b, mm, m, m2, r, x, y);
    }

  m2 = mm;
  while (m2 < mend)
    {
      rngstep64 (~(a ^ (a << 21)), a, b, mm, m, m2, r, x, y);
      rngstep64 (a ^ (a >> 5), a, b, mm, m, m2, r, x, y);
      rngstep64 (a ^ (a << 12), a, b, mm, m, m2, r, x, y);
      rngstep64 (a ^ (a >> 33), a, b, mm, m, m2, r, x, y);
    }

  ctx->a = a;
  ctx->b = b;
  ctx->c = c;
}

/* Perform 2 isaac runs with different contexts simultaneously. */
void
isaac2 (isaac_t * ctx, uword * results)
{
#define _(n) \
  u64 a##n, b##n, c##n, x##n, y##n, * m##n, * mm##n, * m2##n, * r##n, * mend##n

  _(0);
  _(1);

#undef _

#define _(n)							\
do {								\
  mm##n = ctx[(n)].memory;					\
  r##n = results + (n) * ISAAC_SIZE;				\
  a##n = ctx[(n)].a;						\
  b##n = ctx[(n)].b;						\
  c##n = ctx[(n)].c;						\
  b##n += ++c##n;						\
  mend##n = m2##n = mm##n + ARRAY_LEN (ctx[(n)].memory) / 2;	\
  m##n = mm##n;							\
} while (0)

  _(0);
  _(1);

#undef _

  (void) mend1;			/* compiler warning */

  while (m0 < mend0)
    {
      rngstep64 (~(a0 ^ (a0 << 21)), a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep64 (~(a1 ^ (a1 << 21)), a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep64 (a0 ^ (a0 >> 5), a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep64 (a1 ^ (a1 >> 5), a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep64 (a0 ^ (a0 << 12), a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep64 (a1 ^ (a1 << 12), a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep64 (a0 ^ (a0 >> 33), a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep64 (a1 ^ (a1 >> 33), a1, b1, mm1, m1, m21, r1, x1, y1);
    }

  m20 = mm0;
  m21 = mm1;
  while (m20 < mend0)
    {
      rngstep64 (~(a0 ^ (a0 << 21)), a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep64 (~(a1 ^ (a1 << 21)), a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep64 (a0 ^ (a0 >> 5), a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep64 (a1 ^ (a1 >> 5), a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep64 (a0 ^ (a0 << 12), a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep64 (a1 ^ (a1 << 12), a1, b1, mm1, m1, m21, r1, x1, y1);
      rngstep64 (a0 ^ (a0 >> 33), a0, b0, mm0, m0, m20, r0, x0, y0);
      rngstep64 (a1 ^ (a1 >> 33), a1, b1, mm1, m1, m21, r1, x1, y1);
    }

  ctx[0].a = a0;
  ctx[0].b = b0;
  ctx[0].c = c0;
  ctx[1].a = a1;
  ctx[1].b = b1;
  ctx[1].c = c1;
}

#define mix64(a,b,c,d,e,f,g,h)			\
{						\
   a-=e; f^=h>>9;  h+=a;			\
   b-=f; g^=a<<9;  a+=b;			\
   c-=g; h^=b>>23; b+=c;			\
   d-=h; a^=c<<15; c+=d;			\
   e-=a; b^=d>>14; d+=e;			\
   f-=b; c^=e<<20; e+=f;			\
   g-=c; d^=f>>17; f+=g;			\
   h-=d; e^=g<<14; g+=h;			\
}

void
isaac_init (isaac_t * ctx, uword * seeds)
{
  word i;
  u64 a, b, c, d, e, f, g, h, *m, *r;

  ctx->a = ctx->b = ctx->c = 0;
  m = ctx->memory;
  r = seeds;

  a = b = c = d = e = f = g = h = 0x9e3779b97f4a7c13LL;	/* the golden ratio */

  for (i = 0; i < 4; ++i)	/* scramble it */
    mix64 (a, b, c, d, e, f, g, h);

  for (i = 0; i < ISAAC_SIZE; i += 8)	/* fill in mm[] with messy stuff */
    {
      a += r[i];
      b += r[i + 1];
      c += r[i + 2];
      d += r[i + 3];
      e += r[i + 4];
      f += r[i + 5];
      g += r[i + 6];
      h += r[i + 7];
      mix64 (a, b, c, d, e, f, g, h);
      m[i] = a;
      m[i + 1] = b;
      m[i + 2] = c;
      m[i + 3] = d;
      m[i + 4] = e;
      m[i + 5] = f;
      m[i + 6] = g;
      m[i + 7] = h;
    }

  /* do a second pass to make all of the seed affect all of mm */
  for (i = 0; i < ISAAC_SIZE; i += 8)
    {
      a += m[i];
      b += m[i + 1];
      c += m[i + 2];
      d += m[i + 3];
      e += m[i + 4];
      f += m[i + 5];
      g += m[i + 6];
      h += m[i + 7];
      mix64 (a, b, c, d, e, f, g, h);
      m[i] = a;
      m[i + 1] = b;
      m[i + 2] = c;
      m[i + 3] = d;
      m[i + 4] = e;
      m[i + 5] = f;
      m[i + 6] = g;
      m[i + 7] = h;
    }
}
#endif /* uword_bits == 64 */


/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */