summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/crypto/poly1305.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vppinfra/crypto/poly1305.h')
-rw-r--r--src/vppinfra/crypto/poly1305.h234
1 files changed, 234 insertions, 0 deletions
diff --git a/src/vppinfra/crypto/poly1305.h b/src/vppinfra/crypto/poly1305.h
new file mode 100644
index 00000000000..cd6ea60cdf7
--- /dev/null
+++ b/src/vppinfra/crypto/poly1305.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __clib_poly1305_h__
+#define __clib_poly1305_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/string.h>
+
+/* implementation of DJB's poly1305 using 64-bit arithmetrics */
+
+typedef struct
+{
+ const u64 r[3], s[2];
+ u64 h[3];
+
+ /* partial data */
+ union
+ {
+ u8 as_u8[16];
+ u64 as_u64[2];
+ } partial;
+
+ size_t n_partial_bytes;
+} clib_poly1305_ctx;
+
+static_always_inline void
+clib_poly1305_init (clib_poly1305_ctx *ctx, const u8 key[32])
+{
+ u64u *k = (u64u *) key;
+ u64 *h = (u64 *) ctx->h;
+ u64 *r = (u64 *) ctx->r;
+ u64 *s = (u64 *) ctx->s;
+
+ /* initialize accumulator */
+ h[0] = h[1] = h[2] = 0;
+
+ /* clamp 1st half of the key and store it into r[] */
+ r[0] = k[0] & 0x0ffffffc0fffffff;
+ r[1] = k[1] & 0x0ffffffc0ffffffc;
+ s[0] = k[2];
+ s[1] = k[3];
+
+ /* precompute (r[1] >> 2) * 5 */
+ r[2] = r[1] + (r[1] >> 2);
+
+ ctx->n_partial_bytes = 0;
+}
+
+static_always_inline void
+_clib_poly1305_multiply_and_reduce (u64 h[3], const u64 r[3])
+{
+ union
+ {
+ struct
+ {
+ u64 lo, hi;
+ };
+ u128 n;
+ } l0, l1, l2;
+ u64 c;
+
+ /*
+ h2 h1 h0
+ x r1 r0
+ ---------------------------------------
+ r0 x h2 r0 x h1 r0 × h0
+ + r1 x h2 r1 x h1 r1 x h0
+ ---------------------------------------
+
+ for p = 2^130-5, following applies:
+ (r * 2^130) mod p == (r * 5) mod p
+
+ bits above 130 can be shifted right (divided by 2^130)
+ and multiplied by 5 per equation above
+
+ h2 h1 h0
+ x r1 r0
+ ----------------------------------------------
+ r0 x h2 r0 x h1 r0 × h0
+ + r1 x h0
+ + 5x (r1 >>2) x h2 5x (r1 >>2) x h1
+ ----------------------------------------------
+ [0:l2.lo] [l1.hi:l1.lo] [l0.hi:l0.lo]
+ */
+
+ l0.n = l1.n = l2.n = 0;
+ /* u64 x u64 = u128 multiplications */
+ l0.n += (u128) h[0] * r[0];
+ l0.n += (u128) h[1] * r[2]; /* r[2] holds precomputed (r[1] >> 2) * 5 */
+ l1.n += (u128) h[0] * r[1];
+ l1.n += (u128) h[1] * r[0];
+
+ /* u64 x u64 = u64 multiplications, as h[2] may have only lower 2 bits set
+ * and r[1] have clamped bits 60-63 */
+ l1.n += (u128) (h[2] * r[2]);
+ l2.n += (u128) (h[2] * r[0]);
+
+ /* propagate upper 64 bits to higher limb */
+ c = 0;
+ l1.lo = u64_add_with_carry (&c, l1.lo, l0.hi);
+ l2.lo = u64_add_with_carry (&c, l2.lo, l1.hi);
+
+ l2.hi = l2.lo;
+ /* keep bits [128:129] */
+ l2.lo &= 3;
+
+ /* bits 130 and above multiply with 5 and store to l2.hi */
+ l2.hi -= l2.lo;
+ l2.hi += l2.hi >> 2;
+
+ /* add l2.hi to l0.lo with carry propagation and store result to h2:h1:h0 */
+ c = 0;
+ h[0] = u64_add_with_carry (&c, l0.lo, l2.hi);
+ h[1] = u64_add_with_carry (&c, l1.lo, 0);
+ h[2] = u64_add_with_carry (&c, l2.lo, 0);
+}
+
+static_always_inline u32
+_clib_poly1305_add_blocks (clib_poly1305_ctx *ctx, const u8 *msg,
+ uword n_bytes, const u32 bit17)
+{
+ u64 r[3], h[3];
+
+ for (int i = 0; i < 3; i++)
+ {
+ h[i] = ctx->h[i];
+ r[i] = ctx->r[i];
+ }
+
+ for (const u64u *m = (u64u *) msg; n_bytes >= 16; n_bytes -= 16, m += 2)
+ {
+ u64 c = 0;
+
+ /* h += m */
+ h[0] = u64_add_with_carry (&c, h[0], m[0]);
+ h[1] = u64_add_with_carry (&c, h[1], m[1]);
+ h[2] = u64_add_with_carry (&c, h[2], bit17 ? 1 : 0);
+
+ /* h = (h * r) mod p */
+ _clib_poly1305_multiply_and_reduce (h, r);
+ }
+
+ for (int i = 0; i < 3; i++)
+ ctx->h[i] = h[i];
+
+ return n_bytes;
+}
+
+static_always_inline void
+clib_poly1305_update (clib_poly1305_ctx *ctx, const u8 *msg, uword len)
+{
+ uword n_left = len;
+
+ if (n_left == 0)
+ return;
+
+ if (ctx->n_partial_bytes)
+ {
+ u16 missing_bytes = 16 - ctx->n_partial_bytes;
+ if (PREDICT_FALSE (n_left < missing_bytes))
+ {
+ clib_memcpy_fast (ctx->partial.as_u8 + ctx->n_partial_bytes, msg,
+ n_left);
+ ctx->n_partial_bytes += n_left;
+ return;
+ }
+
+ clib_memcpy_fast (ctx->partial.as_u8 + ctx->n_partial_bytes, msg,
+ missing_bytes);
+ _clib_poly1305_add_blocks (ctx, ctx->partial.as_u8, 16, 1);
+ ctx->n_partial_bytes = 0;
+ n_left -= missing_bytes;
+ msg += missing_bytes;
+ }
+
+ n_left = _clib_poly1305_add_blocks (ctx, msg, n_left, 1);
+
+ if (n_left)
+ {
+ ctx->partial.as_u64[0] = ctx->partial.as_u64[1] = 0;
+ clib_memcpy_fast (ctx->partial.as_u8, msg + len - n_left, n_left);
+ ctx->n_partial_bytes = n_left;
+ }
+}
+
+static_always_inline void
+clib_poly1305_final (clib_poly1305_ctx *ctx, u8 *out)
+{
+ const u64 p[] = { 0xFFFFFFFFFFFFFFFB, 0xFFFFFFFFFFFFFFFF, 3 }; /* 2^128-5 */
+ const u64 *s = ctx->s;
+ u64u *t = (u64u *) out;
+ u64 h0, h1, t0, t1;
+ u64 c;
+
+ if (ctx->n_partial_bytes)
+ {
+ ctx->partial.as_u8[ctx->n_partial_bytes] = 1;
+ _clib_poly1305_add_blocks (ctx, ctx->partial.as_u8, 16, 0);
+ }
+
+ h0 = ctx->h[0];
+ h1 = ctx->h[1];
+
+ /* h may not be fully reduced, try to subtract 2^128-5 */
+ c = 0;
+ t0 = u64_sub_with_borrow (&c, h0, p[0]);
+ t1 = u64_sub_with_borrow (&c, h1, p[1]);
+ u64_sub_with_borrow (&c, ctx->h[2], p[2]);
+
+ if (!c)
+ {
+ h0 = t0;
+ h1 = t1;
+ }
+
+ c = 0;
+ t[0] = u64_add_with_carry (&c, h0, s[0]);
+ t[1] = u64_add_with_carry (&c, h1, s[1]);
+}
+
+static_always_inline void
+clib_poly1305 (const u8 *key, const u8 *msg, uword len, u8 *out)
+{
+ clib_poly1305_ctx ctx;
+ clib_poly1305_init (&ctx, key);
+ clib_poly1305_update (&ctx, msg, len);
+ clib_poly1305_final (&ctx, out);
+}
+
+#endif /* __clib_poly1305_h__ */