aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/common/include/rte_reciprocal.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal/common/include/rte_reciprocal.h')
-rw-r--r--lib/librte_eal/common/include/rte_reciprocal.h90
1 files changed, 90 insertions, 0 deletions
diff --git a/lib/librte_eal/common/include/rte_reciprocal.h b/lib/librte_eal/common/include/rte_reciprocal.h
new file mode 100644
index 00000000..3492c73b
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_reciprocal.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+/*
+ * Reciprocal divide
+ *
+ * Used with permission from original authors
+ * Hannes Frederic Sowa and Daniel Borkmann
+ *
+ * This algorithm is based on the paper "Division by Invariant
+ * Integers Using Multiplication" by Torbjörn Granlund and Peter
+ * L. Montgomery.
+ *
+ * The assembler implementation from Agner Fog, which this code is
+ * based on, can be found here:
+ * http://www.agner.org/optimize/asmlib.zip
+ *
+ * This optimization for A/B is helpful if the divisor B is mostly
+ * runtime invariant. The reciprocal of B is calculated in the
+ * slow-path with reciprocal_value(). The fast-path can then just use
+ * a much faster multiplication operation with a variable dividend A
+ * to calculate the division A/B.
+ */
+
+#ifndef _RTE_RECIPROCAL_H_
+#define _RTE_RECIPROCAL_H_
+
+#include <stdint.h>
+
+struct rte_reciprocal {
+ uint32_t m;
+ uint8_t sh1, sh2;
+};
+
+struct rte_reciprocal_u64 {
+ uint64_t m;
+ uint8_t sh1, sh2;
+};
+
+static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
+{
+ uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
+
+ return (t + ((a - t) >> R.sh1)) >> R.sh2;
+}
+
+static __rte_always_inline uint64_t
+mullhi_u64(uint64_t x, uint64_t y)
+{
+#ifdef __SIZEOF_INT128__
+ __uint128_t xl = x;
+ __uint128_t rl = xl * y;
+
+ return (rl >> 64);
+#else
+ uint64_t u0, u1, v0, v1, k, t;
+ uint64_t w1, w2;
+ uint64_t whi;
+
+ u1 = x >> 32; u0 = x & 0xFFFFFFFF;
+ v1 = y >> 32; v0 = y & 0xFFFFFFFF;
+
+ t = u0*v0;
+ k = t >> 32;
+
+ t = u1*v0 + k;
+ w1 = t & 0xFFFFFFFF;
+ w2 = t >> 32;
+
+ t = u0*v1 + w1;
+ k = t >> 32;
+
+ whi = u1*v1 + w2 + k;
+
+ return whi;
+#endif
+}
+
+static __rte_always_inline uint64_t
+rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
+{
+ uint64_t t = mullhi_u64(a, R->m);
+
+ return (t + ((a - t) >> R->sh1)) >> R->sh2;
+}
+
+struct rte_reciprocal rte_reciprocal_value(uint32_t d);
+struct rte_reciprocal_u64 rte_reciprocal_value_u64(uint64_t d);
+
+#endif /* _RTE_RECIPROCAL_H_ */