summaryrefslogtreecommitdiffstats
path: root/src/dpdk22/lib/librte_acl
diff options
context:
space:
mode:
authorIdo Barnea <ibarnea@cisco.com>2015-12-09 05:07:44 +0200
committerIdo Barnea <ibarnea@cisco.com>2015-12-27 08:50:12 +0200
commit509648b87434b9032d38b8ca5ad470ba3edcc036 (patch)
tree920548ce9e2e5aeed4c88c1b288290505e7d7987 /src/dpdk22/lib/librte_acl
parentb161dc672544a913f7f1ddf3a086dd75f2f1134a (diff)
Adding dpdk 2.2 instead of dpdk 1.8 and making changes to make compilation work.
40G and 10G filters do not work yet.
Diffstat (limited to 'src/dpdk22/lib/librte_acl')
-rw-r--r--src/dpdk22/lib/librte_acl/acl.h241
-rw-r--r--src/dpdk22/lib/librte_acl/acl_run.h263
-rw-r--r--src/dpdk22/lib/librte_acl/acl_run_avx2.h284
-rw-r--r--src/dpdk22/lib/librte_acl/acl_run_neon.h289
-rw-r--r--src/dpdk22/lib/librte_acl/acl_run_sse.h357
-rw-r--r--src/dpdk22/lib/librte_acl/acl_vect.h116
-rw-r--r--src/dpdk22/lib/librte_acl/rte_acl.h388
-rw-r--r--src/dpdk22/lib/librte_acl/rte_acl_osdep.h80
-rw-r--r--src/dpdk22/lib/librte_acl/tb_mem.h76
9 files changed, 2094 insertions, 0 deletions
diff --git a/src/dpdk22/lib/librte_acl/acl.h b/src/dpdk22/lib/librte_acl/acl.h
new file mode 100644
index 00000000..09d67841
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl.h
@@ -0,0 +1,241 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ACL_H_
+#define _ACL_H_
+
+#ifdef __cplusplus
+extern"C" {
+#endif /* __cplusplus */
+
+#define RTE_ACL_QUAD_MAX 5
+#define RTE_ACL_QUAD_SIZE 4
+#define RTE_ACL_QUAD_SINGLE UINT64_C(0x7f7f7f7f00000000)
+
+#define RTE_ACL_SINGLE_TRIE_SIZE 2000
+
+#define RTE_ACL_DFA_MAX UINT8_MAX
+#define RTE_ACL_DFA_SIZE (UINT8_MAX + 1)
+
+#define RTE_ACL_DFA_GR64_SIZE 64
+#define RTE_ACL_DFA_GR64_NUM (RTE_ACL_DFA_SIZE / RTE_ACL_DFA_GR64_SIZE)
+#define RTE_ACL_DFA_GR64_BIT \
+ (CHAR_BIT * sizeof(uint32_t) / RTE_ACL_DFA_GR64_NUM)
+
+typedef int bits_t;
+
+#define RTE_ACL_BIT_SET_SIZE ((UINT8_MAX + 1) / (sizeof(bits_t) * CHAR_BIT))
+
+struct rte_acl_bitset {
+ bits_t bits[RTE_ACL_BIT_SET_SIZE];
+};
+
+#define RTE_ACL_NODE_DFA (0 << RTE_ACL_TYPE_SHIFT)
+#define RTE_ACL_NODE_SINGLE (1U << RTE_ACL_TYPE_SHIFT)
+#define RTE_ACL_NODE_QRANGE (3U << RTE_ACL_TYPE_SHIFT)
+#define RTE_ACL_NODE_MATCH (4U << RTE_ACL_TYPE_SHIFT)
+#define RTE_ACL_NODE_TYPE (7U << RTE_ACL_TYPE_SHIFT)
+#define RTE_ACL_NODE_UNDEFINED UINT32_MAX
+
+/*
+ * ACL RT structure is a set of multibit tries (with stride == 8)
+ * represented by an array of transitions. The next position is calculated
+ * based on the current position and the input byte.
+ * Each transition is 64 bit value with the following format:
+ * | node_type_specific : 32 | node_type : 3 | node_addr : 29 |
+ * For all node types except RTE_ACL_NODE_MATCH, node_addr is an index
+ * to the start of the node in the transtions array.
+ * Few different node types are used:
+ * RTE_ACL_NODE_MATCH:
+ * node_addr value is and index into an array that contains the return value
+ * and its priority for each category.
+ * Upper 32 bits of the transition value are not used for that node type.
+ * RTE_ACL_NODE_QRANGE:
+ * that node consist of up to 5 transitions.
+ * Upper 32 bits are interpreted as 4 signed character values which
+ * are ordered from smallest(INT8_MIN) to largest (INT8_MAX).
+ * These values define 5 ranges:
+ * INT8_MIN <= range[0] <= ((int8_t *)&transition)[4]
+ * ((int8_t *)&transition)[4] < range[1] <= ((int8_t *)&transition)[5]
+ * ((int8_t *)&transition)[5] < range[2] <= ((int8_t *)&transition)[6]
+ * ((int8_t *)&transition)[6] < range[3] <= ((int8_t *)&transition)[7]
+ * ((int8_t *)&transition)[7] < range[4] <= INT8_MAX
+ * So for input byte value within range[i] i-th transition within that node
+ * will be used.
+ * RTE_ACL_NODE_SINGLE:
+ * always transitions to the same node regardless of the input value.
+ * RTE_ACL_NODE_DFA:
+ * that node consits of up to 256 transitions.
+ * In attempt to conserve space all transitions are divided into 4 consecutive
+ * groups, by 64 transitions per group:
+ * group64[i] contains transitions[i * 64, .. i * 64 + 63].
+ * Upper 32 bits are interpreted as 4 unsigned character values one per group,
+ * which contain index to the start of the given group within the node.
+ * So to calculate transition index within the node for given input byte value:
+ * input_byte - ((uint8_t *)&transition)[4 + input_byte / 64].
+ */
+
+/*
+ * Structure of a node is a set of ptrs and each ptr has a bit map
+ * of values associated with this transition.
+ */
+struct rte_acl_ptr_set {
+ struct rte_acl_bitset values; /* input values associated with ptr */
+ struct rte_acl_node *ptr; /* transition to next node */
+};
+
+struct rte_acl_classifier_results {
+ int results[RTE_ACL_MAX_CATEGORIES];
+};
+
+struct rte_acl_match_results {
+ uint32_t results[RTE_ACL_MAX_CATEGORIES];
+ int32_t priority[RTE_ACL_MAX_CATEGORIES];
+};
+
+struct rte_acl_node {
+ uint64_t node_index; /* index for this node */
+ uint32_t level; /* level 0-n in the trie */
+ uint32_t ref_count; /* ref count for this node */
+ struct rte_acl_bitset values;
+ /* set of all values that map to another node
+ * (union of bits in each transition.
+ */
+ uint32_t num_ptrs; /* number of ptr_set in use */
+ uint32_t max_ptrs; /* number of allocated ptr_set */
+ uint32_t min_add; /* number of ptr_set per allocation */
+ struct rte_acl_ptr_set *ptrs; /* transitions array for this node */
+ int32_t match_flag;
+ int32_t match_index; /* index to match data */
+ uint32_t node_type;
+ int32_t fanout;
+ /* number of ranges (transitions w/ consecutive bits) */
+ int32_t id;
+ struct rte_acl_match_results *mrt; /* only valid when match_flag != 0 */
+ union {
+ char transitions[RTE_ACL_QUAD_SIZE];
+ /* boundaries for ranged node */
+ uint8_t dfa_gr64[RTE_ACL_DFA_GR64_NUM];
+ };
+ struct rte_acl_node *next;
+ /* free list link or pointer to duplicate node during merge */
+ struct rte_acl_node *prev;
+ /* points to node from which this node was duplicated */
+};
+
+/*
+ * Types of tries used to generate runtime structure(s)
+ */
+enum {
+ RTE_ACL_FULL_TRIE = 0,
+ RTE_ACL_NOSRC_TRIE = 1,
+ RTE_ACL_NODST_TRIE = 2,
+ RTE_ACL_NOPORTS_TRIE = 4,
+ RTE_ACL_NOVLAN_TRIE = 8,
+ RTE_ACL_UNUSED_TRIE = 0x80000000
+};
+
+
+/** MAX number of tries per one ACL context.*/
+#define RTE_ACL_MAX_TRIES 8
+
+/** Max number of characters in PM name.*/
+#define RTE_ACL_NAMESIZE 32
+
+
+struct rte_acl_trie {
+ uint32_t type;
+ uint32_t count;
+ uint32_t root_index;
+ const uint32_t *data_index;
+ uint32_t num_data_indexes;
+};
+
+struct rte_acl_bld_trie {
+ struct rte_acl_node *trie;
+};
+
+struct rte_acl_ctx {
+ char name[RTE_ACL_NAMESIZE];
+ /** Name of the ACL context. */
+ int32_t socket_id;
+ /** Socket ID to allocate memory from. */
+ enum rte_acl_classify_alg alg;
+ void *rules;
+ uint32_t max_rules;
+ uint32_t rule_sz;
+ uint32_t num_rules;
+ uint32_t num_categories;
+ uint32_t num_tries;
+ uint32_t match_index;
+ uint64_t no_match;
+ uint64_t idle;
+ uint64_t *trans_table;
+ uint32_t *data_indexes;
+ struct rte_acl_trie trie[RTE_ACL_MAX_TRIES];
+ void *mem;
+ size_t mem_sz;
+ struct rte_acl_config config; /* copy of build config. */
+};
+
+int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
+ struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+ uint32_t num_categories, uint32_t data_index_sz, size_t max_size);
+
+typedef int (*rte_acl_classify_t)
+(const struct rte_acl_ctx *, const uint8_t **, uint32_t *, uint32_t, uint32_t);
+
+/*
+ * Different implementations of ACL classify.
+ */
+int
+rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t num, uint32_t categories);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ACL_H_ */
diff --git a/src/dpdk22/lib/librte_acl/acl_run.h b/src/dpdk22/lib/librte_acl/acl_run.h
new file mode 100644
index 00000000..b2fc42c6
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_run.h
@@ -0,0 +1,263 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ACL_RUN_H_
+#define _ACL_RUN_H_
+
+#include <rte_acl.h>
+#include "acl.h"
+
+#define MAX_SEARCHES_AVX16 16
+#define MAX_SEARCHES_SSE8 8
+#define MAX_SEARCHES_SSE4 4
+#define MAX_SEARCHES_SCALAR 2
+
+#define GET_NEXT_4BYTES(prm, idx) \
+ (*((const int32_t *)((prm)[(idx)].data + *(prm)[idx].data_index++)))
+
+
+#define RTE_ACL_NODE_INDEX ((uint32_t)~RTE_ACL_NODE_TYPE)
+
+#define SCALAR_QRANGE_MULT 0x01010101
+#define SCALAR_QRANGE_MASK 0x7f7f7f7f
+#define SCALAR_QRANGE_MIN 0x80808080
+
+/*
+ * Structure to manage N parallel trie traversals.
+ * The runtime trie traversal routines can process 8, 4, or 2 tries
+ * in parallel. Each packet may require multiple trie traversals (up to 4).
+ * This structure is used to fill the slots (0 to n-1) for parallel processing
+ * with the trie traversals needed for each packet.
+ */
+struct acl_flow_data {
+ uint32_t num_packets;
+ /* number of packets processed */
+ uint32_t started;
+ /* number of trie traversals in progress */
+ uint32_t trie;
+ /* current trie index (0 to N-1) */
+ uint32_t cmplt_size;
+ uint32_t total_packets;
+ uint32_t categories;
+ /* number of result categories per packet. */
+ /* maximum number of packets to process */
+ const uint64_t *trans;
+ const uint8_t **data;
+ uint32_t *results;
+ struct completion *last_cmplt;
+ struct completion *cmplt_array;
+};
+
+/*
+ * Structure to maintain running results for
+ * a single packet (up to 4 tries).
+ */
+struct completion {
+ uint32_t *results; /* running results. */
+ int32_t priority[RTE_ACL_MAX_CATEGORIES]; /* running priorities. */
+ uint32_t count; /* num of remaining tries */
+ /* true for allocated struct */
+} __attribute__((aligned(XMM_SIZE)));
+
+/*
+ * One parms structure for each slot in the search engine.
+ */
+struct parms {
+ const uint8_t *data;
+ /* input data for this packet */
+ const uint32_t *data_index;
+ /* data indirection for this trie */
+ struct completion *cmplt;
+ /* completion data for this packet */
+};
+
+/*
+ * Define an global idle node for unused engine slots
+ */
+static const uint32_t idle[UINT8_MAX + 1];
+
+/*
+ * Allocate a completion structure to manage the tries for a packet.
+ */
+static inline struct completion *
+alloc_completion(struct completion *p, uint32_t size, uint32_t tries,
+ uint32_t *results)
+{
+ uint32_t n;
+
+ for (n = 0; n < size; n++) {
+
+ if (p[n].count == 0) {
+
+ /* mark as allocated and set number of tries. */
+ p[n].count = tries;
+ p[n].results = results;
+ return &(p[n]);
+ }
+ }
+
+ /* should never get here */
+ return NULL;
+}
+
+/*
+ * Resolve priority for a single result trie.
+ */
+static inline void
+resolve_single_priority(uint64_t transition, int n,
+ const struct rte_acl_ctx *ctx, struct parms *parms,
+ const struct rte_acl_match_results *p)
+{
+ if (parms[n].cmplt->count == ctx->num_tries ||
+ parms[n].cmplt->priority[0] <=
+ p[transition].priority[0]) {
+
+ parms[n].cmplt->priority[0] = p[transition].priority[0];
+ parms[n].cmplt->results[0] = p[transition].results[0];
+ }
+}
+
+/*
+ * Routine to fill a slot in the parallel trie traversal array (parms) from
+ * the list of packets (flows).
+ */
+static inline uint64_t
+acl_start_next_trie(struct acl_flow_data *flows, struct parms *parms, int n,
+ const struct rte_acl_ctx *ctx)
+{
+ uint64_t transition;
+
+ /* if there are any more packets to process */
+ if (flows->num_packets < flows->total_packets) {
+ parms[n].data = flows->data[flows->num_packets];
+ parms[n].data_index = ctx->trie[flows->trie].data_index;
+
+ /* if this is the first trie for this packet */
+ if (flows->trie == 0) {
+ flows->last_cmplt = alloc_completion(flows->cmplt_array,
+ flows->cmplt_size, ctx->num_tries,
+ flows->results +
+ flows->num_packets * flows->categories);
+ }
+
+ /* set completion parameters and starting index for this slot */
+ parms[n].cmplt = flows->last_cmplt;
+ transition =
+ flows->trans[parms[n].data[*parms[n].data_index++] +
+ ctx->trie[flows->trie].root_index];
+
+ /*
+ * if this is the last trie for this packet,
+ * then setup next packet.
+ */
+ flows->trie++;
+ if (flows->trie >= ctx->num_tries) {
+ flows->trie = 0;
+ flows->num_packets++;
+ }
+
+ /* keep track of number of active trie traversals */
+ flows->started++;
+
+ /* no more tries to process, set slot to an idle position */
+ } else {
+ transition = ctx->idle;
+ parms[n].data = (const uint8_t *)idle;
+ parms[n].data_index = idle;
+ }
+ return transition;
+}
+
+static inline void
+acl_set_flow(struct acl_flow_data *flows, struct completion *cmplt,
+ uint32_t cmplt_size, const uint8_t **data, uint32_t *results,
+ uint32_t data_num, uint32_t categories, const uint64_t *trans)
+{
+ flows->num_packets = 0;
+ flows->started = 0;
+ flows->trie = 0;
+ flows->last_cmplt = NULL;
+ flows->cmplt_array = cmplt;
+ flows->total_packets = data_num;
+ flows->categories = categories;
+ flows->cmplt_size = cmplt_size;
+ flows->data = data;
+ flows->results = results;
+ flows->trans = trans;
+}
+
+typedef void (*resolve_priority_t)
+(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+ struct parms *parms, const struct rte_acl_match_results *p,
+ uint32_t categories);
+
+/*
+ * Detect matches. If a match node transition is found, then this trie
+ * traversal is complete and fill the slot with the next trie
+ * to be processed.
+ */
+static inline uint64_t
+acl_match_check(uint64_t transition, int slot,
+ const struct rte_acl_ctx *ctx, struct parms *parms,
+ struct acl_flow_data *flows, resolve_priority_t resolve_priority)
+{
+ const struct rte_acl_match_results *p;
+
+ p = (const struct rte_acl_match_results *)
+ (flows->trans + ctx->match_index);
+
+ if (transition & RTE_ACL_NODE_MATCH) {
+
+ /* Remove flags from index and decrement active traversals */
+ transition &= RTE_ACL_NODE_INDEX;
+ flows->started--;
+
+ /* Resolve priorities for this trie and running results */
+ if (flows->categories == 1)
+ resolve_single_priority(transition, slot, ctx,
+ parms, p);
+ else
+ resolve_priority(transition, slot, ctx, parms,
+ p, flows->categories);
+
+ /* Count down completed tries for this search request */
+ parms[slot].cmplt->count--;
+
+ /* Fill the slot with the next trie or idle trie */
+ transition = acl_start_next_trie(flows, parms, slot, ctx);
+ }
+
+ return transition;
+}
+
+#endif /* _ACL_RUN_H_ */
diff --git a/src/dpdk22/lib/librte_acl/acl_run_avx2.h b/src/dpdk22/lib/librte_acl/acl_run_avx2.h
new file mode 100644
index 00000000..b01a46a5
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_run_avx2.h
@@ -0,0 +1,284 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "acl_run_sse.h"
+
+static const rte_ymm_t ymm_match_mask = {
+ .u32 = {
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ },
+};
+
+static const rte_ymm_t ymm_index_mask = {
+ .u32 = {
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ },
+};
+
+static const rte_ymm_t ymm_shuffle_input = {
+ .u32 = {
+ 0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c,
+ 0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c,
+ },
+};
+
+static const rte_ymm_t ymm_ones_16 = {
+ .u16 = {
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ },
+};
+
+static const rte_ymm_t ymm_range_base = {
+ .u32 = {
+ 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+ 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+ },
+};
+
+/*
+ * Process 8 transitions in parallel.
+ * tr_lo contains low 32 bits for 8 transition.
+ * tr_hi contains high 32 bits for 8 transition.
+ * next_input contains up to 4 input bytes for 8 flows.
+ */
+static inline __attribute__((always_inline)) ymm_t
+transition8(ymm_t next_input, const uint64_t *trans, ymm_t *tr_lo, ymm_t *tr_hi)
+{
+ const int32_t *tr;
+ ymm_t addr;
+
+ tr = (const int32_t *)(uintptr_t)trans;
+
+ /* Calculate the address (array index) for all 8 transitions. */
+ ACL_TR_CALC_ADDR(mm256, 256, addr, ymm_index_mask.y, next_input,
+ ymm_shuffle_input.y, ymm_ones_16.y, ymm_range_base.y,
+ *tr_lo, *tr_hi);
+
+ /* load lower 32 bits of 8 transactions at once. */
+ *tr_lo = _mm256_i32gather_epi32(tr, addr, sizeof(trans[0]));
+
+ next_input = _mm256_srli_epi32(next_input, CHAR_BIT);
+
+ /* load high 32 bits of 8 transactions at once. */
+ *tr_hi = _mm256_i32gather_epi32(tr + 1, addr, sizeof(trans[0]));
+
+ return next_input;
+}
+
+/*
+ * Process matches for 8 flows.
+ * tr_lo contains low 32 bits for 8 transition.
+ * tr_hi contains high 32 bits for 8 transition.
+ */
+static inline void
+acl_process_matches_avx2x8(const struct rte_acl_ctx *ctx,
+ struct parms *parms, struct acl_flow_data *flows, uint32_t slot,
+ ymm_t matches, ymm_t *tr_lo, ymm_t *tr_hi)
+{
+ ymm_t t0, t1;
+ ymm_t lo, hi;
+ xmm_t l0, l1;
+ uint32_t i;
+ uint64_t tr[MAX_SEARCHES_SSE8];
+
+ l1 = _mm256_extracti128_si256(*tr_lo, 1);
+ l0 = _mm256_castsi256_si128(*tr_lo);
+
+ for (i = 0; i != RTE_DIM(tr) / 2; i++) {
+
+ /*
+ * Extract low 32bits of each transition.
+ * That's enough to process the match.
+ */
+ tr[i] = (uint32_t)_mm_cvtsi128_si32(l0);
+ tr[i + 4] = (uint32_t)_mm_cvtsi128_si32(l1);
+
+ l0 = _mm_srli_si128(l0, sizeof(uint32_t));
+ l1 = _mm_srli_si128(l1, sizeof(uint32_t));
+
+ tr[i] = acl_match_check(tr[i], slot + i,
+ ctx, parms, flows, resolve_priority_sse);
+ tr[i + 4] = acl_match_check(tr[i + 4], slot + i + 4,
+ ctx, parms, flows, resolve_priority_sse);
+ }
+
+ /* Collect new transitions into 2 YMM registers. */
+ t0 = _mm256_set_epi64x(tr[5], tr[4], tr[1], tr[0]);
+ t1 = _mm256_set_epi64x(tr[7], tr[6], tr[3], tr[2]);
+
+ /* For each transition: put low 32 into tr_lo and high 32 into tr_hi */
+ ACL_TR_HILO(mm256, __m256, t0, t1, lo, hi);
+
+ /* Keep transitions wth NOMATCH intact. */
+ *tr_lo = _mm256_blendv_epi8(*tr_lo, lo, matches);
+ *tr_hi = _mm256_blendv_epi8(*tr_hi, hi, matches);
+}
+
+static inline void
+acl_match_check_avx2x8(const struct rte_acl_ctx *ctx, struct parms *parms,
+ struct acl_flow_data *flows, uint32_t slot,
+ ymm_t *tr_lo, ymm_t *tr_hi, ymm_t match_mask)
+{
+ uint32_t msk;
+ ymm_t matches, temp;
+
+ /* test for match node */
+ temp = _mm256_and_si256(match_mask, *tr_lo);
+ matches = _mm256_cmpeq_epi32(temp, match_mask);
+ msk = _mm256_movemask_epi8(matches);
+
+ while (msk != 0) {
+
+ acl_process_matches_avx2x8(ctx, parms, flows, slot,
+ matches, tr_lo, tr_hi);
+ temp = _mm256_and_si256(match_mask, *tr_lo);
+ matches = _mm256_cmpeq_epi32(temp, match_mask);
+ msk = _mm256_movemask_epi8(matches);
+ }
+}
+
+/*
+ * Execute trie traversal for up to 16 flows in parallel.
+ */
+static inline int
+search_avx2x16(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+ uint32_t n;
+ struct acl_flow_data flows;
+ uint64_t index_array[MAX_SEARCHES_AVX16];
+ struct completion cmplt[MAX_SEARCHES_AVX16];
+ struct parms parms[MAX_SEARCHES_AVX16];
+ ymm_t input[2], tr_lo[2], tr_hi[2];
+ ymm_t t0, t1;
+
+ acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+ total_packets, categories, ctx->trans_table);
+
+ for (n = 0; n < RTE_DIM(cmplt); n++) {
+ cmplt[n].count = 0;
+ index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+ }
+
+ t0 = _mm256_set_epi64x(index_array[5], index_array[4],
+ index_array[1], index_array[0]);
+ t1 = _mm256_set_epi64x(index_array[7], index_array[6],
+ index_array[3], index_array[2]);
+
+ ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[0], tr_hi[0]);
+
+ t0 = _mm256_set_epi64x(index_array[13], index_array[12],
+ index_array[9], index_array[8]);
+ t1 = _mm256_set_epi64x(index_array[15], index_array[14],
+ index_array[11], index_array[10]);
+
+ ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[1], tr_hi[1]);
+
+ /* Check for any matches. */
+ acl_match_check_avx2x8(ctx, parms, &flows, 0, &tr_lo[0], &tr_hi[0],
+ ymm_match_mask.y);
+ acl_match_check_avx2x8(ctx, parms, &flows, 8, &tr_lo[1], &tr_hi[1],
+ ymm_match_mask.y);
+
+ while (flows.started > 0) {
+
+ uint32_t in[MAX_SEARCHES_SSE8];
+
+ /* Gather 4 bytes of input data for first 8 flows. */
+ in[0] = GET_NEXT_4BYTES(parms, 0);
+ in[4] = GET_NEXT_4BYTES(parms, 4);
+ in[1] = GET_NEXT_4BYTES(parms, 1);
+ in[5] = GET_NEXT_4BYTES(parms, 5);
+ in[2] = GET_NEXT_4BYTES(parms, 2);
+ in[6] = GET_NEXT_4BYTES(parms, 6);
+ in[3] = GET_NEXT_4BYTES(parms, 3);
+ in[7] = GET_NEXT_4BYTES(parms, 7);
+ input[0] = _mm256_set_epi32(in[7], in[6], in[5], in[4],
+ in[3], in[2], in[1], in[0]);
+
+ /* Gather 4 bytes of input data for last 8 flows. */
+ in[0] = GET_NEXT_4BYTES(parms, 8);
+ in[4] = GET_NEXT_4BYTES(parms, 12);
+ in[1] = GET_NEXT_4BYTES(parms, 9);
+ in[5] = GET_NEXT_4BYTES(parms, 13);
+ in[2] = GET_NEXT_4BYTES(parms, 10);
+ in[6] = GET_NEXT_4BYTES(parms, 14);
+ in[3] = GET_NEXT_4BYTES(parms, 11);
+ in[7] = GET_NEXT_4BYTES(parms, 15);
+ input[1] = _mm256_set_epi32(in[7], in[6], in[5], in[4],
+ in[3], in[2], in[1], in[0]);
+
+ input[0] = transition8(input[0], flows.trans,
+ &tr_lo[0], &tr_hi[0]);
+ input[1] = transition8(input[1], flows.trans,
+ &tr_lo[1], &tr_hi[1]);
+
+ input[0] = transition8(input[0], flows.trans,
+ &tr_lo[0], &tr_hi[0]);
+ input[1] = transition8(input[1], flows.trans,
+ &tr_lo[1], &tr_hi[1]);
+
+ input[0] = transition8(input[0], flows.trans,
+ &tr_lo[0], &tr_hi[0]);
+ input[1] = transition8(input[1], flows.trans,
+ &tr_lo[1], &tr_hi[1]);
+
+ input[0] = transition8(input[0], flows.trans,
+ &tr_lo[0], &tr_hi[0]);
+ input[1] = transition8(input[1], flows.trans,
+ &tr_lo[1], &tr_hi[1]);
+
+ /* Check for any matches. */
+ acl_match_check_avx2x8(ctx, parms, &flows, 0,
+ &tr_lo[0], &tr_hi[0], ymm_match_mask.y);
+ acl_match_check_avx2x8(ctx, parms, &flows, 8,
+ &tr_lo[1], &tr_hi[1], ymm_match_mask.y);
+ }
+
+ return 0;
+}
diff --git a/src/dpdk22/lib/librte_acl/acl_run_neon.h b/src/dpdk22/lib/librte_acl/acl_run_neon.h
new file mode 100644
index 00000000..cf7c57fb
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_run_neon.h
@@ -0,0 +1,289 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (C) Cavium networks Ltd. 2015.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Cavium networks nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+struct _neon_acl_const {
+ rte_xmm_t xmm_shuffle_input;
+ rte_xmm_t xmm_index_mask;
+ rte_xmm_t range_base;
+} neon_acl_const __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = {
+ {
+ .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}
+ },
+ {
+ .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX}
+ },
+ {
+ .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c}
+ },
+};
+
+/*
+ * Resolve priority for multiple results (neon version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_neon(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+ struct parms *parms,
+ const struct rte_acl_match_results *p,
+ uint32_t categories)
+{
+ uint32_t x;
+ int32x4_t results, priority, results1, priority1;
+ uint32x4_t selector;
+ int32_t *saved_results, *saved_priority;
+
+ for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+ saved_results = (int32_t *)(&parms[n].cmplt->results[x]);
+ saved_priority = (int32_t *)(&parms[n].cmplt->priority[x]);
+
+ /* get results and priorities for completed trie */
+ results = vld1q_s32(
+ (const int32_t *)&p[transition].results[x]);
+ priority = vld1q_s32(
+ (const int32_t *)&p[transition].priority[x]);
+
+ /* if this is not the first completed trie */
+ if (parms[n].cmplt->count != ctx->num_tries) {
+ /* get running best results and their priorities */
+ results1 = vld1q_s32(saved_results);
+ priority1 = vld1q_s32(saved_priority);
+
+ /* select results that are highest priority */
+ selector = vcgtq_s32(priority1, priority);
+ results = vbslq_s32(selector, results1, results);
+ priority = vbslq_s32(selector, priority1, priority);
+ }
+
+ /* save running best results and their priorities */
+ vst1q_s32(saved_results, results);
+ vst1q_s32(saved_priority, priority);
+ }
+}
+
+/*
+ * Check for any match in 4 transitions
+ */
+static inline __attribute__((always_inline)) uint32_t
+check_any_match_x4(uint64_t val[])
+{
+ return ((val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH);
+}
+
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+ struct acl_flow_data *flows, uint64_t transitions[])
+{
+ while (check_any_match_x4(transitions)) {
+ transitions[0] = acl_match_check(transitions[0], slot, ctx,
+ parms, flows, resolve_priority_neon);
+ transitions[1] = acl_match_check(transitions[1], slot + 1, ctx,
+ parms, flows, resolve_priority_neon);
+ transitions[2] = acl_match_check(transitions[2], slot + 2, ctx,
+ parms, flows, resolve_priority_neon);
+ transitions[3] = acl_match_check(transitions[3], slot + 3, ctx,
+ parms, flows, resolve_priority_neon);
+ }
+}
+
+/*
+ * Process 4 transitions (in 2 NEON Q registers) in parallel
+ */
+static inline __attribute__((always_inline)) int32x4_t
+transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[])
+{
+ int32x4x2_t tr_hi_lo;
+ int32x4_t t, in, r;
+ uint32x4_t index_msk, node_type, addr;
+ uint32x4_t dfa_msk, mask, quad_ofs, dfa_ofs;
+
+ /* Move low 32 into tr_hi_lo.val[0] and high 32 into tr_hi_lo.val[1] */
+ tr_hi_lo = vld2q_s32((const int32_t *)transitions);
+
+ /* Calculate the address (array index) for all 4 transitions. */
+
+ index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask);
+
+ /* Calc node type and node addr */
+ node_type = vbicq_s32(tr_hi_lo.val[0], index_msk);
+ addr = vandq_s32(tr_hi_lo.val[0], index_msk);
+
+ /* t = 0 */
+ t = veorq_s32(node_type, node_type);
+
+ /* mask for DFA type(0) nodes */
+ dfa_msk = vceqq_u32(node_type, t);
+
+ mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input);
+ in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask);
+
+ /* DFA calculations. */
+ r = vshrq_n_u32(in, 30); /* div by 64 */
+ mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base);
+ r = vaddq_u8(r, mask);
+ t = vshrq_n_u32(in, 24);
+ r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r);
+ dfa_ofs = vsubq_s32(t, r);
+
+ /* QUAD/SINGLE calculations. */
+ t = vcgtq_s8(in, tr_hi_lo.val[1]);
+ t = vabsq_s8(t);
+ t = vpaddlq_u8(t);
+ quad_ofs = vpaddlq_u16(t);
+
+ /* blend DFA and QUAD/SINGLE. */
+ t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs);
+
+ /* calculate address for next transitions */
+ addr = vaddq_u32(addr, t);
+
+ /* Fill next transitions */
+ transitions[0] = trans[vgetq_lane_u32(addr, 0)];
+ transitions[1] = trans[vgetq_lane_u32(addr, 1)];
+ transitions[2] = trans[vgetq_lane_u32(addr, 2)];
+ transitions[3] = trans[vgetq_lane_u32(addr, 3)];
+
+ return vshrq_n_u32(next_input, CHAR_BIT);
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+ int n;
+ struct acl_flow_data flows;
+ uint64_t index_array[8];
+ struct completion cmplt[8];
+ struct parms parms[8];
+ int32x4_t input0, input1;
+
+ acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+ total_packets, categories, ctx->trans_table);
+
+ for (n = 0; n < 8; n++) {
+ cmplt[n].count = 0;
+ index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+ }
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]);
+ acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]);
+
+ while (flows.started > 0) {
+ /* Gather 4 bytes of input data for each stream. */
+ input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0);
+ input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 4), input1, 0);
+
+ input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input0, 1);
+ input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 5), input1, 1);
+
+ input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input0, 2);
+ input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 6), input1, 2);
+
+ input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input0, 3);
+ input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 7), input1, 3);
+
+ /* Process the 4 bytes of input on each stream. */
+
+ input0 = transition4(input0, flows.trans, &index_array[0]);
+ input1 = transition4(input1, flows.trans, &index_array[4]);
+
+ input0 = transition4(input0, flows.trans, &index_array[0]);
+ input1 = transition4(input1, flows.trans, &index_array[4]);
+
+ input0 = transition4(input0, flows.trans, &index_array[0]);
+ input1 = transition4(input1, flows.trans, &index_array[4]);
+
+ input0 = transition4(input0, flows.trans, &index_array[0]);
+ input1 = transition4(input1, flows.trans, &index_array[4]);
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]);
+ acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]);
+ }
+
+ return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, int total_packets, uint32_t categories)
+{
+ int n;
+ struct acl_flow_data flows;
+ uint64_t index_array[4];
+ struct completion cmplt[4];
+ struct parms parms[4];
+ int32x4_t input;
+
+ acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+ total_packets, categories, ctx->trans_table);
+
+ for (n = 0; n < 4; n++) {
+ cmplt[n].count = 0;
+ index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+ }
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows, index_array);
+
+ while (flows.started > 0) {
+ /* Gather 4 bytes of input data for each stream. */
+ input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0);
+ input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input, 1);
+ input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input, 2);
+ input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input, 3);
+
+ /* Process the 4 bytes of input on each stream. */
+ input = transition4(input, flows.trans, index_array);
+ input = transition4(input, flows.trans, index_array);
+ input = transition4(input, flows.trans, index_array);
+ input = transition4(input, flows.trans, index_array);
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows, index_array);
+ }
+
+ return 0;
+}
diff --git a/src/dpdk22/lib/librte_acl/acl_run_sse.h b/src/dpdk22/lib/librte_acl/acl_run_sse.h
new file mode 100644
index 00000000..ad40a674
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_run_sse.h
@@ -0,0 +1,357 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+enum {
+ SHUFFLE32_SLOT1 = 0xe5,
+ SHUFFLE32_SLOT2 = 0xe6,
+ SHUFFLE32_SLOT3 = 0xe7,
+ SHUFFLE32_SWAP64 = 0x4e,
+};
+
+static const rte_xmm_t xmm_shuffle_input = {
+ .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c},
+};
+
+static const rte_xmm_t xmm_ones_16 = {
+ .u16 = {1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+static const rte_xmm_t xmm_match_mask = {
+ .u32 = {
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ RTE_ACL_NODE_MATCH,
+ },
+};
+
+static const rte_xmm_t xmm_index_mask = {
+ .u32 = {
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ RTE_ACL_NODE_INDEX,
+ },
+};
+
+static const rte_xmm_t xmm_range_base = {
+ .u32 = {
+ 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+ },
+};
+
+/*
+ * Resolve priority for multiple results (sse version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_sse(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+ struct parms *parms, const struct rte_acl_match_results *p,
+ uint32_t categories)
+{
+ uint32_t x;
+ xmm_t results, priority, results1, priority1, selector;
+ xmm_t *saved_results, *saved_priority;
+
+ for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+
+ saved_results = (xmm_t *)(&parms[n].cmplt->results[x]);
+ saved_priority =
+ (xmm_t *)(&parms[n].cmplt->priority[x]);
+
+ /* get results and priorities for completed trie */
+ results = _mm_loadu_si128(
+ (const xmm_t *)&p[transition].results[x]);
+ priority = _mm_loadu_si128(
+ (const xmm_t *)&p[transition].priority[x]);
+
+ /* if this is not the first completed trie */
+ if (parms[n].cmplt->count != ctx->num_tries) {
+
+ /* get running best results and their priorities */
+ results1 = _mm_loadu_si128(saved_results);
+ priority1 = _mm_loadu_si128(saved_priority);
+
+ /* select results that are highest priority */
+ selector = _mm_cmpgt_epi32(priority1, priority);
+ results = _mm_blendv_epi8(results, results1, selector);
+ priority = _mm_blendv_epi8(priority, priority1,
+ selector);
+ }
+
+ /* save running best results and their priorities */
+ _mm_storeu_si128(saved_results, results);
+ _mm_storeu_si128(saved_priority, priority);
+ }
+}
+
+/*
+ * Extract transitions from an XMM register and check for any matches
+ */
+static void
+acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx,
+ struct parms *parms, struct acl_flow_data *flows)
+{
+ uint64_t transition1, transition2;
+
+ /* extract transition from low 64 bits. */
+ transition1 = _mm_cvtsi128_si64(*indices);
+
+ /* extract transition from high 64 bits. */
+ *indices = _mm_shuffle_epi32(*indices, SHUFFLE32_SWAP64);
+ transition2 = _mm_cvtsi128_si64(*indices);
+
+ transition1 = acl_match_check(transition1, slot, ctx,
+ parms, flows, resolve_priority_sse);
+ transition2 = acl_match_check(transition2, slot + 1, ctx,
+ parms, flows, resolve_priority_sse);
+
+ /* update indices with new transitions. */
+ *indices = _mm_set_epi64x(transition2, transition1);
+}
+
+/*
+ * Check for any match in 4 transitions (contained in 2 SSE registers)
+ */
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+ struct acl_flow_data *flows, xmm_t *indices1, xmm_t *indices2,
+ xmm_t match_mask)
+{
+ xmm_t temp;
+
+ /* put low 32 bits of each transition into one register */
+ temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1, (__m128)*indices2,
+ 0x88);
+ /* test for match node */
+ temp = _mm_and_si128(match_mask, temp);
+
+ while (!_mm_testz_si128(temp, temp)) {
+ acl_process_matches(indices1, slot, ctx, parms, flows);
+ acl_process_matches(indices2, slot + 2, ctx, parms, flows);
+
+ temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1,
+ (__m128)*indices2,
+ 0x88);
+ temp = _mm_and_si128(match_mask, temp);
+ }
+}
+
+/*
+ * Process 4 transitions (in 2 XMM registers) in parallel
+ */
+static inline __attribute__((always_inline)) xmm_t
+transition4(xmm_t next_input, const uint64_t *trans,
+ xmm_t *indices1, xmm_t *indices2)
+{
+ xmm_t addr, tr_lo, tr_hi;
+ uint64_t trans0, trans2;
+
+ /* Shuffle low 32 into tr_lo and high 32 into tr_hi */
+ ACL_TR_HILO(mm, __m128, *indices1, *indices2, tr_lo, tr_hi);
+
+ /* Calculate the address (array index) for all 4 transitions. */
+ ACL_TR_CALC_ADDR(mm, 128, addr, xmm_index_mask.x, next_input,
+ xmm_shuffle_input.x, xmm_ones_16.x, xmm_range_base.x,
+ tr_lo, tr_hi);
+
+ /* Gather 64 bit transitions and pack back into 2 registers. */
+
+ trans0 = trans[_mm_cvtsi128_si32(addr)];
+
+ /* get slot 2 */
+
+ /* {x0, x1, x2, x3} -> {x2, x1, x2, x3} */
+ addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT2);
+ trans2 = trans[_mm_cvtsi128_si32(addr)];
+
+ /* get slot 1 */
+
+ /* {x2, x1, x2, x3} -> {x1, x1, x2, x3} */
+ addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT1);
+ *indices1 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans0);
+
+ /* get slot 3 */
+
+ /* {x1, x1, x2, x3} -> {x3, x1, x2, x3} */
+ addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT3);
+ *indices2 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans2);
+
+ return _mm_srli_epi32(next_input, CHAR_BIT);
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+ int n;
+ struct acl_flow_data flows;
+ uint64_t index_array[MAX_SEARCHES_SSE8];
+ struct completion cmplt[MAX_SEARCHES_SSE8];
+ struct parms parms[MAX_SEARCHES_SSE8];
+ xmm_t input0, input1;
+ xmm_t indices1, indices2, indices3, indices4;
+
+ acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+ total_packets, categories, ctx->trans_table);
+
+ for (n = 0; n < MAX_SEARCHES_SSE8; n++) {
+ cmplt[n].count = 0;
+ index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+ }
+
+ /*
+ * indices1 contains index_array[0,1]
+ * indices2 contains index_array[2,3]
+ * indices3 contains index_array[4,5]
+ * indices4 contains index_array[6,7]
+ */
+
+ indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]);
+ indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]);
+
+ indices3 = _mm_loadu_si128((xmm_t *) &index_array[4]);
+ indices4 = _mm_loadu_si128((xmm_t *) &index_array[6]);
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows,
+ &indices1, &indices2, xmm_match_mask.x);
+ acl_match_check_x4(4, ctx, parms, &flows,
+ &indices3, &indices4, xmm_match_mask.x);
+
+ while (flows.started > 0) {
+
+ /* Gather 4 bytes of input data for each stream. */
+ input0 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0));
+ input1 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 4));
+
+ input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 1), 1);
+ input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 5), 1);
+
+ input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 2), 2);
+ input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 6), 2);
+
+ input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 3), 3);
+ input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 7), 3);
+
+ /* Process the 4 bytes of input on each stream. */
+
+ input0 = transition4(input0, flows.trans,
+ &indices1, &indices2);
+ input1 = transition4(input1, flows.trans,
+ &indices3, &indices4);
+
+ input0 = transition4(input0, flows.trans,
+ &indices1, &indices2);
+ input1 = transition4(input1, flows.trans,
+ &indices3, &indices4);
+
+ input0 = transition4(input0, flows.trans,
+ &indices1, &indices2);
+ input1 = transition4(input1, flows.trans,
+ &indices3, &indices4);
+
+ input0 = transition4(input0, flows.trans,
+ &indices1, &indices2);
+ input1 = transition4(input1, flows.trans,
+ &indices3, &indices4);
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows,
+ &indices1, &indices2, xmm_match_mask.x);
+ acl_match_check_x4(4, ctx, parms, &flows,
+ &indices3, &indices4, xmm_match_mask.x);
+ }
+
+ return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+ uint32_t *results, int total_packets, uint32_t categories)
+{
+ int n;
+ struct acl_flow_data flows;
+ uint64_t index_array[MAX_SEARCHES_SSE4];
+ struct completion cmplt[MAX_SEARCHES_SSE4];
+ struct parms parms[MAX_SEARCHES_SSE4];
+ xmm_t input, indices1, indices2;
+
+ acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+ total_packets, categories, ctx->trans_table);
+
+ for (n = 0; n < MAX_SEARCHES_SSE4; n++) {
+ cmplt[n].count = 0;
+ index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+ }
+
+ indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]);
+ indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]);
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows,
+ &indices1, &indices2, xmm_match_mask.x);
+
+ while (flows.started > 0) {
+
+ /* Gather 4 bytes of input data for each stream. */
+ input = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0));
+ input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 1), 1);
+ input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 2), 2);
+ input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 3), 3);
+
+ /* Process the 4 bytes of input on each stream. */
+ input = transition4(input, flows.trans, &indices1, &indices2);
+ input = transition4(input, flows.trans, &indices1, &indices2);
+ input = transition4(input, flows.trans, &indices1, &indices2);
+ input = transition4(input, flows.trans, &indices1, &indices2);
+
+ /* Check for any matches. */
+ acl_match_check_x4(0, ctx, parms, &flows,
+ &indices1, &indices2, xmm_match_mask.x);
+ }
+
+ return 0;
+}
diff --git a/src/dpdk22/lib/librte_acl/acl_vect.h b/src/dpdk22/lib/librte_acl/acl_vect.h
new file mode 100644
index 00000000..6cc19997
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_vect.h
@@ -0,0 +1,116 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_VECT_H_
+#define _RTE_ACL_VECT_H_
+
+/**
+ * @file
+ *
+ * RTE ACL SSE/AVX related header.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1).
+ * Shuffles it into different representation:
+ * lo - contains low 32 bits of given N transitions.
+ * hi - contains high 32 bits of given N transitions.
+ */
+#define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi) do { \
+ lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88); \
+ hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd); \
+} while (0)
+
+
+/*
+ * Calculate the address of the next transition for
+ * all types of nodes. Note that only DFA nodes and range
+ * nodes actually transition to another node. Match
+ * nodes not supposed to be encountered here.
+ * For quad range nodes:
+ * Calculate number of range boundaries that are less than the
+ * input value. Range boundaries for each node are in signed 8 bit,
+ * ordered from -128 to 127.
+ * This is effectively a popcnt of bytes that are greater than the
+ * input byte.
+ * Single nodes are processed in the same ways as quad range nodes.
+*/
+#define ACL_TR_CALC_ADDR(P, S, \
+ addr, index_mask, next_input, shuffle_input, \
+ ones_16, range_base, tr_lo, tr_hi) do { \
+ \
+ typeof(addr) in, node_type, r, t; \
+ typeof(addr) dfa_msk, dfa_ofs, quad_ofs; \
+ \
+ t = _##P##_xor_si##S(index_mask, index_mask); \
+ in = _##P##_shuffle_epi8(next_input, shuffle_input); \
+ \
+ /* Calc node type and node addr */ \
+ node_type = _##P##_andnot_si##S(index_mask, tr_lo); \
+ addr = _##P##_and_si##S(index_mask, tr_lo); \
+ \
+ /* mask for DFA type(0) nodes */ \
+ dfa_msk = _##P##_cmpeq_epi32(node_type, t); \
+ \
+ /* DFA calculations. */ \
+ r = _##P##_srli_epi32(in, 30); \
+ r = _##P##_add_epi8(r, range_base); \
+ t = _##P##_srli_epi32(in, 24); \
+ r = _##P##_shuffle_epi8(tr_hi, r); \
+ \
+ dfa_ofs = _##P##_sub_epi32(t, r); \
+ \
+ /* QUAD/SINGLE caluclations. */ \
+ t = _##P##_cmpgt_epi8(in, tr_hi); \
+ t = _##P##_sign_epi8(t, t); \
+ t = _##P##_maddubs_epi16(t, t); \
+ quad_ofs = _##P##_madd_epi16(t, ones_16); \
+ \
+ /* blend DFA and QUAD/SINGLE. */ \
+ t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk); \
+ \
+ /* calculate address for next transitions. */ \
+ addr = _##P##_add_epi32(addr, t); \
+} while (0)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_VECT_H_ */
diff --git a/src/dpdk22/lib/librte_acl/rte_acl.h b/src/dpdk22/lib/librte_acl/rte_acl.h
new file mode 100644
index 00000000..0979a098
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/rte_acl.h
@@ -0,0 +1,388 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_H_
+#define _RTE_ACL_H_
+
+/**
+ * @file
+ *
+ * RTE Classifier.
+ */
+
+#include <rte_acl_osdep.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RTE_ACL_MAX_CATEGORIES 16
+
+#define RTE_ACL_RESULTS_MULTIPLIER (XMM_SIZE / sizeof(uint32_t))
+
+#define RTE_ACL_MAX_LEVELS 64
+#define RTE_ACL_MAX_FIELDS 64
+
+union rte_acl_field_types {
+ uint8_t u8;
+ uint16_t u16;
+ uint32_t u32;
+ uint64_t u64;
+};
+
+enum {
+ RTE_ACL_FIELD_TYPE_MASK = 0,
+ RTE_ACL_FIELD_TYPE_RANGE,
+ RTE_ACL_FIELD_TYPE_BITMASK
+};
+
+/**
+ * ACL Field definition.
+ * Each field in the ACL rule has an associate definition.
+ * It defines the type of field, its size, its offset in the input buffer,
+ * the field index, and the input index.
+ * For performance reasons, the inner loop of the search function is unrolled
+ * to process four input bytes at a time. This requires the input to be grouped
+ * into sets of 4 consecutive bytes. The loop processes the first input byte as
+ * part of the setup and then subsequent bytes must be in groups of 4
+ * consecutive bytes.
+ */
+struct rte_acl_field_def {
+ uint8_t type; /**< type - RTE_ACL_FIELD_TYPE_*. */
+ uint8_t size; /**< size of field 1,2,4, or 8. */
+ uint8_t field_index; /**< index of field inside the rule. */
+ uint8_t input_index; /**< 0-N input index. */
+ uint32_t offset; /**< offset to start of field. */
+};
+
+/**
+ * ACL build configuration.
+ * Defines the fields of an ACL trie and number of categories to build with.
+ */
+struct rte_acl_config {
+ uint32_t num_categories; /**< Number of categories to build with. */
+ uint32_t num_fields; /**< Number of field definitions. */
+ struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS];
+ /**< array of field definitions. */
+ size_t max_size;
+ /**< max memory limit for internal run-time structures. */
+};
+
+/**
+ * Defines the value of a field for a rule.
+ */
+struct rte_acl_field {
+ union rte_acl_field_types value;
+ /**< a 1,2,4, or 8 byte value of the field. */
+ union rte_acl_field_types mask_range;
+ /**<
+ * depending on field type:
+ * mask -> 1.2.3.4/32 value=0x1020304, mask_range=32,
+ * range -> 0 : 65535 value=0, mask_range=65535,
+ * bitmask -> 0x06/0xff value=6, mask_range=0xff.
+ */
+};
+
+enum {
+ RTE_ACL_TYPE_SHIFT = 29,
+ RTE_ACL_MAX_INDEX = RTE_LEN2MASK(RTE_ACL_TYPE_SHIFT, uint32_t),
+ RTE_ACL_MAX_PRIORITY = RTE_ACL_MAX_INDEX,
+ RTE_ACL_MIN_PRIORITY = 0,
+};
+
+#define RTE_ACL_INVALID_USERDATA 0
+
+#define RTE_ACL_MASKLEN_TO_BITMASK(v, s) \
+((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v))))
+
+/**
+ * Miscellaneous data for ACL rule.
+ */
+struct rte_acl_rule_data {
+ uint32_t category_mask; /**< Mask of categories for that rule. */
+ int32_t priority; /**< Priority for that rule. */
+ uint32_t userdata; /**< Associated with the rule user data. */
+};
+
+/**
+ * Defines single ACL rule.
+ * data - miscellaneous data for the rule.
+ * field[] - value and mask or range for each field.
+ */
+#define RTE_ACL_RULE_DEF(name, fld_num) struct name {\
+ struct rte_acl_rule_data data; \
+ struct rte_acl_field field[fld_num]; \
+}
+
+RTE_ACL_RULE_DEF(rte_acl_rule, 0);
+
+#define RTE_ACL_RULE_SZ(fld_num) \
+ (sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num))
+
+
+/** Max number of characters in name.*/
+#define RTE_ACL_NAMESIZE 32
+
+/**
+ * Parameters used when creating the ACL context.
+ */
+struct rte_acl_param {
+ const char *name; /**< Name of the ACL context. */
+ int socket_id; /**< Socket ID to allocate memory for. */
+ uint32_t rule_size; /**< Size of each rule. */
+ uint32_t max_rule_num; /**< Maximum number of rules. */
+};
+
+
+/**
+ * Create a new ACL context.
+ *
+ * @param param
+ * Parameters used to create and initialise the ACL context.
+ * @return
+ * Pointer to ACL context structure that is used in future ACL
+ * operations, or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ */
+struct rte_acl_ctx *
+rte_acl_create(const struct rte_acl_param *param);
+
+/**
+ * Find an existing ACL context object and return a pointer to it.
+ *
+ * @param name
+ * Name of the ACL context as passed to rte_acl_create()
+ * @return
+ * Pointer to ACL context or NULL if object not found
+ * with rte_errno set appropriately. Possible rte_errno values include:
+ * - ENOENT - value not available for return
+ */
+struct rte_acl_ctx *
+rte_acl_find_existing(const char *name);
+
+/**
+ * De-allocate all memory used by ACL context.
+ *
+ * @param ctx
+ * ACL context to free
+ */
+void
+rte_acl_free(struct rte_acl_ctx *ctx);
+
+/**
+ * Add rules to an existing ACL context.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ * ACL context to add patterns to.
+ * @param rules
+ * Array of rules to add to the ACL context.
+ * Note that all fields in rte_acl_rule structures are expected
+ * to be in host byte order.
+ * Each rule expected to be in the same format and not exceed size
+ * specified at ACL context creation time.
+ * @param num
+ * Number of elements in the input array of rules.
+ * @return
+ * - -ENOMEM if there is no space in the ACL context for these rules.
+ * - -EINVAL if the parameters are invalid.
+ * - Zero if operation completed successfully.
+ */
+int
+rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules,
+ uint32_t num);
+
+/**
+ * Delete all rules from the ACL context.
+ * This function is not multi-thread safe.
+ * Note that internal run-time structures are not affected.
+ *
+ * @param ctx
+ * ACL context to delete rules from.
+ */
+void
+rte_acl_reset_rules(struct rte_acl_ctx *ctx);
+
+/**
+ * Analyze set of rules and build required internal run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ * ACL context to build.
+ * @param cfg
+ * Pointer to struct rte_acl_config - defines build parameters.
+ * @return
+ * - -ENOMEM if couldn't allocate enough memory.
+ * - -EINVAL if the parameters are invalid.
+ * - Negative error code if operation failed.
+ * - Zero if operation completed successfully.
+ */
+int
+rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg);
+
+/**
+ * Delete all rules from the ACL context and
+ * destroy all internal run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ * ACL context to reset.
+ */
+void
+rte_acl_reset(struct rte_acl_ctx *ctx);
+
+/**
+ * Available implementations of ACL classify.
+ */
+enum rte_acl_classify_alg {
+ RTE_ACL_CLASSIFY_DEFAULT = 0,
+ RTE_ACL_CLASSIFY_SCALAR = 1, /**< generic implementation. */
+ RTE_ACL_CLASSIFY_SSE = 2, /**< requires SSE4.1 support. */
+ RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */
+ RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */
+ RTE_ACL_CLASSIFY_NUM /* should always be the last one. */
+};
+
+/**
+ * Perform search for a matching ACL rule for each input data buffer.
+ * Each input data buffer can have up to *categories* matches.
+ * That implies that results array should be big enough to hold
+ * (categories * num) elements.
+ * Also categories parameter should be either one or multiple of
+ * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
+ * If more than one rule is applicable for given input buffer and
+ * given category, then rule with highest priority will be returned as a match.
+ * Note, that it is a caller's responsibility to ensure that input parameters
+ * are valid and point to correct memory locations.
+ *
+ * @param ctx
+ * ACL context to search with.
+ * @param data
+ * Array of pointers to input data buffers to perform search.
+ * Note that all fields in input data buffers supposed to be in network
+ * byte order (MSB).
+ * @param results
+ * Array of search results, *categories* results per each input data buffer.
+ * @param num
+ * Number of elements in the input data buffers array.
+ * @param categories
+ * Number of maximum possible matches for each input buffer, one possible
+ * match per category.
+ * @return
+ * zero on successful completion.
+ * -EINVAL for incorrect arguments.
+ */
+extern int
+rte_acl_classify(const struct rte_acl_ctx *ctx,
+ const uint8_t **data,
+ uint32_t *results, uint32_t num,
+ uint32_t categories);
+
+/**
+ * Perform search using specified algorithm for a matching ACL rule for
+ * each input data buffer.
+ * Each input data buffer can have up to *categories* matches.
+ * That implies that results array should be big enough to hold
+ * (categories * num) elements.
+ * Also categories parameter should be either one or multiple of
+ * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
+ * If more than one rule is applicable for given input buffer and
+ * given category, then rule with highest priority will be returned as a match.
+ * Note, that it is a caller's responsibility to ensure that input parameters
+ * are valid and point to correct memory locations.
+ *
+ * @param ctx
+ * ACL context to search with.
+ * @param data
+ * Array of pointers to input data buffers to perform search.
+ * Note that all fields in input data buffers supposed to be in network
+ * byte order (MSB).
+ * @param results
+ * Array of search results, *categories* results per each input data buffer.
+ * @param num
+ * Number of elements in the input data buffers array.
+ * @param categories
+ * Number of maximum possible matches for each input buffer, one possible
+ * match per category.
+ * @param alg
+ * Algorithm to be used for the search.
+ * It is the caller responsibility to ensure that the value refers to the
+ * existing algorithm, and that it could be run on the given CPU.
+ * @return
+ * zero on successful completion.
+ * -EINVAL for incorrect arguments.
+ */
+extern int
+rte_acl_classify_alg(const struct rte_acl_ctx *ctx,
+ const uint8_t **data,
+ uint32_t *results, uint32_t num,
+ uint32_t categories,
+ enum rte_acl_classify_alg alg);
+
+/*
+ * Override the default classifier function for a given ACL context.
+ * @param ctx
+ * ACL context to change classify function for.
+ * @param alg
+ * New default classify algorithm for given ACL context.
+ * It is the caller responsibility to ensure that the value refers to the
+ * existing algorithm, and that it could be run on the given CPU.
+ * @return
+ * - -EINVAL if the parameters are invalid.
+ * - Zero if operation completed successfully.
+ */
+extern int
+rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx,
+ enum rte_acl_classify_alg alg);
+
+/**
+ * Dump an ACL context structure to the console.
+ *
+ * @param ctx
+ * ACL context to dump.
+ */
+void
+rte_acl_dump(const struct rte_acl_ctx *ctx);
+
+/**
+ * Dump all ACL context structures to the console.
+ */
+void
+rte_acl_list_dump(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_H_ */
diff --git a/src/dpdk22/lib/librte_acl/rte_acl_osdep.h b/src/dpdk22/lib/librte_acl/rte_acl_osdep.h
new file mode 100644
index 00000000..41f7e3d4
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/rte_acl_osdep.h
@@ -0,0 +1,80 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_OSDEP_H_
+#define _RTE_ACL_OSDEP_H_
+
+/**
+ * @file
+ *
+ * RTE ACL DPDK/OS dependent file.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/queue.h>
+
+/*
+ * Common defines.
+ */
+
+#define DIM(x) RTE_DIM(x)
+
+#include <rte_common.h>
+#include <rte_vect.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+#include <rte_memcpy.h>
+#include <rte_prefetch.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+
+#endif /* _RTE_ACL_OSDEP_H_ */
diff --git a/src/dpdk22/lib/librte_acl/tb_mem.h b/src/dpdk22/lib/librte_acl/tb_mem.h
new file mode 100644
index 00000000..ca7af966
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/tb_mem.h
@@ -0,0 +1,76 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TB_MEM_H_
+#define _TB_MEM_H_
+
+/**
+ * @file
+ *
+ * RTE ACL temporary (build phase) memory management.
+ * Contains structures and functions to manage temporary (used by build only)
+ * memory. Memory allocated in large blocks to speed 'free' when trie is
+ * destructed (finish of build phase).
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_acl_osdep.h>
+#include <setjmp.h>
+
+struct tb_mem_block {
+ struct tb_mem_block *next;
+ struct tb_mem_pool *pool;
+ size_t size;
+ uint8_t *mem;
+};
+
+struct tb_mem_pool {
+ struct tb_mem_block *block;
+ size_t alignment;
+ size_t min_alloc;
+ size_t alloc;
+ /* jump target in case of memory allocation failure. */
+ sigjmp_buf fail;
+};
+
+void *tb_alloc(struct tb_mem_pool *pool, size_t size);
+void tb_free_pool(struct tb_mem_pool *pool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TB_MEM_H_ */