From 509648b87434b9032d38b8ca5ad470ba3edcc036 Mon Sep 17 00:00:00 2001
From: Ido Barnea <ibarnea@cisco.com>
Date: Wed, 9 Dec 2015 05:07:44 +0200
Subject: Adding dpdk 2.2 instead of dpdk 1.8 and making changes to make
 compilation work. 40G and 10G filters do not work yet.

---
 src/dpdk22/lib/librte_acl/acl.h           | 241 +++++++++++++++++++
 src/dpdk22/lib/librte_acl/acl_run.h       | 263 ++++++++++++++++++++
 src/dpdk22/lib/librte_acl/acl_run_avx2.h  | 284 ++++++++++++++++++++++
 src/dpdk22/lib/librte_acl/acl_run_neon.h  | 289 ++++++++++++++++++++++
 src/dpdk22/lib/librte_acl/acl_run_sse.h   | 357 +++++++++++++++++++++++++++
 src/dpdk22/lib/librte_acl/acl_vect.h      | 116 +++++++++
 src/dpdk22/lib/librte_acl/rte_acl.h       | 388 ++++++++++++++++++++++++++++++
 src/dpdk22/lib/librte_acl/rte_acl_osdep.h |  80 ++++++
 src/dpdk22/lib/librte_acl/tb_mem.h        |  76 ++++++
 9 files changed, 2094 insertions(+)
 create mode 100644 src/dpdk22/lib/librte_acl/acl.h
 create mode 100644 src/dpdk22/lib/librte_acl/acl_run.h
 create mode 100644 src/dpdk22/lib/librte_acl/acl_run_avx2.h
 create mode 100644 src/dpdk22/lib/librte_acl/acl_run_neon.h
 create mode 100644 src/dpdk22/lib/librte_acl/acl_run_sse.h
 create mode 100644 src/dpdk22/lib/librte_acl/acl_vect.h
 create mode 100644 src/dpdk22/lib/librte_acl/rte_acl.h
 create mode 100644 src/dpdk22/lib/librte_acl/rte_acl_osdep.h
 create mode 100644 src/dpdk22/lib/librte_acl/tb_mem.h

(limited to 'src/dpdk22/lib/librte_acl')

diff --git a/src/dpdk22/lib/librte_acl/acl.h b/src/dpdk22/lib/librte_acl/acl.h
new file mode 100644
index 00000000..09d67841
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl.h
@@ -0,0 +1,241 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_ACL_H_
+#define	_ACL_H_
+
+#ifdef __cplusplus
+extern"C" {
+#endif /* __cplusplus */
+
+#define RTE_ACL_QUAD_MAX	5
+#define RTE_ACL_QUAD_SIZE	4
+#define RTE_ACL_QUAD_SINGLE	UINT64_C(0x7f7f7f7f00000000)
+
+#define RTE_ACL_SINGLE_TRIE_SIZE	2000
+
+#define RTE_ACL_DFA_MAX		UINT8_MAX
+#define RTE_ACL_DFA_SIZE	(UINT8_MAX + 1)
+
+#define	RTE_ACL_DFA_GR64_SIZE	64
+#define	RTE_ACL_DFA_GR64_NUM	(RTE_ACL_DFA_SIZE / RTE_ACL_DFA_GR64_SIZE)
+#define	RTE_ACL_DFA_GR64_BIT	\
+	(CHAR_BIT * sizeof(uint32_t) / RTE_ACL_DFA_GR64_NUM)
+
+typedef int bits_t;
+
+#define	RTE_ACL_BIT_SET_SIZE	((UINT8_MAX + 1) / (sizeof(bits_t) * CHAR_BIT))
+
+struct rte_acl_bitset {
+	bits_t             bits[RTE_ACL_BIT_SET_SIZE];
+};
+
+#define	RTE_ACL_NODE_DFA	(0 << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_SINGLE	(1U << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_QRANGE	(3U << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_MATCH	(4U << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_TYPE	(7U << RTE_ACL_TYPE_SHIFT)
+#define	RTE_ACL_NODE_UNDEFINED	UINT32_MAX
+
+/*
+ * ACL RT structure is a set of multibit tries (with stride == 8)
+ * represented by an array of transitions. The next position is calculated
+ * based on the current position and the input byte.
+ * Each transition is 64 bit value with the following format:
+ * | node_type_specific : 32 | node_type : 3 | node_addr : 29 |
+ * For all node types except RTE_ACL_NODE_MATCH, node_addr is an index
+ * to the start of the node in the transtions array.
+ * Few different node types are used:
+ * RTE_ACL_NODE_MATCH:
+ * node_addr value is and index into an array that contains the return value
+ * and its priority for each category.
+ * Upper 32 bits of the transition value are not used for that node type.
+ * RTE_ACL_NODE_QRANGE:
+ * that node consist of up to 5 transitions.
+ * Upper 32 bits are interpreted as 4 signed character values which
+ * are ordered from smallest(INT8_MIN) to largest (INT8_MAX).
+ * These values define 5 ranges:
+ * INT8_MIN <= range[0]  <= ((int8_t *)&transition)[4]
+ * ((int8_t *)&transition)[4] < range[1] <= ((int8_t *)&transition)[5]
+ * ((int8_t *)&transition)[5] < range[2] <= ((int8_t *)&transition)[6]
+ * ((int8_t *)&transition)[6] < range[3] <= ((int8_t *)&transition)[7]
+ * ((int8_t *)&transition)[7] < range[4] <= INT8_MAX
+ * So for input byte value within range[i] i-th transition within that node
+ * will be used.
+ * RTE_ACL_NODE_SINGLE:
+ * always transitions to the same node regardless of the input value.
+ * RTE_ACL_NODE_DFA:
+ * that node consits of up to 256 transitions.
+ * In attempt to conserve space all transitions are divided into 4 consecutive
+ * groups, by 64 transitions per group:
+ * group64[i] contains transitions[i * 64, .. i * 64 + 63].
+ * Upper 32 bits are interpreted as 4 unsigned character values one per group,
+ * which contain index to the start of the given group within the node.
+ * So to calculate transition index within the node for given input byte value:
+ * input_byte - ((uint8_t *)&transition)[4 + input_byte / 64].
+ */
+
+/*
+ * Structure of a node is a set of ptrs and each ptr has a bit map
+ * of values associated with this transition.
+ */
+struct rte_acl_ptr_set {
+	struct rte_acl_bitset values;	/* input values associated with ptr */
+	struct rte_acl_node  *ptr;	/* transition to next node */
+};
+
+struct rte_acl_classifier_results {
+	int results[RTE_ACL_MAX_CATEGORIES];
+};
+
+struct rte_acl_match_results {
+	uint32_t results[RTE_ACL_MAX_CATEGORIES];
+	int32_t priority[RTE_ACL_MAX_CATEGORIES];
+};
+
+struct rte_acl_node {
+	uint64_t node_index;  /* index for this node */
+	uint32_t level;       /* level 0-n in the trie */
+	uint32_t ref_count;   /* ref count for this node */
+	struct rte_acl_bitset  values;
+	/* set of all values that map to another node
+	 * (union of bits in each transition.
+	 */
+	uint32_t                num_ptrs; /* number of ptr_set in use */
+	uint32_t                max_ptrs; /* number of allocated ptr_set */
+	uint32_t                min_add;  /* number of ptr_set per allocation */
+	struct rte_acl_ptr_set *ptrs;     /* transitions array for this node */
+	int32_t                 match_flag;
+	int32_t                 match_index; /* index to match data */
+	uint32_t                node_type;
+	int32_t                 fanout;
+	/* number of ranges (transitions w/ consecutive bits) */
+	int32_t                 id;
+	struct rte_acl_match_results *mrt; /* only valid when match_flag != 0 */
+	union {
+		char            transitions[RTE_ACL_QUAD_SIZE];
+		/* boundaries for ranged node */
+		uint8_t         dfa_gr64[RTE_ACL_DFA_GR64_NUM];
+	};
+	struct rte_acl_node     *next;
+	/* free list link or pointer to duplicate node during merge */
+	struct rte_acl_node     *prev;
+	/* points to node from which this node was duplicated */
+};
+
+/*
+ * Types of tries used to generate runtime structure(s)
+ */
+enum {
+	RTE_ACL_FULL_TRIE = 0,
+	RTE_ACL_NOSRC_TRIE = 1,
+	RTE_ACL_NODST_TRIE = 2,
+	RTE_ACL_NOPORTS_TRIE = 4,
+	RTE_ACL_NOVLAN_TRIE = 8,
+	RTE_ACL_UNUSED_TRIE = 0x80000000
+};
+
+
+/** MAX number of tries per one ACL context.*/
+#define RTE_ACL_MAX_TRIES	8
+
+/** Max number of characters in PM name.*/
+#define RTE_ACL_NAMESIZE	32
+
+
+struct rte_acl_trie {
+	uint32_t        type;
+	uint32_t        count;
+	uint32_t        root_index;
+	const uint32_t *data_index;
+	uint32_t        num_data_indexes;
+};
+
+struct rte_acl_bld_trie {
+	struct rte_acl_node *trie;
+};
+
+struct rte_acl_ctx {
+	char                name[RTE_ACL_NAMESIZE];
+	/** Name of the ACL context. */
+	int32_t             socket_id;
+	/** Socket ID to allocate memory from. */
+	enum rte_acl_classify_alg alg;
+	void               *rules;
+	uint32_t            max_rules;
+	uint32_t            rule_sz;
+	uint32_t            num_rules;
+	uint32_t            num_categories;
+	uint32_t            num_tries;
+	uint32_t            match_index;
+	uint64_t            no_match;
+	uint64_t            idle;
+	uint64_t           *trans_table;
+	uint32_t           *data_indexes;
+	struct rte_acl_trie trie[RTE_ACL_MAX_TRIES];
+	void               *mem;
+	size_t              mem_sz;
+	struct rte_acl_config config; /* copy of build config. */
+};
+
+int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie,
+	struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries,
+	uint32_t num_categories, uint32_t data_index_sz, size_t max_size);
+
+typedef int (*rte_acl_classify_t)
+(const struct rte_acl_ctx *, const uint8_t **, uint32_t *, uint32_t, uint32_t);
+
+/*
+ * Different implementations of ACL classify.
+ */
+int
+rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+int
+rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t num, uint32_t categories);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ACL_H_ */
diff --git a/src/dpdk22/lib/librte_acl/acl_run.h b/src/dpdk22/lib/librte_acl/acl_run.h
new file mode 100644
index 00000000..b2fc42c6
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_run.h
@@ -0,0 +1,263 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_ACL_RUN_H_
+#define	_ACL_RUN_H_
+
+#include <rte_acl.h>
+#include "acl.h"
+
+#define MAX_SEARCHES_AVX16	16
+#define MAX_SEARCHES_SSE8	8
+#define MAX_SEARCHES_SSE4	4
+#define MAX_SEARCHES_SCALAR	2
+
+#define GET_NEXT_4BYTES(prm, idx)	\
+	(*((const int32_t *)((prm)[(idx)].data + *(prm)[idx].data_index++)))
+
+
+#define RTE_ACL_NODE_INDEX	((uint32_t)~RTE_ACL_NODE_TYPE)
+
+#define	SCALAR_QRANGE_MULT	0x01010101
+#define	SCALAR_QRANGE_MASK	0x7f7f7f7f
+#define	SCALAR_QRANGE_MIN	0x80808080
+
+/*
+ * Structure to manage N parallel trie traversals.
+ * The runtime trie traversal routines can process 8, 4, or 2 tries
+ * in parallel. Each packet may require multiple trie traversals (up to 4).
+ * This structure is used to fill the slots (0 to n-1) for parallel processing
+ * with the trie traversals needed for each packet.
+ */
+struct acl_flow_data {
+	uint32_t            num_packets;
+	/* number of packets processed */
+	uint32_t            started;
+	/* number of trie traversals in progress */
+	uint32_t            trie;
+	/* current trie index (0 to N-1) */
+	uint32_t            cmplt_size;
+	uint32_t            total_packets;
+	uint32_t            categories;
+	/* number of result categories per packet. */
+	/* maximum number of packets to process */
+	const uint64_t     *trans;
+	const uint8_t     **data;
+	uint32_t           *results;
+	struct completion  *last_cmplt;
+	struct completion  *cmplt_array;
+};
+
+/*
+ * Structure to maintain running results for
+ * a single packet (up to 4 tries).
+ */
+struct completion {
+	uint32_t *results;                          /* running results. */
+	int32_t   priority[RTE_ACL_MAX_CATEGORIES]; /* running priorities. */
+	uint32_t  count;                            /* num of remaining tries */
+	/* true for allocated struct */
+} __attribute__((aligned(XMM_SIZE)));
+
+/*
+ * One parms structure for each slot in the search engine.
+ */
+struct parms {
+	const uint8_t              *data;
+	/* input data for this packet */
+	const uint32_t             *data_index;
+	/* data indirection for this trie */
+	struct completion          *cmplt;
+	/* completion data for this packet */
+};
+
+/*
+ * Define an global idle node for unused engine slots
+ */
+static const uint32_t idle[UINT8_MAX + 1];
+
+/*
+ * Allocate a completion structure to manage the tries for a packet.
+ */
+static inline struct completion *
+alloc_completion(struct completion *p, uint32_t size, uint32_t tries,
+	uint32_t *results)
+{
+	uint32_t n;
+
+	for (n = 0; n < size; n++) {
+
+		if (p[n].count == 0) {
+
+			/* mark as allocated and set number of tries. */
+			p[n].count = tries;
+			p[n].results = results;
+			return &(p[n]);
+		}
+	}
+
+	/* should never get here */
+	return NULL;
+}
+
+/*
+ * Resolve priority for a single result trie.
+ */
+static inline void
+resolve_single_priority(uint64_t transition, int n,
+	const struct rte_acl_ctx *ctx, struct parms *parms,
+	const struct rte_acl_match_results *p)
+{
+	if (parms[n].cmplt->count == ctx->num_tries ||
+			parms[n].cmplt->priority[0] <=
+			p[transition].priority[0]) {
+
+		parms[n].cmplt->priority[0] = p[transition].priority[0];
+		parms[n].cmplt->results[0] = p[transition].results[0];
+	}
+}
+
+/*
+ * Routine to fill a slot in the parallel trie traversal array (parms) from
+ * the list of packets (flows).
+ */
+static inline uint64_t
+acl_start_next_trie(struct acl_flow_data *flows, struct parms *parms, int n,
+	const struct rte_acl_ctx *ctx)
+{
+	uint64_t transition;
+
+	/* if there are any more packets to process */
+	if (flows->num_packets < flows->total_packets) {
+		parms[n].data = flows->data[flows->num_packets];
+		parms[n].data_index = ctx->trie[flows->trie].data_index;
+
+		/* if this is the first trie for this packet */
+		if (flows->trie == 0) {
+			flows->last_cmplt = alloc_completion(flows->cmplt_array,
+				flows->cmplt_size, ctx->num_tries,
+				flows->results +
+				flows->num_packets * flows->categories);
+		}
+
+		/* set completion parameters and starting index for this slot */
+		parms[n].cmplt = flows->last_cmplt;
+		transition =
+			flows->trans[parms[n].data[*parms[n].data_index++] +
+			ctx->trie[flows->trie].root_index];
+
+		/*
+		 * if this is the last trie for this packet,
+		 * then setup next packet.
+		 */
+		flows->trie++;
+		if (flows->trie >= ctx->num_tries) {
+			flows->trie = 0;
+			flows->num_packets++;
+		}
+
+		/* keep track of number of active trie traversals */
+		flows->started++;
+
+	/* no more tries to process, set slot to an idle position */
+	} else {
+		transition = ctx->idle;
+		parms[n].data = (const uint8_t *)idle;
+		parms[n].data_index = idle;
+	}
+	return transition;
+}
+
+static inline void
+acl_set_flow(struct acl_flow_data *flows, struct completion *cmplt,
+	uint32_t cmplt_size, const uint8_t **data, uint32_t *results,
+	uint32_t data_num, uint32_t categories, const uint64_t *trans)
+{
+	flows->num_packets = 0;
+	flows->started = 0;
+	flows->trie = 0;
+	flows->last_cmplt = NULL;
+	flows->cmplt_array = cmplt;
+	flows->total_packets = data_num;
+	flows->categories = categories;
+	flows->cmplt_size = cmplt_size;
+	flows->data = data;
+	flows->results = results;
+	flows->trans = trans;
+}
+
+typedef void (*resolve_priority_t)
+(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+	struct parms *parms, const struct rte_acl_match_results *p,
+	uint32_t categories);
+
+/*
+ * Detect matches. If a match node transition is found, then this trie
+ * traversal is complete and fill the slot with the next trie
+ * to be processed.
+ */
+static inline uint64_t
+acl_match_check(uint64_t transition, int slot,
+	const struct rte_acl_ctx *ctx, struct parms *parms,
+	struct acl_flow_data *flows, resolve_priority_t resolve_priority)
+{
+	const struct rte_acl_match_results *p;
+
+	p = (const struct rte_acl_match_results *)
+		(flows->trans + ctx->match_index);
+
+	if (transition & RTE_ACL_NODE_MATCH) {
+
+		/* Remove flags from index and decrement active traversals */
+		transition &= RTE_ACL_NODE_INDEX;
+		flows->started--;
+
+		/* Resolve priorities for this trie and running results */
+		if (flows->categories == 1)
+			resolve_single_priority(transition, slot, ctx,
+				parms, p);
+		else
+			resolve_priority(transition, slot, ctx, parms,
+				p, flows->categories);
+
+		/* Count down completed tries for this search request */
+		parms[slot].cmplt->count--;
+
+		/* Fill the slot with the next trie or idle trie */
+		transition = acl_start_next_trie(flows, parms, slot, ctx);
+	}
+
+	return transition;
+}
+
+#endif /* _ACL_RUN_H_ */
diff --git a/src/dpdk22/lib/librte_acl/acl_run_avx2.h b/src/dpdk22/lib/librte_acl/acl_run_avx2.h
new file mode 100644
index 00000000..b01a46a5
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_run_avx2.h
@@ -0,0 +1,284 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "acl_run_sse.h"
+
+static const rte_ymm_t ymm_match_mask = {
+	.u32 = {
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+	},
+};
+
+static const rte_ymm_t ymm_index_mask = {
+	.u32 = {
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+	},
+};
+
+static const rte_ymm_t ymm_shuffle_input = {
+	.u32 = {
+		0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c,
+		0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c,
+	},
+};
+
+static const rte_ymm_t ymm_ones_16 = {
+	.u16 = {
+		1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1,
+	},
+};
+
+static const rte_ymm_t ymm_range_base = {
+	.u32 = {
+		0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+		0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+	},
+};
+
+/*
+ * Process 8 transitions in parallel.
+ * tr_lo contains low 32 bits for 8 transition.
+ * tr_hi contains high 32 bits for 8 transition.
+ * next_input contains up to 4 input bytes for 8 flows.
+ */
+static inline __attribute__((always_inline)) ymm_t
+transition8(ymm_t next_input, const uint64_t *trans, ymm_t *tr_lo, ymm_t *tr_hi)
+{
+	const int32_t *tr;
+	ymm_t addr;
+
+	tr = (const int32_t *)(uintptr_t)trans;
+
+	/* Calculate the address (array index) for all 8 transitions. */
+	ACL_TR_CALC_ADDR(mm256, 256, addr, ymm_index_mask.y, next_input,
+		ymm_shuffle_input.y, ymm_ones_16.y, ymm_range_base.y,
+		*tr_lo, *tr_hi);
+
+	/* load lower 32 bits of 8 transactions at once. */
+	*tr_lo = _mm256_i32gather_epi32(tr, addr, sizeof(trans[0]));
+
+	next_input = _mm256_srli_epi32(next_input, CHAR_BIT);
+
+	/* load high 32 bits of 8 transactions at once. */
+	*tr_hi = _mm256_i32gather_epi32(tr + 1, addr, sizeof(trans[0]));
+
+	return next_input;
+}
+
+/*
+ * Process matches for  8 flows.
+ * tr_lo contains low 32 bits for 8 transition.
+ * tr_hi contains high 32 bits for 8 transition.
+ */
+static inline void
+acl_process_matches_avx2x8(const struct rte_acl_ctx *ctx,
+	struct parms *parms, struct acl_flow_data *flows, uint32_t slot,
+	ymm_t matches, ymm_t *tr_lo, ymm_t *tr_hi)
+{
+	ymm_t t0, t1;
+	ymm_t lo, hi;
+	xmm_t l0, l1;
+	uint32_t i;
+	uint64_t tr[MAX_SEARCHES_SSE8];
+
+	l1 = _mm256_extracti128_si256(*tr_lo, 1);
+	l0 = _mm256_castsi256_si128(*tr_lo);
+
+	for (i = 0; i != RTE_DIM(tr) / 2; i++) {
+
+		/*
+		 * Extract low 32bits of each transition.
+		 * That's enough to process the match.
+		 */
+		tr[i] = (uint32_t)_mm_cvtsi128_si32(l0);
+		tr[i + 4] = (uint32_t)_mm_cvtsi128_si32(l1);
+
+		l0 = _mm_srli_si128(l0, sizeof(uint32_t));
+		l1 = _mm_srli_si128(l1, sizeof(uint32_t));
+
+		tr[i] = acl_match_check(tr[i], slot + i,
+			ctx, parms, flows, resolve_priority_sse);
+		tr[i + 4] = acl_match_check(tr[i + 4], slot + i + 4,
+			ctx, parms, flows, resolve_priority_sse);
+	}
+
+	/* Collect new transitions into 2 YMM registers. */
+	t0 = _mm256_set_epi64x(tr[5], tr[4], tr[1], tr[0]);
+	t1 = _mm256_set_epi64x(tr[7], tr[6], tr[3], tr[2]);
+
+	/* For each transition: put low 32 into tr_lo and high 32 into tr_hi */
+	ACL_TR_HILO(mm256, __m256, t0, t1, lo, hi);
+
+	/* Keep transitions wth NOMATCH intact. */
+	*tr_lo = _mm256_blendv_epi8(*tr_lo, lo, matches);
+	*tr_hi = _mm256_blendv_epi8(*tr_hi, hi, matches);
+}
+
+static inline void
+acl_match_check_avx2x8(const struct rte_acl_ctx *ctx, struct parms *parms,
+	struct acl_flow_data *flows, uint32_t slot,
+	ymm_t *tr_lo, ymm_t *tr_hi, ymm_t match_mask)
+{
+	uint32_t msk;
+	ymm_t matches, temp;
+
+	/* test for match node */
+	temp = _mm256_and_si256(match_mask, *tr_lo);
+	matches = _mm256_cmpeq_epi32(temp, match_mask);
+	msk = _mm256_movemask_epi8(matches);
+
+	while (msk != 0) {
+
+		acl_process_matches_avx2x8(ctx, parms, flows, slot,
+			matches, tr_lo, tr_hi);
+		temp = _mm256_and_si256(match_mask, *tr_lo);
+		matches = _mm256_cmpeq_epi32(temp, match_mask);
+		msk = _mm256_movemask_epi8(matches);
+	}
+}
+
+/*
+ * Execute trie traversal for up to 16 flows in parallel.
+ */
+static inline int
+search_avx2x16(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+	uint32_t n;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_AVX16];
+	struct completion cmplt[MAX_SEARCHES_AVX16];
+	struct parms parms[MAX_SEARCHES_AVX16];
+	ymm_t input[2], tr_lo[2], tr_hi[2];
+	ymm_t t0, t1;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < RTE_DIM(cmplt); n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	t0 = _mm256_set_epi64x(index_array[5], index_array[4],
+		index_array[1], index_array[0]);
+	t1 = _mm256_set_epi64x(index_array[7], index_array[6],
+		index_array[3], index_array[2]);
+
+	ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[0], tr_hi[0]);
+
+	t0 = _mm256_set_epi64x(index_array[13], index_array[12],
+		index_array[9], index_array[8]);
+	t1 = _mm256_set_epi64x(index_array[15], index_array[14],
+		index_array[11], index_array[10]);
+
+	ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[1], tr_hi[1]);
+
+	 /* Check for any matches. */
+	acl_match_check_avx2x8(ctx, parms, &flows, 0, &tr_lo[0], &tr_hi[0],
+		ymm_match_mask.y);
+	acl_match_check_avx2x8(ctx, parms, &flows, 8, &tr_lo[1], &tr_hi[1],
+		ymm_match_mask.y);
+
+	while (flows.started > 0) {
+
+		uint32_t in[MAX_SEARCHES_SSE8];
+
+		/* Gather 4 bytes of input data for first 8 flows. */
+		in[0] = GET_NEXT_4BYTES(parms, 0);
+		in[4] = GET_NEXT_4BYTES(parms, 4);
+		in[1] = GET_NEXT_4BYTES(parms, 1);
+		in[5] = GET_NEXT_4BYTES(parms, 5);
+		in[2] = GET_NEXT_4BYTES(parms, 2);
+		in[6] = GET_NEXT_4BYTES(parms, 6);
+		in[3] = GET_NEXT_4BYTES(parms, 3);
+		in[7] = GET_NEXT_4BYTES(parms, 7);
+		input[0] = _mm256_set_epi32(in[7], in[6], in[5], in[4],
+			in[3], in[2], in[1], in[0]);
+
+		/* Gather 4 bytes of input data for last 8 flows. */
+		in[0] = GET_NEXT_4BYTES(parms, 8);
+		in[4] = GET_NEXT_4BYTES(parms, 12);
+		in[1] = GET_NEXT_4BYTES(parms, 9);
+		in[5] = GET_NEXT_4BYTES(parms, 13);
+		in[2] = GET_NEXT_4BYTES(parms, 10);
+		in[6] = GET_NEXT_4BYTES(parms, 14);
+		in[3] = GET_NEXT_4BYTES(parms, 11);
+		in[7] = GET_NEXT_4BYTES(parms, 15);
+		input[1] = _mm256_set_epi32(in[7], in[6], in[5], in[4],
+			in[3], in[2], in[1], in[0]);
+
+		input[0] = transition8(input[0], flows.trans,
+			&tr_lo[0], &tr_hi[0]);
+		input[1] = transition8(input[1], flows.trans,
+			&tr_lo[1], &tr_hi[1]);
+
+		input[0] = transition8(input[0], flows.trans,
+			&tr_lo[0], &tr_hi[0]);
+		input[1] = transition8(input[1], flows.trans,
+			&tr_lo[1], &tr_hi[1]);
+
+		input[0] = transition8(input[0], flows.trans,
+			&tr_lo[0], &tr_hi[0]);
+		input[1] = transition8(input[1], flows.trans,
+			&tr_lo[1], &tr_hi[1]);
+
+		input[0] = transition8(input[0], flows.trans,
+			&tr_lo[0], &tr_hi[0]);
+		input[1] = transition8(input[1], flows.trans,
+			&tr_lo[1], &tr_hi[1]);
+
+		 /* Check for any matches. */
+		acl_match_check_avx2x8(ctx, parms, &flows, 0,
+			&tr_lo[0], &tr_hi[0], ymm_match_mask.y);
+		acl_match_check_avx2x8(ctx, parms, &flows, 8,
+			&tr_lo[1], &tr_hi[1], ymm_match_mask.y);
+	}
+
+	return 0;
+}
diff --git a/src/dpdk22/lib/librte_acl/acl_run_neon.h b/src/dpdk22/lib/librte_acl/acl_run_neon.h
new file mode 100644
index 00000000..cf7c57fb
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_run_neon.h
@@ -0,0 +1,289 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium networks Ltd. 2015.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium networks nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+struct _neon_acl_const {
+	rte_xmm_t xmm_shuffle_input;
+	rte_xmm_t xmm_index_mask;
+	rte_xmm_t range_base;
+} neon_acl_const  __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = {
+	{
+		.u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}
+	},
+	{
+		.u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX}
+	},
+	{
+		.u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c}
+	},
+};
+
+/*
+ * Resolve priority for multiple results (neon version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_neon(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+		      struct parms *parms,
+		      const struct rte_acl_match_results *p,
+		      uint32_t categories)
+{
+	uint32_t x;
+	int32x4_t results, priority, results1, priority1;
+	uint32x4_t selector;
+	int32_t *saved_results, *saved_priority;
+
+	for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+		saved_results = (int32_t *)(&parms[n].cmplt->results[x]);
+		saved_priority = (int32_t *)(&parms[n].cmplt->priority[x]);
+
+		/* get results and priorities for completed trie */
+		results = vld1q_s32(
+			(const int32_t *)&p[transition].results[x]);
+		priority = vld1q_s32(
+			(const int32_t *)&p[transition].priority[x]);
+
+		/* if this is not the first completed trie */
+		if (parms[n].cmplt->count != ctx->num_tries) {
+			/* get running best results and their priorities */
+			results1 = vld1q_s32(saved_results);
+			priority1 = vld1q_s32(saved_priority);
+
+			/* select results that are highest priority */
+			selector = vcgtq_s32(priority1, priority);
+			results = vbslq_s32(selector, results1, results);
+			priority = vbslq_s32(selector, priority1, priority);
+		}
+
+		/* save running best results and their priorities */
+		vst1q_s32(saved_results, results);
+		vst1q_s32(saved_priority, priority);
+	}
+}
+
+/*
+ * Check for any match in 4 transitions
+ */
+static inline __attribute__((always_inline)) uint32_t
+check_any_match_x4(uint64_t val[])
+{
+	return ((val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH);
+}
+
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+		   struct acl_flow_data *flows, uint64_t transitions[])
+{
+	while (check_any_match_x4(transitions)) {
+		transitions[0] = acl_match_check(transitions[0], slot, ctx,
+			parms, flows, resolve_priority_neon);
+		transitions[1] = acl_match_check(transitions[1], slot + 1, ctx,
+			parms, flows, resolve_priority_neon);
+		transitions[2] = acl_match_check(transitions[2], slot + 2, ctx,
+			parms, flows, resolve_priority_neon);
+		transitions[3] = acl_match_check(transitions[3], slot + 3, ctx,
+			parms, flows, resolve_priority_neon);
+	}
+}
+
+/*
+ * Process 4 transitions (in 2 NEON Q registers) in parallel
+ */
+static inline __attribute__((always_inline)) int32x4_t
+transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[])
+{
+	int32x4x2_t tr_hi_lo;
+	int32x4_t t, in, r;
+	uint32x4_t index_msk, node_type, addr;
+	uint32x4_t dfa_msk, mask, quad_ofs, dfa_ofs;
+
+	/* Move low 32 into tr_hi_lo.val[0] and high 32 into tr_hi_lo.val[1] */
+	tr_hi_lo = vld2q_s32((const int32_t *)transitions);
+
+	/* Calculate the address (array index) for all 4 transitions. */
+
+	index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask);
+
+	/* Calc node type and node addr */
+	node_type = vbicq_s32(tr_hi_lo.val[0], index_msk);
+	addr = vandq_s32(tr_hi_lo.val[0], index_msk);
+
+	/* t = 0 */
+	t = veorq_s32(node_type, node_type);
+
+	/* mask for DFA type(0) nodes */
+	dfa_msk = vceqq_u32(node_type, t);
+
+	mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input);
+	in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask);
+
+	/* DFA calculations. */
+	r = vshrq_n_u32(in, 30); /* div by 64 */
+	mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base);
+	r = vaddq_u8(r, mask);
+	t = vshrq_n_u32(in, 24);
+	r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r);
+	dfa_ofs = vsubq_s32(t, r);
+
+	/* QUAD/SINGLE calculations. */
+	t = vcgtq_s8(in, tr_hi_lo.val[1]);
+	t = vabsq_s8(t);
+	t = vpaddlq_u8(t);
+	quad_ofs = vpaddlq_u16(t);
+
+	/* blend DFA and QUAD/SINGLE. */
+	t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs);
+
+	/* calculate address for next transitions */
+	addr = vaddq_u32(addr, t);
+
+	/* Fill next transitions */
+	transitions[0] = trans[vgetq_lane_u32(addr, 0)];
+	transitions[1] = trans[vgetq_lane_u32(addr, 1)];
+	transitions[2] = trans[vgetq_lane_u32(addr, 2)];
+	transitions[3] = trans[vgetq_lane_u32(addr, 3)];
+
+	return vshrq_n_u32(next_input, CHAR_BIT);
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	      uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[8];
+	struct completion cmplt[8];
+	struct parms parms[8];
+	int32x4_t input0, input1;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		     total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < 8; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	 /* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]);
+	acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]);
+
+	while (flows.started > 0) {
+		/* Gather 4 bytes of input data for each stream. */
+		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0);
+		input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 4), input1, 0);
+
+		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input0, 1);
+		input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 5), input1, 1);
+
+		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input0, 2);
+		input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 6), input1, 2);
+
+		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input0, 3);
+		input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 7), input1, 3);
+
+		/* Process the 4 bytes of input on each stream. */
+
+		input0 = transition4(input0, flows.trans, &index_array[0]);
+		input1 = transition4(input1, flows.trans, &index_array[4]);
+
+		input0 = transition4(input0, flows.trans, &index_array[0]);
+		input1 = transition4(input1, flows.trans, &index_array[4]);
+
+		input0 = transition4(input0, flows.trans, &index_array[0]);
+		input1 = transition4(input1, flows.trans, &index_array[4]);
+
+		input0 = transition4(input0, flows.trans, &index_array[0]);
+		input1 = transition4(input1, flows.trans, &index_array[4]);
+
+		 /* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]);
+		acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]);
+	}
+
+	return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	      uint32_t *results, int total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[4];
+	struct completion cmplt[4];
+	struct parms parms[4];
+	int32x4_t input;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		     total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < 4; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	/* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows, index_array);
+
+	while (flows.started > 0) {
+		/* Gather 4 bytes of input data for each stream. */
+		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0);
+		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input, 1);
+		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input, 2);
+		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input, 3);
+
+		/* Process the 4 bytes of input on each stream. */
+		input = transition4(input, flows.trans, index_array);
+		input = transition4(input, flows.trans, index_array);
+		input = transition4(input, flows.trans, index_array);
+		input = transition4(input, flows.trans, index_array);
+
+		/* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows, index_array);
+	}
+
+	return 0;
+}
diff --git a/src/dpdk22/lib/librte_acl/acl_run_sse.h b/src/dpdk22/lib/librte_acl/acl_run_sse.h
new file mode 100644
index 00000000..ad40a674
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_run_sse.h
@@ -0,0 +1,357 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "acl_run.h"
+#include "acl_vect.h"
+
+enum {
+	SHUFFLE32_SLOT1 = 0xe5,
+	SHUFFLE32_SLOT2 = 0xe6,
+	SHUFFLE32_SLOT3 = 0xe7,
+	SHUFFLE32_SWAP64 = 0x4e,
+};
+
+static const rte_xmm_t xmm_shuffle_input = {
+	.u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c},
+};
+
+static const rte_xmm_t xmm_ones_16 = {
+	.u16 = {1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+static const rte_xmm_t xmm_match_mask = {
+	.u32 = {
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+		RTE_ACL_NODE_MATCH,
+	},
+};
+
+static const rte_xmm_t xmm_index_mask = {
+	.u32 = {
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+		RTE_ACL_NODE_INDEX,
+	},
+};
+
+static const rte_xmm_t xmm_range_base = {
+	.u32 = {
+		0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c,
+	},
+};
+
+/*
+ * Resolve priority for multiple results (sse version).
+ * This consists comparing the priority of the current traversal with the
+ * running set of results for the packet.
+ * For each result, keep a running array of the result (rule number) and
+ * its priority for each category.
+ */
+static inline void
+resolve_priority_sse(uint64_t transition, int n, const struct rte_acl_ctx *ctx,
+	struct parms *parms, const struct rte_acl_match_results *p,
+	uint32_t categories)
+{
+	uint32_t x;
+	xmm_t results, priority, results1, priority1, selector;
+	xmm_t *saved_results, *saved_priority;
+
+	for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) {
+
+		saved_results = (xmm_t *)(&parms[n].cmplt->results[x]);
+		saved_priority =
+			(xmm_t *)(&parms[n].cmplt->priority[x]);
+
+		/* get results and priorities for completed trie */
+		results = _mm_loadu_si128(
+			(const xmm_t *)&p[transition].results[x]);
+		priority = _mm_loadu_si128(
+			(const xmm_t *)&p[transition].priority[x]);
+
+		/* if this is not the first completed trie */
+		if (parms[n].cmplt->count != ctx->num_tries) {
+
+			/* get running best results and their priorities */
+			results1 = _mm_loadu_si128(saved_results);
+			priority1 = _mm_loadu_si128(saved_priority);
+
+			/* select results that are highest priority */
+			selector = _mm_cmpgt_epi32(priority1, priority);
+			results = _mm_blendv_epi8(results, results1, selector);
+			priority = _mm_blendv_epi8(priority, priority1,
+				selector);
+		}
+
+		/* save running best results and their priorities */
+		_mm_storeu_si128(saved_results, results);
+		_mm_storeu_si128(saved_priority, priority);
+	}
+}
+
+/*
+ * Extract transitions from an XMM register and check for any matches
+ */
+static void
+acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx,
+	struct parms *parms, struct acl_flow_data *flows)
+{
+	uint64_t transition1, transition2;
+
+	/* extract transition from low 64 bits. */
+	transition1 = _mm_cvtsi128_si64(*indices);
+
+	/* extract transition from high 64 bits. */
+	*indices = _mm_shuffle_epi32(*indices, SHUFFLE32_SWAP64);
+	transition2 = _mm_cvtsi128_si64(*indices);
+
+	transition1 = acl_match_check(transition1, slot, ctx,
+		parms, flows, resolve_priority_sse);
+	transition2 = acl_match_check(transition2, slot + 1, ctx,
+		parms, flows, resolve_priority_sse);
+
+	/* update indices with new transitions. */
+	*indices = _mm_set_epi64x(transition2, transition1);
+}
+
+/*
+ * Check for any match in 4 transitions (contained in 2 SSE registers)
+ */
+static inline __attribute__((always_inline)) void
+acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms,
+	struct acl_flow_data *flows, xmm_t *indices1, xmm_t *indices2,
+	xmm_t match_mask)
+{
+	xmm_t temp;
+
+	/* put low 32 bits of each transition into one register */
+	temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1, (__m128)*indices2,
+		0x88);
+	/* test for match node */
+	temp = _mm_and_si128(match_mask, temp);
+
+	while (!_mm_testz_si128(temp, temp)) {
+		acl_process_matches(indices1, slot, ctx, parms, flows);
+		acl_process_matches(indices2, slot + 2, ctx, parms, flows);
+
+		temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1,
+					(__m128)*indices2,
+					0x88);
+		temp = _mm_and_si128(match_mask, temp);
+	}
+}
+
+/*
+ * Process 4 transitions (in 2 XMM registers) in parallel
+ */
+static inline __attribute__((always_inline)) xmm_t
+transition4(xmm_t next_input, const uint64_t *trans,
+	xmm_t *indices1, xmm_t *indices2)
+{
+	xmm_t addr, tr_lo, tr_hi;
+	uint64_t trans0, trans2;
+
+	/* Shuffle low 32 into tr_lo and high 32 into tr_hi */
+	ACL_TR_HILO(mm, __m128, *indices1, *indices2, tr_lo, tr_hi);
+
+	 /* Calculate the address (array index) for all 4 transitions. */
+	ACL_TR_CALC_ADDR(mm, 128, addr, xmm_index_mask.x, next_input,
+		xmm_shuffle_input.x, xmm_ones_16.x, xmm_range_base.x,
+		tr_lo, tr_hi);
+
+	 /* Gather 64 bit transitions and pack back into 2 registers. */
+
+	trans0 = trans[_mm_cvtsi128_si32(addr)];
+
+	/* get slot 2 */
+
+	/* {x0, x1, x2, x3} -> {x2, x1, x2, x3} */
+	addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT2);
+	trans2 = trans[_mm_cvtsi128_si32(addr)];
+
+	/* get slot 1 */
+
+	/* {x2, x1, x2, x3} -> {x1, x1, x2, x3} */
+	addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT1);
+	*indices1 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans0);
+
+	/* get slot 3 */
+
+	/* {x1, x1, x2, x3} -> {x3, x1, x2, x3} */
+	addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT3);
+	*indices2 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans2);
+
+	return _mm_srli_epi32(next_input, CHAR_BIT);
+}
+
+/*
+ * Execute trie traversal with 8 traversals in parallel
+ */
+static inline int
+search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	uint32_t *results, uint32_t total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_SSE8];
+	struct completion cmplt[MAX_SEARCHES_SSE8];
+	struct parms parms[MAX_SEARCHES_SSE8];
+	xmm_t input0, input1;
+	xmm_t indices1, indices2, indices3, indices4;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < MAX_SEARCHES_SSE8; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	/*
+	 * indices1 contains index_array[0,1]
+	 * indices2 contains index_array[2,3]
+	 * indices3 contains index_array[4,5]
+	 * indices4 contains index_array[6,7]
+	 */
+
+	indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]);
+	indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]);
+
+	indices3 = _mm_loadu_si128((xmm_t *) &index_array[4]);
+	indices4 = _mm_loadu_si128((xmm_t *) &index_array[6]);
+
+	 /* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows,
+		&indices1, &indices2, xmm_match_mask.x);
+	acl_match_check_x4(4, ctx, parms, &flows,
+		&indices3, &indices4, xmm_match_mask.x);
+
+	while (flows.started > 0) {
+
+		/* Gather 4 bytes of input data for each stream. */
+		input0 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0));
+		input1 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 4));
+
+		input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 1), 1);
+		input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 5), 1);
+
+		input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 2), 2);
+		input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 6), 2);
+
+		input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 3), 3);
+		input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 7), 3);
+
+		 /* Process the 4 bytes of input on each stream. */
+
+		input0 = transition4(input0, flows.trans,
+			&indices1, &indices2);
+		input1 = transition4(input1, flows.trans,
+			&indices3, &indices4);
+
+		input0 = transition4(input0, flows.trans,
+			&indices1, &indices2);
+		input1 = transition4(input1, flows.trans,
+			&indices3, &indices4);
+
+		input0 = transition4(input0, flows.trans,
+			&indices1, &indices2);
+		input1 = transition4(input1, flows.trans,
+			&indices3, &indices4);
+
+		input0 = transition4(input0, flows.trans,
+			&indices1, &indices2);
+		input1 = transition4(input1, flows.trans,
+			&indices3, &indices4);
+
+		 /* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows,
+			&indices1, &indices2, xmm_match_mask.x);
+		acl_match_check_x4(4, ctx, parms, &flows,
+			&indices3, &indices4, xmm_match_mask.x);
+	}
+
+	return 0;
+}
+
+/*
+ * Execute trie traversal with 4 traversals in parallel
+ */
+static inline int
+search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
+	 uint32_t *results, int total_packets, uint32_t categories)
+{
+	int n;
+	struct acl_flow_data flows;
+	uint64_t index_array[MAX_SEARCHES_SSE4];
+	struct completion cmplt[MAX_SEARCHES_SSE4];
+	struct parms parms[MAX_SEARCHES_SSE4];
+	xmm_t input, indices1, indices2;
+
+	acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results,
+		total_packets, categories, ctx->trans_table);
+
+	for (n = 0; n < MAX_SEARCHES_SSE4; n++) {
+		cmplt[n].count = 0;
+		index_array[n] = acl_start_next_trie(&flows, parms, n, ctx);
+	}
+
+	indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]);
+	indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]);
+
+	/* Check for any matches. */
+	acl_match_check_x4(0, ctx, parms, &flows,
+		&indices1, &indices2, xmm_match_mask.x);
+
+	while (flows.started > 0) {
+
+		/* Gather 4 bytes of input data for each stream. */
+		input = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0));
+		input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 1), 1);
+		input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 2), 2);
+		input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 3), 3);
+
+		/* Process the 4 bytes of input on each stream. */
+		input = transition4(input, flows.trans, &indices1, &indices2);
+		input = transition4(input, flows.trans, &indices1, &indices2);
+		input = transition4(input, flows.trans, &indices1, &indices2);
+		input = transition4(input, flows.trans, &indices1, &indices2);
+
+		/* Check for any matches. */
+		acl_match_check_x4(0, ctx, parms, &flows,
+			&indices1, &indices2, xmm_match_mask.x);
+	}
+
+	return 0;
+}
diff --git a/src/dpdk22/lib/librte_acl/acl_vect.h b/src/dpdk22/lib/librte_acl/acl_vect.h
new file mode 100644
index 00000000..6cc19997
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/acl_vect.h
@@ -0,0 +1,116 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_VECT_H_
+#define _RTE_ACL_VECT_H_
+
+/**
+ * @file
+ *
+ * RTE ACL SSE/AVX related header.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1).
+ * Shuffles it into different representation:
+ * lo - contains low 32 bits of given N transitions.
+ * hi - contains high 32 bits of given N transitions.
+ */
+#define	ACL_TR_HILO(P, TC, tr0, tr1, lo, hi)                        do { \
+	lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88);  \
+	hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd);  \
+} while (0)
+
+
+/*
+ * Calculate the address of the next transition for
+ * all types of nodes. Note that only DFA nodes and range
+ * nodes actually transition to another node. Match
+ * nodes not supposed to be encountered here.
+ * For quad range nodes:
+ * Calculate number of range boundaries that are less than the
+ * input value. Range boundaries for each node are in signed 8 bit,
+ * ordered from -128 to 127.
+ * This is effectively a popcnt of bytes that are greater than the
+ * input byte.
+ * Single nodes are processed in the same ways as quad range nodes.
+*/
+#define ACL_TR_CALC_ADDR(P, S,					\
+	addr, index_mask, next_input, shuffle_input,		\
+	ones_16, range_base, tr_lo, tr_hi)               do {	\
+								\
+	typeof(addr) in, node_type, r, t;			\
+	typeof(addr) dfa_msk, dfa_ofs, quad_ofs;		\
+								\
+	t = _##P##_xor_si##S(index_mask, index_mask);		\
+	in = _##P##_shuffle_epi8(next_input, shuffle_input);	\
+								\
+	/* Calc node type and node addr */			\
+	node_type = _##P##_andnot_si##S(index_mask, tr_lo);	\
+	addr = _##P##_and_si##S(index_mask, tr_lo);		\
+								\
+	/* mask for DFA type(0) nodes */			\
+	dfa_msk = _##P##_cmpeq_epi32(node_type, t);		\
+								\
+	/* DFA calculations. */					\
+	r = _##P##_srli_epi32(in, 30);				\
+	r = _##P##_add_epi8(r, range_base);			\
+	t = _##P##_srli_epi32(in, 24);				\
+	r = _##P##_shuffle_epi8(tr_hi, r);			\
+								\
+	dfa_ofs = _##P##_sub_epi32(t, r);			\
+								\
+	/* QUAD/SINGLE caluclations. */				\
+	t = _##P##_cmpgt_epi8(in, tr_hi);			\
+	t = _##P##_sign_epi8(t, t);				\
+	t = _##P##_maddubs_epi16(t, t);				\
+	quad_ofs = _##P##_madd_epi16(t, ones_16);		\
+								\
+	/* blend DFA and QUAD/SINGLE. */			\
+	t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk);	\
+								\
+	/* calculate address for next transitions. */		\
+	addr = _##P##_add_epi32(addr, t);			\
+} while (0)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_VECT_H_ */
diff --git a/src/dpdk22/lib/librte_acl/rte_acl.h b/src/dpdk22/lib/librte_acl/rte_acl.h
new file mode 100644
index 00000000..0979a098
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/rte_acl.h
@@ -0,0 +1,388 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_H_
+#define _RTE_ACL_H_
+
+/**
+ * @file
+ *
+ * RTE Classifier.
+ */
+
+#include <rte_acl_osdep.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	RTE_ACL_MAX_CATEGORIES	16
+
+#define	RTE_ACL_RESULTS_MULTIPLIER	(XMM_SIZE / sizeof(uint32_t))
+
+#define RTE_ACL_MAX_LEVELS 64
+#define RTE_ACL_MAX_FIELDS 64
+
+union rte_acl_field_types {
+	uint8_t  u8;
+	uint16_t u16;
+	uint32_t u32;
+	uint64_t u64;
+};
+
+enum {
+	RTE_ACL_FIELD_TYPE_MASK = 0,
+	RTE_ACL_FIELD_TYPE_RANGE,
+	RTE_ACL_FIELD_TYPE_BITMASK
+};
+
+/**
+ * ACL Field definition.
+ * Each field in the ACL rule has an associate definition.
+ * It defines the type of field, its size, its offset in the input buffer,
+ * the field index, and the input index.
+ * For performance reasons, the inner loop of the search function is unrolled
+ * to process four input bytes at a time. This requires the input to be grouped
+ * into sets of 4 consecutive bytes. The loop processes the first input byte as
+ * part of the setup and then subsequent bytes must be in groups of 4
+ * consecutive bytes.
+ */
+struct rte_acl_field_def {
+	uint8_t  type;        /**< type - RTE_ACL_FIELD_TYPE_*. */
+	uint8_t	 size;        /**< size of field 1,2,4, or 8. */
+	uint8_t	 field_index; /**< index of field inside the rule. */
+	uint8_t  input_index; /**< 0-N input index. */
+	uint32_t offset;      /**< offset to start of field. */
+};
+
+/**
+ * ACL build configuration.
+ * Defines the fields of an ACL trie and number of categories to build with.
+ */
+struct rte_acl_config {
+	uint32_t num_categories; /**< Number of categories to build with. */
+	uint32_t num_fields;     /**< Number of field definitions. */
+	struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS];
+	/**< array of field definitions. */
+	size_t max_size;
+	/**< max memory limit for internal run-time structures. */
+};
+
+/**
+ * Defines the value of a field for a rule.
+ */
+struct rte_acl_field {
+	union rte_acl_field_types value;
+	/**< a 1,2,4, or 8 byte value of the field. */
+	union rte_acl_field_types mask_range;
+	/**<
+	 * depending on field type:
+	 * mask -> 1.2.3.4/32 value=0x1020304, mask_range=32,
+	 * range -> 0 : 65535 value=0, mask_range=65535,
+	 * bitmask -> 0x06/0xff value=6, mask_range=0xff.
+	 */
+};
+
+enum {
+	RTE_ACL_TYPE_SHIFT = 29,
+	RTE_ACL_MAX_INDEX = RTE_LEN2MASK(RTE_ACL_TYPE_SHIFT, uint32_t),
+	RTE_ACL_MAX_PRIORITY = RTE_ACL_MAX_INDEX,
+	RTE_ACL_MIN_PRIORITY = 0,
+};
+
+#define	RTE_ACL_INVALID_USERDATA	0
+
+#define	RTE_ACL_MASKLEN_TO_BITMASK(v, s)	\
+((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v))))
+
+/**
+ * Miscellaneous data for ACL rule.
+ */
+struct rte_acl_rule_data {
+	uint32_t category_mask; /**< Mask of categories for that rule. */
+	int32_t  priority;      /**< Priority for that rule. */
+	uint32_t userdata;      /**< Associated with the rule user data. */
+};
+
+/**
+ * Defines single ACL rule.
+ * data - miscellaneous data for the rule.
+ * field[] - value and mask or range for each field.
+ */
+#define	RTE_ACL_RULE_DEF(name, fld_num)	struct name {\
+	struct rte_acl_rule_data data;               \
+	struct rte_acl_field field[fld_num];         \
+}
+
+RTE_ACL_RULE_DEF(rte_acl_rule, 0);
+
+#define	RTE_ACL_RULE_SZ(fld_num)	\
+	(sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num))
+
+
+/** Max number of characters in name.*/
+#define	RTE_ACL_NAMESIZE		32
+
+/**
+ * Parameters used when creating the ACL context.
+ */
+struct rte_acl_param {
+	const char *name;         /**< Name of the ACL context. */
+	int         socket_id;    /**< Socket ID to allocate memory for. */
+	uint32_t    rule_size;    /**< Size of each rule. */
+	uint32_t    max_rule_num; /**< Maximum number of rules. */
+};
+
+
+/**
+ * Create a new ACL context.
+ *
+ * @param param
+ *   Parameters used to create and initialise the ACL context.
+ * @return
+ *   Pointer to ACL context structure that is used in future ACL
+ *   operations, or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - EINVAL - invalid parameter passed to function
+ */
+struct rte_acl_ctx *
+rte_acl_create(const struct rte_acl_param *param);
+
+/**
+ * Find an existing ACL context object and return a pointer to it.
+ *
+ * @param name
+ *   Name of the ACL context as passed to rte_acl_create()
+ * @return
+ *   Pointer to ACL context or NULL if object not found
+ *   with rte_errno set appropriately. Possible rte_errno values include:
+ *    - ENOENT - value not available for return
+ */
+struct rte_acl_ctx *
+rte_acl_find_existing(const char *name);
+
+/**
+ * De-allocate all memory used by ACL context.
+ *
+ * @param ctx
+ *   ACL context to free
+ */
+void
+rte_acl_free(struct rte_acl_ctx *ctx);
+
+/**
+ * Add rules to an existing ACL context.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to add patterns to.
+ * @param rules
+ *   Array of rules to add to the ACL context.
+ *   Note that all fields in rte_acl_rule structures are expected
+ *   to be in host byte order.
+ *   Each rule expected to be in the same format and not exceed size
+ *   specified at ACL context creation time.
+ * @param num
+ *   Number of elements in the input array of rules.
+ * @return
+ *   - -ENOMEM if there is no space in the ACL context for these rules.
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+int
+rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules,
+	uint32_t num);
+
+/**
+ * Delete all rules from the ACL context.
+ * This function is not multi-thread safe.
+ * Note that internal run-time structures are not affected.
+ *
+ * @param ctx
+ *   ACL context to delete rules from.
+ */
+void
+rte_acl_reset_rules(struct rte_acl_ctx *ctx);
+
+/**
+ * Analyze set of rules and build required internal run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to build.
+ * @param cfg
+ *   Pointer to struct rte_acl_config - defines build parameters.
+ * @return
+ *   - -ENOMEM if couldn't allocate enough memory.
+ *   - -EINVAL if the parameters are invalid.
+ *   - Negative error code if operation failed.
+ *   - Zero if operation completed successfully.
+ */
+int
+rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg);
+
+/**
+ * Delete all rules from the ACL context and
+ * destroy all internal run-time structures.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ *   ACL context to reset.
+ */
+void
+rte_acl_reset(struct rte_acl_ctx *ctx);
+
+/**
+ *  Available implementations of ACL classify.
+ */
+enum rte_acl_classify_alg {
+	RTE_ACL_CLASSIFY_DEFAULT = 0,
+	RTE_ACL_CLASSIFY_SCALAR = 1,  /**< generic implementation. */
+	RTE_ACL_CLASSIFY_SSE = 2,     /**< requires SSE4.1 support. */
+	RTE_ACL_CLASSIFY_AVX2 = 3,    /**< requires AVX2 support. */
+	RTE_ACL_CLASSIFY_NEON = 4,    /**< requires NEON support. */
+	RTE_ACL_CLASSIFY_NUM          /* should always be the last one. */
+};
+
+/**
+ * Perform search for a matching ACL rule for each input data buffer.
+ * Each input data buffer can have up to *categories* matches.
+ * That implies that results array should be big enough to hold
+ * (categories * num) elements.
+ * Also categories parameter should be either one or multiple of
+ * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
+ * If more than one rule is applicable for given input buffer and
+ * given category, then rule with highest priority will be returned as a match.
+ * Note, that it is a caller's responsibility to ensure that input parameters
+ * are valid and point to correct memory locations.
+ *
+ * @param ctx
+ *   ACL context to search with.
+ * @param data
+ *   Array of pointers to input data buffers to perform search.
+ *   Note that all fields in input data buffers supposed to be in network
+ *   byte order (MSB).
+ * @param results
+ *   Array of search results, *categories* results per each input data buffer.
+ * @param num
+ *   Number of elements in the input data buffers array.
+ * @param categories
+ *   Number of maximum possible matches for each input buffer, one possible
+ *   match per category.
+ * @return
+ *   zero on successful completion.
+ *   -EINVAL for incorrect arguments.
+ */
+extern int
+rte_acl_classify(const struct rte_acl_ctx *ctx,
+		 const uint8_t **data,
+		 uint32_t *results, uint32_t num,
+		 uint32_t categories);
+
+/**
+ * Perform search using specified algorithm for a matching ACL rule for
+ * each input data buffer.
+ * Each input data buffer can have up to *categories* matches.
+ * That implies that results array should be big enough to hold
+ * (categories * num) elements.
+ * Also categories parameter should be either one or multiple of
+ * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES.
+ * If more than one rule is applicable for given input buffer and
+ * given category, then rule with highest priority will be returned as a match.
+ * Note, that it is a caller's responsibility to ensure that input parameters
+ * are valid and point to correct memory locations.
+ *
+ * @param ctx
+ *   ACL context to search with.
+ * @param data
+ *   Array of pointers to input data buffers to perform search.
+ *   Note that all fields in input data buffers supposed to be in network
+ *   byte order (MSB).
+ * @param results
+ *   Array of search results, *categories* results per each input data buffer.
+ * @param num
+ *   Number of elements in the input data buffers array.
+ * @param categories
+ *   Number of maximum possible matches for each input buffer, one possible
+ *   match per category.
+ * @param alg
+ *   Algorithm to be used for the search.
+ *   It is the caller responsibility to ensure that the value refers to the
+ *   existing algorithm, and that it could be run on the given CPU.
+ * @return
+ *   zero on successful completion.
+ *   -EINVAL for incorrect arguments.
+ */
+extern int
+rte_acl_classify_alg(const struct rte_acl_ctx *ctx,
+		 const uint8_t **data,
+		 uint32_t *results, uint32_t num,
+		 uint32_t categories,
+		 enum rte_acl_classify_alg alg);
+
+/*
+ * Override the default classifier function for a given ACL context.
+ * @param ctx
+ *   ACL context to change classify function for.
+ * @param alg
+ *   New default classify algorithm for given ACL context.
+ *   It is the caller responsibility to ensure that the value refers to the
+ *   existing algorithm, and that it could be run on the given CPU.
+ * @return
+ *   - -EINVAL if the parameters are invalid.
+ *   - Zero if operation completed successfully.
+ */
+extern int
+rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx,
+	enum rte_acl_classify_alg alg);
+
+/**
+ * Dump an ACL context structure to the console.
+ *
+ * @param ctx
+ *   ACL context to dump.
+ */
+void
+rte_acl_dump(const struct rte_acl_ctx *ctx);
+
+/**
+ * Dump all ACL context structures to the console.
+ */
+void
+rte_acl_list_dump(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ACL_H_ */
diff --git a/src/dpdk22/lib/librte_acl/rte_acl_osdep.h b/src/dpdk22/lib/librte_acl/rte_acl_osdep.h
new file mode 100644
index 00000000..41f7e3d4
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/rte_acl_osdep.h
@@ -0,0 +1,80 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ACL_OSDEP_H_
+#define _RTE_ACL_OSDEP_H_
+
+/**
+ * @file
+ *
+ * RTE ACL DPDK/OS dependent file.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/queue.h>
+
+/*
+ * Common defines.
+ */
+
+#define DIM(x) RTE_DIM(x)
+
+#include <rte_common.h>
+#include <rte_vect.h>
+#include <rte_memory.h>
+#include <rte_log.h>
+#include <rte_memcpy.h>
+#include <rte_prefetch.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+
+#endif /* _RTE_ACL_OSDEP_H_ */
diff --git a/src/dpdk22/lib/librte_acl/tb_mem.h b/src/dpdk22/lib/librte_acl/tb_mem.h
new file mode 100644
index 00000000..ca7af966
--- /dev/null
+++ b/src/dpdk22/lib/librte_acl/tb_mem.h
@@ -0,0 +1,76 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TB_MEM_H_
+#define _TB_MEM_H_
+
+/**
+ * @file
+ *
+ * RTE ACL temporary (build phase) memory management.
+ * Contains structures and functions to manage temporary (used by build only)
+ * memory. Memory allocated in large blocks to speed 'free' when trie is
+ * destructed (finish of build phase).
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_acl_osdep.h>
+#include <setjmp.h>
+
+struct tb_mem_block {
+	struct tb_mem_block *next;
+	struct tb_mem_pool  *pool;
+	size_t               size;
+	uint8_t             *mem;
+};
+
+struct tb_mem_pool {
+	struct tb_mem_block *block;
+	size_t               alignment;
+	size_t               min_alloc;
+	size_t               alloc;
+	/* jump target in case of memory allocation failure. */
+	sigjmp_buf           fail;
+};
+
+void *tb_alloc(struct tb_mem_pool *pool, size_t size);
+void tb_free_pool(struct tb_mem_pool *pool);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TB_MEM_H_ */
-- 
cgit 1.2.3-korg