summaryrefslogtreecommitdiffstats
path: root/vnet/vnet/classify
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/classify')
-rw-r--r--vnet/vnet/classify/README180
-rw-r--r--vnet/vnet/classify/input_acl.c293
-rw-r--r--vnet/vnet/classify/input_acl.h54
-rw-r--r--vnet/vnet/classify/ip_classify.c384
-rw-r--r--vnet/vnet/classify/vnet_classify.c1895
-rw-r--r--vnet/vnet/classify/vnet_classify.h414
6 files changed, 3220 insertions, 0 deletions
diff --git a/vnet/vnet/classify/README b/vnet/vnet/classify/README
new file mode 100644
index 00000000000..1ef5ab5ac34
--- /dev/null
+++ b/vnet/vnet/classify/README
@@ -0,0 +1,180 @@
+=== vnet classifier theory of operation ===
+
+The vnet classifier trades off simplicity and perf / scale
+characteristics. At a certain level, it's a dumb robot. Given an
+incoming packet, search an ordered list of (mask, match) tables. If
+the classifier finds a matching entry, take the indicated action. If
+not, take a last-resort action.
+
+We use the MMX-unit to match or hash 16 octets at a time. For hardware
+backward compatibility, the code does not [currently] use 256-bit
+(32-octet) vector instructions.
+
+Effective use of the classifier centers around building table lists
+which "hit" as soon as practicable. In many cases, established
+sessions hit in the first table. In this mode of operation, the
+classifier easily processes multiple MPPS / core - even with millions
+of sessions in the data base. Searching 357 tables on a regular basis
+will neatly solve the halting problem.
+
+==== Basic operation ====
+
+The classifier mask-and-match operation proceeds as follows. Given a
+starting classifier table index, lay hands on the indicated mask
+vector. When building tables, we arrange for the mask to obey
+mmx-unit (16-octet) alignment.
+
+We know that the first octet of packet data starts on a cache-line
+boundary. Further, it's reasonably likely that folks won't want to use
+the generalized classifier on the L2 header; preferring to decode the
+Ethertype manually. That scheme makes it easy to select among ip4 /
+ip6 / MPLS, etc. classifier table sets.
+
+A no-vlan-tag L2 header is 14 octets long. A typical ipv4 header
+begins with the octets 0x4500: version=4, header_length=5, DSCP=0,
+ECN=0. If one doesn't intend to classify on (DSCP, ECN) - the typical
+case - we program the classifier to skip the first 16-octet vector.
+
+To classify untagged ipv4 packets on source address, we program the
+classifier to skip one vector, and mask-and-match one vector.
+
+The basic match-and-match operation looks like this:
+
+ switch (t->match_n_vectors)
+ {
+ case 1:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ break;
+
+ case 2:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1];
+ break;
+
+ <etc>
+ }
+
+ result_mask = u32x4_zero_byte_mask (result);
+ if (result_mask == 0xffff)
+ return (v);
+
+Net of setup, it costs a couple of clock cycles to mask-and-match 16
+octets.
+
+At the risk of belaboring an obvious point, the control-plane
+'''must''' pay attention to detail. When skipping one (or more)
+vectors, masks and matches must reflect that decision. See
+.../vnet/vnet/classify/vnet_classify.c:unformat_classify_[mask|match]. Note
+that vec_validate (xxx, 13) creates a 14-element vector.
+
+==== Creating a classifier table ====
+
+To create a new classifier table via the control-plane API, send a
+"classify_add_del_table" message. The underlying action routine,
+vnet_classify_add_del_table(...), is located in
+.../vnet/vnet/classify/vnet_classify.c, and has the following
+prototype:
+
+ int vnet_classify_add_del_table (vnet_classify_main_t * cm,
+ u8 * mask,
+ u32 nbuckets,
+ u32 memory_size,
+ u32 skip,
+ u32 match,
+ u32 next_table_index,
+ u32 miss_next_index,
+ u32 * table_index,
+ int is_add)
+
+Pass cm = &vnet_classify_main if calling this routine directly. Mask,
+skip(_n_vectors) and match(_n_vectors) are as described above. Mask
+need not be aligned, but it must be match*16 octets in length. To
+avoid having your head explode, be absolutely certain that '''only'''
+the bits you intend to match on are set.
+
+The classifier uses thread-safe, no-reader-locking-required
+bounded-index extensible hashing. Nbuckets is the [fixed] size of the
+hash bucket vector. The algorithm works in constant time regardless of
+hash collisions, but wastes space when the bucket array is too
+small. A good rule of thumb: let nbuckets = approximate number of
+entries expected.
+
+At a signficant cost in complexity, it would be possible to resize the
+bucket array dynamically. We have no plans to implement that function.
+
+Each classifier table has its own clib mheap memory allocation
+arena. To pick the memory_size parameter, note that each classifier
+table entry needs 16*(1 + match_n_vectors) bytes. Within reason, aim a
+bit high. Clib mheap memory uses o/s level virtual memory - not wired
+or hugetlb memory - so it's best not to scrimp on size.
+
+The "next_table_index" parameter is as described: the pool index in
+vnet_classify_main.tables of the next table to search. Code ~0 to
+indicate the end of the table list. 0 is a valid table index!
+
+We often create classification tables in reverse order -
+last-table-searched to first-table-searched - so we can easily set
+this parameter. Of course, one can manually adjust the data structure
+after-the-fact.
+
+Specific classifier client nodes - for example,
+.../vnet/vnet/classify/ip_classify.c - interpret the "miss_next_index"
+parameter as a vpp graph-node next index. When packet classification
+fails to produce a match, ip_classify_inline sends packets to the
+indicated disposition. A classifier application might program this
+parameter to send packets which don't match an existing session to a
+"first-sign-of-life, create-new-session" node.
+
+Finally, the is_add parameter indicates whether to add or delete the
+indicated table. The delete case implicitly terminates all sessions
+with extreme prejudice, by freeing the specified clib mheap.
+
+==== Creating a classifier session ====
+
+To create a new classifier session via the control-plane API, send a
+"classify_add_del_session" message. The underlying action routine,
+vnet_classify_add_del_session(...), is located in
+.../vnet/vnet/classify/vnet_classify.c, and has the following
+prototype:
+
+int vnet_classify_add_del_session (vnet_classify_main_t * cm,
+ u32 table_index,
+ u8 * match,
+ u32 hit_next_index,
+ u32 opaque_index,
+ i32 advance,
+ int is_add)
+
+Pass cm = &vnet_classify_main if calling this routine directly. Table
+index specifies the table which receives the new session / contains
+the session to delete depending on is_add.
+
+Match is the key for the indicated session. It need not be aligned,
+but it must be table->match_n_vectors*16 octets in length. As a
+courtesy, vnet_classify_add_del_session applies the table's mask to
+the stored key-value. In this way, one can create a session by passing
+unmasked (packet_data + offset) as the "match" parameter, and end up
+with unconfusing session keys.
+
+Specific classifier client nodes - for example,
+.../vnet/vnet/classify/ip_classify.c - interpret the per-session
+hit_next_index parameter as a vpp graph-node next index. When packet
+classification produces a match, ip_classify_inline sends packets to
+the indicated disposition.
+
+ip4/6_classify place the per-session opaque_index parameter into
+vnet_buffer(b)->l2_classify.opaque_index; a slight misnomer, but
+anyhow classifier applications can send session-hit packets to
+specific graph nodes, with useful values in buffer metadata. Depending
+on the required semantics, we send known-session traffic to a certain
+node, with e.g. a session pool index in buffer metadata. It's totally
+up to the control-plane and the specific use-case.
+
+Finally, nodes such as ip4/6-classify apply the advance parameter as a
+[signed!] argument to vlib_buffer_advance(...); to "consume" a
+networking layer. Example: if we classify incoming tunneled IP packets
+by (inner) source/dest address and source/dest port, we might choose
+to decapsulate and reencapsulate the inner packet. In such a case,
+program the advance parameter to perform the tunnel decapsulation, and
+program next_index to send traffic to a node which uses
+e.g. opaque_index to output traffic on a specific tunnel interface.
diff --git a/vnet/vnet/classify/input_acl.c b/vnet/vnet/classify/input_acl.c
new file mode 100644
index 00000000000..2c533d1170c
--- /dev/null
+++ b/vnet/vnet/classify/input_acl.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+input_acl_main_t input_acl_main;
+
+static int
+vnet_inacl_ip_feature_enable (vlib_main_t * vnm,
+ input_acl_main_t *am,
+ u32 sw_if_index,
+ input_acl_table_id_t tid,
+ int feature_enable)
+{
+
+ if (tid == INPUT_ACL_TABLE_L2)
+ {
+ l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ACL,
+ feature_enable);
+ }
+ else
+ { /* IP[46] */
+ ip_lookup_main_t * lm;
+ ip_config_main_t * ipcm;
+ ip4_rx_feature_type_t ftype;
+ u32 ci;
+
+ if (tid == INPUT_ACL_TABLE_IP4)
+ {
+ lm = &ip4_main.lookup_main;
+ ftype = IP4_RX_FEATURE_CHECK_ACCESS;
+ }
+ else
+ {
+ lm = &ip6_main.lookup_main;
+ ftype = IP6_RX_FEATURE_CHECK_ACCESS;
+ }
+
+ ipcm = &lm->rx_config_mains[VNET_UNICAST];
+
+ ci = ipcm->config_index_by_sw_if_index[sw_if_index];
+ ci = ((feature_enable)
+ ? vnet_config_add_feature
+ : vnet_config_del_feature)
+ (vnm, &ipcm->config_main, ci, ftype,
+ /* config data */ 0,
+ /* # bytes of config data */ 0);
+
+ ipcm->config_index_by_sw_if_index[sw_if_index] = ci;
+ am->vnet_config_main[tid] = &ipcm->config_main;
+ }
+
+ return 0;
+}
+
+int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 l2_table_index, u32 is_add)
+{
+ input_acl_main_t * am = &input_acl_main;
+ vnet_classify_main_t * vcm = am->vnet_classify_main;
+ u32 acl[INPUT_ACL_N_TABLES] = {ip4_table_index, ip6_table_index,
+ l2_table_index};
+ u32 ti;
+
+ /* Assume that we've validated sw_if_index in the API layer */
+
+ for (ti = 0; ti < INPUT_ACL_N_TABLES; ti++)
+ {
+ if (acl[ti] == ~0)
+ continue;
+
+ if (pool_is_free_index (vcm->tables, acl[ti]))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ vec_validate_init_empty
+ (am->classify_table_index_by_sw_if_index[ti], sw_if_index, ~0);
+
+ /* Reject any DEL operation with wrong sw_if_index */
+ if (!is_add &&
+ (acl[ti] != am->classify_table_index_by_sw_if_index[ti][sw_if_index]))
+ {
+ clib_warning ("Non-existent intf_idx=%d with table_index=%d for delete",
+ sw_if_index, acl[ti]);
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ /* Return ok on ADD operaton if feature is already enabled */
+ if (is_add &&
+ am->classify_table_index_by_sw_if_index[ti][sw_if_index] != ~0)
+ return 0;
+
+ vnet_inacl_ip_feature_enable (vm, am, sw_if_index, ti, is_add);
+
+ if (is_add)
+ am->classify_table_index_by_sw_if_index[ti][sw_if_index] = acl[ti];
+ else
+ am->classify_table_index_by_sw_if_index[ti][sw_if_index] = ~0;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_input_acl_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ u32 sw_if_index = ~0;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ u32 l2_table_index = ~0;
+ u32 is_add = 1;
+ u32 idx_cnt = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ ;
+ else if (unformat (input, "ip4-table %d", &ip4_table_index))
+ idx_cnt++;
+ else if (unformat (input, "ip6-table %d", &ip6_table_index))
+ idx_cnt++;
+ else if (unformat (input, "l2-table %d", &l2_table_index))
+ idx_cnt++;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface must be specified.");
+
+ if (!idx_cnt)
+ return clib_error_return (0, "Table index should be specified.");
+
+ if (idx_cnt > 1)
+ return clib_error_return (0, "Only one table index per API is allowed.");
+
+ rv = vnet_set_input_acl_intfc (vm, sw_if_index, ip4_table_index,
+ ip6_table_index, l2_table_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+/*
+ * Configure interface to enable/disble input ACL feature:
+ * intfc - interface name to be configured as input ACL
+ * Ip4-table <index> [del] - enable/disable IP4 input ACL
+ * Ip6-table <index> [del] - enable/disable IP6 input ACL
+ * l2-table <index> [del] - enable/disable Layer2 input ACL
+ *
+ * Note: Only one table index per API call is allowed.
+ *
+ */
+VLIB_CLI_COMMAND (set_input_acl_command, static) = {
+ .path = "set interface input acl",
+ .short_help =
+ "set interface input acl intfc <int> [ip4-table <index>]\n"
+ " [ip6-table <index>] [l2-table <index>] [del]",
+ .function = set_input_acl_command_fn,
+};
+
+clib_error_t *input_acl_init (vlib_main_t *vm)
+{
+ input_acl_main_t * am = &input_acl_main;
+ clib_error_t * error = 0;
+
+ if ((error = vlib_call_init_function (vm, ip_inacl_init)))
+ return error;
+
+ am->vlib_main = vm;
+ am->vnet_main = vnet_get_main();
+ am->vnet_classify_main = &vnet_classify_main;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (input_acl_init);
+
+uword unformat_acl_type (unformat_input_t * input, va_list * args)
+{
+ u32 * acl_type = va_arg (*args, u32 *);
+ u32 tid = INPUT_ACL_N_TABLES;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "ip4"))
+ tid = INPUT_ACL_TABLE_IP4;
+ else if (unformat (input, "ip6"))
+ tid = INPUT_ACL_TABLE_IP6;
+ else if (unformat (input, "l2"))
+ tid = INPUT_ACL_TABLE_L2;
+ else
+ break;
+ }
+
+ *acl_type = tid;
+ return 1;
+}
+
+u8 * format_vnet_inacl_info (u8 * s, va_list * va)
+{
+ input_acl_main_t * am = va_arg (*va, input_acl_main_t *);
+ int sw_if_idx = va_arg (*va, int);
+ u32 tid = va_arg (*va, u32);
+
+ if (tid == ~0)
+ {
+ s = format (s, "%10s%20s\t\t%s", "Intfc idx", "Classify table",
+ "Interface name");
+ return s;
+ }
+
+ s = format (s, "%10d%20d\t\t%U", sw_if_idx, tid,
+ format_vnet_sw_if_index_name, am->vnet_main, sw_if_idx);
+
+ return s;
+}
+
+static clib_error_t *
+show_inacl_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ input_acl_main_t * am = &input_acl_main;
+ u32 type = INPUT_ACL_N_TABLES;
+ int i;
+ u32 * vec_tbl;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "type %U", unformat_acl_type, &type))
+ ;
+ else
+ break;
+ }
+
+ if (type == INPUT_ACL_N_TABLES)
+ return clib_error_return (0, "Invalid input ACL table type.");
+
+ vec_tbl = am->classify_table_index_by_sw_if_index[type];
+
+ if (vec_len(vec_tbl))
+ vlib_cli_output (vm, "%U", format_vnet_inacl_info, am, ~0 /* hdr */, ~0);
+ else
+ vlib_cli_output (vm, "No input ACL tables configured");
+
+ for (i = 0; i < vec_len (vec_tbl); i++)
+ {
+ if (vec_elt(vec_tbl, i) == ~0)
+ continue;
+
+ vlib_cli_output (vm, "%U", format_vnet_inacl_info,
+ am, i, vec_elt(vec_tbl, i));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_inacl_command, static) = {
+ .path = "show inacl",
+ .short_help = "show inacl type [ip4|ip6|l2]",
+ .function = show_inacl_command_fn,
+};
diff --git a/vnet/vnet/classify/input_acl.h b/vnet/vnet/classify/input_acl.h
new file mode 100644
index 00000000000..7ffc189f053
--- /dev/null
+++ b/vnet/vnet/classify/input_acl.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_vnet_input_acl_h__
+#define __included_vnet_input_acl_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/classify/vnet_classify.h>
+
+typedef enum {
+ INPUT_ACL_TABLE_IP4,
+ INPUT_ACL_TABLE_IP6,
+ INPUT_ACL_TABLE_L2,
+ INPUT_ACL_N_TABLES,
+} input_acl_table_id_t;
+
+typedef enum {
+ ACL_NEXT_INDEX_DENY,
+ ACL_NEXT_INDEX_N_NEXT,
+} acl_next_index_t;
+
+typedef struct {
+
+ /* classifier table vectors */
+ u32 * classify_table_index_by_sw_if_index [INPUT_ACL_N_TABLES];
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ vnet_classify_main_t * vnet_classify_main;
+ vnet_config_main_t * vnet_config_main [INPUT_ACL_N_TABLES];
+} input_acl_main_t;
+
+extern input_acl_main_t input_acl_main;
+
+int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 ip4_table_index,
+ u32 ip6_table_index,
+ u32 l2_table_index, u32 is_add);
+
+#endif /* __included_vnet_input_acl_h__ */
diff --git a/vnet/vnet/classify/ip_classify.c b/vnet/vnet/classify/ip_classify.c
new file mode 100644
index 00000000000..c922608547c
--- /dev/null
+++ b/vnet/vnet/classify/ip_classify.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/classify/vnet_classify.h>
+
+typedef struct {
+ u32 next_index;
+ u32 table_index;
+ u32 entry_index;
+} ip_classify_trace_t;
+
+/* packet trace format function */
+static u8 * format_ip_classify_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_classify_trace_t * t = va_arg (*args, ip_classify_trace_t *);
+
+ s = format (s, "IP_CLASSIFY: next_index %d, table %d, entry %d",
+ t->next_index, t->table_index, t->entry_index);
+ return s;
+}
+
+vlib_node_registration_t ip4_classify_node;
+vlib_node_registration_t ip6_classify_node;
+
+#define foreach_ip_classify_error \
+_(MISS, "Classify misses") \
+_(HIT, "Classify hits") \
+_(CHAIN_HIT, "Classify hits after chain walk")
+
+typedef enum {
+#define _(sym,str) IP_CLASSIFY_ERROR_##sym,
+ foreach_ip_classify_error
+#undef _
+ IP_CLASSIFY_N_ERROR,
+} ip_classify_error_t;
+
+static char * ip_classify_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_classify_error
+#undef _
+};
+
+static inline uword
+ip_classify_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_ip4)
+{
+ u32 n_left_from, * from, * to_next;
+ ip_lookup_next_t next_index;
+ vnet_classify_main_t * vcm = &vnet_classify_main;
+ ip_lookup_main_t * lm;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+
+ if (is_ip4)
+ lm = &ip4_main.lookup_main;
+ else
+ lm = &ip6_main.lookup_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ /* First pass: compute hashes */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t * b0, * b1;
+ u32 bi0, bi1;
+ u8 * h0, * h1;
+ u32 adj_index0, adj_index1;
+ ip_adjacency_t * adj0, * adj1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t * t0, * t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t * p1, * p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+ h1 = b1->data;
+
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ table_index0 = adj0->classify_table_index;
+
+ adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ adj1 = ip_get_adjacency (lm, adj_index1);
+ table_index1 = adj1->classify_table_index;
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ vnet_buffer(b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer(b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+ u8 * h0;
+ u32 adj_index0;
+ ip_adjacency_t * adj0;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = ip_get_adjacency (lm, adj_index0);
+ table_index0 = adj0->classify_table_index;
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+ vnet_buffer(b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer(b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = IP_LOOKUP_NEXT_MISS;
+ u32 table_index0;
+ vnet_classify_table_t * t0;
+ vnet_classify_entry_t * e0;
+ u64 hash0;
+ u8 * h0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]);
+ vnet_classify_table_t * tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer(p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer(p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = b0->data;
+ table_index0 = vnet_buffer(b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+ vnet_buffer(b0)->l2_classify.opaque_index = ~0;
+
+ if (PREDICT_TRUE(table_index0 != ~0))
+ {
+ hash0 = vnet_buffer(b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0,
+ now);
+ if (e0)
+ {
+ vnet_buffer(b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < IP_LOOKUP_N_NEXT)?
+ e0->next_index:next0;
+ hits++;
+ }
+ else
+ {
+ while (1)
+ {
+ if (t0->next_table_index != ~0)
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < IP_LOOKUP_N_NEXT)?
+ t0->miss_next_index:next0;
+ misses++;
+ break;
+ }
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer(b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < IP_LOOKUP_N_NEXT)?
+ e0->next_index:next0;
+ hits++;
+ chain_hits++;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip_classify_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->entry_index = e0 ? e0 - t0->entries : ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_CLASSIFY_ERROR_MISS,
+ misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_CLASSIFY_ERROR_HIT,
+ hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_CLASSIFY_ERROR_CHAIN_HIT,
+ chain_hits);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_classify_inline (vm, node, frame, 1 /* is_ip4 */);
+}
+
+
+VLIB_REGISTER_NODE (ip4_classify_node) = {
+ .function = ip4_classify,
+ .name = "ip4-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_classify_trace,
+ .n_errors = ARRAY_LEN(ip_classify_error_strings),
+ .error_strings = ip_classify_error_strings,
+
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip4-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip4-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip4-arp",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", /* probably not... */
+ [IP_LOOKUP_NEXT_MAP] = "ip4-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip4-sixrd",
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip4-hop-by-hop",
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip4-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip4-pop-hop-by-hop",
+ },
+};
+
+static uword
+ip6_classify (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_classify_inline (vm, node, frame, 0 /* is_ip4 */);
+}
+
+
+VLIB_REGISTER_NODE (ip6_classify_node) = {
+ .function = ip6_classify,
+ .name = "ip6-classify",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_classify_trace,
+ .n_errors = ARRAY_LEN(ip_classify_error_strings),
+ .error_strings = ip_classify_error_strings,
+
+ .n_next_nodes = IP_LOOKUP_N_NEXT,
+ .next_nodes = {
+ [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
+ [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
+ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
+ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
+ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
+ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
+ [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", /* probably not... */
+ [IP_LOOKUP_NEXT_MAP] = "ip6-map",
+ [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
+ [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
+ [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
+ [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop",
+ [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop",
+ },
+};
+
+static clib_error_t *
+ip_classify_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip_classify_init);
diff --git a/vnet/vnet/classify/vnet_classify.c b/vnet/vnet/classify/vnet_classify.c
new file mode 100644
index 00000000000..43acb024033
--- /dev/null
+++ b/vnet/vnet/classify/vnet_classify.c
@@ -0,0 +1,1895 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+#include <vnet/ip/ip.h>
+#include <vnet/api_errno.h> /* for API error numbers */
+#include <vnet/l2/l2_classify.h> /* for L2_CLASSIFY_NEXT_xxx */
+
+#if VALIDATION_SCAFFOLDING
+/* Validation scaffolding */
+void mv (vnet_classify_table_t * t)
+{
+ void * oldheap;
+
+ oldheap = clib_mem_set_heap (t->mheap);
+ clib_mem_validate();
+ clib_mem_set_heap (oldheap);
+}
+
+void rogue (vnet_classify_table_t * t)
+{
+ int i, j, k;
+ vnet_classify_entry_t * v, * save_v;
+ u32 active_elements = 0;
+ vnet_classify_bucket_t * b;
+
+ for (i = 0; i < t->nbuckets; i++)
+ {
+ b = &t->buckets [i];
+ if (b->offset == 0)
+ continue;
+ save_v = vnet_classify_get_entry (t, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < t->entries_per_page; k++)
+ {
+ v = vnet_classify_entry_at_index
+ (t, save_v, j*t->entries_per_page + k);
+
+ if (vnet_classify_entry_is_busy (v))
+ active_elements++;
+ }
+ }
+ }
+
+ if (active_elements != t->active_elements)
+ clib_warning ("found %u expected %u elts", active_elements,
+ t->active_elements);
+}
+#else
+void mv (vnet_classify_table_t * t) { }
+void rogue (vnet_classify_table_t * t) { }
+#endif
+
+vnet_classify_table_t *
+vnet_classify_new_table (vnet_classify_main_t *cm,
+ u8 * mask, u32 nbuckets, u32 memory_size,
+ u32 skip_n_vectors,
+ u32 match_n_vectors)
+{
+ vnet_classify_table_t * t;
+ void * oldheap;
+
+ nbuckets = 1 << (max_log2 (nbuckets));
+
+ pool_get_aligned (cm->tables, t, CLIB_CACHE_LINE_BYTES);
+ memset(t, 0, sizeof (*t));
+
+ vec_validate_aligned (t->mask, match_n_vectors - 1, sizeof(u32x4));
+ memcpy (t->mask, mask, match_n_vectors * sizeof (u32x4));
+
+ t->next_table_index = ~0;
+ t->nbuckets = nbuckets;
+ t->log2_nbuckets = max_log2 (nbuckets);
+ t->match_n_vectors = match_n_vectors;
+ t->skip_n_vectors = skip_n_vectors;
+ t->entries_per_page = 2;
+
+ t->mheap = mheap_alloc (0 /* use VM */, memory_size);
+
+ vec_validate_aligned (t->buckets, nbuckets - 1, CLIB_CACHE_LINE_BYTES);
+ oldheap = clib_mem_set_heap (t->mheap);
+
+ t->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+
+ clib_mem_set_heap (oldheap);
+ return (t);
+}
+
+void vnet_classify_delete_table_index (vnet_classify_main_t *cm,
+ u32 table_index)
+{
+ vnet_classify_table_t * t;
+
+ /* Tolerate multiple frees, up to a point */
+ if (pool_is_free_index (cm->tables, table_index))
+ return;
+
+ t = pool_elt_at_index (cm->tables, table_index);
+ if (t->next_table_index != ~0)
+ vnet_classify_delete_table_index (cm, t->next_table_index);
+
+ vec_free (t->mask);
+ vec_free (t->buckets);
+ mheap_free (t->mheap);
+
+ pool_put (cm->tables, t);
+}
+
+static vnet_classify_entry_t *
+vnet_classify_entry_alloc (vnet_classify_table_t * t, u32 log2_pages)
+{
+ vnet_classify_entry_t * rv = 0;
+#define _(size) \
+ vnet_classify_entry_##size##_t * rv##size = 0;
+ foreach_size_in_u32x4;
+#undef _
+
+ void * oldheap;
+
+ ASSERT (t->writer_lock[0]);
+ if (log2_pages >= vec_len (t->freelists) || t->freelists [log2_pages] == 0)
+ {
+ oldheap = clib_mem_set_heap (t->mheap);
+
+ vec_validate (t->freelists, log2_pages);
+
+ switch(t->match_n_vectors)
+ {
+ /* Euchre the vector allocator into allocating the right sizes */
+#define _(size) \
+ case size: \
+ vec_validate_aligned \
+ (rv##size, ((1<<log2_pages)*t->entries_per_page) - 1, \
+ CLIB_CACHE_LINE_BYTES); \
+ rv = (vnet_classify_entry_t *) rv##size; \
+ break;
+ foreach_size_in_u32x4;
+#undef _
+
+ default:
+ abort();
+ }
+
+ clib_mem_set_heap (oldheap);
+ goto initialize;
+ }
+ rv = t->freelists[log2_pages];
+ t->freelists[log2_pages] = rv->next_free;
+
+initialize:
+ ASSERT(rv);
+ ASSERT (vec_len(rv) == (1<<log2_pages)*t->entries_per_page);
+
+ switch (t->match_n_vectors)
+ {
+#define _(size) \
+ case size: \
+ if(vec_len(rv)) \
+ memset (rv, 0xff, sizeof (*rv##size) * vec_len(rv)); \
+ break;
+ foreach_size_in_u32x4;
+#undef _
+
+ default:
+ abort();
+ }
+
+ return rv;
+}
+
+static void
+vnet_classify_entry_free (vnet_classify_table_t * t,
+ vnet_classify_entry_t * v)
+{
+ u32 free_list_index;
+
+ ASSERT (t->writer_lock[0]);
+
+ free_list_index = min_log2(vec_len(v)/t->entries_per_page);
+
+ ASSERT(vec_len (t->freelists) > free_list_index);
+
+ v->next_free = t->freelists[free_list_index];
+ t->freelists[free_list_index] = v;
+}
+
+static inline void make_working_copy
+(vnet_classify_table_t * t, vnet_classify_bucket_t * b)
+{
+ vnet_classify_entry_t * v;
+ vnet_classify_bucket_t working_bucket __attribute__((aligned (8)));
+ void * oldheap;
+ vnet_classify_entry_t * working_copy;
+#define _(size) \
+ vnet_classify_entry_##size##_t * working_copy##size = 0;
+ foreach_size_in_u32x4;
+#undef _
+ u32 cpu_number = os_get_cpu_number();
+
+ if (cpu_number >= vec_len (t->working_copies))
+ {
+ oldheap = clib_mem_set_heap (t->mheap);
+ vec_validate (t->working_copies, cpu_number);
+ clib_mem_set_heap (oldheap);
+ }
+
+ /*
+ * working_copies are per-cpu so that near-simultaneous
+ * updates from multiple threads will not result in sporadic, spurious
+ * lookup failures.
+ */
+ working_copy = t->working_copies[cpu_number];
+
+ t->saved_bucket.as_u64 = b->as_u64;
+ oldheap = clib_mem_set_heap (t->mheap);
+
+ if ((1<<b->log2_pages)*t->entries_per_page > vec_len (working_copy))
+ {
+ switch(t->match_n_vectors)
+ {
+ /* Euchre the vector allocator into allocating the right sizes */
+#define _(size) \
+ case size: \
+ working_copy##size = (void *) working_copy; \
+ vec_validate_aligned \
+ (working_copy##size, \
+ ((1<<b->log2_pages)*t->entries_per_page) - 1, \
+ CLIB_CACHE_LINE_BYTES); \
+ working_copy = (void *) working_copy##size; \
+ break;
+ foreach_size_in_u32x4;
+#undef _
+
+ default:
+ abort();
+ }
+ t->working_copies[cpu_number] = working_copy;
+ }
+
+ _vec_len(working_copy) = (1<<b->log2_pages)*t->entries_per_page;
+ clib_mem_set_heap (oldheap);
+
+ v = vnet_classify_get_entry (t, b->offset);
+
+ switch(t->match_n_vectors)
+ {
+#define _(size) \
+ case size: \
+ memcpy (working_copy, v, \
+ sizeof (vnet_classify_entry_##size##_t) \
+ * (1<<b->log2_pages) \
+ * (t->entries_per_page)); \
+ break;
+ foreach_size_in_u32x4 ;
+#undef _
+
+ default:
+ abort();
+ }
+
+ working_bucket.as_u64 = b->as_u64;
+ working_bucket.offset = vnet_classify_get_offset (t, working_copy);
+ CLIB_MEMORY_BARRIER();
+ b->as_u64 = working_bucket.as_u64;
+ t->working_copies[cpu_number] = working_copy;
+}
+
+static vnet_classify_entry_t *
+split_and_rehash (vnet_classify_table_t * t,
+ vnet_classify_entry_t * old_values,
+ u32 new_log2_pages)
+{
+ vnet_classify_entry_t * new_values, * v, * new_v;
+ int i, j, k;
+
+ new_values = vnet_classify_entry_alloc (t, new_log2_pages);
+
+ for (i = 0; i < (vec_len (old_values)/t->entries_per_page); i++)
+ {
+ u64 new_hash;
+
+ for (j = 0; j < t->entries_per_page; j++)
+ {
+ v = vnet_classify_entry_at_index
+ (t, old_values, i * t->entries_per_page + j);
+
+ if (vnet_classify_entry_is_busy (v))
+ {
+ /* Hack so we can use the packet hash routine */
+ u8 * key_minus_skip;
+ key_minus_skip = (u8 *) v->key;
+ key_minus_skip -= t->skip_n_vectors * sizeof (u32x4);
+
+ new_hash = vnet_classify_hash_packet (t, key_minus_skip);
+ new_hash >>= t->log2_nbuckets;
+ new_hash &= (1<<new_log2_pages) - 1;
+
+ for (k = 0; k < t->entries_per_page; k++)
+ {
+ new_v = vnet_classify_entry_at_index (t, new_values,
+ new_hash + k);
+
+ if (vnet_classify_entry_is_free (new_v))
+ {
+ memcpy (new_v, v, sizeof (vnet_classify_entry_t)
+ + (t->match_n_vectors * sizeof (u32x4)));
+ new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ goto doublebreak;
+ }
+ }
+ /* Crap. Tell caller to try again */
+ vnet_classify_entry_free (t, new_values);
+ return 0;
+ }
+ doublebreak:
+ ;
+ }
+ }
+ return new_values;
+}
+
+int vnet_classify_add_del (vnet_classify_table_t * t,
+ vnet_classify_entry_t * add_v,
+ int is_add)
+{
+ u32 bucket_index;
+ vnet_classify_bucket_t * b, tmp_b;
+ vnet_classify_entry_t * v, * new_v, * save_new_v, * working_copy, * save_v;
+ u32 value_index;
+ int rv = 0;
+ int i;
+ u64 hash, new_hash;
+ u32 new_log2_pages;
+ u32 cpu_number = os_get_cpu_number();
+ u8 * key_minus_skip;
+
+ ASSERT ((add_v->flags & VNET_CLASSIFY_ENTRY_FREE) == 0);
+
+ key_minus_skip = (u8 *) add_v->key;
+ key_minus_skip -= t->skip_n_vectors * sizeof (u32x4);
+
+ hash = vnet_classify_hash_packet (t, key_minus_skip);
+
+ bucket_index = hash & (t->nbuckets-1);
+ b = &t->buckets[bucket_index];
+
+ hash >>= t->log2_nbuckets;
+
+ while (__sync_lock_test_and_set (t->writer_lock, 1))
+ ;
+
+ /* First elt in the bucket? */
+ if (b->offset == 0)
+ {
+ if (is_add == 0)
+ {
+ rv = -1;
+ goto unlock;
+ }
+
+ v = vnet_classify_entry_alloc (t, 0 /* new_log2_pages */);
+ memcpy (v, add_v, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof (u32x4));
+ v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+
+ tmp_b.as_u64 = 0;
+ tmp_b.offset = vnet_classify_get_offset (t, v);
+
+ b->as_u64 = tmp_b.as_u64;
+ t->active_elements ++;
+
+ goto unlock;
+ }
+
+ make_working_copy (t, b);
+
+ save_v = vnet_classify_get_entry (t, t->saved_bucket.offset);
+ value_index = hash & ((1<<t->saved_bucket.log2_pages)-1);
+
+ if (is_add)
+ {
+ /*
+ * For obvious (in hindsight) reasons, see if we're supposed to
+ * replace an existing key, then look for an empty slot.
+ */
+
+ for (i = 0; i < t->entries_per_page; i++)
+ {
+ v = vnet_classify_entry_at_index (t, save_v, value_index + i);
+
+ if (!memcmp (v->key, add_v->key, t->match_n_vectors * sizeof (u32x4)))
+ {
+ memcpy (v, add_v, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof(u32x4));
+ v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+
+ CLIB_MEMORY_BARRIER();
+ /* Restore the previous (k,v) pairs */
+ b->as_u64 = t->saved_bucket.as_u64;
+ goto unlock;
+ }
+ }
+ for (i = 0; i < t->entries_per_page; i++)
+ {
+ v = vnet_classify_entry_at_index (t, save_v, value_index + i);
+
+ if (vnet_classify_entry_is_free (v))
+ {
+ memcpy (v, add_v, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof(u32x4));
+ v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ CLIB_MEMORY_BARRIER();
+ b->as_u64 = t->saved_bucket.as_u64;
+ t->active_elements ++;
+ goto unlock;
+ }
+ }
+ /* no room at the inn... split case... */
+ }
+ else
+ {
+ for (i = 0; i < t->entries_per_page; i++)
+ {
+ v = vnet_classify_entry_at_index (t, save_v, value_index + i);
+
+ if (!memcmp (v->key, add_v->key, t->match_n_vectors * sizeof (u32x4)))
+ {
+ memset (v, 0xff, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof(u32x4));
+ v->flags |= VNET_CLASSIFY_ENTRY_FREE;
+ CLIB_MEMORY_BARRIER();
+ b->as_u64 = t->saved_bucket.as_u64;
+ t->active_elements --;
+ goto unlock;
+ }
+ }
+ rv = -3;
+ b->as_u64 = t->saved_bucket.as_u64;
+ goto unlock;
+ }
+
+ new_log2_pages = t->saved_bucket.log2_pages + 1;
+
+ expand_again:
+ working_copy = t->working_copies[cpu_number];
+ new_v = split_and_rehash (t, working_copy, new_log2_pages);
+
+ if (new_v == 0)
+ {
+ new_log2_pages++;
+ goto expand_again;
+ }
+
+ /* Try to add the new entry */
+ save_new_v = new_v;
+
+ key_minus_skip = (u8 *) add_v->key;
+ key_minus_skip -= t->skip_n_vectors * sizeof (u32x4);
+
+ new_hash = vnet_classify_hash_packet_inline (t, key_minus_skip);
+ new_hash >>= t->log2_nbuckets;
+ new_hash &= (1<<min_log2((vec_len(new_v)/t->entries_per_page))) - 1;
+
+ for (i = 0; i < t->entries_per_page; i++)
+ {
+ new_v = vnet_classify_entry_at_index (t, save_new_v, new_hash + i);
+
+ if (vnet_classify_entry_is_free (new_v))
+ {
+ memcpy (new_v, add_v, sizeof (vnet_classify_entry_t) +
+ t->match_n_vectors * sizeof(u32x4));
+ new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE);
+ goto expand_ok;
+ }
+ }
+ /* Crap. Try again */
+ new_log2_pages++;
+ vnet_classify_entry_free (t, save_new_v);
+ goto expand_again;
+
+ expand_ok:
+ tmp_b.log2_pages = min_log2 (vec_len (save_new_v)/t->entries_per_page);
+ tmp_b.offset = vnet_classify_get_offset (t, save_new_v);
+ CLIB_MEMORY_BARRIER();
+ b->as_u64 = tmp_b.as_u64;
+ t->active_elements ++;
+ v = vnet_classify_get_entry (t, t->saved_bucket.offset);
+ vnet_classify_entry_free (t, v);
+
+ unlock:
+ CLIB_MEMORY_BARRIER();
+ t->writer_lock[0] = 0;
+
+ return rv;
+}
+
+typedef CLIB_PACKED(struct {
+ ethernet_header_t eh;
+ ip4_header_t ip;
+}) classify_data_or_mask_t;
+
+u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h)
+{
+ return vnet_classify_hash_packet_inline (t, h);
+}
+
+vnet_classify_entry_t *
+vnet_classify_find_entry (vnet_classify_table_t * t,
+ u8 * h, u64 hash, f64 now)
+{
+ return vnet_classify_find_entry_inline (t, h, hash, now);
+}
+
+static u8 * format_classify_entry (u8 * s, va_list * args)
+ {
+ vnet_classify_table_t * t = va_arg (*args, vnet_classify_table_t *);
+ vnet_classify_entry_t * e = va_arg (*args, vnet_classify_entry_t *);
+
+ s = format
+ (s, "[%u]: next_index %d advance %d opaque %d\n",
+ vnet_classify_get_offset (t, e), e->next_index, e->advance,
+ e->opaque_index);
+
+
+ s = format (s, " k: %U\n", format_hex_bytes, e->key,
+ t->match_n_vectors * sizeof(u32x4));
+
+ if (vnet_classify_entry_is_busy (e))
+ s = format (s, " hits %lld, last_heard %.2f\n",
+ e->hits, e->last_heard);
+ else
+ s = format (s, " entry is free\n");
+ return s;
+ }
+
+u8 * format_classify_table (u8 * s, va_list * args)
+{
+ vnet_classify_table_t * t = va_arg (*args, vnet_classify_table_t *);
+ int verbose = va_arg (*args, int);
+ vnet_classify_bucket_t * b;
+ vnet_classify_entry_t * v, * save_v;
+ int i, j, k;
+ u64 active_elements = 0;
+
+ for (i = 0; i < t->nbuckets; i++)
+ {
+ b = &t->buckets [i];
+ if (b->offset == 0)
+ {
+ if (verbose > 1)
+ s = format (s, "[%d]: empty\n", i);
+ continue;
+ }
+
+ if (verbose)
+ {
+ s = format (s, "[%d]: heap offset %d, len %d\n", i,
+ b->offset, (1<<b->log2_pages));
+ }
+
+ save_v = vnet_classify_get_entry (t, b->offset);
+ for (j = 0; j < (1<<b->log2_pages); j++)
+ {
+ for (k = 0; k < t->entries_per_page; k++)
+ {
+
+ v = vnet_classify_entry_at_index (t, save_v,
+ j*t->entries_per_page + k);
+
+ if (vnet_classify_entry_is_free (v))
+ {
+ if (verbose > 1)
+ s = format (s, " %d: empty\n",
+ j * t->entries_per_page + k);
+ continue;
+ }
+ if (verbose)
+ {
+ s = format (s, " %d: %U\n",
+ j * t->entries_per_page + k,
+ format_classify_entry, t, v);
+ }
+ active_elements++;
+ }
+ }
+ }
+
+ s = format (s, " %lld active elements\n", active_elements);
+ s = format (s, " %d free lists\n", vec_len (t->freelists));
+ return s;
+}
+
+int vnet_classify_add_del_table (vnet_classify_main_t * cm,
+ u8 * mask,
+ u32 nbuckets,
+ u32 memory_size,
+ u32 skip,
+ u32 match,
+ u32 next_table_index,
+ u32 miss_next_index,
+ u32 * table_index,
+ int is_add)
+{
+ vnet_classify_table_t * t;
+
+ if (is_add)
+ {
+ *table_index = ~0;
+ if (memory_size == 0)
+ return VNET_API_ERROR_INVALID_MEMORY_SIZE;
+
+ if (nbuckets == 0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ t = vnet_classify_new_table (cm, mask, nbuckets, memory_size,
+ skip, match);
+ t->next_table_index = next_table_index;
+ t->miss_next_index = miss_next_index;
+ *table_index = t - cm->tables;
+ return 0;
+ }
+
+ vnet_classify_delete_table_index (cm, *table_index);
+ return 0;
+}
+
+#define foreach_ip4_proto_field \
+_(src_address) \
+_(dst_address) \
+_(tos) \
+_(length) \
+_(fragment_id) \
+_(ttl) \
+_(protocol) \
+_(checksum)
+
+uword unformat_ip4_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u8 * mask = 0;
+ u8 found_something = 0;
+ ip4_header_t * ip;
+
+#define _(a) u8 a=0;
+ foreach_ip4_proto_field;
+#undef _
+ u8 version = 0;
+ u8 hdr_length = 0;
+
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version"))
+ version = 1;
+ else if (unformat (input, "hdr_length"))
+ hdr_length = 1;
+ else if (unformat (input, "src"))
+ src_address = 1;
+ else if (unformat (input, "dst"))
+ dst_address = 1;
+ else if (unformat (input, "proto"))
+ protocol = 1;
+
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_ip4_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_ip4_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*ip) - 1);
+
+ ip = (ip4_header_t *) mask;
+
+#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a));
+ foreach_ip4_proto_field;
+#undef _
+
+ ip->ip_version_and_header_length = 0;
+
+ if (version)
+ ip->ip_version_and_header_length |= 0xF0;
+
+ if (hdr_length)
+ ip->ip_version_and_header_length |= 0x0F;
+
+ *maskp = mask;
+ return 1;
+}
+
+#define foreach_ip6_proto_field \
+_(src_address) \
+_(dst_address) \
+_(payload_length) \
+_(hop_limit) \
+_(protocol)
+
+uword unformat_ip6_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u8 * mask = 0;
+ u8 found_something = 0;
+ ip6_header_t * ip;
+ u32 ip_version_traffic_class_and_flow_label;
+
+#define _(a) u8 a=0;
+ foreach_ip6_proto_field;
+#undef _
+ u8 version = 0;
+ u8 traffic_class = 0;
+ u8 flow_label = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version"))
+ version = 1;
+ else if (unformat (input, "traffic-class"))
+ traffic_class = 1;
+ else if (unformat (input, "flow-label"))
+ flow_label = 1;
+ else if (unformat (input, "src"))
+ src_address = 1;
+ else if (unformat (input, "dst"))
+ dst_address = 1;
+ else if (unformat (input, "proto"))
+ protocol = 1;
+
+#define _(a) else if (unformat (input, #a)) a=1;
+ foreach_ip6_proto_field
+#undef _
+ else
+ break;
+ }
+
+#define _(a) found_something += a;
+ foreach_ip6_proto_field;
+#undef _
+
+ if (found_something == 0)
+ return 0;
+
+ vec_validate (mask, sizeof (*ip) - 1);
+
+ ip = (ip6_header_t *) mask;
+
+#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a));
+ foreach_ip6_proto_field;
+#undef _
+
+ ip_version_traffic_class_and_flow_label = 0;
+
+ if (version)
+ ip_version_traffic_class_and_flow_label |= 0xF0000000;
+
+ if (traffic_class)
+ ip_version_traffic_class_and_flow_label |= 0x0FF00000;
+
+ if (flow_label)
+ ip_version_traffic_class_and_flow_label |= 0x000FFFFF;
+
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label);
+
+ *maskp = mask;
+ return 1;
+}
+
+uword unformat_l3_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "ip4 %U", unformat_ip4_mask, maskp))
+ return 1;
+ else if (unformat (input, "ip6 %U", unformat_ip6_mask, maskp))
+ return 1;
+ else
+ break;
+ }
+ return 0;
+}
+
+uword unformat_l2_mask (unformat_input_t * input, va_list * args)
+{
+ u8 ** maskp = va_arg (*args, u8 **);
+ u8 * mask = 0;
+ u8 src = 0;
+ u8 dst = 0;
+ u8 proto = 0;
+ u8 tag1 = 0;
+ u8 tag2 = 0;
+ u8 ignore_tag1 = 0;
+ u8 ignore_tag2 = 0;
+ u8 cos1 = 0;
+ u8 cos2 = 0;
+ u8 dot1q = 0;
+ u8 dot1ad = 0;
+ int len = 14;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "src"))
+ src = 1;
+ else if (unformat (input, "dst"))
+ dst = 1;
+ else if (unformat (input, "proto"))
+ proto = 1;
+ else if (unformat (input, "tag1"))
+ tag1 = 1;
+ else if (unformat (input, "tag2"))
+ tag2 = 1;
+ else if (unformat (input, "ignore-tag1"))
+ ignore_tag1 = 1;
+ else if (unformat (input, "ignore-tag2"))
+ ignore_tag2 = 1;
+ else if (unformat (input, "cos1"))
+ cos1 = 1;
+ else if (unformat (input, "cos2"))
+ cos2 = 1;
+ else if (unformat (input, "dot1q"))
+ dot1q = 1;
+ else if (unformat (input, "dot1ad"))
+ dot1ad = 1;
+ else
+ break;
+ }
+ if ((src + dst + proto + tag1 + tag2 + dot1q + dot1ad +
+ ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0)
+ return 0;
+
+ if (tag1 || ignore_tag1 || cos1 || dot1q)
+ len = 18;
+ if (tag2 || ignore_tag2 || cos2 || dot1ad)
+ len = 22;
+
+ vec_validate (mask, len-1);
+
+ if (dst)
+ memset (mask, 0xff, 6);
+
+ if (src)
+ memset (mask + 6, 0xff, 6);
+
+ if (tag2 || dot1ad)
+ {
+ /* inner vlan tag */
+ if (tag2)
+ {
+ mask[19] = 0xff;
+ mask[18] = 0x0f;
+ }
+ if (cos2)
+ mask[18] |= 0xe0;
+ if (proto)
+ mask[21] = mask [20] = 0xff;
+ if (tag1)
+ {
+ mask [15] = 0xff;
+ mask [14] = 0x0f;
+ }
+ if (cos1)
+ mask[14] |= 0xe0;
+ *maskp = mask;
+ return 1;
+ }
+ if (tag1 | dot1q)
+ {
+ if (tag1)
+ {
+ mask [15] = 0xff;
+ mask [14] = 0x0f;
+ }
+ if (cos1)
+ mask[14] |= 0xe0;
+ if (proto)
+ mask[16] = mask [17] = 0xff;
+ *maskp = mask;
+ return 1;
+ }
+ if (cos2)
+ mask[18] |= 0xe0;
+ if (cos1)
+ mask[14] |= 0xe0;
+ if (proto)
+ mask[12] = mask [13] = 0xff;
+
+ *maskp = mask;
+ return 1;
+}
+
+uword unformat_classify_mask (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * CLIB_UNUSED(cm)
+ = va_arg (*args, vnet_classify_main_t *);
+ u8 ** maskp = va_arg (*args, u8 **);
+ u32 * skipp = va_arg (*args, u32 *);
+ u32 * matchp = va_arg (*args, u32 *);
+ u32 match;
+ u8 * mask = 0;
+ u8 * l2 = 0;
+ u8 * l3 = 0;
+ int i;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "hex %U", unformat_hex_string, &mask))
+ ;
+ else if (unformat (input, "l2 %U", unformat_l2_mask, &l2))
+ ;
+ else if (unformat (input, "l3 %U", unformat_l3_mask, &l3))
+ ;
+ else
+ break;
+ }
+
+ if (mask || l2 || l3)
+ {
+ if (l2 || l3)
+ {
+ /* "With a free Ethernet header in every package" */
+ if (l2 == 0)
+ vec_validate (l2, 13);
+ mask = l2;
+ vec_append (mask, l3);
+ vec_free (l3);
+ }
+
+ /* Scan forward looking for the first significant mask octet */
+ for (i = 0; i < vec_len (mask); i++)
+ if (mask[i])
+ break;
+
+ /* compute (skip, match) params */
+ *skipp = i / sizeof(u32x4);
+ vec_delete (mask, *skipp * sizeof(u32x4), 0);
+
+ /* Pad mask to an even multiple of the vector size */
+ while (vec_len (mask) % sizeof (u32x4))
+ vec_add1 (mask, 0);
+
+ match = vec_len (mask) / sizeof (u32x4);
+
+ for (i = match*sizeof(u32x4); i > 0; i-= sizeof(u32x4))
+ {
+ u64 *tmp = (u64 *)(mask + (i-sizeof(u32x4)));
+ if (*tmp || *(tmp+1))
+ break;
+ match--;
+ }
+ if (match == 0)
+ clib_warning ("BUG: match 0");
+
+ _vec_len (mask) = match * sizeof(u32x4);
+
+ *matchp = match;
+ *maskp = mask;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+#define foreach_l2_next \
+_(drop, DROP) \
+_(ethernet, ETHERNET_INPUT) \
+_(ip4, IP4_INPUT) \
+_(ip6, IP6_INPUT) \
+_(li, LI)
+
+uword unformat_l2_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 * miss_next_indexp = va_arg (*args, u32 *);
+ u32 next_index = 0;
+ u32 tmp;
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = L2_CLASSIFY_NEXT_##N; goto out;}
+ foreach_l2_next;
+#undef _
+
+ if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+ out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+#define foreach_ip_next \
+_(miss, MISS) \
+_(drop, DROP) \
+_(local, LOCAL) \
+_(rewrite, REWRITE)
+
+uword unformat_ip_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 * miss_next_indexp = va_arg (*args, u32 *);
+ u32 next_index = 0;
+ u32 tmp;
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = IP_LOOKUP_NEXT_##N; goto out;}
+ foreach_ip_next;
+#undef _
+
+ if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+ out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+#define foreach_acl_next \
+_(deny, DENY)
+
+uword unformat_acl_next_index (unformat_input_t * input, va_list * args)
+{
+ u32 * miss_next_indexp = va_arg (*args, u32 *);
+ u32 next_index = 0;
+ u32 tmp;
+
+#define _(n,N) \
+ if (unformat (input, #n)) { next_index = ACL_NEXT_INDEX_##N; goto out;}
+ foreach_acl_next;
+#undef _
+
+ if (unformat (input, "permit"))
+ {
+ next_index = ~0;
+ goto out;
+ }
+ else if (unformat (input, "%d", &tmp))
+ {
+ next_index = tmp;
+ goto out;
+ }
+
+ return 0;
+
+ out:
+ *miss_next_indexp = next_index;
+ return 1;
+}
+
+static clib_error_t *
+classify_table_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 nbuckets = 2;
+ u32 skip = ~0;
+ u32 match = ~0;
+ int is_add = 1;
+ u32 table_index = ~0;
+ u32 next_table_index = ~0;
+ u32 miss_next_index = ~0;
+ u32 memory_size = 2<<20;
+ u32 tmp;
+
+ u8 * mask = 0;
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "buckets %d", &nbuckets))
+ ;
+ else if (unformat (input, "skip %d", &skip))
+ ;
+ else if (unformat (input, "match %d", &match))
+ ;
+ else if (unformat (input, "table %d", &table_index))
+ ;
+ else if (unformat (input, "mask %U", unformat_classify_mask,
+ cm, &mask, &skip, &match))
+ ;
+ else if (unformat (input, "memory-size %uM", &tmp))
+ memory_size = tmp<<20;
+ else if (unformat (input, "memory-size %uG", &tmp))
+ memory_size = tmp<<30;
+ else if (unformat (input, "next-table %d", &next_table_index))
+ ;
+ else if (unformat (input, "miss-next %U", unformat_ip_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (input, "l2-miss-next %U", unformat_l2_next_index,
+ &miss_next_index))
+ ;
+ else if (unformat (input, "acl-miss-next %U", unformat_acl_next_index,
+ &miss_next_index))
+ ;
+
+ else
+ break;
+ }
+
+ if (is_add && mask == 0)
+ return clib_error_return (0, "Mask required");
+
+ if (is_add && skip == ~0)
+ return clib_error_return (0, "skip count required");
+
+ if (is_add && match == ~0)
+ return clib_error_return (0, "match count required");
+
+ if (!is_add && table_index == ~0)
+ return clib_error_return (0, "table index required for delete");
+
+ rv = vnet_classify_add_del_table (cm, mask, nbuckets, memory_size,
+ skip, match, next_table_index, miss_next_index,
+ &table_index, is_add);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_classify_add_del_table returned %d",
+ rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (classify_table, static) = {
+ .path = "classify table",
+ .short_help =
+ "classify table [miss-next|l2-miss_next|acl-miss-next <next_index>]"
+ "\n mask <mask-value> buckets <nn> [skip <n>] [match <n>] [del]",
+ .function = classify_table_command_fn,
+};
+
+static u8 * format_vnet_classify_table (u8 * s, va_list * args)
+{
+ vnet_classify_main_t * cm = va_arg (*args, vnet_classify_main_t *);
+ int verbose = va_arg (*args, int);
+ u32 index = va_arg (*args, u32);
+ vnet_classify_table_t * t;
+
+ if (index == ~0)
+ {
+ s = format (s, "%10s%10s%10s%10s", "TableIdx", "Sessions", "NextTbl",
+ "NextNode", verbose ? "Details" : "");
+ return s;
+ }
+
+ t = pool_elt_at_index (cm->tables, index);
+ s = format (s, "%10u%10d%10d%10d", index, t->active_elements,
+ t->next_table_index, t->miss_next_index);
+
+ s = format (s, "\n Heap: %U", format_mheap, t->mheap, 0 /*verbose*/);
+
+ s = format (s, "\n nbuckets %d, skip %d match %d",
+ t->nbuckets, t->skip_n_vectors, t->match_n_vectors);
+ s = format (s, "\n mask %U", format_hex_bytes, t->mask,
+ t->match_n_vectors * sizeof (u32x4));
+
+ if (verbose == 0)
+ return s;
+
+ s = format (s, "\n%U", format_classify_table, t, verbose);
+
+ return s;
+}
+
+static clib_error_t *
+show_classify_tables_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ vnet_classify_table_t * t;
+ u32 match_index = ~0;
+ u32 * indices = 0;
+ int verbose = 0;
+ int i;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "index %d", &match_index))
+ ;
+ else if (unformat (input, "verbose %d", &verbose))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ break;
+ }
+
+ pool_foreach (t, cm->tables,
+ ({
+ if (match_index == ~0 || (match_index == t - cm->tables))
+ vec_add1 (indices, t - cm->tables);
+ }));
+
+ if (vec_len(indices))
+ {
+ vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, verbose,
+ ~0 /* hdr */);
+ for (i = 0; i < vec_len (indices); i++)
+ vlib_cli_output (vm, "%U", format_vnet_classify_table, cm,
+ verbose, indices[i]);
+ }
+ else
+ vlib_cli_output (vm, "No classifier tables configured");
+
+ vec_free (indices);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_classify_table_command, static) = {
+ .path = "show classify tables",
+ .short_help = "show classify tables [index <nn>]",
+ .function = show_classify_tables_command_fn,
+};
+
+uword unformat_ip4_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+ u8 * match = 0;
+ ip4_header_t * ip;
+ int version = 0;
+ u32 version_val;
+ int hdr_length = 0;
+ u32 hdr_length_val;
+ int src = 0, dst = 0;
+ ip4_address_t src_val, dst_val;
+ int proto = 0;
+ u32 proto_val;
+ int tos = 0;
+ u32 tos_val;
+ int length = 0;
+ u32 length_val;
+ int fragment_id = 0;
+ u32 fragment_id_val;
+ int ttl = 0;
+ int ttl_val;
+ int checksum = 0;
+ u32 checksum_val;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version %d", &version_val))
+ version = 1;
+ else if (unformat (input, "hdr_length %d", &hdr_length_val))
+ hdr_length = 1;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src_val))
+ src = 1;
+ else if (unformat (input, "dst %U", unformat_ip4_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %d", &proto_val))
+ proto = 1;
+ else if (unformat (input, "tos %d", &tos_val))
+ tos = 1;
+ else if (unformat (input, "length %d", &length_val))
+ length = 1;
+ else if (unformat (input, "fragment_id %d", &fragment_id_val))
+ fragment_id = 1;
+ else if (unformat (input, "ttl %d", &ttl_val))
+ ttl = 1;
+ else if (unformat (input, "checksum %d", &checksum_val))
+ checksum = 1;
+ else
+ break;
+ }
+
+ if (version + hdr_length + src + dst + proto + tos + length + fragment_id
+ + ttl + checksum == 0)
+ return 0;
+
+ /*
+ * Aligned because we use the real comparison functions
+ */
+ vec_validate_aligned (match, sizeof (*ip) - 1, sizeof(u32x4));
+
+ ip = (ip4_header_t *) match;
+
+ /* These are realistically matched in practice */
+ if (src)
+ ip->src_address.as_u32 = src_val.as_u32;
+
+ if (dst)
+ ip->dst_address.as_u32 = dst_val.as_u32;
+
+ if (proto)
+ ip->protocol = proto_val;
+
+
+ /* These are not, but they're included for completeness */
+ if (version)
+ ip->ip_version_and_header_length |= (version_val & 0xF)<<4;
+
+ if (hdr_length)
+ ip->ip_version_and_header_length |= (hdr_length_val & 0xF);
+
+ if (tos)
+ ip->tos = tos_val;
+
+ if (length)
+ ip->length = length_val;
+
+ if (ttl)
+ ip->ttl = ttl_val;
+
+ if (checksum)
+ ip->checksum = checksum_val;
+
+ *matchp = match;
+ return 1;
+}
+
+uword unformat_ip6_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+ u8 * match = 0;
+ ip6_header_t * ip;
+ int version = 0;
+ u32 version_val;
+ u8 traffic_class = 0;
+ u32 traffic_class_val;
+ u8 flow_label = 0;
+ u8 flow_label_val;
+ int src = 0, dst = 0;
+ ip6_address_t src_val, dst_val;
+ int proto = 0;
+ u32 proto_val;
+ int payload_length = 0;
+ u32 payload_length_val;
+ int hop_limit = 0;
+ int hop_limit_val;
+ u32 ip_version_traffic_class_and_flow_label;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "version %d", &version_val))
+ version = 1;
+ else if (unformat (input, "traffic_class %d", &traffic_class_val))
+ traffic_class = 1;
+ else if (unformat (input, "flow_label %d", &flow_label_val))
+ flow_label = 1;
+ else if (unformat (input, "src %U", unformat_ip6_address, &src_val))
+ src = 1;
+ else if (unformat (input, "dst %U", unformat_ip6_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %d", &proto_val))
+ proto = 1;
+ else if (unformat (input, "payload_length %d", &payload_length_val))
+ payload_length = 1;
+ else if (unformat (input, "hop_limit %d", &hop_limit_val))
+ hop_limit = 1;
+ else
+ break;
+ }
+
+ if (version + traffic_class + flow_label + src + dst + proto +
+ payload_length + hop_limit == 0)
+ return 0;
+
+ /*
+ * Aligned because we use the real comparison functions
+ */
+ vec_validate_aligned (match, sizeof (*ip) - 1, sizeof(u32x4));
+
+ ip = (ip6_header_t *) match;
+
+ if (src)
+ memcpy (&ip->src_address, &src_val, sizeof (ip->src_address));
+
+ if (dst)
+ memcpy (&ip->dst_address, &dst_val, sizeof (ip->dst_address));
+
+ if (proto)
+ ip->protocol = proto_val;
+
+ ip_version_traffic_class_and_flow_label = 0;
+
+ if (version)
+ ip_version_traffic_class_and_flow_label |= (version_val & 0xF) << 28;
+
+ if (traffic_class)
+ ip_version_traffic_class_and_flow_label |= (traffic_class_val & 0xFF) << 20;
+
+ if (flow_label)
+ ip_version_traffic_class_and_flow_label |= (flow_label_val & 0xFFFFF);
+
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label);
+
+ if (payload_length)
+ ip->payload_length = clib_host_to_net_u16 (payload_length_val);
+
+ if (hop_limit)
+ ip->hop_limit = hop_limit_val;
+
+ *matchp = match;
+ return 1;
+}
+
+uword unformat_l3_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "ip4 %U", unformat_ip4_match, matchp))
+ return 1;
+ else if (unformat (input, "ip6 %U", unformat_ip6_match, matchp))
+ return 1;
+ /* $$$$ add mpls */
+ else
+ break;
+ }
+ return 0;
+}
+
+uword unformat_vlan_tag (unformat_input_t * input, va_list * args)
+{
+ u8 * tagp = va_arg (*args, u8 *);
+ u32 tag;
+
+ if (unformat(input, "%d", &tag))
+ {
+ tagp[0] = (tag>>8) & 0x0F;
+ tagp[1] = tag & 0xFF;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword unformat_l2_match (unformat_input_t * input, va_list * args)
+{
+ u8 ** matchp = va_arg (*args, u8 **);
+ u8 * match = 0;
+ u8 src = 0;
+ u8 src_val[6];
+ u8 dst = 0;
+ u8 dst_val[6];
+ u8 proto = 0;
+ u16 proto_val;
+ u8 tag1 = 0;
+ u8 tag1_val [2];
+ u8 tag2 = 0;
+ u8 tag2_val [2];
+ int len = 14;
+ u8 ignore_tag1 = 0;
+ u8 ignore_tag2 = 0;
+ u8 cos1 = 0;
+ u8 cos2 = 0;
+ u32 cos1_val = 0;
+ u32 cos2_val = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "src %U", unformat_ethernet_address, &src_val))
+ src = 1;
+ else if (unformat (input, "dst %U", unformat_ethernet_address, &dst_val))
+ dst = 1;
+ else if (unformat (input, "proto %U",
+ unformat_ethernet_type_host_byte_order, &proto_val))
+ proto = 1;
+ else if (unformat (input, "tag1 %U", unformat_vlan_tag, tag1_val))
+ tag1 = 1;
+ else if (unformat (input, "tag2 %U", unformat_vlan_tag, tag2_val))
+ tag2 = 1;
+ else if (unformat (input, "ignore-tag1"))
+ ignore_tag1 = 1;
+ else if (unformat (input, "ignore-tag2"))
+ ignore_tag2 = 1;
+ else if (unformat (input, "cos1 %d", &cos1_val))
+ cos1 = 1;
+ else if (unformat (input, "cos2 %d", &cos2_val))
+ cos2 = 1;
+ else
+ break;
+ }
+ if ((src + dst + proto + tag1 + tag2 +
+ ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0)
+ return 0;
+
+ if (tag1 || ignore_tag1 || cos1)
+ len = 18;
+ if (tag2 || ignore_tag2 || cos2)
+ len = 22;
+
+ vec_validate_aligned (match, len-1, sizeof(u32x4));
+
+ if (dst)
+ memcpy (match, dst_val, 6);
+
+ if (src)
+ memcpy (match + 6, src_val, 6);
+
+ if (tag2)
+ {
+ /* inner vlan tag */
+ match[19] = tag2_val[1];
+ match[18] = tag2_val[0];
+ if (cos2)
+ match [18] |= (cos2_val & 0x7) << 5;
+ if (proto)
+ {
+ match[21] = proto_val & 0xff;
+ match[20] = proto_val >> 8;
+ }
+ if (tag1)
+ {
+ match [15] = tag1_val[1];
+ match [14] = tag1_val[0];
+ }
+ if (cos1)
+ match [14] |= (cos1_val & 0x7) << 5;
+ *matchp = match;
+ return 1;
+ }
+ if (tag1)
+ {
+ match [15] = tag1_val[1];
+ match [14] = tag1_val[0];
+ if (proto)
+ {
+ match[17] = proto_val & 0xff;
+ match[16] = proto_val >> 8;
+ }
+ if (cos1)
+ match [14] |= (cos1_val & 0x7) << 5;
+
+ *matchp = match;
+ return 1;
+ }
+ if (cos2)
+ match [18] |= (cos2_val & 0x7) << 5;
+ if (cos1)
+ match [14] |= (cos1_val & 0x7) << 5;
+ if (proto)
+ {
+ match[13] = proto_val & 0xff;
+ match[12] = proto_val >> 8;
+ }
+
+ *matchp = match;
+ return 1;
+}
+
+
+uword unformat_classify_match (unformat_input_t * input, va_list * args)
+{
+ vnet_classify_main_t * cm = va_arg (*args, vnet_classify_main_t *);
+ u8 ** matchp = va_arg (*args, u8 **);
+ u32 table_index = va_arg (*args, u32);
+ vnet_classify_table_t * t;
+
+ u8 * match = 0;
+ u8 * l2 = 0;
+ u8 * l3 = 0;
+
+ if (pool_is_free_index (cm->tables, table_index))
+ return 0;
+
+ t = pool_elt_at_index (cm->tables, table_index);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "hex %U", unformat_hex_string, &match))
+ ;
+ else if (unformat (input, "l2 %U", unformat_l2_match, &l2))
+ ;
+ else if (unformat (input, "l3 %U", unformat_l3_match, &l3))
+ ;
+ else
+ break;
+ }
+
+ if (match || l2 || l3)
+ {
+ if (l2 || l3)
+ {
+ /* "Win a free Ethernet header in every packet" */
+ if (l2 == 0)
+ vec_validate_aligned (l2, 13, sizeof(u32x4));
+ match = l2;
+ vec_append_aligned (match, l3, sizeof(u32x4));
+ vec_free (l3);
+ }
+
+ /* Make sure the vector is big enough even if key is all 0's */
+ vec_validate_aligned
+ (match, ((t->match_n_vectors + t->skip_n_vectors) * sizeof(u32x4)) - 1,
+ sizeof(u32x4));
+
+ /* Set size, include skipped vectors*/
+ _vec_len (match) = (t->match_n_vectors+t->skip_n_vectors) * sizeof(u32x4);
+
+ *matchp = match;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+int vnet_classify_add_del_session (vnet_classify_main_t * cm,
+ u32 table_index,
+ u8 * match,
+ u32 hit_next_index,
+ u32 opaque_index,
+ i32 advance,
+ int is_add)
+{
+ vnet_classify_table_t * t;
+ vnet_classify_entry_5_t _max_e __attribute__((aligned (16)));
+ vnet_classify_entry_t * e;
+ int i, rv;
+
+ if (pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+
+ t = pool_elt_at_index (cm->tables, table_index);
+
+ e = (vnet_classify_entry_t *)&_max_e;
+ e->next_index = hit_next_index;
+ e->opaque_index = opaque_index;
+ e->advance = advance;
+ e->hits = 0;
+ e->last_heard = 0;
+ e->flags = 0;
+
+ /* Copy key data, honoring skip_n_vectors */
+ memcpy (&e->key, match + t->skip_n_vectors * sizeof (u32x4),
+ t->match_n_vectors * sizeof (u32x4));
+
+ /* Clear don't-care bits; likely when dynamically creating sessions */
+ for (i = 0; i < t->match_n_vectors; i++)
+ e->key[i] &= t->mask[i];
+
+ rv = vnet_classify_add_del (t, e, is_add);
+ if (rv)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return 0;
+}
+
+static clib_error_t *
+classify_session_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ int is_add = 1;
+ u32 table_index = ~0;
+ u32 hit_next_index = ~0;
+ u32 opaque_index = ~0;
+ u8 * match = 0;
+ i32 advance = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "hit-next %U", unformat_ip_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (input, "l2-hit-next %U", unformat_l2_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (input, "acl-hit-next %U", unformat_acl_next_index,
+ &hit_next_index))
+ ;
+ else if (unformat (input, "opaque-index %d", &opaque_index))
+ ;
+ else if (unformat (input, "match %U", unformat_classify_match,
+ cm, &match, table_index))
+ ;
+ else if (unformat (input, "advance %d", &advance))
+ ;
+ else if (unformat (input, "table-index %d", &table_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index == ~0)
+ return clib_error_return (0, "Table index required");
+
+ if (is_add && match == 0)
+ return clib_error_return (0, "Match value required");
+
+ rv = vnet_classify_add_del_session (cm, table_index, match,
+ hit_next_index,
+ opaque_index, advance, is_add);
+
+ switch(rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "vnet_classify_add_del_session returned %d",
+ rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (classify_session_command, static) = {
+ .path = "classify session",
+ .short_help =
+ "classify session [hit-next|l2-hit-next|acl-hit-next <next_index>]"
+ "\n table-index <nn> match [hex] [l2] [l3 ip4]",
+ .function = classify_session_command_fn,
+};
+
+#define TEST_CODE 1
+
+#if TEST_CODE > 0
+static clib_error_t *
+test_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 buckets = 2;
+ u32 sessions = 10;
+ int i, rv;
+ vnet_classify_table_t * t = 0;
+ classify_data_or_mask_t * mask;
+ classify_data_or_mask_t * data;
+ u8 *mp = 0, *dp = 0;
+ vnet_classify_main_t * cm = &vnet_classify_main;
+ vnet_classify_entry_t * e;
+ int is_add = 1;
+ u32 tmp;
+ u32 table_index = ~0;
+ ip4_address_t src;
+ u32 deleted = 0;
+ u32 memory_size = 64<<20;
+
+ /* Default starting address 1.0.0.10 */
+ src.as_u32 = clib_net_to_host_u32 (0x0100000A);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "sessions %d", &sessions))
+ ;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src))
+ ;
+ else if (unformat (input, "buckets %d", &buckets))
+ ;
+ else if (unformat (input, "memory-size %uM", &tmp))
+ memory_size = tmp<<20;
+ else if (unformat (input, "memory-size %uG", &tmp))
+ memory_size = tmp<<30;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "table %d", &table_index))
+ ;
+ else
+ break;
+ }
+
+ vec_validate_aligned (mp, 3 * sizeof(u32x4), sizeof(u32x4));
+ vec_validate_aligned (dp, 3 * sizeof(u32x4), sizeof(u32x4));
+
+ mask = (classify_data_or_mask_t *) mp;
+ data = (classify_data_or_mask_t *) dp;
+
+ data->ip.src_address.as_u32 = src.as_u32;
+
+ /* Mask on src address */
+ memset (&mask->ip.src_address, 0xff, 4);
+
+ buckets = 1<<max_log2(buckets);
+
+ if (table_index != ~0)
+ {
+ if (pool_is_free_index (cm->tables, table_index))
+ {
+ vlib_cli_output (vm, "No such table %d", table_index);
+ goto out;
+ }
+ t = pool_elt_at_index (cm->tables, table_index);
+ }
+
+ if (is_add)
+ {
+ if (t == 0)
+ {
+ t = vnet_classify_new_table (cm, (u8 *)mask, buckets,
+ memory_size,
+ 0 /* skip */,
+ 3 /* vectors to match */);
+ t->miss_next_index = IP_LOOKUP_NEXT_LOCAL;
+ vlib_cli_output (vm, "Create table %d", t - cm->tables);
+ }
+
+ vlib_cli_output (vm, "Add %d sessions to %d buckets...",
+ sessions, buckets);
+
+ for (i = 0; i < sessions; i++)
+ {
+ rv = vnet_classify_add_del_session (cm, t - cm->tables, (u8 *) data,
+ IP_LOOKUP_NEXT_DROP,
+ i+100 /* opaque_index */,
+ 0 /* advance */,
+ 1 /* is_add */);
+
+ if (rv != 0)
+ clib_warning ("add: returned %d", rv);
+
+ tmp = clib_net_to_host_u32 (data->ip.src_address.as_u32) + 1;
+ data->ip.src_address.as_u32 = clib_net_to_host_u32 (tmp);
+ }
+ goto out;
+ }
+
+ if (t == 0)
+ {
+ vlib_cli_output (vm, "Must specify table index to delete sessions");
+ goto out;
+ }
+
+ vlib_cli_output (vm, "Try to delete %d sessions...", sessions);
+
+ for (i = 0; i < sessions; i++)
+ {
+ u8 * key_minus_skip;
+ u64 hash;
+
+ hash = vnet_classify_hash_packet (t, (u8 *) data);
+
+ e = vnet_classify_find_entry (t, (u8 *) data, hash, 0 /* time_now */);
+ /* Previous delete, perhaps... */
+ if (e == 0)
+ continue;
+ ASSERT (e->opaque_index == (i+100));
+
+ key_minus_skip = (u8 *)e->key;
+ key_minus_skip -= t->skip_n_vectors * sizeof (u32x4);
+
+ rv = vnet_classify_add_del_session (cm, t - cm->tables, key_minus_skip,
+ IP_LOOKUP_NEXT_DROP,
+ i+100 /* opaque_index */,
+ 0 /* advance */,
+ 0 /* is_add */);
+ if (rv != 0)
+ clib_warning ("del: returned %d", rv);
+
+ tmp = clib_net_to_host_u32 (data->ip.src_address.as_u32) + 1;
+ data->ip.src_address.as_u32 = clib_net_to_host_u32 (tmp);
+ deleted++;
+ }
+
+ vlib_cli_output (vm, "Deleted %d sessions...", deleted);
+
+ out:
+ vec_free (mp);
+ vec_free (dp);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_classify_command, static) = {
+ .path = "test classify",
+ .short_help =
+ "test classify [src <ip>] [sessions <nn>] [buckets <nn>] [table <nn>] [del]",
+ .function = test_classify_command_fn,
+};
+#endif /* TEST_CODE */
diff --git a/vnet/vnet/classify/vnet_classify.h b/vnet/vnet/classify/vnet_classify.h
new file mode 100644
index 00000000000..03271ad2e06
--- /dev/null
+++ b/vnet/vnet/classify/vnet_classify.h
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vnet_classify_h__
+#define __included_vnet_classify_h__
+
+#include <stdarg.h>
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vlib/cli.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/api_errno.h> /* for API error numbers */
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/xxhash.h>
+
+vlib_node_registration_t ip4_classify_node;
+vlib_node_registration_t ip6_classify_node;
+
+#define CLASSIFY_TRACE 0
+
+struct _vnet_classify_main;
+typedef struct _vnet_classify_main vnet_classify_main_t;
+
+#define foreach_size_in_u32x4 \
+_(1) \
+_(2) \
+_(3) \
+_(4) \
+_(5)
+
+typedef CLIB_PACKED(struct _vnet_classify_entry {
+ /* Graph node next index */
+ u32 next_index;
+
+ /* put into vnet_buffer(b)->l2_classfy.opaque_index */
+ union {
+ struct {
+ u32 opaque_index;
+ /* advance on hit, note it's a signed quantity... */
+ i32 advance;
+ };
+ u64 opaque_count;
+ };
+
+ /* Really only need 1 bit */
+ u32 flags;
+#define VNET_CLASSIFY_ENTRY_FREE (1<<0)
+
+ /* Hit counter, last heard time */
+ union {
+ u64 hits;
+ struct _vnet_classify_entry * next_free;
+ };
+
+ f64 last_heard;
+
+ /* Must be aligned to a 16-octet boundary */
+ u32x4 key[0];
+}) vnet_classify_entry_t;
+
+static inline int vnet_classify_entry_is_free (vnet_classify_entry_t * e)
+{
+ return e->flags & VNET_CLASSIFY_ENTRY_FREE;
+}
+
+static inline int vnet_classify_entry_is_busy (vnet_classify_entry_t * e)
+{
+ return ((e->flags & VNET_CLASSIFY_ENTRY_FREE) == 0);
+}
+
+/* Need these to con the vector allocator */
+#define _(size) \
+typedef CLIB_PACKED(struct { \
+ u32 pad0[4]; \
+ u64 pad1[2]; \
+ u32x4 key[size]; \
+}) vnet_classify_entry_##size##_t;
+foreach_size_in_u32x4;
+#undef _
+
+typedef struct {
+ union {
+ struct {
+ u32 offset;
+ u8 pad[3];
+ u8 log2_pages;
+ };
+ u64 as_u64;
+ };
+} vnet_classify_bucket_t;
+
+typedef struct {
+ /* Mask to apply after skipping N vectors */
+ u32x4 *mask;
+ /* Buckets and entries */
+ vnet_classify_bucket_t * buckets;
+ vnet_classify_entry_t * entries;
+
+ /* Config parameters */
+ u32 match_n_vectors;
+ u32 skip_n_vectors;
+ u32 nbuckets;
+ u32 log2_nbuckets;
+ int entries_per_page;
+ u32 active_elements;
+ /* Index of next table to try */
+ u32 next_table_index;
+
+ /* Miss next index, return if next_table_index = 0 */
+ u32 miss_next_index;
+
+ /* Per-bucket working copies, one per thread */
+ vnet_classify_entry_t ** working_copies;
+ vnet_classify_bucket_t saved_bucket;
+
+ /* Free entry freelists */
+ vnet_classify_entry_t **freelists;
+
+ u8 * name;
+
+ /* Private allocation arena, protected by the writer lock */
+ void * mheap;
+
+ /* Writer (only) lock for this table */
+ volatile u32 * writer_lock;
+
+} vnet_classify_table_t;
+
+struct _vnet_classify_main {
+ /* Table pool */
+ vnet_classify_table_t * tables;
+
+ /* convenience variables */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+};
+
+vnet_classify_main_t vnet_classify_main;
+
+u8 * format_classify_table (u8 * s, va_list * args);
+
+u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h);
+
+static inline u64
+vnet_classify_hash_packet_inline (vnet_classify_table_t * t,
+ u8 * h)
+{
+ u32x4 *data, *mask;
+
+ union {
+ u32x4 as_u32x4;
+ u64 as_u64[2];
+ } xor_sum __attribute__((aligned(sizeof(u32x4))));
+
+ ASSERT(t);
+
+ data = (u32x4 *)h;
+ mask = t->mask;
+
+ ASSERT ((((u64)h) & 0xf) == 0);
+
+ xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0];
+
+ switch (t->match_n_vectors)
+ {
+ case 5:
+ xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4];
+ /* FALLTHROUGH */
+ case 4:
+ xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3];
+ /* FALLTHROUGH */
+ case 3:
+ xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2];
+ /* FALLTHROUGH */
+ case 2:
+ xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+
+ default:
+ abort();
+ }
+
+ return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]);
+}
+
+static inline void
+vnet_classify_prefetch_bucket (vnet_classify_table_t * t, u64 hash)
+{
+ u32 bucket_index;
+
+ ASSERT (is_pow2(t->nbuckets));
+
+ bucket_index = hash & (t->nbuckets - 1);
+
+ CLIB_PREFETCH(&t->buckets[bucket_index], CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+static inline vnet_classify_entry_t *
+vnet_classify_get_entry (vnet_classify_table_t * t, uword offset)
+{
+ u8 * hp = t->mheap;
+ u8 * vp = hp + offset;
+
+ return (void *) vp;
+}
+
+static inline uword vnet_classify_get_offset (vnet_classify_table_t * t,
+ vnet_classify_entry_t * v)
+{
+ u8 * hp, * vp;
+
+ hp = (u8 *) t->mheap;
+ vp = (u8 *) v;
+
+ ASSERT((vp - hp) < 0x100000000ULL);
+ return vp - hp;
+}
+
+static inline vnet_classify_entry_t *
+vnet_classify_entry_at_index (vnet_classify_table_t * t,
+ vnet_classify_entry_t * e,
+ u32 index)
+{
+ u8 * eu8;
+
+ eu8 = (u8 *)e;
+
+ eu8 += index * (sizeof (vnet_classify_entry_t) +
+ (t->match_n_vectors * sizeof (u32x4)));
+
+ return (vnet_classify_entry_t *) eu8;
+}
+
+static inline void
+vnet_classify_prefetch_entry (vnet_classify_table_t * t,
+ u64 hash)
+{
+ u32 bucket_index;
+ u32 value_index;
+ vnet_classify_bucket_t * b;
+ vnet_classify_entry_t * e;
+
+ bucket_index = hash & (t->nbuckets - 1);
+
+ b = &t->buckets[bucket_index];
+
+ if (b->offset == 0)
+ return;
+
+ hash >>= t->log2_nbuckets;
+
+ e = vnet_classify_get_entry (t, b->offset);
+ value_index = hash & ((1<<b->log2_pages)-1);
+
+ e = vnet_classify_entry_at_index (t, e, value_index);
+
+ CLIB_PREFETCH(e, CLIB_CACHE_LINE_BYTES, LOAD);
+}
+
+vnet_classify_entry_t *
+vnet_classify_find_entry (vnet_classify_table_t * t,
+ u8 * h, u64 hash, f64 now);
+
+static inline vnet_classify_entry_t *
+vnet_classify_find_entry_inline (vnet_classify_table_t * t,
+ u8 * h, u64 hash, f64 now)
+ {
+ vnet_classify_entry_t * v;
+ u32x4 * mask, * data, *data_start, * key;
+ u32x4 result __attribute__((aligned(sizeof(u32x4))));
+ vnet_classify_bucket_t * b;
+ u32 value_index;
+ u32 result_mask;
+ u32 bucket_index;
+ int i;
+
+ ASSERT ((((u64)h) & 0xf) == 0);
+
+ data_start = (u32x4 *) h;
+
+ bucket_index = hash & (t->nbuckets-1);
+ b = &t->buckets[bucket_index];
+
+ if (b->offset == 0)
+ return 0;
+
+ hash >>= t->log2_nbuckets;
+
+ v = vnet_classify_get_entry (t, b->offset);
+ value_index = hash & ((1<<b->log2_pages)-1);
+
+ v = vnet_classify_entry_at_index (t, v, value_index);
+
+ for (i = 0; i < t->entries_per_page; i++)
+ {
+ mask = t->mask;
+ data = data_start;
+ key = v->key;
+
+ switch (t->match_n_vectors)
+ {
+ case 1:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ break;
+
+ case 2:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1];
+ break;
+
+ case 3:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1];
+ result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2];
+ break;
+
+ case 4:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1];
+ result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2];
+ result |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3];
+ break;
+
+ case 5:
+ result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
+ result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1];
+ result |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2];
+ result |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3];
+ result |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4];
+ break;
+
+ default:
+ abort();
+ }
+
+ result_mask = u32x4_zero_byte_mask (result);
+ if (result_mask == 0xffff)
+ {
+ if (PREDICT_TRUE(now))
+ {
+ v->hits++;
+ v->last_heard = now;
+ }
+ return (v);
+ }
+ v = vnet_classify_entry_at_index (t, v, 1);
+ }
+ return 0;
+}
+
+vnet_classify_table_t *
+vnet_classify_new_table (vnet_classify_main_t *cm,
+ u8 * mask, u32 nbuckets, u32 memory_size,
+ u32 skip_n_vectors,
+ u32 match_n_vectors);
+
+int vnet_classify_add_del_session (vnet_classify_main_t * cm,
+ u32 table_index,
+ u8 * match,
+ u32 hit_next_index,
+ u32 opaque_index,
+ i32 advance,
+ int is_add);
+
+int vnet_classify_add_del_table (vnet_classify_main_t * cm,
+ u8 * mask,
+ u32 nbuckets,
+ u32 memory_size,
+ u32 skip,
+ u32 match,
+ u32 next_table_index,
+ u32 miss_next_index,
+ u32 * table_index,
+ int is_add);
+
+unformat_function_t unformat_ip4_mask;
+unformat_function_t unformat_ip6_mask;
+unformat_function_t unformat_l3_mask;
+unformat_function_t unformat_l2_mask;
+unformat_function_t unformat_classify_mask;
+unformat_function_t unformat_l2_next_index;
+unformat_function_t unformat_ip_next_index;
+unformat_function_t unformat_ip4_match;
+unformat_function_t unformat_ip6_match;
+unformat_function_t unformat_l3_match;
+unformat_function_t unformat_vlan_tag;
+unformat_function_t unformat_l2_match;
+unformat_function_t unformat_classify_match;
+
+#endif /* __included_vnet_classify_h__ */