diff options
Diffstat (limited to 'src/vnet/classify')
-rw-r--r-- | src/vnet/classify/README | 180 | ||||
-rw-r--r-- | src/vnet/classify/flow_classify.c | 212 | ||||
-rw-r--r-- | src/vnet/classify/flow_classify.h | 51 | ||||
-rw-r--r-- | src/vnet/classify/flow_classify_node.c | 338 | ||||
-rw-r--r-- | src/vnet/classify/input_acl.c | 283 | ||||
-rw-r--r-- | src/vnet/classify/input_acl.h | 54 | ||||
-rw-r--r-- | src/vnet/classify/ip_classify.c | 365 | ||||
-rw-r--r-- | src/vnet/classify/policer_classify.c | 227 | ||||
-rw-r--r-- | src/vnet/classify/policer_classify.h | 55 | ||||
-rw-r--r-- | src/vnet/classify/vnet_classify.c | 2436 | ||||
-rw-r--r-- | src/vnet/classify/vnet_classify.h | 523 |
11 files changed, 4724 insertions, 0 deletions
diff --git a/src/vnet/classify/README b/src/vnet/classify/README new file mode 100644 index 00000000000..1ef5ab5ac34 --- /dev/null +++ b/src/vnet/classify/README @@ -0,0 +1,180 @@ +=== vnet classifier theory of operation === + +The vnet classifier trades off simplicity and perf / scale +characteristics. At a certain level, it's a dumb robot. Given an +incoming packet, search an ordered list of (mask, match) tables. If +the classifier finds a matching entry, take the indicated action. If +not, take a last-resort action. + +We use the MMX-unit to match or hash 16 octets at a time. For hardware +backward compatibility, the code does not [currently] use 256-bit +(32-octet) vector instructions. + +Effective use of the classifier centers around building table lists +which "hit" as soon as practicable. In many cases, established +sessions hit in the first table. In this mode of operation, the +classifier easily processes multiple MPPS / core - even with millions +of sessions in the data base. Searching 357 tables on a regular basis +will neatly solve the halting problem. + +==== Basic operation ==== + +The classifier mask-and-match operation proceeds as follows. Given a +starting classifier table index, lay hands on the indicated mask +vector. When building tables, we arrange for the mask to obey +mmx-unit (16-octet) alignment. + +We know that the first octet of packet data starts on a cache-line +boundary. Further, it's reasonably likely that folks won't want to use +the generalized classifier on the L2 header; preferring to decode the +Ethertype manually. That scheme makes it easy to select among ip4 / +ip6 / MPLS, etc. classifier table sets. + +A no-vlan-tag L2 header is 14 octets long. A typical ipv4 header +begins with the octets 0x4500: version=4, header_length=5, DSCP=0, +ECN=0. If one doesn't intend to classify on (DSCP, ECN) - the typical +case - we program the classifier to skip the first 16-octet vector. + +To classify untagged ipv4 packets on source address, we program the +classifier to skip one vector, and mask-and-match one vector. + +The basic match-and-match operation looks like this: + + switch (t->match_n_vectors) + { + case 1: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + break; + + case 2: + result = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + result |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + break; + + <etc> + } + + result_mask = u32x4_zero_byte_mask (result); + if (result_mask == 0xffff) + return (v); + +Net of setup, it costs a couple of clock cycles to mask-and-match 16 +octets. + +At the risk of belaboring an obvious point, the control-plane +'''must''' pay attention to detail. When skipping one (or more) +vectors, masks and matches must reflect that decision. See +.../vnet/vnet/classify/vnet_classify.c:unformat_classify_[mask|match]. Note +that vec_validate (xxx, 13) creates a 14-element vector. + +==== Creating a classifier table ==== + +To create a new classifier table via the control-plane API, send a +"classify_add_del_table" message. The underlying action routine, +vnet_classify_add_del_table(...), is located in +.../vnet/vnet/classify/vnet_classify.c, and has the following +prototype: + + int vnet_classify_add_del_table (vnet_classify_main_t * cm, + u8 * mask, + u32 nbuckets, + u32 memory_size, + u32 skip, + u32 match, + u32 next_table_index, + u32 miss_next_index, + u32 * table_index, + int is_add) + +Pass cm = &vnet_classify_main if calling this routine directly. Mask, +skip(_n_vectors) and match(_n_vectors) are as described above. Mask +need not be aligned, but it must be match*16 octets in length. To +avoid having your head explode, be absolutely certain that '''only''' +the bits you intend to match on are set. + +The classifier uses thread-safe, no-reader-locking-required +bounded-index extensible hashing. Nbuckets is the [fixed] size of the +hash bucket vector. The algorithm works in constant time regardless of +hash collisions, but wastes space when the bucket array is too +small. A good rule of thumb: let nbuckets = approximate number of +entries expected. + +At a signficant cost in complexity, it would be possible to resize the +bucket array dynamically. We have no plans to implement that function. + +Each classifier table has its own clib mheap memory allocation +arena. To pick the memory_size parameter, note that each classifier +table entry needs 16*(1 + match_n_vectors) bytes. Within reason, aim a +bit high. Clib mheap memory uses o/s level virtual memory - not wired +or hugetlb memory - so it's best not to scrimp on size. + +The "next_table_index" parameter is as described: the pool index in +vnet_classify_main.tables of the next table to search. Code ~0 to +indicate the end of the table list. 0 is a valid table index! + +We often create classification tables in reverse order - +last-table-searched to first-table-searched - so we can easily set +this parameter. Of course, one can manually adjust the data structure +after-the-fact. + +Specific classifier client nodes - for example, +.../vnet/vnet/classify/ip_classify.c - interpret the "miss_next_index" +parameter as a vpp graph-node next index. When packet classification +fails to produce a match, ip_classify_inline sends packets to the +indicated disposition. A classifier application might program this +parameter to send packets which don't match an existing session to a +"first-sign-of-life, create-new-session" node. + +Finally, the is_add parameter indicates whether to add or delete the +indicated table. The delete case implicitly terminates all sessions +with extreme prejudice, by freeing the specified clib mheap. + +==== Creating a classifier session ==== + +To create a new classifier session via the control-plane API, send a +"classify_add_del_session" message. The underlying action routine, +vnet_classify_add_del_session(...), is located in +.../vnet/vnet/classify/vnet_classify.c, and has the following +prototype: + +int vnet_classify_add_del_session (vnet_classify_main_t * cm, + u32 table_index, + u8 * match, + u32 hit_next_index, + u32 opaque_index, + i32 advance, + int is_add) + +Pass cm = &vnet_classify_main if calling this routine directly. Table +index specifies the table which receives the new session / contains +the session to delete depending on is_add. + +Match is the key for the indicated session. It need not be aligned, +but it must be table->match_n_vectors*16 octets in length. As a +courtesy, vnet_classify_add_del_session applies the table's mask to +the stored key-value. In this way, one can create a session by passing +unmasked (packet_data + offset) as the "match" parameter, and end up +with unconfusing session keys. + +Specific classifier client nodes - for example, +.../vnet/vnet/classify/ip_classify.c - interpret the per-session +hit_next_index parameter as a vpp graph-node next index. When packet +classification produces a match, ip_classify_inline sends packets to +the indicated disposition. + +ip4/6_classify place the per-session opaque_index parameter into +vnet_buffer(b)->l2_classify.opaque_index; a slight misnomer, but +anyhow classifier applications can send session-hit packets to +specific graph nodes, with useful values in buffer metadata. Depending +on the required semantics, we send known-session traffic to a certain +node, with e.g. a session pool index in buffer metadata. It's totally +up to the control-plane and the specific use-case. + +Finally, nodes such as ip4/6-classify apply the advance parameter as a +[signed!] argument to vlib_buffer_advance(...); to "consume" a +networking layer. Example: if we classify incoming tunneled IP packets +by (inner) source/dest address and source/dest port, we might choose +to decapsulate and reencapsulate the inner packet. In such a case, +program the advance parameter to perform the tunnel decapsulation, and +program next_index to send traffic to a node which uses +e.g. opaque_index to output traffic on a specific tunnel interface. diff --git a/src/vnet/classify/flow_classify.c b/src/vnet/classify/flow_classify.c new file mode 100644 index 00000000000..0a624204e34 --- /dev/null +++ b/src/vnet/classify/flow_classify.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/classify/flow_classify.h> + +static void +vnet_flow_classify_feature_enable (vlib_main_t * vnm, + flow_classify_main_t * fcm, + u32 sw_if_index, + flow_classify_table_id_t tid, + int feature_enable) +{ + vnet_feature_config_main_t *vfcm; + u8 arc; + + if (tid == FLOW_CLASSIFY_TABLE_IP4) + { + vnet_feature_enable_disable ("ip4-unicast", "ip4-flow-classify", + sw_if_index, feature_enable, 0, 0); + arc = vnet_get_feature_arc_index ("ip4-unicast"); + } + else + { + vnet_feature_enable_disable ("ip6-unicast", "ip6-flow-classify", + sw_if_index, feature_enable, 0, 0); + arc = vnet_get_feature_arc_index ("ip6-unicast"); + } + + vfcm = vnet_get_feature_arc_config_main (arc); + fcm->vnet_config_main[tid] = &vfcm->config_main; +} + +int vnet_set_flow_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 ip4_table_index, u32 ip6_table_index, + u32 is_add) +{ + flow_classify_main_t * fcm = &flow_classify_main; + vnet_classify_main_t * vcm = fcm->vnet_classify_main; + u32 pct[FLOW_CLASSIFY_N_TABLES] = {ip4_table_index, ip6_table_index}; + u32 ti; + + /* Assume that we've validated sw_if_index in the API layer */ + + for (ti = 0; ti < FLOW_CLASSIFY_N_TABLES; ti++) + { + if (pct[ti] == ~0) + continue; + + if (pool_is_free_index (vcm->tables, pct[ti])) + return VNET_API_ERROR_NO_SUCH_TABLE; + + vec_validate_init_empty + (fcm->classify_table_index_by_sw_if_index[ti], sw_if_index, ~0); + + /* Reject any DEL operation with wrong sw_if_index */ + if (!is_add && + (pct[ti] != fcm->classify_table_index_by_sw_if_index[ti][sw_if_index])) + { + clib_warning ("Non-existent intf_idx=%d with table_index=%d for delete", + sw_if_index, pct[ti]); + return VNET_API_ERROR_NO_SUCH_TABLE; + } + + /* Return ok on ADD operaton if feature is already enabled */ + if (is_add && + fcm->classify_table_index_by_sw_if_index[ti][sw_if_index] != ~0) + return 0; + + vnet_flow_classify_feature_enable (vm, fcm, sw_if_index, ti, is_add); + + if (is_add) + fcm->classify_table_index_by_sw_if_index[ti][sw_if_index] = pct[ti]; + else + fcm->classify_table_index_by_sw_if_index[ti][sw_if_index] = ~0; + } + + + return 0; +} + +static clib_error_t * +set_flow_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index = ~0; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + u32 is_add = 1; + u32 idx_cnt = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "interface %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else if (unformat (input, "ip4-table %d", &ip4_table_index)) + idx_cnt++; + else if (unformat (input, "ip6-table %d", &ip6_table_index)) + idx_cnt++; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Interface must be specified."); + + if (!idx_cnt) + return clib_error_return (0, "Table index should be specified."); + + if (idx_cnt > 1) + return clib_error_return (0, "Only one table index per API is allowed."); + + rv = vnet_set_flow_classify_intfc(vm, sw_if_index, ip4_table_index, + ip6_table_index, is_add); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_MATCHING_INTERFACE: + return clib_error_return (0, "No such interface"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "No such classifier table"); + } + return 0; +} + +VLIB_CLI_COMMAND (set_input_acl_command, static) = { + .path = "set flow classify", + .short_help = + "set flow classify interface <int> [ip4-table <index>]\n" + " [ip6-table <index>] [del]", + .function = set_flow_classify_command_fn, +}; + +static uword +unformat_table_type (unformat_input_t * input, va_list * va) +{ + u32 * r = va_arg (*va, u32 *); + u32 tid; + + if (unformat (input, "ip4")) + tid = FLOW_CLASSIFY_TABLE_IP4; + else if (unformat (input, "ip6")) + tid = FLOW_CLASSIFY_TABLE_IP6; + else + return 0; + + *r = tid; + return 1; +} +static clib_error_t * +show_flow_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + flow_classify_main_t * fcm = &flow_classify_main; + u32 type = FLOW_CLASSIFY_N_TABLES; + u32 * vec_tbl; + int i; + + if (unformat (input, "type %U", unformat_table_type, &type)) + ; + else + return clib_error_return (0, "Type must be specified.");; + + if (type == FLOW_CLASSIFY_N_TABLES) + return clib_error_return (0, "Invalid table type."); + + vec_tbl = fcm->classify_table_index_by_sw_if_index[type]; + + if (vec_len(vec_tbl)) + vlib_cli_output (vm, "%10s%20s\t\t%s", "Intfc idx", "Classify table", + "Interface name"); + else + vlib_cli_output (vm, "No tables configured."); + + for (i = 0; i < vec_len (vec_tbl); i++) + { + if (vec_elt(vec_tbl, i) == ~0) + continue; + + vlib_cli_output (vm, "%10d%20d\t\t%U", i, vec_elt(vec_tbl, i), + format_vnet_sw_if_index_name, fcm->vnet_main, i); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_flow_classify_command, static) = { + .path = "show classify flow", + .short_help = "show classify flow type [ip4|ip6]", + .function = show_flow_classify_command_fn, +}; diff --git a/src/vnet/classify/flow_classify.h b/src/vnet/classify/flow_classify.h new file mode 100644 index 00000000000..3ae04cd7b21 --- /dev/null +++ b/src/vnet/classify/flow_classify.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_vnet_flow_classify_h__ +#define __included_vnet_flow_classify_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/classify/vnet_classify.h> + +typedef enum { + FLOW_CLASSIFY_TABLE_IP4, + FLOW_CLASSIFY_TABLE_IP6, + FLOW_CLASSIFY_N_TABLES, +} flow_classify_table_id_t; + +typedef enum { + FLOW_CLASSIFY_NEXT_INDEX_DROP, + FLOW_CLASSIFY_NEXT_INDEX_N_NEXT, +} flow_classify_next_index_t; + +typedef struct { + /* Classifier table vectors */ + u32 * classify_table_index_by_sw_if_index [FLOW_CLASSIFY_N_TABLES]; + + /* Convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + vnet_classify_main_t * vnet_classify_main; + vnet_config_main_t * vnet_config_main [FLOW_CLASSIFY_N_TABLES]; +} flow_classify_main_t; + +flow_classify_main_t flow_classify_main; + +int vnet_set_flow_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 ip4_table_index, u32 ip6_table_index, + u32 is_add); + +#endif /* __included_vnet_flow_classify_h__ */ diff --git a/src/vnet/classify/flow_classify_node.c b/src/vnet/classify/flow_classify_node.c new file mode 100644 index 00000000000..d3261d33bca --- /dev/null +++ b/src/vnet/classify/flow_classify_node.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdint.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/classify/flow_classify.h> +#include <vnet/classify/vnet_classify.h> + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 offset; +} flow_classify_trace_t; + +static u8 * +format_flow_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + flow_classify_trace_t * t = va_arg (*args, flow_classify_trace_t *); + + s = format (s, "FLOW_CLASSIFY: sw_if_index %d next %d table %d offset %d", + t->sw_if_index, t->next_index, t->table_index, t->offset); + return s; +} + +#define foreach_flow_classify_error \ +_(MISS, "Flow classify misses") \ +_(HIT, "Flow classify hits") \ +_(CHAIN_HIT, "Flow classify hits after chain walk") \ +_(DROP, "Flow classify action drop") + +typedef enum { +#define _(sym,str) FLOW_CLASSIFY_ERROR_##sym, + foreach_flow_classify_error +#undef _ + FLOW_CLASSIFY_N_ERROR, +} flow_classify_error_t; + +static char * flow_classify_error_strings[] = { +#define _(sym,string) string, + foreach_flow_classify_error +#undef _ +}; + +static inline uword +flow_classify_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + flow_classify_table_id_t tid) +{ + u32 n_left_from, * from, * to_next; + flow_classify_next_index_t next_index; + flow_classify_main_t * fcm = &flow_classify_main; + vnet_classify_main_t * vcm = fcm->vnet_classify_main; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + u32 drop = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + /* First pass: compute hashes */ + while (n_left_from > 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + u8 * h0, * h1; + u32 sw_if_index0, sw_if_index1; + u32 table_index0, table_index1; + vnet_classify_table_t * t0, * t1; + + /* Prefetch next iteration */ + { + vlib_buffer_t * p1, * p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = b1->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = fcm->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + table_index1 = fcm->classify_table_index_by_sw_if_index[tid][sw_if_index1]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + vnet_buffer(b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + + vnet_buffer(b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t * b0; + u32 bi0; + u8 * h0; + u32 sw_if_index0; + u32 table_index0; + vnet_classify_table_t * t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = fcm->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = FLOW_CLASSIFY_NEXT_INDEX_DROP; + u32 table_index0; + vnet_classify_table_t * t0; + vnet_classify_entry_t * e0; + u64 hash0; + u8 * h0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]); + vnet_classify_table_t * tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer(p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer(p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* Speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + table_index0 = vnet_buffer(b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + + vnet_get_config_data (fcm->vnet_config_main[tid], + &b0->current_config_index, + &next0, + /* # bytes of config data */ 0); + + if (PREDICT_TRUE(table_index0 != ~0)) + { + hash0 = vnet_buffer(b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + hits++; + } + else + { + misses++; + vnet_classify_add_del_session (vcm, table_index0, + h0, ~0, 0, 0, 0, 0, 1); + /* increment counter */ + vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + } + } + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + flow_classify_trace_t * t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->offset = (t0 && e0) ? vnet_classify_get_offset (t0, e0): ~0; + } + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + FLOW_CLASSIFY_ERROR_MISS, + misses); + vlib_node_increment_counter (vm, node->node_index, + FLOW_CLASSIFY_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + FLOW_CLASSIFY_ERROR_CHAIN_HIT, + chain_hits); + vlib_node_increment_counter (vm, node->node_index, + FLOW_CLASSIFY_ERROR_DROP, + drop); + + return frame->n_vectors; +} + +static uword +ip4_flow_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return flow_classify_inline(vm, node, frame, FLOW_CLASSIFY_TABLE_IP4); +} + +VLIB_REGISTER_NODE (ip4_flow_classify_node) = { + .function = ip4_flow_classify, + .name = "ip4-flow-classify", + .vector_size = sizeof (u32), + .format_trace = format_flow_classify_trace, + .n_errors = ARRAY_LEN(flow_classify_error_strings), + .error_strings = flow_classify_error_strings, + .n_next_nodes = FLOW_CLASSIFY_NEXT_INDEX_N_NEXT, + .next_nodes = { + [FLOW_CLASSIFY_NEXT_INDEX_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_flow_classify_node, ip4_flow_classify); + +static uword +ip6_flow_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return flow_classify_inline(vm, node, frame, FLOW_CLASSIFY_TABLE_IP6); +} + +VLIB_REGISTER_NODE (ip6_flow_classify_node) = { + .function = ip6_flow_classify, + .name = "ip6-flow-classify", + .vector_size = sizeof (u32), + .format_trace = format_flow_classify_trace, + .n_errors = ARRAY_LEN(flow_classify_error_strings), + .error_strings = flow_classify_error_strings, + .n_next_nodes = FLOW_CLASSIFY_NEXT_INDEX_N_NEXT, + .next_nodes = { + [FLOW_CLASSIFY_NEXT_INDEX_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_flow_classify_node, ip6_flow_classify); + + +static clib_error_t * +flow_classify_init (vlib_main_t *vm) +{ + flow_classify_main_t * fcm = &flow_classify_main; + + fcm->vlib_main = vm; + fcm->vnet_main = vnet_get_main(); + fcm->vnet_classify_main = &vnet_classify_main; + + return 0; +} + +VLIB_INIT_FUNCTION (flow_classify_init); diff --git a/src/vnet/classify/input_acl.c b/src/vnet/classify/input_acl.c new file mode 100644 index 00000000000..c446f2d687c --- /dev/null +++ b/src/vnet/classify/input_acl.c @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> + +input_acl_main_t input_acl_main; + +static int +vnet_inacl_ip_feature_enable (vlib_main_t * vnm, + input_acl_main_t *am, + u32 sw_if_index, + input_acl_table_id_t tid, + int feature_enable) +{ + + if (tid == INPUT_ACL_TABLE_L2) + { + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ACL, + feature_enable); + } + else + { /* IP[46] */ + vnet_feature_config_main_t *fcm; + u8 arc; + + if (tid == INPUT_ACL_TABLE_IP4) + { + vnet_feature_enable_disable ("ip4-unicast", "ip4-inacl", + sw_if_index, feature_enable, 0, 0); + arc = vnet_get_feature_arc_index ("ip4-unicast"); + } + else + { + vnet_feature_enable_disable ("ip6-unicast", "ip6-inacl", + sw_if_index, feature_enable, 0, 0); + arc = vnet_get_feature_arc_index ("ip6-unicast"); + } + + fcm = vnet_get_feature_arc_config_main (arc); + am->vnet_config_main[tid] = &fcm->config_main; + } + + return 0; +} + +int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 ip4_table_index, + u32 ip6_table_index, + u32 l2_table_index, u32 is_add) +{ + input_acl_main_t * am = &input_acl_main; + vnet_classify_main_t * vcm = am->vnet_classify_main; + u32 acl[INPUT_ACL_N_TABLES] = {ip4_table_index, ip6_table_index, + l2_table_index}; + u32 ti; + + /* Assume that we've validated sw_if_index in the API layer */ + + for (ti = 0; ti < INPUT_ACL_N_TABLES; ti++) + { + if (acl[ti] == ~0) + continue; + + if (pool_is_free_index (vcm->tables, acl[ti])) + return VNET_API_ERROR_NO_SUCH_TABLE; + + vec_validate_init_empty + (am->classify_table_index_by_sw_if_index[ti], sw_if_index, ~0); + + /* Reject any DEL operation with wrong sw_if_index */ + if (!is_add && + (acl[ti] != am->classify_table_index_by_sw_if_index[ti][sw_if_index])) + { + clib_warning ("Non-existent intf_idx=%d with table_index=%d for delete", + sw_if_index, acl[ti]); + return VNET_API_ERROR_NO_SUCH_TABLE; + } + + /* Return ok on ADD operaton if feature is already enabled */ + if (is_add && + am->classify_table_index_by_sw_if_index[ti][sw_if_index] != ~0) + return 0; + + vnet_inacl_ip_feature_enable (vm, am, sw_if_index, ti, is_add); + + if (is_add) + am->classify_table_index_by_sw_if_index[ti][sw_if_index] = acl[ti]; + else + am->classify_table_index_by_sw_if_index[ti][sw_if_index] = ~0; + } + + return 0; +} + +static clib_error_t * +set_input_acl_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index = ~0; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + u32 l2_table_index = ~0; + u32 is_add = 1; + u32 idx_cnt = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else if (unformat (input, "ip4-table %d", &ip4_table_index)) + idx_cnt++; + else if (unformat (input, "ip6-table %d", &ip6_table_index)) + idx_cnt++; + else if (unformat (input, "l2-table %d", &l2_table_index)) + idx_cnt++; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Interface must be specified."); + + if (!idx_cnt) + return clib_error_return (0, "Table index should be specified."); + + if (idx_cnt > 1) + return clib_error_return (0, "Only one table index per API is allowed."); + + rv = vnet_set_input_acl_intfc (vm, sw_if_index, ip4_table_index, + ip6_table_index, l2_table_index, is_add); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_MATCHING_INTERFACE: + return clib_error_return (0, "No such interface"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "No such classifier table"); + } + return 0; +} + +/* + * Configure interface to enable/disble input ACL feature: + * intfc - interface name to be configured as input ACL + * Ip4-table <index> [del] - enable/disable IP4 input ACL + * Ip6-table <index> [del] - enable/disable IP6 input ACL + * l2-table <index> [del] - enable/disable Layer2 input ACL + * + * Note: Only one table index per API call is allowed. + * + */ +VLIB_CLI_COMMAND (set_input_acl_command, static) = { + .path = "set interface input acl", + .short_help = + "set interface input acl intfc <int> [ip4-table <index>]\n" + " [ip6-table <index>] [l2-table <index>] [del]", + .function = set_input_acl_command_fn, +}; + +clib_error_t *input_acl_init (vlib_main_t *vm) +{ + input_acl_main_t * am = &input_acl_main; + clib_error_t * error = 0; + + if ((error = vlib_call_init_function (vm, ip_inacl_init))) + return error; + + am->vlib_main = vm; + am->vnet_main = vnet_get_main(); + am->vnet_classify_main = &vnet_classify_main; + + return 0; +} + +VLIB_INIT_FUNCTION (input_acl_init); + +uword unformat_acl_type (unformat_input_t * input, va_list * args) +{ + u32 * acl_type = va_arg (*args, u32 *); + u32 tid = INPUT_ACL_N_TABLES; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "ip4")) + tid = INPUT_ACL_TABLE_IP4; + else if (unformat (input, "ip6")) + tid = INPUT_ACL_TABLE_IP6; + else if (unformat (input, "l2")) + tid = INPUT_ACL_TABLE_L2; + else + break; + } + + *acl_type = tid; + return 1; +} + +u8 * format_vnet_inacl_info (u8 * s, va_list * va) +{ + input_acl_main_t * am = va_arg (*va, input_acl_main_t *); + int sw_if_idx = va_arg (*va, int); + u32 tid = va_arg (*va, u32); + + if (tid == ~0) + { + s = format (s, "%10s%20s\t\t%s", "Intfc idx", "Classify table", + "Interface name"); + return s; + } + + s = format (s, "%10d%20d\t\t%U", sw_if_idx, tid, + format_vnet_sw_if_index_name, am->vnet_main, sw_if_idx); + + return s; +} + +static clib_error_t * +show_inacl_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + input_acl_main_t * am = &input_acl_main; + u32 type = INPUT_ACL_N_TABLES; + int i; + u32 * vec_tbl; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "type %U", unformat_acl_type, &type)) + ; + else + break; + } + + if (type == INPUT_ACL_N_TABLES) + return clib_error_return (0, "Invalid input ACL table type."); + + vec_tbl = am->classify_table_index_by_sw_if_index[type]; + + if (vec_len(vec_tbl)) + vlib_cli_output (vm, "%U", format_vnet_inacl_info, am, ~0 /* hdr */, ~0); + else + vlib_cli_output (vm, "No input ACL tables configured"); + + for (i = 0; i < vec_len (vec_tbl); i++) + { + if (vec_elt(vec_tbl, i) == ~0) + continue; + + vlib_cli_output (vm, "%U", format_vnet_inacl_info, + am, i, vec_elt(vec_tbl, i)); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_inacl_command, static) = { + .path = "show inacl", + .short_help = "show inacl type [ip4|ip6|l2]", + .function = show_inacl_command_fn, +}; diff --git a/src/vnet/classify/input_acl.h b/src/vnet/classify/input_acl.h new file mode 100644 index 00000000000..7ffc189f053 --- /dev/null +++ b/src/vnet/classify/input_acl.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_vnet_input_acl_h__ +#define __included_vnet_input_acl_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/classify/vnet_classify.h> + +typedef enum { + INPUT_ACL_TABLE_IP4, + INPUT_ACL_TABLE_IP6, + INPUT_ACL_TABLE_L2, + INPUT_ACL_N_TABLES, +} input_acl_table_id_t; + +typedef enum { + ACL_NEXT_INDEX_DENY, + ACL_NEXT_INDEX_N_NEXT, +} acl_next_index_t; + +typedef struct { + + /* classifier table vectors */ + u32 * classify_table_index_by_sw_if_index [INPUT_ACL_N_TABLES]; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + vnet_classify_main_t * vnet_classify_main; + vnet_config_main_t * vnet_config_main [INPUT_ACL_N_TABLES]; +} input_acl_main_t; + +extern input_acl_main_t input_acl_main; + +int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 ip4_table_index, + u32 ip6_table_index, + u32 l2_table_index, u32 is_add); + +#endif /* __included_vnet_input_acl_h__ */ diff --git a/src/vnet/classify/ip_classify.c b/src/vnet/classify/ip_classify.c new file mode 100644 index 00000000000..44973ae5e99 --- /dev/null +++ b/src/vnet/classify/ip_classify.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ +#include <vnet/classify/vnet_classify.h> +#include <vnet/dpo/classify_dpo.h> + +typedef struct { + u32 next_index; + u32 table_index; + u32 entry_index; +} ip_classify_trace_t; + +/* packet trace format function */ +static u8 * format_ip_classify_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip_classify_trace_t * t = va_arg (*args, ip_classify_trace_t *); + + s = format (s, "IP_CLASSIFY: next_index %d, table %d, entry %d", + t->next_index, t->table_index, t->entry_index); + return s; +} + +vlib_node_registration_t ip4_classify_node; +vlib_node_registration_t ip6_classify_node; + +#define foreach_ip_classify_error \ +_(MISS, "Classify misses") \ +_(HIT, "Classify hits") \ +_(CHAIN_HIT, "Classify hits after chain walk") + +typedef enum { +#define _(sym,str) IP_CLASSIFY_ERROR_##sym, + foreach_ip_classify_error +#undef _ + IP_CLASSIFY_N_ERROR, +} ip_classify_error_t; + +static char * ip_classify_error_strings[] = { +#define _(sym,string) string, + foreach_ip_classify_error +#undef _ +}; + +static inline uword +ip_classify_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_ip4) +{ + u32 n_left_from, * from, * to_next; + ip_lookup_next_t next_index; + vnet_classify_main_t * vcm = &vnet_classify_main; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + u32 n_next; + + if (is_ip4) { + n_next = IP4_LOOKUP_N_NEXT; + } else { + n_next = IP6_LOOKUP_N_NEXT; + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + /* First pass: compute hashes */ + + while (n_left_from > 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + u8 * h0, * h1; + u32 cd_index0, cd_index1; + classify_dpo_t *cd0, * cd1; + u32 table_index0, table_index1; + vnet_classify_table_t * t0, * t1; + + /* prefetch next iteration */ + { + vlib_buffer_t * p1, * p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = (void *)vlib_buffer_get_current(b0) - + ethernet_buffer_header_size(b0); + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + h1 = (void *)vlib_buffer_get_current(b1) - + ethernet_buffer_header_size(b1); + + cd_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + cd0 = classify_dpo_get(cd_index0); + table_index0 = cd0->cd_table_index; + + cd_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; + cd1 = classify_dpo_get(cd_index1); + table_index1 = cd1->cd_table_index; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + vnet_buffer(b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer(b1)->l2_classify.hash); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + + vnet_buffer(b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t * b0; + u32 bi0; + u8 * h0; + u32 cd_index0; + classify_dpo_t *cd0; + u32 table_index0; + vnet_classify_table_t * t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + h0 = (void *)vlib_buffer_get_current(b0) - + ethernet_buffer_header_size(b0); + + cd_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + cd0 = classify_dpo_get(cd_index0); + table_index0 = cd0->cd_table_index; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + vnet_buffer(b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer(b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer(b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = IP_LOOKUP_NEXT_DROP; + u32 table_index0; + vnet_classify_table_t * t0; + vnet_classify_entry_t * e0; + u64 hash0; + u8 * h0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t * p1 = vlib_get_buffer(vm, from[3]); + vnet_classify_table_t * tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer(p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer(p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = b0->data; + table_index0 = vnet_buffer(b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + vnet_buffer(b0)->l2_classify.opaque_index = ~0; + + if (PREDICT_TRUE(table_index0 != ~0)) + { + hash0 = vnet_buffer(b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, + now); + if (e0) + { + vnet_buffer(b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < node->n_next_nodes)? + e0->next_index:next0; + hits++; + } + else + { + while (1) + { + if (t0->next_table_index != ~0) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < n_next) ? + t0->miss_next_index : next0; + misses++; + break; + } + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry + (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer(b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < node->n_next_nodes)? + e0->next_index:next0; + hits++; + chain_hits++; + break; + } + } + } + } + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip_classify_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->entry_index = e0 ? e0 - t0->entries : ~0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + IP_CLASSIFY_ERROR_MISS, + misses); + vlib_node_increment_counter (vm, node->node_index, + IP_CLASSIFY_ERROR_HIT, + hits); + vlib_node_increment_counter (vm, node->node_index, + IP_CLASSIFY_ERROR_CHAIN_HIT, + chain_hits); + return frame->n_vectors; +} + +static uword +ip4_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip_classify_inline (vm, node, frame, 1 /* is_ip4 */); +} + + +VLIB_REGISTER_NODE (ip4_classify_node) = { + .function = ip4_classify, + .name = "ip4-classify", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = format_ip_classify_trace, + .n_errors = ARRAY_LEN(ip_classify_error_strings), + .error_strings = ip_classify_error_strings, + + .n_next_nodes = 0, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_classify_node, ip4_classify) + +static uword +ip6_classify (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip_classify_inline (vm, node, frame, 0 /* is_ip4 */); +} + + +VLIB_REGISTER_NODE (ip6_classify_node) = { + .function = ip6_classify, + .name = "ip6-classify", + .vector_size = sizeof (u32), + .sibling_of = "ip6-lookup", + .format_trace = format_ip_classify_trace, + .n_errors = ARRAY_LEN(ip_classify_error_strings), + .error_strings = ip_classify_error_strings, + + .n_next_nodes = 0, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_classify_node, ip6_classify) + +static clib_error_t * +ip_classify_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ip_classify_init); diff --git a/src/vnet/classify/policer_classify.c b/src/vnet/classify/policer_classify.c new file mode 100644 index 00000000000..569234fba3b --- /dev/null +++ b/src/vnet/classify/policer_classify.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/classify/policer_classify.h> + +static void +vnet_policer_classify_feature_enable (vlib_main_t * vnm, + policer_classify_main_t * pcm, + u32 sw_if_index, + policer_classify_table_id_t tid, + int feature_enable) +{ + if (tid == POLICER_CLASSIFY_TABLE_L2) + { + l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_POLICER_CLAS, + feature_enable); + } + else + { + vnet_feature_config_main_t * fcm; + u8 arc; + + if (tid == POLICER_CLASSIFY_TABLE_IP4) + { + vnet_feature_enable_disable ("ip4-unicast", "ip4-policer-classify", + sw_if_index, feature_enable, 0, 0); + arc = vnet_get_feature_arc_index ("ip4-unicast"); + } + + else + { + vnet_feature_enable_disable ("ip6-unicast", "ip6-policer-classify", + sw_if_index, feature_enable, 0, 0); + arc = vnet_get_feature_arc_index ("ip6-unicast"); + } + + fcm = vnet_get_feature_arc_config_main (arc); + pcm->vnet_config_main[tid] = &fcm->config_main; + } +} + +int vnet_set_policer_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 ip4_table_index, u32 ip6_table_index, + u32 l2_table_index, u32 is_add) +{ + policer_classify_main_t * pcm = &policer_classify_main; + vnet_classify_main_t * vcm = pcm->vnet_classify_main; + u32 pct[POLICER_CLASSIFY_N_TABLES] = {ip4_table_index, ip6_table_index, + l2_table_index}; + u32 ti; + + /* Assume that we've validated sw_if_index in the API layer */ + + for (ti = 0; ti < POLICER_CLASSIFY_N_TABLES; ti++) + { + if (pct[ti] == ~0) + continue; + + if (pool_is_free_index (vcm->tables, pct[ti])) + return VNET_API_ERROR_NO_SUCH_TABLE; + + vec_validate_init_empty + (pcm->classify_table_index_by_sw_if_index[ti], sw_if_index, ~0); + + /* Reject any DEL operation with wrong sw_if_index */ + if (!is_add && + (pct[ti] != pcm->classify_table_index_by_sw_if_index[ti][sw_if_index])) + { + clib_warning ("Non-existent intf_idx=%d with table_index=%d for delete", + sw_if_index, pct[ti]); + return VNET_API_ERROR_NO_SUCH_TABLE; + } + + /* Return ok on ADD operaton if feature is already enabled */ + if (is_add && + pcm->classify_table_index_by_sw_if_index[ti][sw_if_index] != ~0) + return 0; + + vnet_policer_classify_feature_enable (vm, pcm, sw_if_index, ti, is_add); + + if (is_add) + pcm->classify_table_index_by_sw_if_index[ti][sw_if_index] = pct[ti]; + else + pcm->classify_table_index_by_sw_if_index[ti][sw_if_index] = ~0; + } + + + return 0; +} + +static clib_error_t * +set_policer_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 sw_if_index = ~0; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + u32 l2_table_index = ~0; + u32 is_add = 1; + u32 idx_cnt = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "interface %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + ; + else if (unformat (input, "ip4-table %d", &ip4_table_index)) + idx_cnt++; + else if (unformat (input, "ip6-table %d", &ip6_table_index)) + idx_cnt++; + else if (unformat (input, "l2-table %d", &l2_table_index)) + idx_cnt++; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Interface must be specified."); + + if (!idx_cnt) + return clib_error_return (0, "Table index should be specified."); + + if (idx_cnt > 1) + return clib_error_return (0, "Only one table index per API is allowed."); + + rv = vnet_set_policer_classify_intfc(vm, sw_if_index, ip4_table_index, + ip6_table_index, l2_table_index, is_add); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_MATCHING_INTERFACE: + return clib_error_return (0, "No such interface"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "No such classifier table"); + } + return 0; +} + +VLIB_CLI_COMMAND (set_policer_classify_command, static) = { + .path = "set policer classify", + .short_help = + "set policer classify interface <int> [ip4-table <index>]\n" + " [ip6-table <index>] [l2-table <index>] [del]", + .function = set_policer_classify_command_fn, +}; + +static uword +unformat_table_type (unformat_input_t * input, va_list * va) +{ + u32 * r = va_arg (*va, u32 *); + u32 tid; + + if (unformat (input, "ip4")) + tid = POLICER_CLASSIFY_TABLE_IP4; + else if (unformat (input, "ip6")) + tid = POLICER_CLASSIFY_TABLE_IP6; + else if (unformat (input, "l2")) + tid = POLICER_CLASSIFY_TABLE_L2; + else + return 0; + + *r = tid; + return 1; +} +static clib_error_t * +show_policer_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + policer_classify_main_t * pcm = &policer_classify_main; + u32 type = POLICER_CLASSIFY_N_TABLES; + u32 * vec_tbl; + int i; + + if (unformat (input, "type %U", unformat_table_type, &type)) + ; + else + return clib_error_return (0, "Type must be specified.");; + + if (type == POLICER_CLASSIFY_N_TABLES) + return clib_error_return (0, "Invalid table type."); + + vec_tbl = pcm->classify_table_index_by_sw_if_index[type]; + + if (vec_len(vec_tbl)) + vlib_cli_output (vm, "%10s%20s\t\t%s", "Intfc idx", "Classify table", + "Interface name"); + else + vlib_cli_output (vm, "No tables configured."); + + for (i = 0; i < vec_len (vec_tbl); i++) + { + if (vec_elt(vec_tbl, i) == ~0) + continue; + + vlib_cli_output (vm, "%10d%20d\t\t%U", i, vec_elt(vec_tbl, i), + format_vnet_sw_if_index_name, pcm->vnet_main, i); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_policer_classify_command, static) = { + .path = "show classify policer", + .short_help = "show classify policer type [ip4|ip6|l2]", + .function = show_policer_classify_command_fn, +}; diff --git a/src/vnet/classify/policer_classify.h b/src/vnet/classify/policer_classify.h new file mode 100644 index 00000000000..3065644438d --- /dev/null +++ b/src/vnet/classify/policer_classify.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_vnet_policer_classify_h__ +#define __included_vnet_policer_classify_h__ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/classify/vnet_classify.h> + +typedef enum { + POLICER_CLASSIFY_TABLE_IP4, + POLICER_CLASSIFY_TABLE_IP6, + POLICER_CLASSIFY_TABLE_L2, + POLICER_CLASSIFY_N_TABLES, +} policer_classify_table_id_t; + +typedef enum { + POLICER_CLASSIFY_NEXT_INDEX_DROP, + POLICER_CLASSIFY_NEXT_INDEX_N_NEXT, +} policer_classify_next_index_t; + +typedef struct { + /* Classifier table vectors */ + u32 * classify_table_index_by_sw_if_index [POLICER_CLASSIFY_N_TABLES]; + + /* L2 next nodes for each feature */ + u32 feat_next_node_index[32]; + + /* Convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + vnet_classify_main_t * vnet_classify_main; + vnet_config_main_t * vnet_config_main [POLICER_CLASSIFY_N_TABLES]; +} policer_classify_main_t; + +policer_classify_main_t policer_classify_main; + +int vnet_set_policer_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 ip4_table_index, u32 ip6_table_index, + u32 l2_table_index, u32 is_add); + +#endif /* __included_vnet_policer_classify_h__ */ diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c new file mode 100644 index 00000000000..ce38f9f173e --- /dev/null +++ b/src/vnet/classify/vnet_classify.c @@ -0,0 +1,2436 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> +#include <vnet/ip/ip.h> +#include <vnet/api_errno.h> /* for API error numbers */ +#include <vnet/l2/l2_classify.h> /* for L2_INPUT_CLASSIFY_NEXT_xxx */ +#include <vnet/fib/fib_table.h> + +vnet_classify_main_t vnet_classify_main; + +#if VALIDATION_SCAFFOLDING +/* Validation scaffolding */ +void mv (vnet_classify_table_t * t) +{ + void * oldheap; + + oldheap = clib_mem_set_heap (t->mheap); + clib_mem_validate(); + clib_mem_set_heap (oldheap); +} + +void rogue (vnet_classify_table_t * t) +{ + int i, j, k; + vnet_classify_entry_t * v, * save_v; + u32 active_elements = 0; + vnet_classify_bucket_t * b; + + for (i = 0; i < t->nbuckets; i++) + { + b = &t->buckets [i]; + if (b->offset == 0) + continue; + save_v = vnet_classify_get_entry (t, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < t->entries_per_page; k++) + { + v = vnet_classify_entry_at_index + (t, save_v, j*t->entries_per_page + k); + + if (vnet_classify_entry_is_busy (v)) + active_elements++; + } + } + } + + if (active_elements != t->active_elements) + clib_warning ("found %u expected %u elts", active_elements, + t->active_elements); +} +#else +void mv (vnet_classify_table_t * t) { } +void rogue (vnet_classify_table_t * t) { } +#endif + +void vnet_classify_register_unformat_l2_next_index_fn (unformat_function_t * fn) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + + vec_add1 (cm->unformat_l2_next_index_fns, fn); +} + +void vnet_classify_register_unformat_ip_next_index_fn (unformat_function_t * fn) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + + vec_add1 (cm->unformat_ip_next_index_fns, fn); +} + +void +vnet_classify_register_unformat_acl_next_index_fn (unformat_function_t * fn) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + + vec_add1 (cm->unformat_acl_next_index_fns, fn); +} + +void +vnet_classify_register_unformat_policer_next_index_fn (unformat_function_t * fn) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + + vec_add1 (cm->unformat_policer_next_index_fns, fn); +} + +void vnet_classify_register_unformat_opaque_index_fn (unformat_function_t * fn) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + + vec_add1 (cm->unformat_opaque_index_fns, fn); +} + +vnet_classify_table_t * +vnet_classify_new_table (vnet_classify_main_t *cm, + u8 * mask, u32 nbuckets, u32 memory_size, + u32 skip_n_vectors, + u32 match_n_vectors) +{ + vnet_classify_table_t * t; + void * oldheap; + + nbuckets = 1 << (max_log2 (nbuckets)); + + pool_get_aligned (cm->tables, t, CLIB_CACHE_LINE_BYTES); + memset(t, 0, sizeof (*t)); + + vec_validate_aligned (t->mask, match_n_vectors - 1, sizeof(u32x4)); + clib_memcpy (t->mask, mask, match_n_vectors * sizeof (u32x4)); + + t->next_table_index = ~0; + t->nbuckets = nbuckets; + t->log2_nbuckets = max_log2 (nbuckets); + t->match_n_vectors = match_n_vectors; + t->skip_n_vectors = skip_n_vectors; + t->entries_per_page = 2; + + t->mheap = mheap_alloc (0 /* use VM */, memory_size); + + vec_validate_aligned (t->buckets, nbuckets - 1, CLIB_CACHE_LINE_BYTES); + oldheap = clib_mem_set_heap (t->mheap); + + t->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + t->writer_lock[0] = 0; + + clib_mem_set_heap (oldheap); + return (t); +} + +void vnet_classify_delete_table_index (vnet_classify_main_t *cm, + u32 table_index, int del_chain) +{ + vnet_classify_table_t * t; + + /* Tolerate multiple frees, up to a point */ + if (pool_is_free_index (cm->tables, table_index)) + return; + + t = pool_elt_at_index (cm->tables, table_index); + if (del_chain && t->next_table_index != ~0) + /* Recursively delete the entire chain */ + vnet_classify_delete_table_index (cm, t->next_table_index, del_chain); + + vec_free (t->mask); + vec_free (t->buckets); + mheap_free (t->mheap); + + pool_put (cm->tables, t); +} + +static vnet_classify_entry_t * +vnet_classify_entry_alloc (vnet_classify_table_t * t, u32 log2_pages) +{ + vnet_classify_entry_t * rv = 0; +#define _(size) \ + vnet_classify_entry_##size##_t * rv##size = 0; + foreach_size_in_u32x4; +#undef _ + + void * oldheap; + + ASSERT (t->writer_lock[0]); + if (log2_pages >= vec_len (t->freelists) || t->freelists [log2_pages] == 0) + { + oldheap = clib_mem_set_heap (t->mheap); + + vec_validate (t->freelists, log2_pages); + + switch(t->match_n_vectors) + { + /* Euchre the vector allocator into allocating the right sizes */ +#define _(size) \ + case size: \ + vec_validate_aligned \ + (rv##size, ((1<<log2_pages)*t->entries_per_page) - 1, \ + CLIB_CACHE_LINE_BYTES); \ + rv = (vnet_classify_entry_t *) rv##size; \ + break; + foreach_size_in_u32x4; +#undef _ + + default: + abort(); + } + + clib_mem_set_heap (oldheap); + goto initialize; + } + rv = t->freelists[log2_pages]; + t->freelists[log2_pages] = rv->next_free; + +initialize: + ASSERT(rv); + ASSERT (vec_len(rv) == (1<<log2_pages)*t->entries_per_page); + + switch (t->match_n_vectors) + { +#define _(size) \ + case size: \ + if(vec_len(rv)) \ + memset (rv, 0xff, sizeof (*rv##size) * vec_len(rv)); \ + break; + foreach_size_in_u32x4; +#undef _ + + default: + abort(); + } + + return rv; +} + +static void +vnet_classify_entry_free (vnet_classify_table_t * t, + vnet_classify_entry_t * v) +{ + u32 free_list_index; + + ASSERT (t->writer_lock[0]); + + free_list_index = min_log2(vec_len(v)/t->entries_per_page); + + ASSERT(vec_len (t->freelists) > free_list_index); + + v->next_free = t->freelists[free_list_index]; + t->freelists[free_list_index] = v; +} + +static inline void make_working_copy +(vnet_classify_table_t * t, vnet_classify_bucket_t * b) +{ + vnet_classify_entry_t * v; + vnet_classify_bucket_t working_bucket __attribute__((aligned (8))); + void * oldheap; + vnet_classify_entry_t * working_copy; +#define _(size) \ + vnet_classify_entry_##size##_t * working_copy##size = 0; + foreach_size_in_u32x4; +#undef _ + u32 cpu_number = os_get_cpu_number(); + + if (cpu_number >= vec_len (t->working_copies)) + { + oldheap = clib_mem_set_heap (t->mheap); + vec_validate (t->working_copies, cpu_number); + clib_mem_set_heap (oldheap); + } + + /* + * working_copies are per-cpu so that near-simultaneous + * updates from multiple threads will not result in sporadic, spurious + * lookup failures. + */ + working_copy = t->working_copies[cpu_number]; + + t->saved_bucket.as_u64 = b->as_u64; + oldheap = clib_mem_set_heap (t->mheap); + + if ((1<<b->log2_pages)*t->entries_per_page > vec_len (working_copy)) + { + switch(t->match_n_vectors) + { + /* Euchre the vector allocator into allocating the right sizes */ +#define _(size) \ + case size: \ + working_copy##size = (void *) working_copy; \ + vec_validate_aligned \ + (working_copy##size, \ + ((1<<b->log2_pages)*t->entries_per_page) - 1, \ + CLIB_CACHE_LINE_BYTES); \ + working_copy = (void *) working_copy##size; \ + break; + foreach_size_in_u32x4; +#undef _ + + default: + abort(); + } + t->working_copies[cpu_number] = working_copy; + } + + _vec_len(working_copy) = (1<<b->log2_pages)*t->entries_per_page; + clib_mem_set_heap (oldheap); + + v = vnet_classify_get_entry (t, b->offset); + + switch(t->match_n_vectors) + { +#define _(size) \ + case size: \ + clib_memcpy (working_copy, v, \ + sizeof (vnet_classify_entry_##size##_t) \ + * (1<<b->log2_pages) \ + * (t->entries_per_page)); \ + break; + foreach_size_in_u32x4 ; +#undef _ + + default: + abort(); + } + + working_bucket.as_u64 = b->as_u64; + working_bucket.offset = vnet_classify_get_offset (t, working_copy); + CLIB_MEMORY_BARRIER(); + b->as_u64 = working_bucket.as_u64; + t->working_copies[cpu_number] = working_copy; +} + +static vnet_classify_entry_t * +split_and_rehash (vnet_classify_table_t * t, + vnet_classify_entry_t * old_values, + u32 new_log2_pages) +{ + vnet_classify_entry_t * new_values, * v, * new_v; + int i, j, k; + + new_values = vnet_classify_entry_alloc (t, new_log2_pages); + + for (i = 0; i < (vec_len (old_values)/t->entries_per_page); i++) + { + u64 new_hash; + + for (j = 0; j < t->entries_per_page; j++) + { + v = vnet_classify_entry_at_index + (t, old_values, i * t->entries_per_page + j); + + if (vnet_classify_entry_is_busy (v)) + { + /* Hack so we can use the packet hash routine */ + u8 * key_minus_skip; + key_minus_skip = (u8 *) v->key; + key_minus_skip -= t->skip_n_vectors * sizeof (u32x4); + + new_hash = vnet_classify_hash_packet (t, key_minus_skip); + new_hash >>= t->log2_nbuckets; + new_hash &= (1<<new_log2_pages) - 1; + + for (k = 0; k < t->entries_per_page; k++) + { + new_v = vnet_classify_entry_at_index (t, new_values, + new_hash + k); + + if (vnet_classify_entry_is_free (new_v)) + { + clib_memcpy (new_v, v, sizeof (vnet_classify_entry_t) + + (t->match_n_vectors * sizeof (u32x4))); + new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + goto doublebreak; + } + } + /* Crap. Tell caller to try again */ + vnet_classify_entry_free (t, new_values); + return 0; + } + doublebreak: + ; + } + } + return new_values; +} + +int vnet_classify_add_del (vnet_classify_table_t * t, + vnet_classify_entry_t * add_v, + int is_add) +{ + u32 bucket_index; + vnet_classify_bucket_t * b, tmp_b; + vnet_classify_entry_t * v, * new_v, * save_new_v, * working_copy, * save_v; + u32 value_index; + int rv = 0; + int i; + u64 hash, new_hash; + u32 new_log2_pages; + u32 cpu_number = os_get_cpu_number(); + u8 * key_minus_skip; + + ASSERT ((add_v->flags & VNET_CLASSIFY_ENTRY_FREE) == 0); + + key_minus_skip = (u8 *) add_v->key; + key_minus_skip -= t->skip_n_vectors * sizeof (u32x4); + + hash = vnet_classify_hash_packet (t, key_minus_skip); + + bucket_index = hash & (t->nbuckets-1); + b = &t->buckets[bucket_index]; + + hash >>= t->log2_nbuckets; + + while (__sync_lock_test_and_set (t->writer_lock, 1)) + ; + + /* First elt in the bucket? */ + if (b->offset == 0) + { + if (is_add == 0) + { + rv = -1; + goto unlock; + } + + v = vnet_classify_entry_alloc (t, 0 /* new_log2_pages */); + clib_memcpy (v, add_v, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof (u32x4)); + v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + + tmp_b.as_u64 = 0; + tmp_b.offset = vnet_classify_get_offset (t, v); + + b->as_u64 = tmp_b.as_u64; + t->active_elements ++; + + goto unlock; + } + + make_working_copy (t, b); + + save_v = vnet_classify_get_entry (t, t->saved_bucket.offset); + value_index = hash & ((1<<t->saved_bucket.log2_pages)-1); + + if (is_add) + { + /* + * For obvious (in hindsight) reasons, see if we're supposed to + * replace an existing key, then look for an empty slot. + */ + + for (i = 0; i < t->entries_per_page; i++) + { + v = vnet_classify_entry_at_index (t, save_v, value_index + i); + + if (!memcmp (v->key, add_v->key, t->match_n_vectors * sizeof (u32x4))) + { + clib_memcpy (v, add_v, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof(u32x4)); + v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + + CLIB_MEMORY_BARRIER(); + /* Restore the previous (k,v) pairs */ + b->as_u64 = t->saved_bucket.as_u64; + goto unlock; + } + } + for (i = 0; i < t->entries_per_page; i++) + { + v = vnet_classify_entry_at_index (t, save_v, value_index + i); + + if (vnet_classify_entry_is_free (v)) + { + clib_memcpy (v, add_v, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof(u32x4)); + v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + CLIB_MEMORY_BARRIER(); + b->as_u64 = t->saved_bucket.as_u64; + t->active_elements ++; + goto unlock; + } + } + /* no room at the inn... split case... */ + } + else + { + for (i = 0; i < t->entries_per_page; i++) + { + v = vnet_classify_entry_at_index (t, save_v, value_index + i); + + if (!memcmp (v->key, add_v->key, t->match_n_vectors * sizeof (u32x4))) + { + memset (v, 0xff, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof(u32x4)); + v->flags |= VNET_CLASSIFY_ENTRY_FREE; + CLIB_MEMORY_BARRIER(); + b->as_u64 = t->saved_bucket.as_u64; + t->active_elements --; + goto unlock; + } + } + rv = -3; + b->as_u64 = t->saved_bucket.as_u64; + goto unlock; + } + + new_log2_pages = t->saved_bucket.log2_pages + 1; + + expand_again: + working_copy = t->working_copies[cpu_number]; + new_v = split_and_rehash (t, working_copy, new_log2_pages); + + if (new_v == 0) + { + new_log2_pages++; + goto expand_again; + } + + /* Try to add the new entry */ + save_new_v = new_v; + + key_minus_skip = (u8 *) add_v->key; + key_minus_skip -= t->skip_n_vectors * sizeof (u32x4); + + new_hash = vnet_classify_hash_packet_inline (t, key_minus_skip); + new_hash >>= t->log2_nbuckets; + new_hash &= (1<<min_log2((vec_len(new_v)/t->entries_per_page))) - 1; + + for (i = 0; i < t->entries_per_page; i++) + { + new_v = vnet_classify_entry_at_index (t, save_new_v, new_hash + i); + + if (vnet_classify_entry_is_free (new_v)) + { + clib_memcpy (new_v, add_v, sizeof (vnet_classify_entry_t) + + t->match_n_vectors * sizeof(u32x4)); + new_v->flags &= ~(VNET_CLASSIFY_ENTRY_FREE); + goto expand_ok; + } + } + /* Crap. Try again */ + new_log2_pages++; + vnet_classify_entry_free (t, save_new_v); + goto expand_again; + + expand_ok: + tmp_b.log2_pages = min_log2 (vec_len (save_new_v)/t->entries_per_page); + tmp_b.offset = vnet_classify_get_offset (t, save_new_v); + CLIB_MEMORY_BARRIER(); + b->as_u64 = tmp_b.as_u64; + t->active_elements ++; + v = vnet_classify_get_entry (t, t->saved_bucket.offset); + vnet_classify_entry_free (t, v); + + unlock: + CLIB_MEMORY_BARRIER(); + t->writer_lock[0] = 0; + + return rv; +} + +typedef CLIB_PACKED(struct { + ethernet_header_t eh; + ip4_header_t ip; +}) classify_data_or_mask_t; + +u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h) +{ + return vnet_classify_hash_packet_inline (t, h); +} + +vnet_classify_entry_t * +vnet_classify_find_entry (vnet_classify_table_t * t, + u8 * h, u64 hash, f64 now) +{ + return vnet_classify_find_entry_inline (t, h, hash, now); +} + +static u8 * format_classify_entry (u8 * s, va_list * args) + { + vnet_classify_table_t * t = va_arg (*args, vnet_classify_table_t *); + vnet_classify_entry_t * e = va_arg (*args, vnet_classify_entry_t *); + + s = format + (s, "[%u]: next_index %d advance %d opaque %d action %d metadata %d\n", + vnet_classify_get_offset (t, e), e->next_index, e->advance, + e->opaque_index, e->action, e->metadata); + + + s = format (s, " k: %U\n", format_hex_bytes, e->key, + t->match_n_vectors * sizeof(u32x4)); + + if (vnet_classify_entry_is_busy (e)) + s = format (s, " hits %lld, last_heard %.2f\n", + e->hits, e->last_heard); + else + s = format (s, " entry is free\n"); + return s; + } + +u8 * format_classify_table (u8 * s, va_list * args) +{ + vnet_classify_table_t * t = va_arg (*args, vnet_classify_table_t *); + int verbose = va_arg (*args, int); + vnet_classify_bucket_t * b; + vnet_classify_entry_t * v, * save_v; + int i, j, k; + u64 active_elements = 0; + + for (i = 0; i < t->nbuckets; i++) + { + b = &t->buckets [i]; + if (b->offset == 0) + { + if (verbose > 1) + s = format (s, "[%d]: empty\n", i); + continue; + } + + if (verbose) + { + s = format (s, "[%d]: heap offset %d, len %d\n", i, + b->offset, (1<<b->log2_pages)); + } + + save_v = vnet_classify_get_entry (t, b->offset); + for (j = 0; j < (1<<b->log2_pages); j++) + { + for (k = 0; k < t->entries_per_page; k++) + { + + v = vnet_classify_entry_at_index (t, save_v, + j*t->entries_per_page + k); + + if (vnet_classify_entry_is_free (v)) + { + if (verbose > 1) + s = format (s, " %d: empty\n", + j * t->entries_per_page + k); + continue; + } + if (verbose) + { + s = format (s, " %d: %U\n", + j * t->entries_per_page + k, + format_classify_entry, t, v); + } + active_elements++; + } + } + } + + s = format (s, " %lld active elements\n", active_elements); + s = format (s, " %d free lists\n", vec_len (t->freelists)); + return s; +} + +int vnet_classify_add_del_table (vnet_classify_main_t * cm, + u8 * mask, + u32 nbuckets, + u32 memory_size, + u32 skip, + u32 match, + u32 next_table_index, + u32 miss_next_index, + u32 * table_index, + u8 current_data_flag, + i16 current_data_offset, + int is_add, + int del_chain) +{ + vnet_classify_table_t * t; + + if (is_add) + { + if (*table_index == ~0) /* add */ + { + if (memory_size == 0) + return VNET_API_ERROR_INVALID_MEMORY_SIZE; + + if (nbuckets == 0) + return VNET_API_ERROR_INVALID_VALUE; + + t = vnet_classify_new_table (cm, mask, nbuckets, memory_size, + skip, match); + t->next_table_index = next_table_index; + t->miss_next_index = miss_next_index; + t->current_data_flag = current_data_flag; + t->current_data_offset = current_data_offset; + *table_index = t - cm->tables; + } + else /* update */ + { + vnet_classify_main_t *cm = &vnet_classify_main; + t = pool_elt_at_index (cm->tables, *table_index); + + t->next_table_index = next_table_index; + } + return 0; + } + + vnet_classify_delete_table_index (cm, *table_index, del_chain); + return 0; +} + +#define foreach_tcp_proto_field \ +_(src_port) \ +_(dst_port) + +#define foreach_udp_proto_field \ +_(src_port) \ +_(dst_port) + +#define foreach_ip4_proto_field \ +_(src_address) \ +_(dst_address) \ +_(tos) \ +_(length) \ +_(fragment_id) \ +_(ttl) \ +_(protocol) \ +_(checksum) + +uword unformat_tcp_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u8 * mask = 0; + u8 found_something = 0; + tcp_header_t * tcp; + +#define _(a) u8 a=0; + foreach_tcp_proto_field; +#undef _ + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0) ; +#define _(a) else if (unformat (input, #a)) a=1; + foreach_tcp_proto_field +#undef _ + else + break; + } + +#define _(a) found_something += a; + foreach_tcp_proto_field; +#undef _ + + if (found_something == 0) + return 0; + + vec_validate (mask, sizeof (*tcp) - 1); + + tcp = (tcp_header_t *) mask; + +#define _(a) if (a) memset (&tcp->a, 0xff, sizeof (tcp->a)); + foreach_tcp_proto_field; +#undef _ + + *maskp = mask; + return 1; +} + +uword unformat_udp_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u8 * mask = 0; + u8 found_something = 0; + udp_header_t * udp; + +#define _(a) u8 a=0; + foreach_udp_proto_field; +#undef _ + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0) ; +#define _(a) else if (unformat (input, #a)) a=1; + foreach_udp_proto_field +#undef _ + else + break; + } + +#define _(a) found_something += a; + foreach_udp_proto_field; +#undef _ + + if (found_something == 0) + return 0; + + vec_validate (mask, sizeof (*udp) - 1); + + udp = (udp_header_t *) mask; + +#define _(a) if (a) memset (&udp->a, 0xff, sizeof (udp->a)); + foreach_udp_proto_field; +#undef _ + + *maskp = mask; + return 1; +} + +typedef struct { + u16 src_port, dst_port; +} tcpudp_header_t; + +uword unformat_l4_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u16 src_port = 0, dst_port = 0; + tcpudp_header_t * tcpudp; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "tcp %U", unformat_tcp_mask, maskp)) + return 1; + else if (unformat (input, "udp %U", unformat_udp_mask, maskp)) + return 1; + else if (unformat (input, "src_port")) + src_port = 0xFFFF; + else if (unformat (input, "dst_port")) + dst_port = 0xFFFF; + else + return 0; + } + + if (!src_port && !dst_port) + return 0; + + u8 * mask = 0; + vec_validate (mask, sizeof (tcpudp_header_t) - 1); + + tcpudp = (tcpudp_header_t *) mask; + tcpudp->src_port = src_port; + tcpudp->dst_port = dst_port; + + *maskp = mask; + + return 1; +} + +uword unformat_ip4_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u8 * mask = 0; + u8 found_something = 0; + ip4_header_t * ip; + +#define _(a) u8 a=0; + foreach_ip4_proto_field; +#undef _ + u8 version = 0; + u8 hdr_length = 0; + + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "version")) + version = 1; + else if (unformat (input, "hdr_length")) + hdr_length = 1; + else if (unformat (input, "src")) + src_address = 1; + else if (unformat (input, "dst")) + dst_address = 1; + else if (unformat (input, "proto")) + protocol = 1; + +#define _(a) else if (unformat (input, #a)) a=1; + foreach_ip4_proto_field +#undef _ + else + break; + } + +#define _(a) found_something += a; + foreach_ip4_proto_field; +#undef _ + + if (found_something == 0) + return 0; + + vec_validate (mask, sizeof (*ip) - 1); + + ip = (ip4_header_t *) mask; + +#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a)); + foreach_ip4_proto_field; +#undef _ + + ip->ip_version_and_header_length = 0; + + if (version) + ip->ip_version_and_header_length |= 0xF0; + + if (hdr_length) + ip->ip_version_and_header_length |= 0x0F; + + *maskp = mask; + return 1; +} + +#define foreach_ip6_proto_field \ +_(src_address) \ +_(dst_address) \ +_(payload_length) \ +_(hop_limit) \ +_(protocol) + +uword unformat_ip6_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u8 * mask = 0; + u8 found_something = 0; + ip6_header_t * ip; + u32 ip_version_traffic_class_and_flow_label; + +#define _(a) u8 a=0; + foreach_ip6_proto_field; +#undef _ + u8 version = 0; + u8 traffic_class = 0; + u8 flow_label = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "version")) + version = 1; + else if (unformat (input, "traffic-class")) + traffic_class = 1; + else if (unformat (input, "flow-label")) + flow_label = 1; + else if (unformat (input, "src")) + src_address = 1; + else if (unformat (input, "dst")) + dst_address = 1; + else if (unformat (input, "proto")) + protocol = 1; + +#define _(a) else if (unformat (input, #a)) a=1; + foreach_ip6_proto_field +#undef _ + else + break; + } + +#define _(a) found_something += a; + foreach_ip6_proto_field; +#undef _ + + if (found_something == 0) + return 0; + + vec_validate (mask, sizeof (*ip) - 1); + + ip = (ip6_header_t *) mask; + +#define _(a) if (a) memset (&ip->a, 0xff, sizeof (ip->a)); + foreach_ip6_proto_field; +#undef _ + + ip_version_traffic_class_and_flow_label = 0; + + if (version) + ip_version_traffic_class_and_flow_label |= 0xF0000000; + + if (traffic_class) + ip_version_traffic_class_and_flow_label |= 0x0FF00000; + + if (flow_label) + ip_version_traffic_class_and_flow_label |= 0x000FFFFF; + + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label); + + *maskp = mask; + return 1; +} + +uword unformat_l3_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "ip4 %U", unformat_ip4_mask, maskp)) + return 1; + else if (unformat (input, "ip6 %U", unformat_ip6_mask, maskp)) + return 1; + else + break; + } + return 0; +} + +uword unformat_l2_mask (unformat_input_t * input, va_list * args) +{ + u8 ** maskp = va_arg (*args, u8 **); + u8 * mask = 0; + u8 src = 0; + u8 dst = 0; + u8 proto = 0; + u8 tag1 = 0; + u8 tag2 = 0; + u8 ignore_tag1 = 0; + u8 ignore_tag2 = 0; + u8 cos1 = 0; + u8 cos2 = 0; + u8 dot1q = 0; + u8 dot1ad = 0; + int len = 14; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "src")) + src = 1; + else if (unformat (input, "dst")) + dst = 1; + else if (unformat (input, "proto")) + proto = 1; + else if (unformat (input, "tag1")) + tag1 = 1; + else if (unformat (input, "tag2")) + tag2 = 1; + else if (unformat (input, "ignore-tag1")) + ignore_tag1 = 1; + else if (unformat (input, "ignore-tag2")) + ignore_tag2 = 1; + else if (unformat (input, "cos1")) + cos1 = 1; + else if (unformat (input, "cos2")) + cos2 = 1; + else if (unformat (input, "dot1q")) + dot1q = 1; + else if (unformat (input, "dot1ad")) + dot1ad = 1; + else + break; + } + if ((src + dst + proto + tag1 + tag2 + dot1q + dot1ad + + ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0) + return 0; + + if (tag1 || ignore_tag1 || cos1 || dot1q) + len = 18; + if (tag2 || ignore_tag2 || cos2 || dot1ad) + len = 22; + + vec_validate (mask, len-1); + + if (dst) + memset (mask, 0xff, 6); + + if (src) + memset (mask + 6, 0xff, 6); + + if (tag2 || dot1ad) + { + /* inner vlan tag */ + if (tag2) + { + mask[19] = 0xff; + mask[18] = 0x0f; + } + if (cos2) + mask[18] |= 0xe0; + if (proto) + mask[21] = mask [20] = 0xff; + if (tag1) + { + mask [15] = 0xff; + mask [14] = 0x0f; + } + if (cos1) + mask[14] |= 0xe0; + *maskp = mask; + return 1; + } + if (tag1 | dot1q) + { + if (tag1) + { + mask [15] = 0xff; + mask [14] = 0x0f; + } + if (cos1) + mask[14] |= 0xe0; + if (proto) + mask[16] = mask [17] = 0xff; + *maskp = mask; + return 1; + } + if (cos2) + mask[18] |= 0xe0; + if (cos1) + mask[14] |= 0xe0; + if (proto) + mask[12] = mask [13] = 0xff; + + *maskp = mask; + return 1; +} + +uword unformat_classify_mask (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * CLIB_UNUSED(cm) + = va_arg (*args, vnet_classify_main_t *); + u8 ** maskp = va_arg (*args, u8 **); + u32 * skipp = va_arg (*args, u32 *); + u32 * matchp = va_arg (*args, u32 *); + u32 match; + u8 * mask = 0; + u8 * l2 = 0; + u8 * l3 = 0; + u8 * l4 = 0; + int i; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "hex %U", unformat_hex_string, &mask)) + ; + else if (unformat (input, "l2 %U", unformat_l2_mask, &l2)) + ; + else if (unformat (input, "l3 %U", unformat_l3_mask, &l3)) + ; + else if (unformat (input, "l4 %U", unformat_l4_mask, &l4)) + ; + else + break; + } + + if (l4 && !l3) { + vec_free (mask); + vec_free (l2); + vec_free (l4); + return 0; + } + + if (mask || l2 || l3 || l4) + { + if (l2 || l3 || l4) + { + /* "With a free Ethernet header in every package" */ + if (l2 == 0) + vec_validate (l2, 13); + mask = l2; + if (l3) + { + vec_append (mask, l3); + vec_free (l3); + } + if (l4) + { + vec_append (mask, l4); + vec_free (l4); + } + } + + /* Scan forward looking for the first significant mask octet */ + for (i = 0; i < vec_len (mask); i++) + if (mask[i]) + break; + + /* compute (skip, match) params */ + *skipp = i / sizeof(u32x4); + vec_delete (mask, *skipp * sizeof(u32x4), 0); + + /* Pad mask to an even multiple of the vector size */ + while (vec_len (mask) % sizeof (u32x4)) + vec_add1 (mask, 0); + + match = vec_len (mask) / sizeof (u32x4); + + for (i = match*sizeof(u32x4); i > 0; i-= sizeof(u32x4)) + { + u64 *tmp = (u64 *)(mask + (i-sizeof(u32x4))); + if (*tmp || *(tmp+1)) + break; + match--; + } + if (match == 0) + clib_warning ("BUG: match 0"); + + _vec_len (mask) = match * sizeof(u32x4); + + *matchp = match; + *maskp = mask; + + return 1; + } + + return 0; +} + +#define foreach_l2_input_next \ +_(drop, DROP) \ +_(ethernet, ETHERNET_INPUT) \ +_(ip4, IP4_INPUT) \ +_(ip6, IP6_INPUT) \ +_(li, LI) + +uword unformat_l2_input_next_index (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + u32 * miss_next_indexp = va_arg (*args, u32 *); + u32 next_index = 0; + u32 tmp; + int i; + + /* First try registered unformat fns, allowing override... */ + for (i = 0; i < vec_len (cm->unformat_l2_next_index_fns); i++) + { + if (unformat (input, "%U", cm->unformat_l2_next_index_fns[i], &tmp)) + { + next_index = tmp; + goto out; + } + } + +#define _(n,N) \ + if (unformat (input, #n)) { next_index = L2_INPUT_CLASSIFY_NEXT_##N; goto out;} + foreach_l2_input_next; +#undef _ + + if (unformat (input, "%d", &tmp)) + { + next_index = tmp; + goto out; + } + + return 0; + + out: + *miss_next_indexp = next_index; + return 1; +} + +#define foreach_l2_output_next \ +_(drop, DROP) + +uword unformat_l2_output_next_index (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + u32 * miss_next_indexp = va_arg (*args, u32 *); + u32 next_index = 0; + u32 tmp; + int i; + + /* First try registered unformat fns, allowing override... */ + for (i = 0; i < vec_len (cm->unformat_l2_next_index_fns); i++) + { + if (unformat (input, "%U", cm->unformat_l2_next_index_fns[i], &tmp)) + { + next_index = tmp; + goto out; + } + } + +#define _(n,N) \ + if (unformat (input, #n)) { next_index = L2_OUTPUT_CLASSIFY_NEXT_##N; goto out;} + foreach_l2_output_next; +#undef _ + + if (unformat (input, "%d", &tmp)) + { + next_index = tmp; + goto out; + } + + return 0; + + out: + *miss_next_indexp = next_index; + return 1; +} + +#define foreach_ip_next \ +_(drop, DROP) \ +_(rewrite, REWRITE) + +uword unformat_ip_next_index (unformat_input_t * input, va_list * args) +{ + u32 * miss_next_indexp = va_arg (*args, u32 *); + vnet_classify_main_t * cm = &vnet_classify_main; + u32 next_index = 0; + u32 tmp; + int i; + + /* First try registered unformat fns, allowing override... */ + for (i = 0; i < vec_len (cm->unformat_ip_next_index_fns); i++) + { + if (unformat (input, "%U", cm->unformat_ip_next_index_fns[i], &tmp)) + { + next_index = tmp; + goto out; + } + } + +#define _(n,N) \ + if (unformat (input, #n)) { next_index = IP_LOOKUP_NEXT_##N; goto out;} + foreach_ip_next; +#undef _ + + if (unformat (input, "%d", &tmp)) + { + next_index = tmp; + goto out; + } + + return 0; + + out: + *miss_next_indexp = next_index; + return 1; +} + +#define foreach_acl_next \ +_(deny, DENY) + +uword unformat_acl_next_index (unformat_input_t * input, va_list * args) +{ + u32 * next_indexp = va_arg (*args, u32 *); + vnet_classify_main_t * cm = &vnet_classify_main; + u32 next_index = 0; + u32 tmp; + int i; + + /* First try registered unformat fns, allowing override... */ + for (i = 0; i < vec_len (cm->unformat_acl_next_index_fns); i++) + { + if (unformat (input, "%U", cm->unformat_acl_next_index_fns[i], &tmp)) + { + next_index = tmp; + goto out; + } + } + +#define _(n,N) \ + if (unformat (input, #n)) { next_index = ACL_NEXT_INDEX_##N; goto out;} + foreach_acl_next; +#undef _ + + if (unformat (input, "permit")) + { + next_index = ~0; + goto out; + } + else if (unformat (input, "%d", &tmp)) + { + next_index = tmp; + goto out; + } + + return 0; + + out: + *next_indexp = next_index; + return 1; +} + +uword unformat_policer_next_index (unformat_input_t * input, va_list * args) +{ + u32 * next_indexp = va_arg (*args, u32 *); + vnet_classify_main_t * cm = &vnet_classify_main; + u32 next_index = 0; + u32 tmp; + int i; + + /* First try registered unformat fns, allowing override... */ + for (i = 0; i < vec_len (cm->unformat_policer_next_index_fns); i++) + { + if (unformat (input, "%U", cm->unformat_policer_next_index_fns[i], &tmp)) + { + next_index = tmp; + goto out; + } + } + + if (unformat (input, "%d", &tmp)) + { + next_index = tmp; + goto out; + } + + return 0; + + out: + *next_indexp = next_index; + return 1; +} + +static clib_error_t * +classify_table_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 nbuckets = 2; + u32 skip = ~0; + u32 match = ~0; + int is_add = 1; + int del_chain = 0; + u32 table_index = ~0; + u32 next_table_index = ~0; + u32 miss_next_index = ~0; + u32 memory_size = 2<<20; + u32 tmp; + u32 current_data_flag = 0; + int current_data_offset = 0; + + u8 * mask = 0; + vnet_classify_main_t * cm = &vnet_classify_main; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "del-chain")) + { + is_add = 0; + del_chain = 1; + } + else if (unformat (input, "buckets %d", &nbuckets)) + ; + else if (unformat (input, "skip %d", &skip)) + ; + else if (unformat (input, "match %d", &match)) + ; + else if (unformat (input, "table %d", &table_index)) + ; + else if (unformat (input, "mask %U", unformat_classify_mask, + cm, &mask, &skip, &match)) + ; + else if (unformat (input, "memory-size %uM", &tmp)) + memory_size = tmp<<20; + else if (unformat (input, "memory-size %uG", &tmp)) + memory_size = tmp<<30; + else if (unformat (input, "next-table %d", &next_table_index)) + ; + else if (unformat (input, "miss-next %U", unformat_ip_next_index, + &miss_next_index)) + ; + else if (unformat (input, "l2-input-miss-next %U", unformat_l2_input_next_index, + &miss_next_index)) + ; + else if (unformat (input, "l2-output-miss-next %U", unformat_l2_output_next_index, + &miss_next_index)) + ; + else if (unformat (input, "acl-miss-next %U", unformat_acl_next_index, + &miss_next_index)) + ; + else if (unformat (input, "current-data-flag %d", ¤t_data_flag)) + ; + else if (unformat (input, "current-data-offset %d", ¤t_data_offset)) + ; + + else + break; + } + + if (is_add && mask == 0 && table_index == ~0) + return clib_error_return (0, "Mask required"); + + if (is_add && skip == ~0 && table_index == ~0) + return clib_error_return (0, "skip count required"); + + if (is_add && match == ~0 && table_index == ~0) + return clib_error_return (0, "match count required"); + + if (!is_add && table_index == ~0) + return clib_error_return (0, "table index required for delete"); + + rv = vnet_classify_add_del_table (cm, mask, nbuckets, memory_size, + skip, match, next_table_index, miss_next_index, &table_index, + current_data_flag, current_data_offset, is_add, del_chain); + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "vnet_classify_add_del_table returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (classify_table, static) = { + .path = "classify table", + .short_help = + "classify table [miss-next|l2-miss_next|acl-miss-next <next_index>]" + "\n mask <mask-value> buckets <nn> [skip <n>] [match <n>]" + "\n [current-data-flag <n>] [current-data-offset <n>] [table <n>]" + "\n [del] [del-chain]", + .function = classify_table_command_fn, +}; + +static u8 * format_vnet_classify_table (u8 * s, va_list * args) +{ + vnet_classify_main_t * cm = va_arg (*args, vnet_classify_main_t *); + int verbose = va_arg (*args, int); + u32 index = va_arg (*args, u32); + vnet_classify_table_t * t; + + if (index == ~0) + { + s = format (s, "%10s%10s%10s%10s", "TableIdx", "Sessions", "NextTbl", + "NextNode", verbose ? "Details" : ""); + return s; + } + + t = pool_elt_at_index (cm->tables, index); + s = format (s, "%10u%10d%10d%10d", index, t->active_elements, + t->next_table_index, t->miss_next_index); + + s = format (s, "\n Heap: %U", format_mheap, t->mheap, 0 /*verbose*/); + + s = format (s, "\n nbuckets %d, skip %d match %d flag %d offset %d", + t->nbuckets, t->skip_n_vectors, t->match_n_vectors, + t->current_data_flag, t->current_data_offset); + s = format (s, "\n mask %U", format_hex_bytes, t->mask, + t->match_n_vectors * sizeof (u32x4)); + + if (verbose == 0) + return s; + + s = format (s, "\n%U", format_classify_table, t, verbose); + + return s; +} + +static clib_error_t * +show_classify_tables_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + vnet_classify_table_t * t; + u32 match_index = ~0; + u32 * indices = 0; + int verbose = 0; + int i; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "index %d", &match_index)) + ; + else if (unformat (input, "verbose %d", &verbose)) + ; + else if (unformat (input, "verbose")) + verbose = 1; + else + break; + } + + pool_foreach (t, cm->tables, + ({ + if (match_index == ~0 || (match_index == t - cm->tables)) + vec_add1 (indices, t - cm->tables); + })); + + if (vec_len(indices)) + { + vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, verbose, + ~0 /* hdr */); + for (i = 0; i < vec_len (indices); i++) + vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, + verbose, indices[i]); + } + else + vlib_cli_output (vm, "No classifier tables configured"); + + vec_free (indices); + + return 0; +} + +VLIB_CLI_COMMAND (show_classify_table_command, static) = { + .path = "show classify tables", + .short_help = "show classify tables [index <nn>]", + .function = show_classify_tables_command_fn, +}; + +uword unformat_l4_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + + u8 * proto_header = 0; + int src_port = 0; + int dst_port = 0; + + tcpudp_header_t h; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "src_port %d", &src_port)) + ; + else if (unformat (input, "dst_port %d", &dst_port)) + ; + else + return 0; + } + + h.src_port = clib_host_to_net_u16(src_port); + h.dst_port = clib_host_to_net_u16(dst_port); + vec_validate(proto_header, sizeof(h)-1); + memcpy(proto_header, &h, sizeof(h)); + + *matchp = proto_header; + + return 1; +} + +uword unformat_ip4_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + u8 * match = 0; + ip4_header_t * ip; + int version = 0; + u32 version_val; + int hdr_length = 0; + u32 hdr_length_val; + int src = 0, dst = 0; + ip4_address_t src_val, dst_val; + int proto = 0; + u32 proto_val; + int tos = 0; + u32 tos_val; + int length = 0; + u32 length_val; + int fragment_id = 0; + u32 fragment_id_val; + int ttl = 0; + int ttl_val; + int checksum = 0; + u32 checksum_val; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "version %d", &version_val)) + version = 1; + else if (unformat (input, "hdr_length %d", &hdr_length_val)) + hdr_length = 1; + else if (unformat (input, "src %U", unformat_ip4_address, &src_val)) + src = 1; + else if (unformat (input, "dst %U", unformat_ip4_address, &dst_val)) + dst = 1; + else if (unformat (input, "proto %d", &proto_val)) + proto = 1; + else if (unformat (input, "tos %d", &tos_val)) + tos = 1; + else if (unformat (input, "length %d", &length_val)) + length = 1; + else if (unformat (input, "fragment_id %d", &fragment_id_val)) + fragment_id = 1; + else if (unformat (input, "ttl %d", &ttl_val)) + ttl = 1; + else if (unformat (input, "checksum %d", &checksum_val)) + checksum = 1; + else + break; + } + + if (version + hdr_length + src + dst + proto + tos + length + fragment_id + + ttl + checksum == 0) + return 0; + + /* + * Aligned because we use the real comparison functions + */ + vec_validate_aligned (match, sizeof (*ip) - 1, sizeof(u32x4)); + + ip = (ip4_header_t *) match; + + /* These are realistically matched in practice */ + if (src) + ip->src_address.as_u32 = src_val.as_u32; + + if (dst) + ip->dst_address.as_u32 = dst_val.as_u32; + + if (proto) + ip->protocol = proto_val; + + + /* These are not, but they're included for completeness */ + if (version) + ip->ip_version_and_header_length |= (version_val & 0xF)<<4; + + if (hdr_length) + ip->ip_version_and_header_length |= (hdr_length_val & 0xF); + + if (tos) + ip->tos = tos_val; + + if (length) + ip->length = clib_host_to_net_u16 (length_val); + + if (ttl) + ip->ttl = ttl_val; + + if (checksum) + ip->checksum = clib_host_to_net_u16 (checksum_val); + + *matchp = match; + return 1; +} + +uword unformat_ip6_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + u8 * match = 0; + ip6_header_t * ip; + int version = 0; + u32 version_val; + u8 traffic_class = 0; + u32 traffic_class_val; + u8 flow_label = 0; + u8 flow_label_val; + int src = 0, dst = 0; + ip6_address_t src_val, dst_val; + int proto = 0; + u32 proto_val; + int payload_length = 0; + u32 payload_length_val; + int hop_limit = 0; + int hop_limit_val; + u32 ip_version_traffic_class_and_flow_label; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "version %d", &version_val)) + version = 1; + else if (unformat (input, "traffic_class %d", &traffic_class_val)) + traffic_class = 1; + else if (unformat (input, "flow_label %d", &flow_label_val)) + flow_label = 1; + else if (unformat (input, "src %U", unformat_ip6_address, &src_val)) + src = 1; + else if (unformat (input, "dst %U", unformat_ip6_address, &dst_val)) + dst = 1; + else if (unformat (input, "proto %d", &proto_val)) + proto = 1; + else if (unformat (input, "payload_length %d", &payload_length_val)) + payload_length = 1; + else if (unformat (input, "hop_limit %d", &hop_limit_val)) + hop_limit = 1; + else + break; + } + + if (version + traffic_class + flow_label + src + dst + proto + + payload_length + hop_limit == 0) + return 0; + + /* + * Aligned because we use the real comparison functions + */ + vec_validate_aligned (match, sizeof (*ip) - 1, sizeof(u32x4)); + + ip = (ip6_header_t *) match; + + if (src) + clib_memcpy (&ip->src_address, &src_val, sizeof (ip->src_address)); + + if (dst) + clib_memcpy (&ip->dst_address, &dst_val, sizeof (ip->dst_address)); + + if (proto) + ip->protocol = proto_val; + + ip_version_traffic_class_and_flow_label = 0; + + if (version) + ip_version_traffic_class_and_flow_label |= (version_val & 0xF) << 28; + + if (traffic_class) + ip_version_traffic_class_and_flow_label |= (traffic_class_val & 0xFF) << 20; + + if (flow_label) + ip_version_traffic_class_and_flow_label |= (flow_label_val & 0xFFFFF); + + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (ip_version_traffic_class_and_flow_label); + + if (payload_length) + ip->payload_length = clib_host_to_net_u16 (payload_length_val); + + if (hop_limit) + ip->hop_limit = hop_limit_val; + + *matchp = match; + return 1; +} + +uword unformat_l3_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "ip4 %U", unformat_ip4_match, matchp)) + return 1; + else if (unformat (input, "ip6 %U", unformat_ip6_match, matchp)) + return 1; + /* $$$$ add mpls */ + else + break; + } + return 0; +} + +uword unformat_vlan_tag (unformat_input_t * input, va_list * args) +{ + u8 * tagp = va_arg (*args, u8 *); + u32 tag; + + if (unformat(input, "%d", &tag)) + { + tagp[0] = (tag>>8) & 0x0F; + tagp[1] = tag & 0xFF; + return 1; + } + + return 0; +} + +uword unformat_l2_match (unformat_input_t * input, va_list * args) +{ + u8 ** matchp = va_arg (*args, u8 **); + u8 * match = 0; + u8 src = 0; + u8 src_val[6]; + u8 dst = 0; + u8 dst_val[6]; + u8 proto = 0; + u16 proto_val; + u8 tag1 = 0; + u8 tag1_val [2]; + u8 tag2 = 0; + u8 tag2_val [2]; + int len = 14; + u8 ignore_tag1 = 0; + u8 ignore_tag2 = 0; + u8 cos1 = 0; + u8 cos2 = 0; + u32 cos1_val = 0; + u32 cos2_val = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "src %U", unformat_ethernet_address, &src_val)) + src = 1; + else if (unformat (input, "dst %U", unformat_ethernet_address, &dst_val)) + dst = 1; + else if (unformat (input, "proto %U", + unformat_ethernet_type_host_byte_order, &proto_val)) + proto = 1; + else if (unformat (input, "tag1 %U", unformat_vlan_tag, tag1_val)) + tag1 = 1; + else if (unformat (input, "tag2 %U", unformat_vlan_tag, tag2_val)) + tag2 = 1; + else if (unformat (input, "ignore-tag1")) + ignore_tag1 = 1; + else if (unformat (input, "ignore-tag2")) + ignore_tag2 = 1; + else if (unformat (input, "cos1 %d", &cos1_val)) + cos1 = 1; + else if (unformat (input, "cos2 %d", &cos2_val)) + cos2 = 1; + else + break; + } + if ((src + dst + proto + tag1 + tag2 + + ignore_tag1 + ignore_tag2 + cos1 + cos2) == 0) + return 0; + + if (tag1 || ignore_tag1 || cos1) + len = 18; + if (tag2 || ignore_tag2 || cos2) + len = 22; + + vec_validate_aligned (match, len-1, sizeof(u32x4)); + + if (dst) + clib_memcpy (match, dst_val, 6); + + if (src) + clib_memcpy (match + 6, src_val, 6); + + if (tag2) + { + /* inner vlan tag */ + match[19] = tag2_val[1]; + match[18] = tag2_val[0]; + if (cos2) + match [18] |= (cos2_val & 0x7) << 5; + if (proto) + { + match[21] = proto_val & 0xff; + match[20] = proto_val >> 8; + } + if (tag1) + { + match [15] = tag1_val[1]; + match [14] = tag1_val[0]; + } + if (cos1) + match [14] |= (cos1_val & 0x7) << 5; + *matchp = match; + return 1; + } + if (tag1) + { + match [15] = tag1_val[1]; + match [14] = tag1_val[0]; + if (proto) + { + match[17] = proto_val & 0xff; + match[16] = proto_val >> 8; + } + if (cos1) + match [14] |= (cos1_val & 0x7) << 5; + + *matchp = match; + return 1; + } + if (cos2) + match [18] |= (cos2_val & 0x7) << 5; + if (cos1) + match [14] |= (cos1_val & 0x7) << 5; + if (proto) + { + match[13] = proto_val & 0xff; + match[12] = proto_val >> 8; + } + + *matchp = match; + return 1; +} + + +uword unformat_classify_match (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * cm = va_arg (*args, vnet_classify_main_t *); + u8 ** matchp = va_arg (*args, u8 **); + u32 table_index = va_arg (*args, u32); + vnet_classify_table_t * t; + + u8 * match = 0; + u8 * l2 = 0; + u8 * l3 = 0; + u8 * l4 = 0; + + if (pool_is_free_index (cm->tables, table_index)) + return 0; + + t = pool_elt_at_index (cm->tables, table_index); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "hex %U", unformat_hex_string, &match)) + ; + else if (unformat (input, "l2 %U", unformat_l2_match, &l2)) + ; + else if (unformat (input, "l3 %U", unformat_l3_match, &l3)) + ; + else if (unformat (input, "l4 %U", unformat_l4_match, &l4)) + ; + else + break; + } + + if (l4 && !l3) { + vec_free (match); + vec_free (l2); + vec_free (l4); + return 0; + } + + if (match || l2 || l3 || l4) + { + if (l2 || l3 || l4) + { + /* "Win a free Ethernet header in every packet" */ + if (l2 == 0) + vec_validate_aligned (l2, 13, sizeof(u32x4)); + match = l2; + if (l3) + { + vec_append_aligned (match, l3, sizeof(u32x4)); + vec_free (l3); + } + if (l4) + { + vec_append_aligned (match, l4, sizeof(u32x4)); + vec_free (l4); + } + } + + /* Make sure the vector is big enough even if key is all 0's */ + vec_validate_aligned + (match, ((t->match_n_vectors + t->skip_n_vectors) * sizeof(u32x4)) - 1, + sizeof(u32x4)); + + /* Set size, include skipped vectors*/ + _vec_len (match) = (t->match_n_vectors+t->skip_n_vectors) * sizeof(u32x4); + + *matchp = match; + + return 1; + } + + return 0; +} + +int vnet_classify_add_del_session (vnet_classify_main_t * cm, + u32 table_index, + u8 * match, + u32 hit_next_index, + u32 opaque_index, + i32 advance, + u8 action, + u32 metadata, + int is_add) +{ + vnet_classify_table_t * t; + vnet_classify_entry_5_t _max_e __attribute__((aligned (16))); + vnet_classify_entry_t * e; + int i, rv; + + if (pool_is_free_index (cm->tables, table_index)) + return VNET_API_ERROR_NO_SUCH_TABLE; + + t = pool_elt_at_index (cm->tables, table_index); + + e = (vnet_classify_entry_t *)&_max_e; + e->next_index = hit_next_index; + e->opaque_index = opaque_index; + e->advance = advance; + e->hits = 0; + e->last_heard = 0; + e->flags = 0; + e->action = action; + if (e->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX) + e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, metadata); + else if (e->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX) + e->metadata = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, metadata); + + /* Copy key data, honoring skip_n_vectors */ + clib_memcpy (&e->key, match + t->skip_n_vectors * sizeof (u32x4), + t->match_n_vectors * sizeof (u32x4)); + + /* Clear don't-care bits; likely when dynamically creating sessions */ + for (i = 0; i < t->match_n_vectors; i++) + e->key[i] &= t->mask[i]; + + rv = vnet_classify_add_del (t, e, is_add); + if (rv) + return VNET_API_ERROR_NO_SUCH_ENTRY; + return 0; +} + +static clib_error_t * +classify_session_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + int is_add = 1; + u32 table_index = ~0; + u32 hit_next_index = ~0; + u64 opaque_index = ~0; + u8 * match = 0; + i32 advance = 0; + u32 action = 0; + u32 metadata = 0; + int i, rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "hit-next %U", unformat_ip_next_index, + &hit_next_index)) + ; + else if (unformat (input, "l2-input-hit-next %U", unformat_l2_input_next_index, + &hit_next_index)) + ; + else if (unformat (input, "l2-output-hit-next %U", unformat_l2_output_next_index, + &hit_next_index)) + ; + else if (unformat (input, "acl-hit-next %U", unformat_acl_next_index, + &hit_next_index)) + ; + else if (unformat (input, "policer-hit-next %U", + unformat_policer_next_index, &hit_next_index)) + ; + else if (unformat (input, "opaque-index %lld", &opaque_index)) + ; + else if (unformat (input, "match %U", unformat_classify_match, + cm, &match, table_index)) + ; + else if (unformat (input, "advance %d", &advance)) + ; + else if (unformat (input, "table-index %d", &table_index)) + ; + else if (unformat (input, "action set-ip4-fib-id %d", &metadata)) + action = 1; + else if (unformat (input, "action set-ip6-fib-id %d", &metadata)) + action = 2; + else + { + /* Try registered opaque-index unformat fns */ + for (i = 0; i < vec_len (cm->unformat_opaque_index_fns); i++) + { + if (unformat (input, "%U", cm->unformat_opaque_index_fns[i], + &opaque_index)) + goto found_opaque; + } + break; + } + found_opaque: + ; + } + + if (table_index == ~0) + return clib_error_return (0, "Table index required"); + + if (is_add && match == 0) + return clib_error_return (0, "Match value required"); + + rv = vnet_classify_add_del_session (cm, table_index, match, + hit_next_index, + opaque_index, advance, + action, metadata, is_add); + + switch(rv) + { + case 0: + break; + + default: + return clib_error_return (0, "vnet_classify_add_del_session returned %d", + rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (classify_session_command, static) = { + .path = "classify session", + .short_help = + "classify session [hit-next|l2-hit-next|" + "acl-hit-next <next_index>|policer-hit-next <policer_name>]" + "\n table-index <nn> match [hex] [l2] [l3 ip4] [opaque-index <index>]" + "\n [action set-ip4-fib-id <n>] [action set-ip6-fib-id <n>] [del]", + .function = classify_session_command_fn, +}; + +static uword +unformat_opaque_sw_if_index (unformat_input_t * input, va_list * args) +{ + u64 * opaquep = va_arg (*args, u64 *); + u32 sw_if_index; + + if (unformat (input, "opaque-sw_if_index %U", unformat_vnet_sw_interface, + vnet_get_main(), &sw_if_index)) + { + *opaquep = sw_if_index; + return 1; + } + return 0; +} + +static uword +unformat_ip_next_node (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + u32 * next_indexp = va_arg (*args, u32 *); + u32 node_index; + u32 next_index = ~0; + + if (unformat (input, "ip6-node %U", unformat_vlib_node, + cm->vlib_main, &node_index)) + { + next_index = vlib_node_add_next (cm->vlib_main, + ip6_classify_node.index, node_index); + } + else if (unformat (input, "ip4-node %U", unformat_vlib_node, + cm->vlib_main, &node_index)) + { + next_index = vlib_node_add_next (cm->vlib_main, + ip4_classify_node.index, node_index); + } + else + return 0; + + *next_indexp = next_index; + return 1; +} + +static uword +unformat_acl_next_node (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + u32 * next_indexp = va_arg (*args, u32 *); + u32 node_index; + u32 next_index; + + if (unformat (input, "ip6-node %U", unformat_vlib_node, + cm->vlib_main, &node_index)) + { + next_index = vlib_node_add_next (cm->vlib_main, + ip6_inacl_node.index, node_index); + } + else if (unformat (input, "ip4-node %U", unformat_vlib_node, + cm->vlib_main, &node_index)) + { + next_index = vlib_node_add_next (cm->vlib_main, + ip4_inacl_node.index, node_index); + } + else + return 0; + + *next_indexp = next_index; + return 1; +} + +static uword +unformat_l2_input_next_node (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + u32 * next_indexp = va_arg (*args, u32 *); + u32 node_index; + u32 next_index; + + if (unformat (input, "input-node %U", unformat_vlib_node, + cm->vlib_main, &node_index)) + { + next_index = vlib_node_add_next + (cm->vlib_main, l2_input_classify_node.index, node_index); + + *next_indexp = next_index; + return 1; + } + return 0; +} + +static uword +unformat_l2_output_next_node (unformat_input_t * input, va_list * args) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + u32 * next_indexp = va_arg (*args, u32 *); + u32 node_index; + u32 next_index; + + if (unformat (input, "output-node %U", unformat_vlib_node, + cm->vlib_main, &node_index)) + { + next_index = vlib_node_add_next + (cm->vlib_main, l2_output_classify_node.index, node_index); + + *next_indexp = next_index; + return 1; + } + return 0; +} + +static clib_error_t * +vnet_classify_init (vlib_main_t * vm) +{ + vnet_classify_main_t * cm = &vnet_classify_main; + + cm->vlib_main = vm; + cm->vnet_main = vnet_get_main(); + + vnet_classify_register_unformat_opaque_index_fn + (unformat_opaque_sw_if_index); + + vnet_classify_register_unformat_ip_next_index_fn + (unformat_ip_next_node); + + vnet_classify_register_unformat_l2_next_index_fn + (unformat_l2_input_next_node); + + vnet_classify_register_unformat_l2_next_index_fn + (unformat_l2_output_next_node); + + vnet_classify_register_unformat_acl_next_index_fn + (unformat_acl_next_node); + + return 0; +} + +VLIB_INIT_FUNCTION (vnet_classify_init); + +#define TEST_CODE 1 + +#if TEST_CODE > 0 +static clib_error_t * +test_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 buckets = 2; + u32 sessions = 10; + int i, rv; + vnet_classify_table_t * t = 0; + classify_data_or_mask_t * mask; + classify_data_or_mask_t * data; + u8 *mp = 0, *dp = 0; + vnet_classify_main_t * cm = &vnet_classify_main; + vnet_classify_entry_t * e; + int is_add = 1; + u32 tmp; + u32 table_index = ~0; + ip4_address_t src; + u32 deleted = 0; + u32 memory_size = 64<<20; + + /* Default starting address 1.0.0.10 */ + src.as_u32 = clib_net_to_host_u32 (0x0100000A); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "sessions %d", &sessions)) + ; + else if (unformat (input, "src %U", unformat_ip4_address, &src)) + ; + else if (unformat (input, "buckets %d", &buckets)) + ; + else if (unformat (input, "memory-size %uM", &tmp)) + memory_size = tmp<<20; + else if (unformat (input, "memory-size %uG", &tmp)) + memory_size = tmp<<30; + else if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "table %d", &table_index)) + ; + else + break; + } + + vec_validate_aligned (mp, 3 * sizeof(u32x4), sizeof(u32x4)); + vec_validate_aligned (dp, 3 * sizeof(u32x4), sizeof(u32x4)); + + mask = (classify_data_or_mask_t *) mp; + data = (classify_data_or_mask_t *) dp; + + data->ip.src_address.as_u32 = src.as_u32; + + /* Mask on src address */ + memset (&mask->ip.src_address, 0xff, 4); + + buckets = 1<<max_log2(buckets); + + if (table_index != ~0) + { + if (pool_is_free_index (cm->tables, table_index)) + { + vlib_cli_output (vm, "No such table %d", table_index); + goto out; + } + t = pool_elt_at_index (cm->tables, table_index); + } + + if (is_add) + { + if (t == 0) + { + t = vnet_classify_new_table (cm, (u8 *)mask, buckets, + memory_size, + 0 /* skip */, + 3 /* vectors to match */); + t->miss_next_index = IP_LOOKUP_NEXT_DROP; + vlib_cli_output (vm, "Create table %d", t - cm->tables); + } + + vlib_cli_output (vm, "Add %d sessions to %d buckets...", + sessions, buckets); + + for (i = 0; i < sessions; i++) + { + rv = vnet_classify_add_del_session (cm, t - cm->tables, (u8 *) data, + IP_LOOKUP_NEXT_DROP, + i+100 /* opaque_index */, + 0 /* advance */, 0, 0, + 1 /* is_add */); + + if (rv != 0) + clib_warning ("add: returned %d", rv); + + tmp = clib_net_to_host_u32 (data->ip.src_address.as_u32) + 1; + data->ip.src_address.as_u32 = clib_net_to_host_u32 (tmp); + } + goto out; + } + + if (t == 0) + { + vlib_cli_output (vm, "Must specify table index to delete sessions"); + goto out; + } + + vlib_cli_output (vm, "Try to delete %d sessions...", sessions); + + for (i = 0; i < sessions; i++) + { + u8 * key_minus_skip; + u64 hash; + + hash = vnet_classify_hash_packet (t, (u8 *) data); + + e = vnet_classify_find_entry (t, (u8 *) data, hash, 0 /* time_now */); + /* Previous delete, perhaps... */ + if (e == 0) + continue; + ASSERT (e->opaque_index == (i+100)); + + key_minus_skip = (u8 *)e->key; + key_minus_skip -= t->skip_n_vectors * sizeof (u32x4); + + rv = vnet_classify_add_del_session (cm, t - cm->tables, key_minus_skip, + IP_LOOKUP_NEXT_DROP, + i+100 /* opaque_index */, + 0 /* advance */, 0, 0, + 0 /* is_add */); + if (rv != 0) + clib_warning ("del: returned %d", rv); + + tmp = clib_net_to_host_u32 (data->ip.src_address.as_u32) + 1; + data->ip.src_address.as_u32 = clib_net_to_host_u32 (tmp); + deleted++; + } + + vlib_cli_output (vm, "Deleted %d sessions...", deleted); + + out: + vec_free (mp); + vec_free (dp); + + return 0; +} + +VLIB_CLI_COMMAND (test_classify_command, static) = { + .path = "test classify", + .short_help = + "test classify [src <ip>] [sessions <nn>] [buckets <nn>] [table <nn>] [del]", + .function = test_classify_command_fn, +}; +#endif /* TEST_CODE */ diff --git a/src/vnet/classify/vnet_classify.h b/src/vnet/classify/vnet_classify.h new file mode 100644 index 00000000000..d0b896ed7d2 --- /dev/null +++ b/src/vnet/classify/vnet_classify.h @@ -0,0 +1,523 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vnet_classify_h__ +#define __included_vnet_classify_h__ + +#include <stdarg.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vlib/cli.h> +#include <vnet/l2/l2_input.h> +#include <vnet/l2/feat_bitmap.h> +#include <vnet/api_errno.h> /* for API error numbers */ + +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> +#include <vppinfra/xxhash.h> + +extern vlib_node_registration_t ip4_classify_node; +extern vlib_node_registration_t ip6_classify_node; + +#define CLASSIFY_TRACE 0 + +#if !defined( __aarch64__) && !defined(__arm__) +#define CLASSIFY_USE_SSE //Allow usage of SSE operations +#endif + +#define U32X4_ALIGNED(p) PREDICT_TRUE((((intptr_t)p) & 0xf) == 0) + +/* + * Classify table option to process packets + * CLASSIFY_FLAG_USE_CURR_DATA: + * - classify packets starting from VPP node’s current data pointer + */ +#define CLASSIFY_FLAG_USE_CURR_DATA 1 + +/* + * Classify session action + * CLASSIFY_ACTION_SET_IP4_FIB_INDEX: + * - Classified IP packets will be looked up + * from the specified ipv4 fib table + * CLASSIFY_ACTION_SET_IP6_FIB_INDEX: + * - Classified IP packets will be looked up + * from the specified ipv6 fib table + */ +#define CLASSIFY_ACTION_SET_IP4_FIB_INDEX 1 +#define CLASSIFY_ACTION_SET_IP6_FIB_INDEX 2 + +struct _vnet_classify_main; +typedef struct _vnet_classify_main vnet_classify_main_t; + +#define foreach_size_in_u32x4 \ +_(1) \ +_(2) \ +_(3) \ +_(4) \ +_(5) + +typedef CLIB_PACKED(struct _vnet_classify_entry { + /* Graph node next index */ + u32 next_index; + + /* put into vnet_buffer(b)->l2_classfy.opaque_index */ + union { + struct { + u32 opaque_index; + /* advance on hit, note it's a signed quantity... */ + i32 advance; + }; + u64 opaque_count; + }; + + /* Really only need 1 bit */ + u8 flags; +#define VNET_CLASSIFY_ENTRY_FREE (1<<0) + + u8 action; + u16 metadata; + + /* Hit counter, last heard time */ + union { + u64 hits; + struct _vnet_classify_entry * next_free; + }; + + f64 last_heard; + + /* Must be aligned to a 16-octet boundary */ + u32x4 key[0]; +}) vnet_classify_entry_t; + +static inline int vnet_classify_entry_is_free (vnet_classify_entry_t * e) +{ + return e->flags & VNET_CLASSIFY_ENTRY_FREE; +} + +static inline int vnet_classify_entry_is_busy (vnet_classify_entry_t * e) +{ + return ((e->flags & VNET_CLASSIFY_ENTRY_FREE) == 0); +} + +/* Need these to con the vector allocator */ +#define _(size) \ +typedef CLIB_PACKED(struct { \ + u32 pad0[4]; \ + u64 pad1[2]; \ + u32x4 key[size]; \ +}) vnet_classify_entry_##size##_t; +foreach_size_in_u32x4; +#undef _ + +typedef struct { + union { + struct { + u32 offset; + u8 pad[3]; + u8 log2_pages; + }; + u64 as_u64; + }; +} vnet_classify_bucket_t; + +typedef struct { + /* Mask to apply after skipping N vectors */ + u32x4 *mask; + /* Buckets and entries */ + vnet_classify_bucket_t * buckets; + vnet_classify_entry_t * entries; + + /* Config parameters */ + u32 match_n_vectors; + u32 skip_n_vectors; + u32 nbuckets; + u32 log2_nbuckets; + int entries_per_page; + u32 active_elements; + u32 current_data_flag; + int current_data_offset; + u32 data_offset; + /* Index of next table to try */ + u32 next_table_index; + + /* Miss next index, return if next_table_index = 0 */ + u32 miss_next_index; + + /* Per-bucket working copies, one per thread */ + vnet_classify_entry_t ** working_copies; + vnet_classify_bucket_t saved_bucket; + + /* Free entry freelists */ + vnet_classify_entry_t **freelists; + + u8 * name; + + /* Private allocation arena, protected by the writer lock */ + void * mheap; + + /* Writer (only) lock for this table */ + volatile u32 * writer_lock; + +} vnet_classify_table_t; + +struct _vnet_classify_main { + /* Table pool */ + vnet_classify_table_t * tables; + + /* Registered next-index, opaque unformat fcns */ + unformat_function_t ** unformat_l2_next_index_fns; + unformat_function_t ** unformat_ip_next_index_fns; + unformat_function_t ** unformat_acl_next_index_fns; + unformat_function_t ** unformat_policer_next_index_fns; + unformat_function_t ** unformat_opaque_index_fns; + + /* convenience variables */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +}; + +extern vnet_classify_main_t vnet_classify_main; + +u8 * format_classify_table (u8 * s, va_list * args); + +u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h); + +static inline u64 +vnet_classify_hash_packet_inline (vnet_classify_table_t * t, + u8 * h) +{ + u32x4 *mask; + + union { + u32x4 as_u32x4; + u64 as_u64[2]; + } xor_sum __attribute__((aligned(sizeof(u32x4)))); + + ASSERT(t); + mask = t->mask; +#ifdef CLASSIFY_USE_SSE + if (U32X4_ALIGNED(h)) { //SSE can't handle unaligned data + u32x4 *data = (u32x4 *)h; + xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0]; + switch (t->match_n_vectors) + { + case 5: + xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort(); + } + } else +#endif /* CLASSIFY_USE_SSE */ + { + u32 skip_u64 = t->skip_n_vectors * 2; + u64 *data64 = (u64 *)h; + xor_sum.as_u64[0] = data64[0 + skip_u64] & ((u64 *)mask)[0]; + xor_sum.as_u64[1] = data64[1 + skip_u64] & ((u64 *)mask)[1]; + switch (t->match_n_vectors) + { + case 5: + xor_sum.as_u64[0] ^= data64[8 + skip_u64] & ((u64 *)mask)[8]; + xor_sum.as_u64[1] ^= data64[9 + skip_u64] & ((u64 *)mask)[9]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u64[0] ^= data64[6 + skip_u64] & ((u64 *)mask)[6]; + xor_sum.as_u64[1] ^= data64[7 + skip_u64] & ((u64 *)mask)[7]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u64[0] ^= data64[4 + skip_u64] & ((u64 *)mask)[4]; + xor_sum.as_u64[1] ^= data64[5 + skip_u64] & ((u64 *)mask)[5]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u64[0] ^= data64[2 + skip_u64] & ((u64 *)mask)[2]; + xor_sum.as_u64[1] ^= data64[3 + skip_u64] & ((u64 *)mask)[3]; + /* FALLTHROUGH */ + case 1: + break; + + default: + abort(); + } + } + + return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]); +} + +static inline void +vnet_classify_prefetch_bucket (vnet_classify_table_t * t, u64 hash) +{ + u32 bucket_index; + + ASSERT (is_pow2(t->nbuckets)); + + bucket_index = hash & (t->nbuckets - 1); + + CLIB_PREFETCH(&t->buckets[bucket_index], CLIB_CACHE_LINE_BYTES, LOAD); +} + +static inline vnet_classify_entry_t * +vnet_classify_get_entry (vnet_classify_table_t * t, uword offset) +{ + u8 * hp = t->mheap; + u8 * vp = hp + offset; + + return (void *) vp; +} + +static inline uword vnet_classify_get_offset (vnet_classify_table_t * t, + vnet_classify_entry_t * v) +{ + u8 * hp, * vp; + + hp = (u8 *) t->mheap; + vp = (u8 *) v; + + ASSERT((vp - hp) < 0x100000000ULL); + return vp - hp; +} + +static inline vnet_classify_entry_t * +vnet_classify_entry_at_index (vnet_classify_table_t * t, + vnet_classify_entry_t * e, + u32 index) +{ + u8 * eu8; + + eu8 = (u8 *)e; + + eu8 += index * (sizeof (vnet_classify_entry_t) + + (t->match_n_vectors * sizeof (u32x4))); + + return (vnet_classify_entry_t *) eu8; +} + +static inline void +vnet_classify_prefetch_entry (vnet_classify_table_t * t, + u64 hash) +{ + u32 bucket_index; + u32 value_index; + vnet_classify_bucket_t * b; + vnet_classify_entry_t * e; + + bucket_index = hash & (t->nbuckets - 1); + + b = &t->buckets[bucket_index]; + + if (b->offset == 0) + return; + + hash >>= t->log2_nbuckets; + + e = vnet_classify_get_entry (t, b->offset); + value_index = hash & ((1<<b->log2_pages)-1); + + e = vnet_classify_entry_at_index (t, e, value_index); + + CLIB_PREFETCH(e, CLIB_CACHE_LINE_BYTES, LOAD); +} + +vnet_classify_entry_t * +vnet_classify_find_entry (vnet_classify_table_t * t, + u8 * h, u64 hash, f64 now); + +static inline vnet_classify_entry_t * +vnet_classify_find_entry_inline (vnet_classify_table_t * t, + u8 * h, u64 hash, f64 now) + { + vnet_classify_entry_t * v; + u32x4 *mask, *key; + union { + u32x4 as_u32x4; + u64 as_u64[2]; + } result __attribute__((aligned(sizeof(u32x4)))); + vnet_classify_bucket_t * b; + u32 value_index; + u32 bucket_index; + int i; + + bucket_index = hash & (t->nbuckets-1); + b = &t->buckets[bucket_index]; + mask = t->mask; + + if (b->offset == 0) + return 0; + + hash >>= t->log2_nbuckets; + + v = vnet_classify_get_entry (t, b->offset); + value_index = hash & ((1<<b->log2_pages)-1); + v = vnet_classify_entry_at_index (t, v, value_index); + +#ifdef CLASSIFY_USE_SSE + if (U32X4_ALIGNED(h)) { + u32x4 *data = (u32x4 *) h; + for (i = 0; i < t->entries_per_page; i++) { + key = v->key; + result.as_u32x4 = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + switch (t->match_n_vectors) + { + case 5: + result.as_u32x4 |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4]; + /* FALLTHROUGH */ + case 4: + result.as_u32x4 |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; + /* FALLTHROUGH */ + case 3: + result.as_u32x4 |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; + /* FALLTHROUGH */ + case 2: + result.as_u32x4 |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort(); + } + + if (u32x4_zero_byte_mask (result.as_u32x4) == 0xffff) { + if (PREDICT_TRUE(now)) { + v->hits++; + v->last_heard = now; + } + return (v); + } + v = vnet_classify_entry_at_index (t, v, 1); + } + } else +#endif /* CLASSIFY_USE_SSE */ + { + u32 skip_u64 = t->skip_n_vectors * 2; + u64 *data64 = (u64 *)h; + for (i = 0; i < t->entries_per_page; i++) { + key = v->key; + + result.as_u64[0] = (data64[0 + skip_u64] & ((u64 *)mask)[0]) ^ ((u64 *)key)[0]; + result.as_u64[1] = (data64[1 + skip_u64] & ((u64 *)mask)[1]) ^ ((u64 *)key)[1]; + switch (t->match_n_vectors) + { + case 5: + result.as_u64[0] |= (data64[8 + skip_u64] & ((u64 *)mask)[8]) ^ ((u64 *)key)[8]; + result.as_u64[1] |= (data64[9 + skip_u64] & ((u64 *)mask)[9]) ^ ((u64 *)key)[9]; + /* FALLTHROUGH */ + case 4: + result.as_u64[0] |= (data64[6 + skip_u64] & ((u64 *)mask)[6]) ^ ((u64 *)key)[6]; + result.as_u64[1] |= (data64[7 + skip_u64] & ((u64 *)mask)[7]) ^ ((u64 *)key)[7]; + /* FALLTHROUGH */ + case 3: + result.as_u64[0] |= (data64[4 + skip_u64] & ((u64 *)mask)[4]) ^ ((u64 *)key)[4]; + result.as_u64[1] |= (data64[5 + skip_u64] & ((u64 *)mask)[5]) ^ ((u64 *)key)[5]; + /* FALLTHROUGH */ + case 2: + result.as_u64[0] |= (data64[2 + skip_u64] & ((u64 *)mask)[2]) ^ ((u64 *)key)[2]; + result.as_u64[1] |= (data64[3 + skip_u64] & ((u64 *)mask)[3]) ^ ((u64 *)key)[3]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort(); + } + + if (result.as_u64[0] == 0 && result.as_u64[1] == 0) { + if (PREDICT_TRUE(now)) { + v->hits++; + v->last_heard = now; + } + return (v); + } + + v = vnet_classify_entry_at_index (t, v, 1); + } + } + return 0; + } + +vnet_classify_table_t * +vnet_classify_new_table (vnet_classify_main_t *cm, + u8 * mask, u32 nbuckets, u32 memory_size, + u32 skip_n_vectors, + u32 match_n_vectors); + +int vnet_classify_add_del_session (vnet_classify_main_t * cm, + u32 table_index, + u8 * match, + u32 hit_next_index, + u32 opaque_index, + i32 advance, + u8 action, + u32 metadata, + int is_add); + +int vnet_classify_add_del_table (vnet_classify_main_t * cm, + u8 * mask, + u32 nbuckets, + u32 memory_size, + u32 skip, + u32 match, + u32 next_table_index, + u32 miss_next_index, + u32 * table_index, + u8 current_data_flag, + i16 current_data_offset, + int is_add, + int del_chain); + +unformat_function_t unformat_ip4_mask; +unformat_function_t unformat_ip6_mask; +unformat_function_t unformat_l3_mask; +unformat_function_t unformat_l2_mask; +unformat_function_t unformat_classify_mask; +unformat_function_t unformat_l2_next_index; +unformat_function_t unformat_ip_next_index; +unformat_function_t unformat_ip4_match; +unformat_function_t unformat_ip6_match; +unformat_function_t unformat_l3_match; +unformat_function_t unformat_vlan_tag; +unformat_function_t unformat_l2_match; +unformat_function_t unformat_classify_match; + +void vnet_classify_register_unformat_ip_next_index_fn +(unformat_function_t * fn); + +void vnet_classify_register_unformat_l2_next_index_fn +(unformat_function_t * fn); + +void vnet_classify_register_unformat_acl_next_index_fn +(unformat_function_t * fn); + +void vnet_classify_register_unformat_policer_next_index_fn +(unformat_function_t * fn); + +void vnet_classify_register_unformat_opaque_index_fn (unformat_function_t * fn); + +#endif /* __included_vnet_classify_h__ */ |