diff options
author | Sachin Saxena <sachin.saxena@freescale.com> | 2018-02-28 20:28:52 +0530 |
---|---|---|
committer | Sachin Saxena <sachin.saxena@nxp.com> | 2018-02-28 20:34:56 +0530 |
commit | 0689fce93ba269c48f83a2f70f971b3976d04c90 (patch) | |
tree | 4cc2908df3598507cc1828ac19d8c43b22450ffa /src/plugins/acl | |
parent | 746b57564deede624261ab8a96c94f562f24d22c (diff) | |
parent | d594711a5d79859a7d0bde83a516f7ab52051d9b (diff) |
Merge branch 'stable/1710' of https://gerrit.fd.io/r/vpp into 17101710
Diffstat (limited to 'src/plugins/acl')
-rw-r--r-- | src/plugins/acl/acl.api | 477 | ||||
-rw-r--r-- | src/plugins/acl/acl.c | 2709 | ||||
-rw-r--r-- | src/plugins/acl/acl.h | 316 | ||||
-rw-r--r-- | src/plugins/acl/acl_all_api_h.h | 21 | ||||
-rw-r--r-- | src/plugins/acl/acl_hash_lookup_doc.md | 241 | ||||
-rw-r--r-- | src/plugins/acl/acl_msg_enum.h | 28 | ||||
-rw-r--r-- | src/plugins/acl/acl_multicore_doc.md | 349 | ||||
-rw-r--r-- | src/plugins/acl/acl_test.c | 1219 | ||||
-rw-r--r-- | src/plugins/acl/fa_node.c | 1874 | ||||
-rw-r--r-- | src/plugins/acl/fa_node.h | 174 | ||||
-rw-r--r-- | src/plugins/acl/hash_lookup.c | 894 | ||||
-rw-r--r-- | src/plugins/acl/hash_lookup.h | 64 | ||||
-rw-r--r-- | src/plugins/acl/hash_lookup_private.h | 33 | ||||
-rw-r--r-- | src/plugins/acl/hash_lookup_types.h | 107 | ||||
-rw-r--r-- | src/plugins/acl/manual_fns.h | 408 |
15 files changed, 8914 insertions, 0 deletions
diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api new file mode 100644 index 00000000..a0de24a2 --- /dev/null +++ b/src/plugins/acl/acl.api @@ -0,0 +1,477 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + This file defines the vpp control-plane API messages + used to control the ACL plugin +*/ + + +/** \brief Get the plugin version + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ + +define acl_plugin_get_version +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply to get the plugin version + @param context - returned sender context, to match reply w/ request + @param major - Incremented every time a known breaking behavior change is introduced + @param minor - Incremented with small changes, may be used to avoid buggy versions +*/ + +define acl_plugin_get_version_reply +{ + u32 context; + u32 major; + u32 minor; +}; + +/** \brief Control ping from client to api server request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define acl_plugin_control_ping +{ + u32 client_index; + u32 context; +}; + +/** \brief Control ping from the client to the server response + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param vpe_pid - the pid of the vpe, returned by the server +*/ +define acl_plugin_control_ping_reply +{ + u32 context; + i32 retval; + u32 client_index; + u32 vpe_pid; +}; + +/** \brief Access List Rule entry + @param is_permit - deny (0), permit (1), or permit+reflect(2) action on this rule. + @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0) + @param src_ip_addr - Source prefix value + @param src_ip_prefix_len - Source prefix length + @param dst_ip_addr - Destination prefix value + @param dst_ip_prefix_len - Destination prefix length + @param proto - L4 protocol (http://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml) + @param srcport_or_icmptype_first - beginning of source port or ICMP4/6 type range + @param srcport_or_icmptype_last - end of source port or ICMP4/6 type range + @param dstport_or_icmpcode_first - beginning of destination port or ICMP4/6 code range + @param dstport_or_icmpcode_last - end of destination port or ICMP4/6 code range + @param tcp_flags_mask - if proto==6, match masked TCP flags with this value + @param tcp_flags_value - if proto==6, mask to AND the TCP flags in the packet with +*/ + +typeonly manual_print define acl_rule +{ + u8 is_permit; + u8 is_ipv6; + u8 src_ip_addr[16]; + u8 src_ip_prefix_len; + u8 dst_ip_addr[16]; + u8 dst_ip_prefix_len; +/* + * L4 protocol. IANA number. 1 = ICMP, 58 = ICMPv6, 6 = TCP, 17 = UDP. + * 0 => ignore L4 and ignore the ports/tcpflags when matching. + */ + u8 proto; +/* + * If the L4 protocol is TCP or UDP, the below + * hold ranges of ports, else if the L4 is ICMP/ICMPv6 + * they hold ranges of ICMP(v6) types/codes. + * + * Ranges are inclusive, i.e. to match "any" TCP/UDP port, + * use first=0,last=65535. For ICMP(v6), + * use first=0,last=255. + */ + u16 srcport_or_icmptype_first; + u16 srcport_or_icmptype_last; + u16 dstport_or_icmpcode_first; + u16 dstport_or_icmpcode_last; +/* + * for proto = 6, this matches if the + * TCP flags in the packet, ANDed with tcp_flags_mask, + * is equal to tcp_flags_value. + */ + u8 tcp_flags_mask; + u8 tcp_flags_value; +}; + +/** \brief MACIP Access List Rule entry + @param is_permit - deny (0), permit (1) action on this rule. + @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0) + @param src_mac - match masked source MAC address against this value + @param src_mac_mask - AND source MAC address with this value before matching + @param src_ip_addr - Source prefix value + @param src_ip_prefix_len - Source prefix length +*/ + +typeonly manual_print define macip_acl_rule +{ + u8 is_permit; + u8 is_ipv6; +/* + * The source mac of the packet ANDed with src_mac_mask. + * The source ip[46] address in the packet is matched + * against src_ip_addr, with src_ip_prefix_len set to 0. + * + * For better performance, minimize the number of + * (src_mac_mask, src_ip_prefix_len) combinations + * in a MACIP ACL. + */ + u8 src_mac[6]; + u8 src_mac_mask[6]; + u8 src_ip_addr[16]; + u8 src_ip_prefix_len; +}; + +/** \brief Replace an existing ACL in-place or create a new ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - an existing ACL entry (0..0xfffffffe) to replace, or 0xffffffff to make new ACL + @param tag - a string value stored along with the ACL, for descriptive purposes + @param count - number of ACL rules + @r - Rules for this access-list +*/ + +manual_print manual_endian define acl_add_replace +{ + u32 client_index; + u32 context; + u32 acl_index; /* ~0 to add, existing ACL# to replace */ + u8 tag[64]; /* What gets in here gets out in the corresponding tag field when dumping the ACLs. */ + u32 count; + vl_api_acl_rule_t r[count]; +}; + +/** \brief Reply to add/replace ACL + @param context - returned sender context, to match reply w/ request + @param acl_index - index of the updated or newly created ACL + @param retval 0 - no error +*/ + +define acl_add_replace_reply +{ + u32 context; + u32 acl_index; + i32 retval; +}; + +/** \brief Delete an ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - ACL index to delete +*/ + +autoreply manual_print define acl_del +{ + u32 client_index; + u32 context; + u32 acl_index; +}; + +/* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */ +/** \brief Use acl_interface_set_acl_list instead + Append/remove an ACL index to/from the list of ACLs checked for an interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add or delete the ACL index from the list + @param is_input - check the ACL on input (1) or output (0) + @param sw_if_index - the interface to alter the list of ACLs on + @param acl_index - index of ACL for the operation +*/ + +autoreply manual_print define acl_interface_add_del +{ + u32 client_index; + u32 context; + u8 is_add; +/* + * is_input = 0 => ACL applied on interface egress + * is_input = 1 => ACL applied on interface ingress + */ + u8 is_input; + u32 sw_if_index; + u32 acl_index; +}; + +/** \brief Set the vector of input/output ACLs checked for an interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface to alter the list of ACLs on + @param count - total number of ACL indices in the vector + @param n_input - this many first elements correspond to input ACLs, the rest - output + @param acls - vector of ACL indices +*/ + +autoreply manual_print define acl_interface_set_acl_list +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 count; + u8 n_input; /* First n_input ACLs are set as a list of input ACLs, the rest are applied as output */ + u32 acls[count]; +}; + +/** \brief Reply to set the ACL list on an interface + @param context - returned sender context, to match reply w/ request + @param retval 0 - no error +*/ + +/** \brief Dump the specific ACL contents or all of the ACLs' contents + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - ACL index to dump, ~0 to dump all ACLs +*/ + +define acl_dump +{ + u32 client_index; + u32 context; + u32 acl_index; /* ~0 for all ACLs */ +}; + +/** \brief Details about a single ACL contents + @param context - returned sender context, to match reply w/ request + @param acl_index - ACL index whose contents are being sent in this message + @param tag - Descriptive tag value which was supplied at ACL creation + @param count - Number of rules in this ACL + @param r - Array of rules within this ACL +*/ + +manual_endian manual_print define acl_details +{ + u32 context; + u32 acl_index; + u8 tag[64]; /* Same blob that was supplied to us when creating the ACL, one hopes. */ + u32 count; + vl_api_acl_rule_t r[count]; +}; + +/** \brief Dump the list(s) of ACL applied to specific or all interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to dump the ACL list for +*/ + +define acl_interface_list_dump +{ + u32 client_index; + u32 context; + u32 sw_if_index; /* ~0 for all interfaces */ +}; + +/** \brief Details about a single ACL contents + @param context - returned sender context, to match reply w/ request + @param sw_if_index - interface for which the list of ACLs is applied + @param count - total length of acl indices vector + @param n_input - this many of indices in the beginning are input ACLs, the rest - output + @param acls - the vector of ACL indices +*/ + +define acl_interface_list_details +{ + u32 context; + u32 sw_if_index; + u8 count; + u8 n_input; + u32 acls[count]; +}; + +/** \brief Add a MACIP ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param tag - descriptive value for this MACIP ACL + @param count - number of rules in this MACIP ACL + @param r - vector of MACIP ACL rules +*/ + +manual_endian manual_print define macip_acl_add +{ + u32 client_index; + u32 context; + u8 tag[64]; + u32 count; + vl_api_macip_acl_rule_t r[count]; +}; + +/** \brief Reply to add MACIP ACL + @param context - returned sender context, to match reply w/ request + @param acl_index - index of the newly created MACIP ACL + @param retval 0 - no error +*/ + +define macip_acl_add_reply +{ + u32 context; + u32 acl_index; + i32 retval; +}; + +/** \brief Add/Replace a MACIP ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - an existing MACIP ACL entry (0..0xfffffffe) to replace, or 0xffffffff to make new MACIP ACL + @param tag - descriptive value for this MACIP ACL + @param count - number of rules in this MACIP ACL + @param r - vector of MACIP ACL rules +*/ + +manual_endian manual_print define macip_acl_add_replace +{ + u32 client_index; + u32 context; + u32 acl_index; /* ~0 to add, existing MACIP ACL# to replace */ + u8 tag[64]; + u32 count; + vl_api_macip_acl_rule_t r[count]; +}; + +/** \brief Reply to add/replace MACIP ACL + @param context - returned sender context, to match reply w/ request + @param acl_index - index of the newly created MACIP ACL + @param retval 0 - no error +*/ + +define macip_acl_add_replace_reply +{ + u32 context; + u32 acl_index; + i32 retval; +}; + +/** \brief Delete a MACIP ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - MACIP ACL index to delete +*/ + +autoreply manual_print define macip_acl_del +{ + u32 client_index; + u32 context; + u32 acl_index; +}; + +/** \brief Add or delete a MACIP ACL to/from interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add (1) or delete (0) MACIP ACL from being used on an interface + @param sw_if_index - interface to apply the action to + @param acl_index - MACIP ACL index +*/ + +autoreply manual_print define macip_acl_interface_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + /* MACIP ACLs are always input */ + u32 sw_if_index; + u32 acl_index; +}; + +/** \brief Dump one or all defined MACIP ACLs + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - MACIP ACL index or ~0 to dump all MACIP ACLs +*/ + +define macip_acl_dump +{ + u32 client_index; + u32 context; + u32 acl_index; /* ~0 for all ACLs */ +}; + +/** \brief Details about one MACIP ACL + @param context - returned sender context, to match reply w/ request + @param acl_index - index of this MACIP ACL + @param tag - descriptive tag which was supplied during the creation + @param count - length of the vector of MACIP ACL rules + @param r - rules comprising this MACIP ACL +*/ + +manual_endian manual_print define macip_acl_details +{ + u32 context; + u32 acl_index; + u8 tag[64]; + u32 count; + vl_api_macip_acl_rule_t r[count]; +}; + +/** \brief Get the vector of MACIP ACL IDs applied to the interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ + +define macip_acl_interface_get +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply with the vector of MACIP ACLs by sw_if_index + @param context - returned sender context, to match reply w/ request + @param count - total number of elements in the vector + @param acls - the vector of active MACIP ACL indices per sw_if_index +*/ + +define macip_acl_interface_get_reply +{ + u32 context; + u32 count; + u32 acls[count]; +}; + +/** \brief Dump the list(s) of MACIP ACLs applied to specific or all interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to dump the MACIP ACL list for +*/ + +define macip_acl_interface_list_dump +{ + u32 client_index; + u32 context; + u32 sw_if_index; /* ~0 for all interfaces */ +}; + +/** \brief Details about a single MACIP ACL contents + @param context - returned sender context, to match reply w/ request + @param sw_if_index - interface for which the list of MACIP ACLs is applied + @param count - total length of acl indices vector + @param acls - the vector of MACIP ACL indices +*/ + +define macip_acl_interface_list_details +{ + u32 context; + u32 sw_if_index; + u8 count; + u32 acls[count]; +}; diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c new file mode 100644 index 00000000..efd506de --- /dev/null +++ b/src/plugins/acl/acl.c @@ -0,0 +1,2709 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stddef.h> + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <acl/acl.h> + +#include <vnet/l2/l2_classify.h> +#include <vnet/classify/input_acl.h> +#include <vpp/app/version.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +/* define message IDs */ +#include <acl/acl_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <acl/acl_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <acl/acl_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <acl/acl_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <acl/acl_all_api_h.h> +#undef vl_api_version + +#include "fa_node.h" +#include "hash_lookup.h" + +acl_main_t acl_main; + +#define REPLY_MSG_ID_BASE am->msg_id_base +#include <vlibapi/api_helper_macros.h> + +/* List of message types that this plugin understands */ + +#define foreach_acl_plugin_api_msg \ +_(ACL_PLUGIN_GET_VERSION, acl_plugin_get_version) \ +_(ACL_PLUGIN_CONTROL_PING, acl_plugin_control_ping) \ +_(ACL_ADD_REPLACE, acl_add_replace) \ +_(ACL_DEL, acl_del) \ +_(ACL_INTERFACE_ADD_DEL, acl_interface_add_del) \ +_(ACL_INTERFACE_SET_ACL_LIST, acl_interface_set_acl_list) \ +_(ACL_DUMP, acl_dump) \ +_(ACL_INTERFACE_LIST_DUMP, acl_interface_list_dump) \ +_(MACIP_ACL_ADD, macip_acl_add) \ +_(MACIP_ACL_ADD_REPLACE, macip_acl_add_replace) \ +_(MACIP_ACL_DEL, macip_acl_del) \ +_(MACIP_ACL_INTERFACE_ADD_DEL, macip_acl_interface_add_del) \ +_(MACIP_ACL_DUMP, macip_acl_dump) \ +_(MACIP_ACL_INTERFACE_GET, macip_acl_interface_get) \ +_(MACIP_ACL_INTERFACE_LIST_DUMP, macip_acl_interface_list_dump) + + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Access Control Lists", +}; +/* *INDENT-ON* */ + + +static void * +acl_set_heap(acl_main_t *am) +{ + if (0 == am->acl_mheap) { + am->acl_mheap = mheap_alloc (0 /* use VM */ , am->acl_mheap_size); + mheap_t *h = mheap_header (am->acl_mheap); + h->flags |= MHEAP_FLAG_THREAD_SAFE; + } + void *oldheap = clib_mem_set_heap(am->acl_mheap); + return oldheap; +} + +void +acl_plugin_acl_set_validate_heap(acl_main_t *am, int on) +{ + clib_mem_set_heap(acl_set_heap(am)); + mheap_t *h = mheap_header (am->acl_mheap); + if (on) { + h->flags |= MHEAP_FLAG_VALIDATE; + h->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE; + mheap_validate(h); + } else { + h->flags &= ~MHEAP_FLAG_VALIDATE; + h->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE; + } +} + +void +acl_plugin_acl_set_trace_heap(acl_main_t *am, int on) +{ + clib_mem_set_heap(acl_set_heap(am)); + mheap_t *h = mheap_header (am->acl_mheap); + if (on) { + h->flags |= MHEAP_FLAG_TRACE; + } else { + h->flags &= ~MHEAP_FLAG_TRACE; + } +} + +static void +vl_api_acl_plugin_get_version_t_handler (vl_api_acl_plugin_get_version_t * mp) +{ + acl_main_t *am = &acl_main; + vl_api_acl_plugin_get_version_reply_t *rmp; + int msg_size = sizeof (*rmp); + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + rmp = vl_msg_api_alloc (msg_size); + memset (rmp, 0, msg_size); + rmp->_vl_msg_id = + ntohs (VL_API_ACL_PLUGIN_GET_VERSION_REPLY + am->msg_id_base); + rmp->context = mp->context; + rmp->major = htonl (ACL_PLUGIN_VERSION_MAJOR); + rmp->minor = htonl (ACL_PLUGIN_VERSION_MINOR); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_acl_plugin_control_ping_t_handler (vl_api_acl_plugin_control_ping_t * mp) +{ + vl_api_acl_plugin_control_ping_reply_t *rmp; + acl_main_t *am = &acl_main; + int rv = 0; + + /* *INDENT-OFF* */ + REPLY_MACRO2 (VL_API_ACL_PLUGIN_CONTROL_PING_REPLY, + ({ + rmp->vpe_pid = ntohl (getpid ()); + })); + /* *INDENT-ON* */ +} + +static int +acl_add_list (u32 count, vl_api_acl_rule_t rules[], + u32 * acl_list_index, u8 * tag) +{ + acl_main_t *am = &acl_main; + acl_list_t *a; + acl_rule_t *r; + acl_rule_t *acl_new_rules = 0; + int i; + + if (*acl_list_index != ~0) + { + /* They supplied some number, let's see if this ACL exists */ + if (pool_is_free_index (am->acls, *acl_list_index)) + { + /* tried to replace a non-existent ACL, no point doing anything */ + clib_warning("acl-plugin-error: Trying to replace nonexistent ACL %d (tag %s)", *acl_list_index, tag); + return -1; + } + } + if (0 == count) { + clib_warning("acl-plugin-warning: supplied no rules for ACL %d (tag %s)", *acl_list_index, tag); + } + + void *oldheap = acl_set_heap(am); + + /* Create and populate the rules */ + if (count > 0) + vec_validate(acl_new_rules, count-1); + + for (i = 0; i < count; i++) + { + r = vec_elt_at_index(acl_new_rules, i); + memset(r, 0, sizeof(*r)); + r->is_permit = rules[i].is_permit; + r->is_ipv6 = rules[i].is_ipv6; + if (r->is_ipv6) + { + memcpy (&r->src, rules[i].src_ip_addr, sizeof (r->src)); + memcpy (&r->dst, rules[i].dst_ip_addr, sizeof (r->dst)); + } + else + { + memcpy (&r->src.ip4, rules[i].src_ip_addr, sizeof (r->src.ip4)); + memcpy (&r->dst.ip4, rules[i].dst_ip_addr, sizeof (r->dst.ip4)); + } + r->src_prefixlen = rules[i].src_ip_prefix_len; + r->dst_prefixlen = rules[i].dst_ip_prefix_len; + r->proto = rules[i].proto; + r->src_port_or_type_first = ntohs ( rules[i].srcport_or_icmptype_first ); + r->src_port_or_type_last = ntohs ( rules[i].srcport_or_icmptype_last ); + r->dst_port_or_code_first = ntohs ( rules[i].dstport_or_icmpcode_first ); + r->dst_port_or_code_last = ntohs ( rules[i].dstport_or_icmpcode_last ); + r->tcp_flags_value = rules[i].tcp_flags_value; + r->tcp_flags_mask = rules[i].tcp_flags_mask; + } + + if (~0 == *acl_list_index) + { + /* Get ACL index */ + pool_get_aligned (am->acls, a, CLIB_CACHE_LINE_BYTES); + memset (a, 0, sizeof (*a)); + /* Will return the newly allocated ACL index */ + *acl_list_index = a - am->acls; + } + else + { + a = am->acls + *acl_list_index; + hash_acl_delete(am, *acl_list_index); + /* Get rid of the old rules */ + if (a->rules) + vec_free (a->rules); + } + a->rules = acl_new_rules; + a->count = count; + memcpy (a->tag, tag, sizeof (a->tag)); + hash_acl_add(am, *acl_list_index); + clib_mem_set_heap (oldheap); + return 0; +} + +static int +acl_del_list (u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + acl_list_t *a; + int i, ii; + if (pool_is_free_index (am->acls, acl_list_index)) + { + return -1; + } + + if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + if (vec_len(vec_elt(am->input_sw_if_index_vec_by_acl, acl_list_index)) > 0) { + /* ACL is applied somewhere inbound. Refuse to delete */ + return -1; + } + } + if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + if (vec_len(vec_elt(am->output_sw_if_index_vec_by_acl, acl_list_index)) > 0) { + /* ACL is applied somewhere outbound. Refuse to delete */ + return -1; + } + } + + void *oldheap = acl_set_heap(am); + /* delete any references to the ACL */ + for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index); i++) + { + for (ii = 0; ii < vec_len (am->output_acl_vec_by_sw_if_index[i]); + /* see body */ ) + { + if (acl_list_index == am->output_acl_vec_by_sw_if_index[i][ii]) + { + vec_del1 (am->output_acl_vec_by_sw_if_index[i], ii); + } + else + { + ii++; + } + } + } + for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index); i++) + { + for (ii = 0; ii < vec_len (am->input_acl_vec_by_sw_if_index[i]); + /* see body */ ) + { + if (acl_list_index == am->input_acl_vec_by_sw_if_index[i][ii]) + { + vec_del1 (am->input_acl_vec_by_sw_if_index[i], ii); + } + else + { + ii++; + } + } + } + /* delete the hash table data */ + + hash_acl_delete(am, acl_list_index); + /* now we can delete the ACL itself */ + a = pool_elt_at_index (am->acls, acl_list_index); + if (a->rules) + vec_free (a->rules); + + pool_put (am->acls, a); + clib_mem_set_heap (oldheap); + return 0; +} + +/* Some aids in ASCII graphing the content */ +#define XX "\377" +#define __ "\000" +#define _(x) +#define v + +u8 ip4_5tuple_mask[] = +_(" dmac smac etype ") +_(ether) __ __ __ __ __ __ v __ __ __ __ __ __ v __ __ v + _(" v ihl totlen ") + _(0x0000) + __ __ __ __ + _(" ident fl+fo ") + _(0x0004) + __ __ __ __ + _(" ttl pr checksum ") + _(0x0008) + __ XX __ __ + _(" src address ") + _(0x000C) + XX XX XX XX + _(" dst address ") + _(0x0010) + XX XX XX XX + _("L4 T/U sport dport ") + _(tcpudp) + XX XX XX XX + _(padpad) + __ __ __ __ + _(padpad) + __ __ __ __ + _(padeth) + __ __; + + u8 ip6_5tuple_mask[] = + _(" dmac smac etype ") + _(ether) __ __ __ __ __ __ v __ __ __ __ __ __ v __ __ v + _(" v tc + flow ") + _(0x0000) __ __ __ __ + _(" plen nh hl ") + _(0x0004) __ __ XX __ + _(" src address ") + _(0x0008) XX XX XX XX + _(0x000C) XX XX XX XX + _(0x0010) XX XX XX XX + _(0x0014) XX XX XX XX + _(" dst address ") + _(0x0018) XX XX XX XX + _(0x001C) XX XX XX XX + _(0x0020) XX XX XX XX + _(0x0024) XX XX XX XX + _("L4T/U sport dport ") + _(tcpudp) XX XX XX XX _(padpad) __ __ __ __ _(padeth) __ __; + +#undef XX +#undef __ +#undef _ +#undef v + + static int count_skip (u8 * p, u32 size) +{ + u64 *p64 = (u64 *) p; + /* Be tolerant to null pointer */ + if (0 == p) + return 0; + + while ((0ULL == *p64) && ((u8 *) p64 - p) < size) + { + p64++; + } + return (p64 - (u64 *) p) / 2; +} + +static int +acl_classify_add_del_table_tiny (vnet_classify_main_t * cm, u8 * mask, + u32 mask_len, u32 next_table_index, + u32 miss_next_index, u32 * table_index, + int is_add) +{ + u32 nbuckets = 1; + u32 memory_size = 2 << 13; + u32 skip = count_skip (mask, mask_len); + u32 match = (mask_len / 16) - skip; + u8 *skip_mask_ptr = mask + 16 * skip; + u32 current_data_flag = 0; + int current_data_offset = 0; + + if (0 == match) + match = 1; + void *oldheap = clib_mem_set_heap (cm->vlib_main->heap_base); + int ret = vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets, + memory_size, skip, match, + next_table_index, miss_next_index, + table_index, current_data_flag, + current_data_offset, is_add, + 1 /* delete_chain */); + clib_mem_set_heap (oldheap); + return ret; +} + +static int +acl_classify_add_del_table_small (vnet_classify_main_t * cm, u8 * mask, + u32 mask_len, u32 next_table_index, + u32 miss_next_index, u32 * table_index, + int is_add) +{ + u32 nbuckets = 32; + u32 memory_size = 2 << 20; + u32 skip = count_skip (mask, mask_len); + u32 match = (mask_len / 16) - skip; + u8 *skip_mask_ptr = mask + 16 * skip; + u32 current_data_flag = 0; + int current_data_offset = 0; + + if (0 == match) + match = 1; + + void *oldheap = clib_mem_set_heap (cm->vlib_main->heap_base); + int ret = vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets, + memory_size, skip, match, + next_table_index, miss_next_index, + table_index, current_data_flag, + current_data_offset, is_add, + 1 /* delete_chain */); + clib_mem_set_heap (oldheap); + return ret; +} + + +static int +acl_unhook_l2_input_classify (acl_main_t * am, u32 sw_if_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + void *oldheap = acl_set_heap(am); + + vec_validate_init_empty (am->acl_ip4_input_classify_table_by_sw_if_index, + sw_if_index, ~0); + vec_validate_init_empty (am->acl_ip6_input_classify_table_by_sw_if_index, + sw_if_index, ~0); + + /* switch to global heap while calling vnet_* functions */ + clib_mem_set_heap (cm->vlib_main->heap_base); + vnet_l2_input_classify_enable_disable (sw_if_index, 0); + + if (am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] != ~0) + { + ip4_table_index = + am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index]; + am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] = ~0; + acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl_ip4, + &ip4_table_index, 0); + } + if (am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] != ~0) + { + ip6_table_index = + am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index]; + am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] = ~0; + acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl_ip6, + &ip6_table_index, 0); + } + clib_mem_set_heap (oldheap); + return 0; +} + +static int +acl_unhook_l2_output_classify (acl_main_t * am, u32 sw_if_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + void *oldheap = acl_set_heap(am); + + vec_validate_init_empty (am->acl_ip4_output_classify_table_by_sw_if_index, + sw_if_index, ~0); + vec_validate_init_empty (am->acl_ip6_output_classify_table_by_sw_if_index, + sw_if_index, ~0); + + /* switch to global heap while calling vnet_* functions */ + clib_mem_set_heap (cm->vlib_main->heap_base); + + vnet_l2_output_classify_enable_disable (sw_if_index, 0); + + if (am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] != ~0) + { + ip4_table_index = + am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index]; + am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] = ~0; + acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl_ip4, + &ip4_table_index, 0); + } + if (am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] != ~0) + { + ip6_table_index = + am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index]; + am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] = ~0; + acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl_ip6, + &ip6_table_index, 0); + } + clib_mem_set_heap (oldheap); + return 0; +} + +static int +acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + int rv; + + void *prevheap = clib_mem_set_heap (cm->vlib_main->heap_base); + + /* in case there were previous tables attached */ + acl_unhook_l2_input_classify (am, sw_if_index); + rv = + acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl_ip4, + &ip4_table_index, 1); + if (rv) + goto done; + rv = + acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl_ip6, + &ip6_table_index, 1); + if (rv) + { + acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl_ip4, + &ip4_table_index, 0); + goto done; + } + rv = + vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, ~0); + if (rv) + { + acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl_ip6, + &ip6_table_index, 0); + acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl_ip4, + &ip4_table_index, 0); + goto done; + } + + am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] = + ip4_table_index; + am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] = + ip6_table_index; + + vnet_l2_input_classify_enable_disable (sw_if_index, 1); +done: + clib_mem_set_heap (prevheap); + return rv; +} + +static int +acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + int rv; + + void *prevheap = clib_mem_set_heap (cm->vlib_main->heap_base); + + /* in case there were previous tables attached */ + acl_unhook_l2_output_classify (am, sw_if_index); + rv = + acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl_ip4, + &ip4_table_index, 1); + if (rv) + goto done; + rv = + acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl_ip6, + &ip6_table_index, 1); + if (rv) + { + acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl_ip4, + &ip4_table_index, 0); + goto done; + } + rv = + vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, ~0); + clib_warning + ("ACL enabling on interface sw_if_index %d, setting tables to the following: ip4: %d ip6: %d\n", + sw_if_index, ip4_table_index, ip6_table_index); + if (rv) + { + acl_classify_add_del_table_tiny (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl_ip6, + &ip6_table_index, 0); + acl_classify_add_del_table_tiny (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl_ip4, + &ip4_table_index, 0); + goto done; + } + + am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] = + ip4_table_index; + am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] = + ip6_table_index; + + vnet_l2_output_classify_enable_disable (sw_if_index, 1); +done: + clib_mem_set_heap (prevheap); + return rv; +} + + + +int +acl_interface_in_enable_disable (acl_main_t * am, u32 sw_if_index, + int enable_disable) +{ + int rv; + + /* Utterly wrong? */ + if (pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + acl_fa_enable_disable(sw_if_index, 1, enable_disable); + + if (enable_disable) + { + rv = acl_hook_l2_input_classify (am, sw_if_index); + } + else + { + rv = acl_unhook_l2_input_classify (am, sw_if_index); + } + + return rv; +} + +int +acl_interface_out_enable_disable (acl_main_t * am, u32 sw_if_index, + int enable_disable) +{ + int rv; + + /* Utterly wrong? */ + if (pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + acl_fa_enable_disable(sw_if_index, 0, enable_disable); + + if (enable_disable) + { + rv = acl_hook_l2_output_classify (am, sw_if_index); + } + else + { + rv = acl_unhook_l2_output_classify (am, sw_if_index); + } + + return rv; +} + +static int +acl_is_not_defined(acl_main_t *am, u32 acl_list_index) +{ + return (pool_is_free_index (am->acls, acl_list_index)); +} + + +static int +acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + if (acl_is_not_defined(am, acl_list_index)) { + /* ACL is not defined. Can not apply */ + return -1; + } + void *oldheap = acl_set_heap(am); + + if (is_input) + { + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + + u32 index = vec_search(am->input_acl_vec_by_sw_if_index[sw_if_index], acl_list_index); + if (index < vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index])) { + clib_warning("ACL %d is already applied inbound on sw_if_index %d (index %d)", + acl_list_index, sw_if_index, index); + /* the entry is already there */ + clib_mem_set_heap (oldheap); + return -1; + } + /* if there was no ACL applied before, enable the ACL processing */ + if (vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index]) == 0) { + acl_interface_in_enable_disable (am, sw_if_index, 1); + } + vec_add (am->input_acl_vec_by_sw_if_index[sw_if_index], &acl_list_index, + 1); + vec_validate (am->input_sw_if_index_vec_by_acl, acl_list_index); + vec_add (am->input_sw_if_index_vec_by_acl[acl_list_index], &sw_if_index, + 1); + } + else + { + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + + u32 index = vec_search(am->output_acl_vec_by_sw_if_index[sw_if_index], acl_list_index); + if (index < vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index])) { + clib_warning("ACL %d is already applied outbound on sw_if_index %d (index %d)", + acl_list_index, sw_if_index, index); + /* the entry is already there */ + clib_mem_set_heap (oldheap); + return -1; + } + /* if there was no ACL applied before, enable the ACL processing */ + if (vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index]) == 0) { + acl_interface_out_enable_disable (am, sw_if_index, 1); + } + vec_add (am->output_acl_vec_by_sw_if_index[sw_if_index], + &acl_list_index, 1); + vec_validate (am->output_sw_if_index_vec_by_acl, acl_list_index); + vec_add (am->output_sw_if_index_vec_by_acl[acl_list_index], &sw_if_index, + 1); + } + clib_mem_set_heap (oldheap); + return 0; +} + + +static int +acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + int i; + int rv = -1; + void *oldheap = acl_set_heap(am); + if (is_input) + { + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]); + i++) + { + if (acl_list_index == + am->input_acl_vec_by_sw_if_index[sw_if_index][i]) + { + vec_del1 (am->input_acl_vec_by_sw_if_index[sw_if_index], i); + rv = 0; + break; + } + } + + if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + u32 index = vec_search(am->input_sw_if_index_vec_by_acl[acl_list_index], sw_if_index); + if (index < vec_len(am->input_sw_if_index_vec_by_acl[acl_list_index])) { + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + vec_del1 (am->input_sw_if_index_vec_by_acl[acl_list_index], index); + } + } + + /* If there is no more ACLs applied on an interface, disable ACL processing */ + if (0 == vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index])) + { + acl_interface_in_enable_disable (am, sw_if_index, 0); + } + } + else + { + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + for (i = 0; + i < vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]); i++) + { + if (acl_list_index == + am->output_acl_vec_by_sw_if_index[sw_if_index][i]) + { + vec_del1 (am->output_acl_vec_by_sw_if_index[sw_if_index], i); + rv = 0; + break; + } + } + + if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + u32 index = vec_search(am->output_sw_if_index_vec_by_acl[acl_list_index], sw_if_index); + if (index < vec_len(am->output_sw_if_index_vec_by_acl[acl_list_index])) { + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + vec_del1 (am->output_sw_if_index_vec_by_acl[acl_list_index], index); + } + } + + /* If there is no more ACLs applied on an interface, disable ACL processing */ + if (0 == vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index])) + { + acl_interface_out_enable_disable (am, sw_if_index, 0); + } + } + clib_mem_set_heap (oldheap); + return rv; +} + +static void +acl_interface_reset_inout_acls (u32 sw_if_index, u8 is_input) +{ + acl_main_t *am = &acl_main; + int i; + void *oldheap = acl_set_heap(am); + if (is_input) + { + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + if (vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index]) > 0) { + acl_interface_in_enable_disable (am, sw_if_index, 0); + } + + for(i = vec_len(am->input_acl_vec_by_sw_if_index[sw_if_index])-1; i>=0; i--) { + u32 acl_list_index = am->input_acl_vec_by_sw_if_index[sw_if_index][i]; + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + if (acl_list_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + u32 index = vec_search(am->input_sw_if_index_vec_by_acl[acl_list_index], sw_if_index); + if (index < vec_len(am->input_sw_if_index_vec_by_acl[acl_list_index])) { + vec_del1 (am->input_sw_if_index_vec_by_acl[acl_list_index], index); + } + } + } + + vec_reset_length (am->input_acl_vec_by_sw_if_index[sw_if_index]); + } + else + { + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + if (vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index]) > 0) { + acl_interface_out_enable_disable (am, sw_if_index, 0); + } + + for(i = vec_len(am->output_acl_vec_by_sw_if_index[sw_if_index])-1; i>=0; i--) { + u32 acl_list_index = am->output_acl_vec_by_sw_if_index[sw_if_index][i]; + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + if (acl_list_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + u32 index = vec_search(am->output_sw_if_index_vec_by_acl[acl_list_index], sw_if_index); + if (index < vec_len(am->output_sw_if_index_vec_by_acl[acl_list_index])) { + vec_del1 (am->output_sw_if_index_vec_by_acl[acl_list_index], index); + } + } + } + + vec_reset_length (am->output_acl_vec_by_sw_if_index[sw_if_index]); + } + clib_mem_set_heap (oldheap); +} + +static int +acl_interface_add_del_inout_acl (u32 sw_if_index, u8 is_add, u8 is_input, + u32 acl_list_index) +{ + int rv = -1; + acl_main_t *am = &acl_main; + if (is_add) + { + rv = + acl_interface_add_inout_acl (sw_if_index, is_input, acl_list_index); + if (rv == 0) + { + hash_acl_apply(am, sw_if_index, is_input, acl_list_index); + } + } + else + { + hash_acl_unapply(am, sw_if_index, is_input, acl_list_index); + rv = + acl_interface_del_inout_acl (sw_if_index, is_input, acl_list_index); + } + return rv; +} + + +typedef struct +{ + u8 is_ipv6; + u8 mac_mask[6]; + u8 prefix_len; + u32 count; + u32 table_index; + u32 arp_table_index; +} macip_match_type_t; + +static u32 +macip_find_match_type (macip_match_type_t * mv, u8 * mac_mask, u8 prefix_len, + u8 is_ipv6) +{ + u32 i; + if (mv) + { + for (i = 0; i < vec_len (mv); i++) + { + if ((mv[i].prefix_len == prefix_len) && (mv[i].is_ipv6 == is_ipv6) + && (0 == memcmp (mv[i].mac_mask, mac_mask, 6))) + { + return i; + } + } + } + return ~0; +} + + +/* Get metric used to sort match types. + The more specific and the more often seen - the bigger the metric */ +static int +match_type_metric (macip_match_type_t * m) +{ + unsigned int mac_bits_set = 0; + unsigned int mac_byte; + int i; + for (i=0; i<6; i++) + { + mac_byte = m->mac_mask[i]; + for (; mac_byte; mac_byte >>= 1) + mac_bits_set += mac_byte & 1; + } + /* + * Attempt to place the more specific and the more used rules on top. + * There are obvious caveat corner cases to this, but they do not + * seem to be sensible in real world (e.g. specific IPv4 with wildcard MAC + * going with a wildcard IPv4 with a specific MAC). + */ + return m->prefix_len + mac_bits_set + m->is_ipv6 + 10 * m->count; +} + +static int +match_type_compare (macip_match_type_t * m1, macip_match_type_t * m2) +{ + /* Ascending sort based on the metric values */ + return match_type_metric (m1) - match_type_metric (m2); +} + +/* Get the offset of L3 source within ethernet packet */ +static int +get_l3_src_offset(int is6) +{ + if(is6) + return (sizeof(ethernet_header_t) + offsetof(ip6_header_t, src_address)); + else + return (sizeof(ethernet_header_t) + offsetof(ip4_header_t, src_address)); +} + +static int +macip_create_classify_tables (acl_main_t * am, u32 macip_acl_index) +{ + macip_match_type_t *mvec = NULL; + macip_match_type_t *mt; + macip_acl_list_t *a = pool_elt_at_index (am->macip_acls, macip_acl_index); + int i; + u32 match_type_index; + u32 last_table; + u8 mask[5 * 16]; + vnet_classify_main_t *cm = &vnet_classify_main; + + /* Count the number of different types of rules */ + for (i = 0; i < a->count; i++) + { + if (~0 == + (match_type_index = + macip_find_match_type (mvec, a->rules[i].src_mac_mask, + a->rules[i].src_prefixlen, + a->rules[i].is_ipv6))) + { + match_type_index = vec_len (mvec); + vec_validate (mvec, match_type_index); + memcpy (mvec[match_type_index].mac_mask, + a->rules[i].src_mac_mask, 6); + mvec[match_type_index].prefix_len = a->rules[i].src_prefixlen; + mvec[match_type_index].is_ipv6 = a->rules[i].is_ipv6; + mvec[match_type_index].table_index = ~0; + } + mvec[match_type_index].count++; + } + /* Put the most frequently used tables last in the list so we can create classifier tables in reverse order */ + vec_sort_with_function (mvec, match_type_compare); + /* Create the classifier tables */ + last_table = ~0; + /* First add ARP tables */ + vec_foreach (mt, mvec) + { + int mask_len; + int is6 = mt->is_ipv6; + + mt->arp_table_index = ~0; + if (!is6) + { + memset (mask, 0, sizeof (mask)); + memcpy (&mask[6], mt->mac_mask, 6); + memset (&mask[12], 0xff, 2); /* ethernet protocol */ + memcpy (&mask[14 + 8], mt->mac_mask, 6); + + for (i = 0; i < (mt->prefix_len / 8); i++) + mask[14 + 14 + i] = 0xff; + if (mt->prefix_len % 8) + mask[14 + 14 + (mt->prefix_len / 8)] = 0xff - ((1 << (8 - mt->prefix_len % 8)) - 1); + + mask_len = ((14 + 14 + ((mt->prefix_len+7) / 8) + + (sizeof (u32x4)-1))/sizeof(u32x4)) * sizeof (u32x4); + acl_classify_add_del_table_small (cm, mask, mask_len, last_table, + (~0 == last_table) ? 0 : ~0, &mt->arp_table_index, + 1); + last_table = mt->arp_table_index; + } + } + /* Now add IP[46] tables */ + vec_foreach (mt, mvec) + { + int mask_len; + int is6 = mt->is_ipv6; + int l3_src_offs = get_l3_src_offset(is6); + memset (mask, 0, sizeof (mask)); + memcpy (&mask[6], mt->mac_mask, 6); + for (i = 0; i < (mt->prefix_len / 8); i++) + { + mask[l3_src_offs + i] = 0xff; + } + if (mt->prefix_len % 8) + { + mask[l3_src_offs + (mt->prefix_len / 8)] = + 0xff - ((1 << (8 - mt->prefix_len % 8)) - 1); + } + /* + * Round-up the number of bytes needed to store the prefix, + * and round up the number of vectors too + */ + mask_len = ((l3_src_offs + ((mt->prefix_len+7) / 8) + + (sizeof (u32x4)-1))/sizeof(u32x4)) * sizeof (u32x4); + acl_classify_add_del_table_small (cm, mask, mask_len, last_table, + (~0 == last_table) ? 0 : ~0, &mt->table_index, + 1); + last_table = mt->table_index; + } + a->ip4_table_index = last_table; + a->ip6_table_index = last_table; + a->l2_table_index = last_table; + + /* Populate the classifier tables with rules from the MACIP ACL */ + for (i = 0; i < a->count; i++) + { + u32 action = 0; + u32 metadata = 0; + int is6 = a->rules[i].is_ipv6; + int l3_src_offs = get_l3_src_offset(is6); + memset (mask, 0, sizeof (mask)); + memcpy (&mask[6], a->rules[i].src_mac, 6); + memset (&mask[12], 0xff, 2); /* ethernet protocol */ + if (is6) + { + memcpy (&mask[l3_src_offs], &a->rules[i].src_ip_addr.ip6, 16); + mask[12] = 0x86; + mask[13] = 0xdd; + } + else + { + memcpy (&mask[l3_src_offs], &a->rules[i].src_ip_addr.ip4, 4); + mask[12] = 0x08; + mask[13] = 0x00; + } + match_type_index = + macip_find_match_type (mvec, a->rules[i].src_mac_mask, + a->rules[i].src_prefixlen, + a->rules[i].is_ipv6); + ASSERT(match_type_index != ~0); + /* add session to table mvec[match_type_index].table_index; */ + vnet_classify_add_del_session (cm, mvec[match_type_index].table_index, + mask, a->rules[i].is_permit ? ~0 : 0, i, + 0, action, metadata, 1); + /* add ARP table entry too */ + if (!is6 && (mvec[match_type_index].arp_table_index != ~0)) + { + memset (mask, 0, sizeof (mask)); + memcpy (&mask[6], a->rules[i].src_mac, 6); + mask[12] = 0x08; + mask[13] = 0x06; + memcpy (&mask[14 + 8], a->rules[i].src_mac, 6); + memcpy (&mask[14 + 14], &a->rules[i].src_ip_addr.ip4, 4); + vnet_classify_add_del_session (cm, mvec[match_type_index].arp_table_index, + mask, a->rules[i].is_permit ? ~0 : 0, i, + 0, action, metadata, 1); + } + } + return 0; +} + +static void +macip_destroy_classify_tables (acl_main_t * am, u32 macip_acl_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + macip_acl_list_t *a = pool_elt_at_index (am->macip_acls, macip_acl_index); + + if (a->ip4_table_index != ~0) + { + acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->ip4_table_index, 0); + a->ip4_table_index = ~0; + } + if (a->ip6_table_index != ~0) + { + acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->ip6_table_index, 0); + a->ip6_table_index = ~0; + } + if (a->l2_table_index != ~0) + { + acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->l2_table_index, 0); + a->l2_table_index = ~0; + } +} + +static int +macip_acl_add_list (u32 count, vl_api_macip_acl_rule_t rules[], + u32 * acl_list_index, u8 * tag) +{ + acl_main_t *am = &acl_main; + macip_acl_list_t *a; + macip_acl_rule_t *r; + macip_acl_rule_t *acl_new_rules = 0; + int i; + + if (*acl_list_index != ~0) + { + /* They supplied some number, let's see if this MACIP ACL exists */ + if (pool_is_free_index (am->macip_acls, *acl_list_index)) + { + /* tried to replace a non-existent ACL, no point doing anything */ + clib_warning("acl-plugin-error: Trying to replace nonexistent MACIP ACL %d (tag %s)", *acl_list_index, tag); + return -1; + } + } + + if (0 == count) { + clib_warning("acl-plugin-warning: Trying to create empty MACIP ACL (tag %s)", tag); + } + void *oldheap = acl_set_heap(am); + /* Create and populate the rules */ + if (count > 0) + vec_validate(acl_new_rules, count-1); + + for (i = 0; i < count; i++) + { + r = &acl_new_rules[i]; + r->is_permit = rules[i].is_permit; + r->is_ipv6 = rules[i].is_ipv6; + memcpy (&r->src_mac, rules[i].src_mac, 6); + memcpy (&r->src_mac_mask, rules[i].src_mac_mask, 6); + if(rules[i].is_ipv6) + memcpy (&r->src_ip_addr.ip6, rules[i].src_ip_addr, 16); + else + memcpy (&r->src_ip_addr.ip4, rules[i].src_ip_addr, 4); + r->src_prefixlen = rules[i].src_ip_prefix_len; + } + + if (~0 == *acl_list_index) + { + /* Get ACL index */ + pool_get_aligned (am->macip_acls, a, CLIB_CACHE_LINE_BYTES); + memset (a, 0, sizeof (*a)); + /* Will return the newly allocated ACL index */ + *acl_list_index = a - am->macip_acls; + } + else + { + a = pool_elt_at_index (am->macip_acls, *acl_list_index); + if (a->rules) + { + vec_free (a->rules); + } + macip_destroy_classify_tables (am, *acl_list_index); + } + + a->rules = acl_new_rules; + a->count = count; + memcpy (a->tag, tag, sizeof (a->tag)); + + /* Create and populate the classifer tables */ + macip_create_classify_tables (am, *acl_list_index); + clib_mem_set_heap (oldheap); + return 0; +} + + +/* No check for validity of sw_if_index - the callers were supposed to validate */ + +static int +macip_acl_interface_del_acl (acl_main_t * am, u32 sw_if_index) +{ + int rv; + u32 macip_acl_index; + macip_acl_list_t *a; + void *oldheap = acl_set_heap(am); + vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0); + clib_mem_set_heap (oldheap); + macip_acl_index = am->macip_acl_by_sw_if_index[sw_if_index]; + /* No point in deleting MACIP ACL which is not applied */ + if (~0 == macip_acl_index) + return -1; + a = pool_elt_at_index (am->macip_acls, macip_acl_index); + /* remove the classifier tables off the interface L2 ACL */ + rv = + vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index, + a->ip6_table_index, a->l2_table_index, 0); + /* Unset the MACIP ACL index */ + am->macip_acl_by_sw_if_index[sw_if_index] = ~0; + return rv; +} + +/* No check for validity of sw_if_index - the callers were supposed to validate */ + +static int +macip_acl_interface_add_acl (acl_main_t * am, u32 sw_if_index, + u32 macip_acl_index) +{ + macip_acl_list_t *a; + int rv; + if (pool_is_free_index (am->macip_acls, macip_acl_index)) + { + return -1; + } + void *oldheap = acl_set_heap(am); + a = pool_elt_at_index (am->macip_acls, macip_acl_index); + vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0); + clib_mem_set_heap (oldheap); + /* If there already a MACIP ACL applied, unapply it */ + if (~0 != am->macip_acl_by_sw_if_index[sw_if_index]) + macip_acl_interface_del_acl(am, sw_if_index); + am->macip_acl_by_sw_if_index[sw_if_index] = macip_acl_index; + + /* Apply the classifier tables for L2 ACLs */ + rv = + vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index, + a->ip6_table_index, a->l2_table_index, 1); + return rv; +} + +static int +macip_acl_del_list (u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + macip_acl_list_t *a; + int i; + if (pool_is_free_index (am->macip_acls, acl_list_index)) + { + return -1; + } + + /* delete any references to the ACL */ + for (i = 0; i < vec_len (am->macip_acl_by_sw_if_index); i++) + { + if (am->macip_acl_by_sw_if_index[i] == acl_list_index) + { + macip_acl_interface_del_acl (am, i); + } + } + + void *oldheap = acl_set_heap(am); + /* Now that classifier tables are detached, clean them up */ + macip_destroy_classify_tables (am, acl_list_index); + + /* now we can delete the ACL itself */ + a = pool_elt_at_index (am->macip_acls, acl_list_index); + if (a->rules) + { + vec_free (a->rules); + } + pool_put (am->macip_acls, a); + clib_mem_set_heap (oldheap); + return 0; +} + + +static int +macip_acl_interface_add_del_acl (u32 sw_if_index, u8 is_add, + u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + int rv = -1; + if (is_add) + { + rv = macip_acl_interface_add_acl (am, sw_if_index, acl_list_index); + } + else + { + rv = macip_acl_interface_del_acl (am, sw_if_index); + } + return rv; +} + +/* + * If the client does not allocate enough memory for a variable-length + * message, and then proceed to use it as if the full memory allocated, + * absent the check we happily consume that on the VPP side, and go + * along as if nothing happened. However, the resulting + * effects range from just garbage in the API decode + * (because the decoder snoops too far), to potential memory + * corruptions. + * + * This verifies that the actual length of the message is + * at least expected_len, and complains loudly if it is not. + * + * A failing check here is 100% a software bug on the API user side, + * so we might as well yell. + * + */ +static int verify_message_len(void *mp, u32 expected_len, char *where) +{ + u32 supplied_len = vl_msg_api_get_msg_length (mp); + if (supplied_len < expected_len) { + clib_warning("%s: Supplied message length %d is less than expected %d", + where, supplied_len, expected_len); + return 0; + } else { + return 1; + } +} + +/* API message handler */ +static void +vl_api_acl_add_replace_t_handler (vl_api_acl_add_replace_t * mp) +{ + vl_api_acl_add_replace_reply_t *rmp; + acl_main_t *am = &acl_main; + int rv; + u32 acl_list_index = ntohl (mp->acl_index); + u32 acl_count = ntohl (mp->count); + u32 expected_len = sizeof(*mp) + acl_count*sizeof(mp->r[0]); + + if (verify_message_len(mp, expected_len, "acl_add_replace")) { + rv = acl_add_list (acl_count, mp->r, &acl_list_index, mp->tag); + } else { + rv = VNET_API_ERROR_INVALID_VALUE; + } + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_ACL_ADD_REPLACE_REPLY, + ({ + rmp->acl_index = htonl(acl_list_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_acl_del_t_handler (vl_api_acl_del_t * mp) +{ + acl_main_t *am = &acl_main; + vl_api_acl_del_reply_t *rmp; + int rv; + + rv = acl_del_list (ntohl (mp->acl_index)); + + REPLY_MACRO (VL_API_ACL_DEL_REPLY); +} + +static void +vl_api_acl_interface_add_del_t_handler (vl_api_acl_interface_add_del_t * mp) +{ + acl_main_t *am = &acl_main; + vnet_interface_main_t *im = &am->vnet_main->interface_main; + u32 sw_if_index = ntohl (mp->sw_if_index); + vl_api_acl_interface_add_del_reply_t *rmp; + int rv = -1; + + if (pool_is_free_index(im->sw_interfaces, sw_if_index)) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + rv = + acl_interface_add_del_inout_acl (sw_if_index, mp->is_add, + mp->is_input, ntohl (mp->acl_index)); + + REPLY_MACRO (VL_API_ACL_INTERFACE_ADD_DEL_REPLY); +} + +static void +vl_api_acl_interface_set_acl_list_t_handler + (vl_api_acl_interface_set_acl_list_t * mp) +{ + acl_main_t *am = &acl_main; + vl_api_acl_interface_set_acl_list_reply_t *rmp; + int rv = 0; + int i; + vnet_interface_main_t *im = &am->vnet_main->interface_main; + u32 sw_if_index = ntohl (mp->sw_if_index); + + if (pool_is_free_index(im->sw_interfaces, sw_if_index)) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + { + acl_interface_reset_inout_acls (sw_if_index, 0); + acl_interface_reset_inout_acls (sw_if_index, 1); + + for (i = 0; i < mp->count; i++) + { + if(acl_is_not_defined(am, ntohl (mp->acls[i]))) { + /* ACL does not exist, so we can not apply it */ + rv = -1; + } + } + if (0 == rv) { + for (i = 0; i < mp->count; i++) + { + acl_interface_add_del_inout_acl (sw_if_index, 1, (i < mp->n_input), + ntohl (mp->acls[i])); + } + } + } + + REPLY_MACRO (VL_API_ACL_INTERFACE_SET_ACL_LIST_REPLY); +} + +static void +copy_acl_rule_to_api_rule (vl_api_acl_rule_t * api_rule, acl_rule_t * r) +{ + api_rule->is_permit = r->is_permit; + api_rule->is_ipv6 = r->is_ipv6; + if(r->is_ipv6) + { + memcpy (api_rule->src_ip_addr, &r->src, sizeof (r->src)); + memcpy (api_rule->dst_ip_addr, &r->dst, sizeof (r->dst)); + } + else + { + memcpy (api_rule->src_ip_addr, &r->src.ip4, sizeof (r->src.ip4)); + memcpy (api_rule->dst_ip_addr, &r->dst.ip4, sizeof (r->dst.ip4)); + } + api_rule->src_ip_prefix_len = r->src_prefixlen; + api_rule->dst_ip_prefix_len = r->dst_prefixlen; + api_rule->proto = r->proto; + api_rule->srcport_or_icmptype_first = htons (r->src_port_or_type_first); + api_rule->srcport_or_icmptype_last = htons (r->src_port_or_type_last); + api_rule->dstport_or_icmpcode_first = htons (r->dst_port_or_code_first); + api_rule->dstport_or_icmpcode_last = htons (r->dst_port_or_code_last); + api_rule->tcp_flags_mask = r->tcp_flags_mask; + api_rule->tcp_flags_value = r->tcp_flags_value; +} + +static void +send_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q, + acl_list_t * acl, u32 context) +{ + vl_api_acl_details_t *mp; + vl_api_acl_rule_t *rules; + int i; + int msg_size = sizeof (*mp) + sizeof (mp->r[0]) * acl->count; + void *oldheap = acl_set_heap(am); + + mp = vl_msg_api_alloc (msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_ACL_DETAILS + am->msg_id_base); + + /* fill in the message */ + mp->context = context; + mp->count = htonl (acl->count); + mp->acl_index = htonl (acl - am->acls); + memcpy (mp->tag, acl->tag, sizeof (mp->tag)); + // clib_memcpy (mp->r, acl->rules, acl->count * sizeof(acl->rules[0])); + rules = mp->r; + for (i = 0; i < acl->count; i++) + { + copy_acl_rule_to_api_rule (&rules[i], &acl->rules[i]); + } + + clib_mem_set_heap (oldheap); + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + + +static void +vl_api_acl_dump_t_handler (vl_api_acl_dump_t * mp) +{ + acl_main_t *am = &acl_main; + u32 acl_index; + acl_list_t *acl; + + int rv = -1; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + if (mp->acl_index == ~0) + { + /* *INDENT-OFF* */ + /* Just dump all ACLs */ + pool_foreach (acl, am->acls, + ({ + send_acl_details(am, q, acl, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + acl_index = ntohl (mp->acl_index); + if (!pool_is_free_index (am->acls, acl_index)) + { + acl = pool_elt_at_index (am->acls, acl_index); + send_acl_details (am, q, acl, mp->context); + } + } + + if (rv == -1) + { + /* FIXME API: should we signal an error here at all ? */ + return; + } +} + +static void +send_acl_interface_list_details (acl_main_t * am, + unix_shared_memory_queue_t * q, + u32 sw_if_index, u32 context) +{ + vl_api_acl_interface_list_details_t *mp; + int msg_size; + int n_input; + int n_output; + int count; + int i = 0; + void *oldheap = acl_set_heap(am); + + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + + n_input = vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]); + n_output = vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]); + count = n_input + n_output; + + msg_size = sizeof (*mp); + msg_size += sizeof (mp->acls[0]) * count; + + mp = vl_msg_api_alloc (msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = + ntohs (VL_API_ACL_INTERFACE_LIST_DETAILS + am->msg_id_base); + + /* fill in the message */ + mp->context = context; + mp->sw_if_index = htonl (sw_if_index); + mp->count = count; + mp->n_input = n_input; + for (i = 0; i < n_input; i++) + { + mp->acls[i] = htonl (am->input_acl_vec_by_sw_if_index[sw_if_index][i]); + } + for (i = 0; i < n_output; i++) + { + mp->acls[n_input + i] = + htonl (am->output_acl_vec_by_sw_if_index[sw_if_index][i]); + } + clib_mem_set_heap (oldheap); + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_acl_interface_list_dump_t_handler (vl_api_acl_interface_list_dump_t * + mp) +{ + acl_main_t *am = &acl_main; + vnet_sw_interface_t *swif; + vnet_interface_main_t *im = &am->vnet_main->interface_main; + + u32 sw_if_index; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + if (mp->sw_if_index == ~0) + { + /* *INDENT-OFF* */ + pool_foreach (swif, im->sw_interfaces, + ({ + send_acl_interface_list_details(am, q, swif->sw_if_index, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + sw_if_index = ntohl (mp->sw_if_index); + if (!pool_is_free_index(im->sw_interfaces, sw_if_index)) + send_acl_interface_list_details (am, q, sw_if_index, mp->context); + } +} + +/* MACIP ACL API handlers */ + +static void +vl_api_macip_acl_add_t_handler (vl_api_macip_acl_add_t * mp) +{ + vl_api_macip_acl_add_reply_t *rmp; + acl_main_t *am = &acl_main; + int rv; + u32 acl_list_index = ~0; + u32 acl_count = ntohl (mp->count); + u32 expected_len = sizeof(*mp) + acl_count*sizeof(mp->r[0]); + + if (verify_message_len(mp, expected_len, "macip_acl_add")) { + rv = macip_acl_add_list (acl_count, mp->r, &acl_list_index, mp->tag); + } else { + rv = VNET_API_ERROR_INVALID_VALUE; + } + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLY, + ({ + rmp->acl_index = htonl(acl_list_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_macip_acl_add_replace_t_handler (vl_api_macip_acl_add_replace_t * mp) +{ + vl_api_macip_acl_add_replace_reply_t *rmp; + acl_main_t *am = &acl_main; + int rv; + u32 acl_list_index = ntohl (mp->acl_index); + u32 acl_count = ntohl (mp->count); + u32 expected_len = sizeof(*mp) + acl_count*sizeof(mp->r[0]); + + if (verify_message_len(mp, expected_len, "macip_acl_add_replace")) { + rv = macip_acl_add_list (acl_count, mp->r, &acl_list_index, mp->tag); + } else { + rv = VNET_API_ERROR_INVALID_VALUE; + } + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLACE_REPLY, + ({ + rmp->acl_index = htonl(acl_list_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_macip_acl_del_t_handler (vl_api_macip_acl_del_t * mp) +{ + acl_main_t *am = &acl_main; + vl_api_macip_acl_del_reply_t *rmp; + int rv; + + rv = macip_acl_del_list (ntohl (mp->acl_index)); + + REPLY_MACRO (VL_API_MACIP_ACL_DEL_REPLY); +} + +static void +vl_api_macip_acl_interface_add_del_t_handler + (vl_api_macip_acl_interface_add_del_t * mp) +{ + acl_main_t *am = &acl_main; + vl_api_macip_acl_interface_add_del_reply_t *rmp; + int rv = -1; + vnet_interface_main_t *im = &am->vnet_main->interface_main; + u32 sw_if_index = ntohl (mp->sw_if_index); + + if (pool_is_free_index(im->sw_interfaces, sw_if_index)) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + rv = + macip_acl_interface_add_del_acl (ntohl (mp->sw_if_index), mp->is_add, + ntohl (mp->acl_index)); + + REPLY_MACRO (VL_API_MACIP_ACL_INTERFACE_ADD_DEL_REPLY); +} + +static void +send_macip_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q, + macip_acl_list_t * acl, u32 context) +{ + vl_api_macip_acl_details_t *mp; + vl_api_macip_acl_rule_t *rules; + macip_acl_rule_t *r; + int i; + int msg_size = sizeof (*mp) + (acl ? sizeof (mp->r[0]) * acl->count : 0); + + mp = vl_msg_api_alloc (msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_DETAILS + am->msg_id_base); + + /* fill in the message */ + mp->context = context; + if (acl) + { + memcpy (mp->tag, acl->tag, sizeof (mp->tag)); + mp->count = htonl (acl->count); + mp->acl_index = htonl (acl - am->macip_acls); + rules = mp->r; + for (i = 0; i < acl->count; i++) + { + r = &acl->rules[i]; + rules[i].is_permit = r->is_permit; + rules[i].is_ipv6 = r->is_ipv6; + memcpy (rules[i].src_mac, &r->src_mac, sizeof (r->src_mac)); + memcpy (rules[i].src_mac_mask, &r->src_mac_mask, + sizeof (r->src_mac_mask)); + if (r->is_ipv6) + memcpy (rules[i].src_ip_addr, &r->src_ip_addr.ip6, + sizeof (r->src_ip_addr.ip6)); + else + memcpy (rules[i].src_ip_addr, &r->src_ip_addr.ip4, + sizeof (r->src_ip_addr.ip4)); + rules[i].src_ip_prefix_len = r->src_prefixlen; + } + } + else + { + /* No martini, no party - no ACL applied to this interface. */ + mp->acl_index = ~0; + mp->count = 0; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + + +static void +vl_api_macip_acl_dump_t_handler (vl_api_macip_acl_dump_t * mp) +{ + acl_main_t *am = &acl_main; + macip_acl_list_t *acl; + + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + if (mp->acl_index == ~0) + { + /* Just dump all ACLs for now, with sw_if_index = ~0 */ + pool_foreach (acl, am->macip_acls, ( + { + send_macip_acl_details (am, q, acl, + mp-> + context);} + )); + /* *INDENT-ON* */ + } + else + { + u32 acl_index = ntohl (mp->acl_index); + if (!pool_is_free_index (am->macip_acls, acl_index)) + { + acl = pool_elt_at_index (am->macip_acls, acl_index); + send_macip_acl_details (am, q, acl, mp->context); + } + } +} + +static void +vl_api_macip_acl_interface_get_t_handler (vl_api_macip_acl_interface_get_t * + mp) +{ + acl_main_t *am = &acl_main; + vl_api_macip_acl_interface_get_reply_t *rmp; + u32 count = vec_len (am->macip_acl_by_sw_if_index); + int msg_size = sizeof (*rmp) + sizeof (rmp->acls[0]) * count; + unix_shared_memory_queue_t *q; + int i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + rmp = vl_msg_api_alloc (msg_size); + memset (rmp, 0, msg_size); + rmp->_vl_msg_id = + ntohs (VL_API_MACIP_ACL_INTERFACE_GET_REPLY + am->msg_id_base); + rmp->context = mp->context; + rmp->count = htonl (count); + for (i = 0; i < count; i++) + { + rmp->acls[i] = htonl (am->macip_acl_by_sw_if_index[i]); + } + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +send_macip_acl_interface_list_details (acl_main_t * am, + unix_shared_memory_queue_t * q, + u32 sw_if_index, + u32 acl_index, + u32 context) +{ + vl_api_macip_acl_interface_list_details_t *rmp; + /* at this time there is only ever 1 mac ip acl per interface */ + int msg_size = sizeof (*rmp) + sizeof (rmp->acls[0]); + + rmp = vl_msg_api_alloc (msg_size); + memset (rmp, 0, msg_size); + rmp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_INTERFACE_LIST_DETAILS + am->msg_id_base); + + /* fill in the message */ + rmp->context = context; + rmp->count = 1; + rmp->sw_if_index = htonl (sw_if_index); + rmp->acls[0] = htonl (acl_index); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_macip_acl_interface_list_dump_t_handler (vl_api_macip_acl_interface_list_dump_t *mp) +{ + unix_shared_memory_queue_t *q; + acl_main_t *am = &acl_main; + u32 sw_if_index = ntohl (mp->sw_if_index); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + if (sw_if_index == ~0) + { + vec_foreach_index(sw_if_index, am->macip_acl_by_sw_if_index) + { + if (~0 != am->macip_acl_by_sw_if_index[sw_if_index]) + { + send_macip_acl_interface_list_details(am, q, sw_if_index, + am->macip_acl_by_sw_if_index[sw_if_index], + mp->context); + } + } + } + else + { + if (vec_len(am->macip_acl_by_sw_if_index) > sw_if_index) + { + send_macip_acl_interface_list_details(am, q, sw_if_index, + am->macip_acl_by_sw_if_index[sw_if_index], + mp->context); + } + } +} + +/* Set up the API message handling tables */ +static clib_error_t * +acl_plugin_api_hookup (vlib_main_t * vm) +{ + acl_main_t *am = &acl_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + am->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_acl_plugin_api_msg; +#undef _ + + return 0; +} + +#define vl_msg_name_crc_list +#include <acl/acl_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (acl_main_t * am, api_main_t * apim) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (apim, #n "_" #crc, id + am->msg_id_base); + foreach_vl_msg_name_crc_acl; +#undef _ +} + +static void +acl_setup_fa_nodes (void) +{ + vlib_main_t *vm = vlib_get_main (); + acl_main_t *am = &acl_main; + vlib_node_t *n, *n4, *n6; + + n = vlib_get_node_by_name (vm, (u8 *) "l2-input-classify"); + n4 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-in-ip4-l2"); + n6 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-in-ip6-l2"); + + + am->l2_input_classify_next_acl_ip4 = + vlib_node_add_next_with_slot (vm, n->index, n4->index, ~0); + am->l2_input_classify_next_acl_ip6 = + vlib_node_add_next_with_slot (vm, n->index, n6->index, ~0); + + feat_bitmap_init_next_nodes (vm, n4->index, L2INPUT_N_FEAT, + l2input_get_feat_names (), + am->fa_acl_in_ip4_l2_node_feat_next_node_index); + + feat_bitmap_init_next_nodes (vm, n6->index, L2INPUT_N_FEAT, + l2input_get_feat_names (), + am->fa_acl_in_ip6_l2_node_feat_next_node_index); + + + n = vlib_get_node_by_name (vm, (u8 *) "l2-output-classify"); + n4 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-out-ip4-l2"); + n6 = vlib_get_node_by_name (vm, (u8 *) "acl-plugin-out-ip6-l2"); + + am->l2_output_classify_next_acl_ip4 = + vlib_node_add_next_with_slot (vm, n->index, n4->index, ~0); + am->l2_output_classify_next_acl_ip6 = + vlib_node_add_next_with_slot (vm, n->index, n6->index, ~0); + + feat_bitmap_init_next_nodes (vm, n4->index, L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + am->fa_acl_out_ip4_l2_node_feat_next_node_index); + + feat_bitmap_init_next_nodes (vm, n6->index, L2OUTPUT_N_FEAT, + l2output_get_feat_names (), + am->fa_acl_out_ip6_l2_node_feat_next_node_index); +} + +static void +acl_set_timeout_sec(int timeout_type, u32 value) +{ + acl_main_t *am = &acl_main; + clib_time_t *ct = &am->vlib_main->clib_time; + + if (timeout_type < ACL_N_TIMEOUTS) { + am->session_timeout_sec[timeout_type] = value; + } else { + clib_warning("Unknown timeout type %d", timeout_type); + return; + } + am->session_timeout[timeout_type] = (u64)(((f64)value)/ct->seconds_per_clock); +} + +static void +acl_set_session_max_entries(u32 value) +{ + acl_main_t *am = &acl_main; + am->fa_conn_table_max_entries = value; +} + +static int +acl_set_skip_ipv6_eh(u32 eh, u32 value) +{ + acl_main_t *am = &acl_main; + + if ((eh < 256) && (value < 2)) + { + am->fa_ipv6_known_eh_bitmap = clib_bitmap_set(am->fa_ipv6_known_eh_bitmap, eh, value); + return 1; + } + else + return 0; +} + + +static clib_error_t * +acl_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) +{ + acl_main_t *am = &acl_main; + if (0 == am->acl_mheap) { + /* ACL heap is not initialized, so definitely nothing to do. */ + return 0; + } + if (0 == is_add) { + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, sw_if_index); + /* also unapply any ACLs in case the users did not do so. */ + macip_acl_interface_del_acl(am, sw_if_index); + acl_interface_reset_inout_acls (sw_if_index, 0); + acl_interface_reset_inout_acls (sw_if_index, 1); + } + return 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (acl_sw_interface_add_del); + + + +static clib_error_t * +acl_set_aclplugin_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + u32 timeout = 0; + u32 val = 0; + u32 eh_val = 0; + uword memory_size = 0; + acl_main_t *am = &acl_main; + + if (unformat (input, "skip-ipv6-extension-header %u %u", &eh_val, &val)) { + if(!acl_set_skip_ipv6_eh(eh_val, val)) { + error = clib_error_return(0, "expecting eh=0..255, value=0..1"); + } + goto done; + } + if (unformat (input, "use-hash-acl-matching %u", &val)) + { + am->use_hash_acl_matching = (val !=0); + goto done; + } + if (unformat (input, "l4-match-nonfirst-fragment %u", &val)) + { + am->l4_match_nonfirst_fragment = (val != 0); + goto done; + } + if (unformat (input, "heap")) + { + if (unformat(input, "main")) + { + if (unformat(input, "validate %u", &val)) + acl_plugin_acl_set_validate_heap(am, val); + else if (unformat(input, "trace %u", &val)) + acl_plugin_acl_set_trace_heap(am, val); + goto done; + } + else if (unformat(input, "hash")) + { + if (unformat(input, "validate %u", &val)) + acl_plugin_hash_acl_set_validate_heap(am, val); + else if (unformat(input, "trace %u", &val)) + acl_plugin_hash_acl_set_trace_heap(am, val); + goto done; + } + goto done; + } + if (unformat (input, "session")) { + if (unformat (input, "table")) { + /* The commands here are for tuning/testing. No user-serviceable parts inside */ + if (unformat (input, "max-entries")) { + if (!unformat(input, "%u", &val)) { + error = clib_error_return(0, + "expecting maximum number of entries, got `%U`", + format_unformat_error, input); + goto done; + } else { + acl_set_session_max_entries(val); + goto done; + } + } + if (unformat (input, "hash-table-buckets")) { + if (!unformat(input, "%u", &val)) { + error = clib_error_return(0, + "expecting maximum number of hash table buckets, got `%U`", + format_unformat_error, input); + goto done; + } else { + am->fa_conn_table_hash_num_buckets = val; + goto done; + } + } + if (unformat (input, "hash-table-memory")) { + if (!unformat(input, "%U", unformat_memory_size, &memory_size)) { + error = clib_error_return(0, + "expecting maximum amount of hash table memory, got `%U`", + format_unformat_error, input); + goto done; + } else { + am->fa_conn_table_hash_memory_size = memory_size; + goto done; + } + } + goto done; + } + if (unformat (input, "timeout")) { + if (unformat(input, "udp")) { + if(unformat(input, "idle")) { + if (!unformat(input, "%u", &timeout)) { + error = clib_error_return(0, + "expecting timeout value in seconds, got `%U`", + format_unformat_error, input); + goto done; + } else { + acl_set_timeout_sec(ACL_TIMEOUT_UDP_IDLE, timeout); + goto done; + } + } + } + if (unformat(input, "tcp")) { + if(unformat(input, "idle")) { + if (!unformat(input, "%u", &timeout)) { + error = clib_error_return(0, + "expecting timeout value in seconds, got `%U`", + format_unformat_error, input); + goto done; + } else { + acl_set_timeout_sec(ACL_TIMEOUT_TCP_IDLE, timeout); + goto done; + } + } + if(unformat(input, "transient")) { + if (!unformat(input, "%u", &timeout)) { + error = clib_error_return(0, + "expecting timeout value in seconds, got `%U`", + format_unformat_error, input); + goto done; + } else { + acl_set_timeout_sec(ACL_TIMEOUT_TCP_TRANSIENT, timeout); + goto done; + } + } + } + goto done; + } + } +done: + return error; +} + +static u8 * +my_format_mac_address (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%02x:%02x:%02x:%02x:%02x:%02x", + a[0], a[1], a[2], a[3], a[4], a[5]); +} + +static inline u8 * +my_macip_acl_rule_t_pretty_format (u8 *out, va_list *args) +{ + macip_acl_rule_t *a = va_arg (*args, macip_acl_rule_t *); + + out = format(out, "%s action %d ip %U/%d mac %U mask %U", + a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit, + format_ip46_address, &a->src_ip_addr, IP46_TYPE_ANY, + a->src_prefixlen, + my_format_mac_address, a->src_mac, + my_format_mac_address, a->src_mac_mask); + return(out); +} + +static void +macip_acl_print(acl_main_t *am, u32 macip_acl_index) +{ + vlib_main_t * vm = am->vlib_main; + int i; + + /* Don't try to print someone else's memory */ + if (macip_acl_index > vec_len(am->macip_acls)) + return; + + macip_acl_list_t *a = vec_elt_at_index(am->macip_acls, macip_acl_index); + int free_pool_slot = pool_is_free_index(am->macip_acls, macip_acl_index); + + vlib_cli_output(vm, "MACIP acl_index: %d, count: %d (true len %d) tag {%s} is free pool slot: %d\n", + macip_acl_index, a->count, vec_len(a->rules), a->tag, free_pool_slot); + vlib_cli_output(vm, " ip4_table_index %d, ip6_table_index %d, l2_table_index %d\n", + a->ip4_table_index, a->ip6_table_index, a->l2_table_index); + for(i=0; i<vec_len(a->rules); i++) + vlib_cli_output(vm, " rule %d: %U\n", i, my_macip_acl_rule_t_pretty_format, + vec_elt_at_index(a->rules, i)); + +} + +static clib_error_t * +acl_show_aclplugin_macip_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + acl_main_t *am = &acl_main; + int i; + if (unformat (input, "interface")) + { + for(i=0; i < vec_len(am->macip_acl_by_sw_if_index); i++) + { + vlib_cli_output(vm, " sw_if_index %d: %d\n", i, vec_elt(am->macip_acl_by_sw_if_index, i)); + } + } + else if (unformat (input, "acl")) + { + for(i=0; i < vec_len(am->macip_acls); i++) + macip_acl_print(am, i); + } + return error; +} + + +static clib_error_t * +acl_show_aclplugin_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + acl_main_t *am = &acl_main; + vnet_interface_main_t *im = &am->vnet_main->interface_main; + u32 *pj; + + vnet_sw_interface_t *swif; + + if (unformat (input, "sessions")) + { + u8 * out0 = format(0, ""); + u16 wk; + u32 show_bihash_verbose = 0; + u32 show_session_thread_id = ~0; + u32 show_session_session_index = ~0; + unformat (input, "thread %u index %u", &show_session_thread_id, &show_session_session_index); + unformat (input, "verbose %u", &show_bihash_verbose); + { + u64 n_adds = am->fa_session_total_adds; + u64 n_dels = am->fa_session_total_dels; + out0 = format(out0, "Sessions total: add %lu - del %lu = %lu\n", n_adds, n_dels, n_adds - n_dels); + } + out0 = format(out0, "\n\nPer-thread data:\n"); + for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { + acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; + out0 = format(out0, "Thread #%d:\n", wk); + if (show_session_thread_id == wk && show_session_session_index < pool_len(pw->fa_sessions_pool)) { + out0 = format(out0, " session index %u:\n", show_session_session_index); + fa_session_t *sess = pw->fa_sessions_pool + show_session_session_index; + u64 *m = (u64 *)&sess->info; + out0 = format(out0, " info: %016llx %016llx %016llx %016llx %016llx %016llx\n", m[0], m[1], m[2], m[3], m[4], m[5]); + out0 = format(out0, " sw_if_index: %u\n", sess->sw_if_index); + out0 = format(out0, " tcp_flags_seen: %x\n", sess->tcp_flags_seen.as_u16); + out0 = format(out0, " last active time: %lu\n", sess->last_active_time); + out0 = format(out0, " thread index: %u\n", sess->thread_index); + out0 = format(out0, " link enqueue time: %lu\n", sess->link_enqueue_time); + out0 = format(out0, " link next index: %u\n", sess->link_next_idx); + out0 = format(out0, " link prev index: %u\n", sess->link_prev_idx); + out0 = format(out0, " link list id: %u\n", sess->link_list_id); + } + out0 = format(out0, " connection add/del stats:\n", wk); + pool_foreach (swif, im->sw_interfaces, + ({ + u32 sw_if_index = swif->sw_if_index; + u64 n_adds = sw_if_index < vec_len(pw->fa_session_adds_by_sw_if_index) ? pw->fa_session_adds_by_sw_if_index[sw_if_index] : 0; + u64 n_dels = sw_if_index < vec_len(pw->fa_session_dels_by_sw_if_index) ? pw->fa_session_dels_by_sw_if_index[sw_if_index] : 0; + out0 = format(out0, " sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels); + })); + + out0 = format(out0, " connection timeout type lists:\n", wk); + u8 tt = 0; + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { + u32 head_session_index = pw->fa_conn_list_head[tt]; + out0 = format(out0, " fa_conn_list_head[%d]: %d\n", tt, head_session_index); + if (~0 != head_session_index) { + fa_session_t *sess = pw->fa_sessions_pool + head_session_index; + out0 = format(out0, " last active time: %lu\n", sess->last_active_time); + out0 = format(out0, " link enqueue time: %lu\n", sess->link_enqueue_time); + } + } + + out0 = format(out0, " Next expiry time: %lu\n", pw->next_expiry_time); + out0 = format(out0, " Requeue until time: %lu\n", pw->requeue_until_time); + out0 = format(out0, " Current time wait interval: %lu\n", pw->current_time_wait_interval); + out0 = format(out0, " Count of deleted sessions: %lu\n", pw->cnt_deleted_sessions); + out0 = format(out0, " Delete already deleted: %lu\n", pw->cnt_already_deleted_sessions); + out0 = format(out0, " Session timers restarted: %lu\n", pw->cnt_session_timer_restarted); + out0 = format(out0, " Swipe until this time: %lu\n", pw->swipe_end_time); + out0 = format(out0, " sw_if_index serviced bitmap: %U\n", format_bitmap_hex, pw->serviced_sw_if_index_bitmap); + out0 = format(out0, " pending clear intfc bitmap : %U\n", format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap); + out0 = format(out0, " clear in progress: %u\n", pw->clear_in_process); + out0 = format(out0, " interrupt is pending: %d\n", pw->interrupt_is_pending); + out0 = format(out0, " interrupt is needed: %d\n", pw->interrupt_is_needed); + out0 = format(out0, " interrupt is unwanted: %d\n", pw->interrupt_is_unwanted); + out0 = format(out0, " interrupt generation: %d\n", pw->interrupt_generation); + } + out0 = format(out0, "\n\nConn cleaner thread counters:\n"); +#define _(cnt, desc) out0 = format(out0, " %20lu: %s\n", am->cnt, desc); + foreach_fa_cleaner_counter; +#undef _ + vec_terminate_c_string(out0); + vlib_cli_output(vm, "\n\n%s\n\n", out0); + vlib_cli_output(vm, "Interrupt generation: %d\n", am->fa_interrupt_generation); + vlib_cli_output(vm, "Sessions per interval: min %lu max %lu increment: %f ms current: %f ms", + am->fa_min_deleted_sessions_per_interval, am->fa_max_deleted_sessions_per_interval, + am->fa_cleaner_wait_time_increment * 1000.0, ((f64)am->fa_current_cleaner_timer_wait_interval) * 1000.0/(f64)vm->clib_time.clocks_per_second); + + vec_free(out0); + show_fa_sessions_hash(vm, show_bihash_verbose); + } + else if (unformat (input, "interface")) + { + u32 sw_if_index = ~0; + u32 swi; + u8 * out0 = format(0, ""); + unformat (input, "sw_if_index %u", &sw_if_index); + for(swi = 0; (swi < vec_len(am->input_acl_vec_by_sw_if_index)) || + (swi < vec_len(am->output_acl_vec_by_sw_if_index)); swi++) { + out0 = format(out0, "sw_if_index %d:\n", swi); + + if ((swi < vec_len(am->input_acl_vec_by_sw_if_index)) && + (vec_len(am->input_acl_vec_by_sw_if_index[swi]) > 0)) { + out0 = format(out0, " input acl(s): "); + vec_foreach(pj, am->input_acl_vec_by_sw_if_index[swi]) { + out0 = format(out0, "%d ", *pj); + } + out0 = format(out0, "\n"); + } + + if ((swi < vec_len(am->output_acl_vec_by_sw_if_index)) && + (vec_len(am->output_acl_vec_by_sw_if_index[swi]) > 0)) { + out0 = format(out0, " output acl(s): "); + vec_foreach(pj, am->output_acl_vec_by_sw_if_index[swi]) { + out0 = format(out0, "%d ", *pj); + } + out0 = format(out0, "\n"); + } + + } + vec_terminate_c_string(out0); + vlib_cli_output(vm, "\n%s\n", out0); + vec_free(out0); + } + else if (unformat (input, "acl")) + { + u32 acl_index = ~0; + u32 i; + u8 * out0 = format(0, ""); + unformat (input, "index %u", &acl_index); + for(i=0; i<vec_len(am->acls); i++) { + if (acl_is_not_defined(am, i)) { + /* don't attempt to show the ACLs that do not exist */ + continue; + } + if ((acl_index != ~0) && (acl_index != i)) { + continue; + } + out0 = format(out0, "acl-index %u count %u tag {%s}\n", i, am->acls[i].count, am->acls[i].tag); + acl_rule_t *r; + int j; + for(j=0; j<am->acls[i].count; j++) { + r = &am->acls[i].rules[j]; + out0 = format(out0, " %4d: %s ", j, r->is_ipv6 ? "ipv6" : "ipv4"); + out0 = format_acl_action(out0, r->is_permit); + out0 = format(out0, " src %U/%d", format_ip46_address, &r->src, IP46_TYPE_ANY, r->src_prefixlen); + out0 = format(out0, " dst %U/%d", format_ip46_address, &r->dst, IP46_TYPE_ANY, r->dst_prefixlen); + out0 = format(out0, " proto %d", r->proto); + out0 = format(out0, " sport %d", r->src_port_or_type_first); + if (r->src_port_or_type_first != r->src_port_or_type_last) { + out0 = format(out0, "-%d", r->src_port_or_type_last); + } + out0 = format(out0, " dport %d", r->dst_port_or_code_first); + if (r->dst_port_or_code_first != r->dst_port_or_code_last) { + out0 = format(out0, "-%d", r->dst_port_or_code_last); + } + if (r->tcp_flags_mask || r->tcp_flags_value) { + out0 = format(out0, " tcpflags %d mask %d", r->tcp_flags_value, r->tcp_flags_mask); + } + out0 = format(out0, "\n"); + } + + if (i<vec_len(am->input_sw_if_index_vec_by_acl)) { + out0 = format(out0, " applied inbound on sw_if_index: "); + vec_foreach(pj, am->input_sw_if_index_vec_by_acl[i]) { + out0 = format(out0, "%d ", *pj); + } + out0 = format(out0, "\n"); + } + if (i<vec_len(am->output_sw_if_index_vec_by_acl)) { + out0 = format(out0, " applied outbound on sw_if_index: "); + vec_foreach(pj, am->output_sw_if_index_vec_by_acl[i]) { + out0 = format(out0, "%d ", *pj); + } + out0 = format(out0, "\n"); + } + } + vec_terminate_c_string(out0); + vlib_cli_output(vm, "\n%s\n", out0); + vec_free(out0); + } + else if (unformat (input, "memory")) + { + vlib_cli_output (vm, "ACL plugin main heap statistics:\n"); + if (am->acl_mheap) { + vlib_cli_output (vm, " %U\n", format_mheap, am->acl_mheap, 1); + } else { + vlib_cli_output (vm, " Not initialized\n"); + } + vlib_cli_output (vm, "ACL hash lookup support heap statistics:\n"); + if (am->hash_lookup_mheap) { + vlib_cli_output (vm, " %U\n", format_mheap, am->hash_lookup_mheap, 1); + } else { + vlib_cli_output (vm, " Not initialized\n"); + } + } + else if (unformat (input, "tables")) + { + ace_mask_type_entry_t *mte; + u32 acl_index = ~0; + u32 sw_if_index = ~0; + int show_acl_hash_info = 0; + int show_applied_info = 0; + int show_mask_type = 0; + int show_bihash = 0; + u32 show_bihash_verbose = 0; + + if (unformat (input, "acl")) { + show_acl_hash_info = 1; + /* mask-type is handy to see as well right there */ + show_mask_type = 1; + unformat (input, "index %u", &acl_index); + } else if (unformat (input, "applied")) { + show_applied_info = 1; + unformat (input, "sw_if_index %u", &sw_if_index); + } else if (unformat (input, "mask")) { + show_mask_type = 1; + } else if (unformat (input, "hash")) { + show_bihash = 1; + unformat (input, "verbose %u", &show_bihash_verbose); + } + + if ( ! (show_mask_type || show_acl_hash_info || show_applied_info || show_bihash) ) { + /* if no qualifiers specified, show all */ + show_mask_type = 1; + show_acl_hash_info = 1; + show_applied_info = 1; + show_bihash = 1; + } + + if (show_mask_type) { + vlib_cli_output(vm, "Mask-type entries:"); + /* *INDENT-OFF* */ + pool_foreach(mte, am->ace_mask_type_pool, + ({ + vlib_cli_output(vm, " %3d: %016llx %016llx %016llx %016llx %016llx %016llx refcount %d", + mte - am->ace_mask_type_pool, + mte->mask.kv.key[0], mte->mask.kv.key[1], mte->mask.kv.key[2], + mte->mask.kv.key[3], mte->mask.kv.key[4], mte->mask.kv.value, mte->refcount); + })); + /* *INDENT-ON* */ + } + + if (show_acl_hash_info) { + u32 i,j; + u8 * out0 = format(0, ""); + u64 *m; + out0 = format(out0, "Mask-ready ACL representations\n"); + for (i=0; i< vec_len(am->hash_acl_infos); i++) { + if ((acl_index != ~0) && (acl_index != i)) { + continue; + } + hash_acl_info_t *ha = &am->hash_acl_infos[i]; + out0 = format(out0, "acl-index %u bitmask-ready layout\n", i); + out0 = format(out0, " applied inbound on sw_if_index list: %U\n", format_vec32, ha->inbound_sw_if_index_list, "%d"); + out0 = format(out0, " applied outbound on sw_if_index list: %U\n", format_vec32, ha->outbound_sw_if_index_list, "%d"); + out0 = format(out0, " mask type index bitmap: %U\n", format_bitmap_hex, ha->mask_type_index_bitmap); + for(j=0; j<vec_len(ha->rules); j++) { + hash_ace_info_t *pa = &ha->rules[j]; + m = (u64 *)&pa->match; + out0 = format(out0, " %4d: %016llx %016llx %016llx %016llx %016llx %016llx mask index %d acl %d rule %d action %d src/dst portrange not ^2: %d,%d\n", + j, m[0], m[1], m[2], m[3], m[4], m[5], pa->mask_type_index, + pa->acl_index, pa->ace_index, pa->action, + pa->src_portrange_not_powerof2, pa->dst_portrange_not_powerof2); + } + } + vec_terminate_c_string(out0); + vlib_cli_output(vm, "\n%s\n", out0); + vec_free(out0); + } + + if (show_applied_info) { + u32 swi, j; + u8 * out0 = format(0, ""); + out0 = format(out0, "Applied lookup entries for interfaces\n"); + + for(swi = 0; (swi < vec_len(am->input_applied_hash_acl_info_by_sw_if_index)) || + (swi < vec_len(am->output_applied_hash_acl_info_by_sw_if_index)) || + (swi < vec_len(am->input_hash_entry_vec_by_sw_if_index)) || + (swi < vec_len(am->output_hash_entry_vec_by_sw_if_index)); swi++) { + if ((sw_if_index != ~0) && (sw_if_index != swi)) { + continue; + } + out0 = format(out0, "sw_if_index %d:\n", swi); + if (swi < vec_len(am->input_applied_hash_acl_info_by_sw_if_index)) { + applied_hash_acl_info_t *pal = &am->input_applied_hash_acl_info_by_sw_if_index[swi]; + out0 = format(out0, " input lookup mask_type_index_bitmap: %U\n", format_bitmap_hex, pal->mask_type_index_bitmap); + out0 = format(out0, " input applied acls: %U\n", format_vec32, pal->applied_acls, "%d"); + } + if (swi < vec_len(am->input_hash_entry_vec_by_sw_if_index)) { + out0 = format(out0, " input lookup applied entries:\n"); + for(j=0; j<vec_len(am->input_hash_entry_vec_by_sw_if_index[swi]); j++) { + applied_hash_ace_entry_t *pae = &am->input_hash_entry_vec_by_sw_if_index[swi][j]; + out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d tail %d hitcount %lld\n", + j, pae->acl_index, pae->ace_index, pae->action, pae->hash_ace_info_index, + pae->next_applied_entry_index, pae->prev_applied_entry_index, pae->tail_applied_entry_index, pae->hitcount); + } + } + + if (swi < vec_len(am->output_applied_hash_acl_info_by_sw_if_index)) { + applied_hash_acl_info_t *pal = &am->output_applied_hash_acl_info_by_sw_if_index[swi]; + out0 = format(out0, " output lookup mask_type_index_bitmap: %U\n", format_bitmap_hex, pal->mask_type_index_bitmap); + out0 = format(out0, " output applied acls: %U\n", format_vec32, pal->applied_acls, "%d"); + } + if (swi < vec_len(am->output_hash_entry_vec_by_sw_if_index)) { + out0 = format(out0, " output lookup applied entries:\n"); + for(j=0; j<vec_len(am->output_hash_entry_vec_by_sw_if_index[swi]); j++) { + applied_hash_ace_entry_t *pae = &am->output_hash_entry_vec_by_sw_if_index[swi][j]; + out0 = format(out0, " %4d: acl %d rule %d action %d bitmask-ready rule %d next %d prev %d tail %d hitcount %lld\n", + j, pae->acl_index, pae->ace_index, pae->action, pae->hash_ace_info_index, + pae->next_applied_entry_index, pae->prev_applied_entry_index, pae->tail_applied_entry_index, pae->hitcount); + } + } + + } + vec_terminate_c_string(out0); + vlib_cli_output(vm, "\n%s\n", out0); + vec_free(out0); + } + + if (show_bihash) { + show_hash_acl_hash(vm, am, show_bihash_verbose); + } + } + return error; +} + +static clib_error_t * +acl_clear_aclplugin_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + acl_main_t *am = &acl_main; + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, ~0); + return error; +} + + /* *INDENT-OFF* */ +VLIB_CLI_COMMAND (aclplugin_set_command, static) = { + .path = "set acl-plugin", + .short_help = "set acl-plugin session timeout {{udp idle}|tcp {idle|transient}} <seconds>", + .function = acl_set_aclplugin_fn, +}; + +VLIB_CLI_COMMAND (aclplugin_show_command, static) = { + .path = "show acl-plugin", + .short_help = "show acl-plugin {sessions|acl|interface|tables}", + .function = acl_show_aclplugin_fn, +}; + +VLIB_CLI_COMMAND (aclplugin_show_macip_command, static) = { + .path = "show acl-plugin macip", + .short_help = "show acl-plugin macip {acl|interface}", + .function = acl_show_aclplugin_macip_fn, +}; + + +VLIB_CLI_COMMAND (aclplugin_clear_command, static) = { + .path = "clear acl-plugin sessions", + .short_help = "clear acl-plugin sessions", + .function = acl_clear_aclplugin_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +acl_plugin_config (vlib_main_t * vm, unformat_input_t * input) +{ + acl_main_t *am = &acl_main; + u32 conn_table_hash_buckets; + u32 conn_table_hash_memory_size; + u32 conn_table_max_entries; + u32 main_heap_size; + u32 hash_heap_size; + u32 hash_lookup_hash_buckets; + u32 hash_lookup_hash_memory; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "connection hash buckets %d", &conn_table_hash_buckets)) + am->fa_conn_table_hash_num_buckets = conn_table_hash_buckets; + else if (unformat (input, "connection hash memory %d", + &conn_table_hash_memory_size)) + am->fa_conn_table_hash_memory_size = conn_table_hash_memory_size; + else if (unformat (input, "connection count max %d", + &conn_table_max_entries)) + am->fa_conn_table_max_entries = conn_table_max_entries; + else if (unformat (input, "main heap size %d", + &main_heap_size)) + am->acl_mheap_size = main_heap_size; + else if (unformat (input, "hash lookup heap size %d", + &hash_heap_size)) + am->hash_lookup_mheap_size = hash_heap_size; + else if (unformat (input, "hash lookup hash buckets %d", + &hash_lookup_hash_buckets)) + am->hash_lookup_hash_buckets = hash_lookup_hash_buckets; + else if (unformat (input, "hash lookup hash memory %d", + &hash_lookup_hash_memory)) + am->hash_lookup_hash_memory = hash_lookup_hash_memory; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + return 0; +} +VLIB_CONFIG_FUNCTION (acl_plugin_config, "acl-plugin"); + +static clib_error_t * +acl_init (vlib_main_t * vm) +{ + acl_main_t *am = &acl_main; + clib_error_t *error = 0; + memset (am, 0, sizeof (*am)); + am->vlib_main = vm; + am->vnet_main = vnet_get_main (); + + u8 *name = format (0, "acl_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + am->msg_id_base = vl_msg_api_get_msg_ids ((char *) name, + VL_MSG_FIRST_AVAILABLE); + + error = acl_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (am, &api_main); + + vec_free (name); + + acl_setup_fa_nodes(); + + am->acl_mheap_size = ACL_FA_DEFAULT_HEAP_SIZE; + am->hash_lookup_mheap_size = ACL_PLUGIN_HASH_LOOKUP_HEAP_SIZE; + + am->hash_lookup_hash_buckets = ACL_PLUGIN_HASH_LOOKUP_HASH_BUCKETS; + am->hash_lookup_hash_memory = ACL_PLUGIN_HASH_LOOKUP_HASH_MEMORY; + + am->session_timeout_sec[ACL_TIMEOUT_TCP_TRANSIENT] = TCP_SESSION_TRANSIENT_TIMEOUT_SEC; + am->session_timeout_sec[ACL_TIMEOUT_TCP_IDLE] = TCP_SESSION_IDLE_TIMEOUT_SEC; + am->session_timeout_sec[ACL_TIMEOUT_UDP_IDLE] = UDP_SESSION_IDLE_TIMEOUT_SEC; + + am->fa_conn_table_hash_num_buckets = ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS; + am->fa_conn_table_hash_memory_size = ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE; + am->fa_conn_table_max_entries = ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vec_validate(am->per_worker_data, tm->n_vlib_mains-1); + { + u16 wk; + u8 tt; + for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { + acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; + vec_validate(pw->fa_conn_list_head, ACL_N_TIMEOUTS-1); + vec_validate(pw->fa_conn_list_tail, ACL_N_TIMEOUTS-1); + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { + pw->fa_conn_list_head[tt] = ~0; + pw->fa_conn_list_tail[tt] = ~0; + } + } + } + + am->fa_min_deleted_sessions_per_interval = ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL; + am->fa_max_deleted_sessions_per_interval = ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL; + am->fa_cleaner_wait_time_increment = ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT; + + am->fa_cleaner_cnt_delete_by_sw_index = 0; + am->fa_cleaner_cnt_delete_by_sw_index_ok = 0; + am->fa_cleaner_cnt_unknown_event = 0; + am->fa_cleaner_cnt_timer_restarted = 0; + am->fa_cleaner_cnt_wait_with_timeout = 0; + + +#define _(N, v, s) am->fa_ipv6_known_eh_bitmap = clib_bitmap_set(am->fa_ipv6_known_eh_bitmap, v, 1); + foreach_acl_eh +#undef _ + + am->l4_match_nonfirst_fragment = 1; + + /* use the new fancy hash-based matching */ + am->use_hash_acl_matching = 1; + + return error; +} + +VLIB_INIT_FUNCTION (acl_init); diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h new file mode 100644 index 00000000..bed22e5f --- /dev/null +++ b/src/plugins/acl/acl.h @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_acl_h +#define included_acl_h + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/l2/l2_output.h> + + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/elog.h> +#include <vppinfra/bihash_48_8.h> +#include <vppinfra/bihash_40_8.h> + +#include "fa_node.h" +#include "hash_lookup_types.h" + +#define ACL_PLUGIN_VERSION_MAJOR 1 +#define ACL_PLUGIN_VERSION_MINOR 3 + +#define UDP_SESSION_IDLE_TIMEOUT_SEC 600 +#define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24) +#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 120 + +#define ACL_FA_DEFAULT_HEAP_SIZE (2 << 29) + +#define ACL_PLUGIN_HASH_LOOKUP_HEAP_SIZE (2 << 25) +#define ACL_PLUGIN_HASH_LOOKUP_HASH_BUCKETS 65536 +#define ACL_PLUGIN_HASH_LOOKUP_HASH_MEMORY (2 << 25) + +extern vlib_node_registration_t acl_in_node; +extern vlib_node_registration_t acl_out_node; + +void input_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap); +void output_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap); + +enum acl_timeout_e { + ACL_TIMEOUT_UDP_IDLE = 0, + ACL_TIMEOUT_TCP_IDLE, + ACL_TIMEOUT_TCP_TRANSIENT, + ACL_N_TIMEOUTS +}; + + +enum address_e { IP4, IP6 }; +typedef struct +{ + enum address_e type; + union { + ip6_address_t ip6; + ip4_address_t ip4; + } addr; +} address_t; + +/* + * ACL rules + */ +typedef struct +{ + u8 is_permit; + u8 is_ipv6; + ip46_address_t src; + u8 src_prefixlen; + ip46_address_t dst; + u8 dst_prefixlen; + u8 proto; + u16 src_port_or_type_first; + u16 src_port_or_type_last; + u16 dst_port_or_code_first; + u16 dst_port_or_code_last; + u8 tcp_flags_value; + u8 tcp_flags_mask; +} acl_rule_t; + +typedef struct +{ + u8 is_permit; + u8 is_ipv6; + u8 src_mac[6]; + u8 src_mac_mask[6]; + ip46_address_t src_ip_addr; + u8 src_prefixlen; +} macip_acl_rule_t; + +/* + * ACL + */ +typedef struct +{ + u8 tag[64]; + u32 count; + acl_rule_t *rules; +} acl_list_t; + +typedef struct +{ + u8 tag[64]; + u32 count; + macip_acl_rule_t *rules; + /* References to the classifier tables that will enforce the rules */ + u32 ip4_table_index; + u32 ip6_table_index; + u32 l2_table_index; +} macip_acl_list_t; + +/* + * An element describing a particular configuration fo the mask, + * and how many times it has been used. + */ +typedef struct +{ + fa_5tuple_t mask; + u32 refcount; +} ace_mask_type_entry_t; + +typedef struct { + /* mheap to hold all the ACL module related allocations, other than hash */ + void *acl_mheap; + u32 acl_mheap_size; + + /* API message ID base */ + u16 msg_id_base; + + acl_list_t *acls; /* Pool of ACLs */ + hash_acl_info_t *hash_acl_infos; /* corresponding hash matching housekeeping info */ + clib_bihash_48_8_t acl_lookup_hash; /* ACL lookup hash table. */ + u32 hash_lookup_hash_buckets; + u32 hash_lookup_hash_memory; + + /* mheap to hold all the miscellaneous allocations related to hash-based lookups */ + void *hash_lookup_mheap; + u32 hash_lookup_mheap_size; + int acl_lookup_hash_initialized; + applied_hash_ace_entry_t **input_hash_entry_vec_by_sw_if_index; + applied_hash_ace_entry_t **output_hash_entry_vec_by_sw_if_index; + applied_hash_acl_info_t *input_applied_hash_acl_info_by_sw_if_index; + applied_hash_acl_info_t *output_applied_hash_acl_info_by_sw_if_index; + + macip_acl_list_t *macip_acls; /* Pool of MAC-IP ACLs */ + + /* ACLs associated with interfaces */ + u32 **input_acl_vec_by_sw_if_index; + u32 **output_acl_vec_by_sw_if_index; + + /* interfaces on which given ACLs are applied */ + u32 **input_sw_if_index_vec_by_acl; + u32 **output_sw_if_index_vec_by_acl; + + /* Total count of interface+direction pairs enabled */ + u32 fa_total_enabled_count; + + /* Do we use hash-based ACL matching or linear */ + int use_hash_acl_matching; + + /* a pool of all mask types present in all ACEs */ + ace_mask_type_entry_t *ace_mask_type_pool; + + /* + * Classify tables used to grab the packets for the ACL check, + * and serving as the 5-tuple session tables at the same time + */ + u32 *acl_ip4_input_classify_table_by_sw_if_index; + u32 *acl_ip6_input_classify_table_by_sw_if_index; + u32 *acl_ip4_output_classify_table_by_sw_if_index; + u32 *acl_ip6_output_classify_table_by_sw_if_index; + + /* MACIP (input) ACLs associated with the interfaces */ + u32 *macip_acl_by_sw_if_index; + + /* bitmaps when set the processing is enabled on the interface */ + uword *fa_in_acl_on_sw_if_index; + uword *fa_out_acl_on_sw_if_index; + /* bihash holding all of the sessions */ + int fa_sessions_hash_is_initialized; + clib_bihash_40_8_t fa_sessions_hash; + /* The process node which orcherstrates the cleanup */ + u32 fa_cleaner_node_index; + /* FA session timeouts, in seconds */ + u32 session_timeout_sec[ACL_N_TIMEOUTS]; + /* total session adds/dels */ + u64 fa_session_total_adds; + u64 fa_session_total_dels; + + /* L2 datapath glue */ + + /* next indices within L2 classifiers for ip4/ip6 fa L2 nodes */ + u32 l2_input_classify_next_acl_ip4; + u32 l2_input_classify_next_acl_ip6; + u32 l2_output_classify_next_acl_ip4; + u32 l2_output_classify_next_acl_ip6; + /* next node indices for L2 dispatch */ + u32 fa_acl_in_ip4_l2_node_feat_next_node_index[32]; + u32 fa_acl_in_ip6_l2_node_feat_next_node_index[32]; + u32 fa_acl_out_ip4_l2_node_feat_next_node_index[32]; + u32 fa_acl_out_ip6_l2_node_feat_next_node_index[32]; + + /* EH values that we can skip over */ + uword *fa_ipv6_known_eh_bitmap; + + /* whether to match L4 ACEs with ports on the non-initial fragment */ + int l4_match_nonfirst_fragment; + + /* conn table per-interface conn table parameters */ + u32 fa_conn_table_hash_num_buckets; + uword fa_conn_table_hash_memory_size; + u64 fa_conn_table_max_entries; + + /* + * If the cleaner has to delete more than this number + * of connections, it halves the sleep time. + */ + +#define ACL_FA_DEFAULT_MAX_DELETED_SESSIONS_PER_INTERVAL 100 + u64 fa_max_deleted_sessions_per_interval; + + /* + * If the cleaner deletes less than these connections, + * it increases the wait time by the "increment" + */ + +#define ACL_FA_DEFAULT_MIN_DELETED_SESSIONS_PER_INTERVAL 1 + u64 fa_min_deleted_sessions_per_interval; + +#define ACL_FA_DEFAULT_CLEANER_WAIT_TIME_INCREMENT 0.1 + f64 fa_cleaner_wait_time_increment; + + u64 fa_current_cleaner_timer_wait_interval; + + int fa_interrupt_generation; + + /* per-worker data related t conn management */ + acl_fa_per_worker_data_t *per_worker_data; + + /* Configured session timeout */ + u64 session_timeout[ACL_N_TIMEOUTS]; + + + /* Counters for the cleaner thread */ + +#define foreach_fa_cleaner_counter \ + _(fa_cleaner_cnt_delete_by_sw_index, "delete_by_sw_index events") \ + _(fa_cleaner_cnt_delete_by_sw_index_ok, "delete_by_sw_index handled ok") \ + _(fa_cleaner_cnt_unknown_event, "unknown events received") \ + _(fa_cleaner_cnt_timer_restarted, "session idle timers restarted") \ + _(fa_cleaner_cnt_wait_with_timeout, "event wait with timeout called") \ + _(fa_cleaner_cnt_wait_without_timeout, "event wait w/o timeout called") \ + _(fa_cleaner_cnt_event_cycles, "total event cycles") \ +/* end of counters */ +#define _(id, desc) u32 id; + foreach_fa_cleaner_counter +#undef _ + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} acl_main_t; + +#define foreach_acl_eh \ + _(HOPBYHOP , 0 , "IPv6ExtHdrHopByHop") \ + _(ROUTING , 43 , "IPv6ExtHdrRouting") \ + _(DESTOPT , 60 , "IPv6ExtHdrDestOpt") \ + _(FRAGMENT , 44 , "IPv6ExtHdrFragment") \ + _(MOBILITY , 135, "Mobility Header") \ + _(HIP , 139, "Experimental use Host Identity Protocol") \ + _(SHIM6 , 140, "Shim6 Protocol") \ + _(EXP1 , 253, "Use for experimentation and testing") \ + _(EXP2 , 254, "Use for experimentation and testing") + +/* + + "No Next Header" is not a header. + Also, Fragment header needs special processing. + + _(NONEXT , 59 , "NoNextHdr") \ + + +ESP is hiding its internal format, so no point in trying to go past it. + + _(ESP , 50 , "EncapsulatingSecurityPayload") \ + + +AH has a special treatment of its length, it is in 32-bit words, not 64-bit words like the rest. + + _(AUTH , 51 , "Authentication Header") \ + + +*/ + + + typedef enum { + #define _(N, v, s) ACL_EH_##N = v, + foreach_acl_eh + #undef _ + } acl_eh_t; + + + +extern acl_main_t acl_main; + + +#endif diff --git a/src/plugins/acl/acl_all_api_h.h b/src/plugins/acl/acl_all_api_h.h new file mode 100644 index 00000000..cb781cfd --- /dev/null +++ b/src/plugins/acl/acl_all_api_h.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <acl/acl.api.h> + +#ifdef vl_printfun +#include <acl/manual_fns.h> +#endif + diff --git a/src/plugins/acl/acl_hash_lookup_doc.md b/src/plugins/acl/acl_hash_lookup_doc.md new file mode 100644 index 00000000..cb93df04 --- /dev/null +++ b/src/plugins/acl/acl_hash_lookup_doc.md @@ -0,0 +1,241 @@ +ACL plugin constant-time lookup design {#acl_hash_lookup} +====================================== + +The initial implementation of ACL plugin performs a trivial for() cycle, +going through the assigned ACLs on a per-packet basis. This is not very +efficient, even if for very short ACLs due to its simplicity it can beat +more advanced methods. + +However, to cover the case of longer ACLs with acceptable performance, +we need to have a better way of matching. This write-up proposes +a mechanism to make a lookup from O(M) where M is number of entries +to O(N) where N is number of different mask combinations. + +Preparation of ACL(s) +--------------------- + +The ACL plugin will maintain a global list of "mask types", i.e. the specific +configurations of "do not care" bits within the ACEs. +Upon the creation of a new ACL, a pass will be made through all the +ACEs, to assign and possibly allocate the "mask type number". + +Each ACL has a structure *hash_acl_info_t* representing the "hash-based" +parts of information related to that ACL, primarily the array of +*hash_ace_info_t* structures - each of the members of that array +corresponding to one of the rules (ACEs) in the original ACL, +for this they have a pair of *(acl_index, ace_index)* to keep track, +predominantly for the debugging. + +Why do we need a whole separate structure, and are not adding new fields +to the existing rile structure ? First, encapsulation, to minimize +the pollution of the main ACL code with the hash-based lookup artifacts. + +Second, one rule may correspond to more than one "hash-based" ACE. +In fact, most of the rules do correspond to two of those. Why ? + +Consider that the current ACL lookup logic is that if a packet +is not the initial fragment, and there is an L4 entry acting on the packet, +the comparison will be made only on the L4 protocol field value rather +than on the protocol and port values. This beaviour is governed by +*l4_match_nonfirst_fragment* flag in the *acl_main*, and was needed to +maintain the compatibility with the existing software switch implementation. + +While for the sequential check in *single_acl_match_5tuple()* +it is very easy to implement by just breaking out at the right moment, +in case of hash-based matching this cost us two checks: +one on full 5-tuple and the flag *pkt.is_nonfirst_fragment* being zero, +the second on 3-tuple and the flag *pkt.is_nonfirst_fragment* being one, +with the second check triggered by the *acl_main.l4_match_nonfirst_fragment* +setting being the default 1. This dictates the necessity of having a "match" +field in a given *hash_ace_info_t* element, which would reflect the value +we are supposed to match after applying the mask. + +There can be other circumstances when it might be beneficial to expand +the given rule in the original ACL into multiple - for example, as an +optimization within the port range handling for small port ranges +(this is not done as of the time of writing). + +Assigning ACLs to an interface +------------------------------ + +Once the ACL list is assigned to an interface, or, rather, a new ACL +is added to the list of the existing ACLs applied to the interface, +we need to update the bihash accelerating the lookup. + +All the entries for the lookups are stored within a single *48_8* bihash, +which captures the 5-tuple from the packet as well as the miscellaneous +per-packet information flags, e.g. *l4_valid*, *is_non_first_fragment*, +and so on. To facilitate the use of the single bihash by all the interfaces, +the *is_ip6*, *is_input*, *sw_if_index* are part of the key, +as well as *mask_type_index* - the latter being necessary because +there can be entries with the same value but different masks, e.g.: +`permit ::/0, permit::/128`. + +At the moment of an ACL being applied to an interface, we need to +walk the list of *hash_ace_info_t* entries corresponding to that ACL, +and update the bihash with the keys corresponding to the match +values in these entries. + +The value of the hash match contains the index into a per-*sw_if_index* vector +of *applied_ace_hash_entry_t* elements, as well as a couple of flags: +*shadowed* (optimization: if this flag on a matched entry is zero, means +we can stop the lookup early and declare a match - see below), +and *need_portrange_check* - meaning that what matched was a superset +of the actual match, and we need to perform an extra check. + +Also, upon insertion, we must keep in mind there can be +multiple *applied_ace_hash_entry_t* for the same key and must keep +a list of those. This is necessary to incrementally apply/unapply +the ACLs as part of the ACL vector: say, two ACLs have +"permit 2001:db8::1/128 any" - we should be able to retain the entry +for the second ACL even if we have deleted the first one. +Also, in case there are two entries with the same key but +different port ranges, say 0..42 and 142..65535 - we need +to be able to sequentially match on those if we decide not +to expand them into individual port-specific entries. + +Per-packet lookup +----------------- + +The simple single-packet lookup is defined in +*multi_acl_match_get_applied_ace_index*, which returns the index +of the applied hash ACE if there was a match, or ~0 if there wasn't. + +The future optimized per-packet lookup may be batched in three phases: + +1. Prepare the keys in the per-worker vector by doing logical AND of + original 5-tuple record with the elements of the mask vector. +2. Lookup the keys in the bihash in a batch manner, collecting the + result with lowest u64 (acl index within vector, ACE index) from + the hash lookup value, and performing the list walk if necessary + (for portranges) +3. Take the action from the ACL record as defined by (ACL#, ACE#) from the + resulting lookup winner, or, if no match found, then perform default deny. + +Shadowed/independent/redundant ACEs +------------------------------------ + +During the phase of combining multiple ACLs into one rulebase, when they +are applied to interface, we also can perform several optimizations. + +If a given ACE is a strict subset of another ACE located up in the linear +search order, we can ignore this ACE completely - because by definition +it will never match. We will call such an ACE *redundant*. Here is an example: + +``` +permit 2001:db8:1::/48 2001:db8:2::/48 (B) +deny 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A) +``` + +A bit more formally, we can define this relationship of an ACE A to ACE B as: + +``` +redundant(aceA, aceB) := (contains(protoB, protoA) && contains(srcB, srcA) + && contains(dstB, dstA) && is_after(A, B)) +``` + +Here as "contains" we define an operation operating on the sets defined by +the protocol, (srcIP, srcPortDefinition) and (dstIP, dstPortDefinition) +respectively, and returning true if all the elements represented by +the second argument are represented by the first argument. The "is_after" +is true if A is located below B in the ruleset. + +If a given ACE does not intersect at all with any other ACE +in front of it, we can mark it as such. + +Then during the sequence of the lookups the successful hit on this ACE means +we do not need to look up other mask combinations - thus potentially +significantly speeding up the match process. Here is an example, +assuming we have the following ACL: + +``` +permit 2001:db8:1::/48 2001:db8:2::/48 (B) +deny 2001:db8:3::/48 2001:db8:2:1::/64 (A) +``` + +In this case if we match the second entry, we do not need to check whether +we have matched the first one - the source addresses are completely +different. We call such an ACE *independent* from another. + +We can define this as + +``` +independent(aceA, aceB) := (!intersect(protoA, protoB) || + !intersect(srcA, srcB) || + !intersect(dstA, dstB)) +``` + +where intersect is defined as operation returning true if there are +elements belonging to the sets of both arguments. + +If the entry A is neither redundant nor independent from B, and is below +B in the ruleset, we call such an entry *shadowed* by B, here is an example: + +``` +deny tcp 2001:db8:1::/48 2001:db8:2::/48 (B) +permit 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A) +``` + +This means the earlier rule "carves out" a subset of A, thus leaving +a "shadow". (Evidently, the action needs to be different for the shadow +to have an effect, but for for the terminology sake we do not care). + +The more formal definition: + +``` +shadowed(aceA, aceB) := !redundante(aceA, aceB) && + !independent(aceA, aceB) && + is_after(aceA, aceB) +``` + +Using this terminology, any ruleset can be represented as +a DAG (Directed Acyclic Graph), with the bottom being the implicit +"deny any", pointing to the set of rules shadowing it or the ones +it is redundant for. + +These rules may in turn be shadowing each other. There is no cycles in +this graph because of the natural order of the rules - the rule located +closer to the end of the ruleset can never shadow or make redundant a rule +higher up. + +The optimization that enables can allow for is to skip matching certain +masks on a per-lookup basis - if a given rule has matched, +the only adjustments that can happen is the match with one of +the shadowing rules. + +Also, another avenue for the optimization can be starting the lookup process +with the mask type that maximizes the chances of the independent ACE match, +thus resulting in an ACE lookup being a single hash table hit. + + +Plumbing +-------- + +All the new routines are located in a separate file, +so we can cleanly experiment with a different approach if this +does not fit all of the use cases. + +The constant-time lookup within the data path has the API with +the same signature as: + +``` +u8 +multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap) +``` + +There should be a new upper-level function with the same signature, which +will make a decision whether to use a linear lookup, or to use the +constant-time lookup implemented by this work, or to add some other +optimizations (e.g. by keeping the cache of the last N lookups). + +The calls to the routine doing preparatory work should happen +in `acl_add_list()` after creating the linear-lookup structures, +and the routine doing the preparatory work populating the hashtable +should be called from `acl_interface_add_del_inout_acl()` or its callees. + +The initial implementation will be geared towards looking up a single +match at a time, with the subsequent optimizations possible to make +the lookup for more than one packet. + diff --git a/src/plugins/acl/acl_msg_enum.h b/src/plugins/acl/acl_msg_enum.h new file mode 100644 index 00000000..14d8b48c --- /dev/null +++ b/src/plugins/acl/acl_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_acl_msg_enum_h +#define included_acl_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <acl/acl_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif diff --git a/src/plugins/acl/acl_multicore_doc.md b/src/plugins/acl/acl_multicore_doc.md new file mode 100644 index 00000000..b2cf7b9c --- /dev/null +++ b/src/plugins/acl/acl_multicore_doc.md @@ -0,0 +1,349 @@ +Multicore support for ACL plugin {#acl_multicore} +================================ + +This captures some considerations and design decisions that I have made, +both for my own memory later on ("what the hell was I thinking?!?"), +and for anyone interested to criticize/improve/hack on this code. + +One of the factors taken into account while making these decisions, +was the relative emphasis on the multi-thread vs. single-thread +use cases: the latter is the vastly more prevalent. But, +one can not optimize the single-thread performance without +having a functioning code for multi-thread. + +stateless ACLs +============== + +The stateless trivially parallelizes, and the only potential for the +race between the different threads is during the reconfiguration, +at the time of replacing the old ACL being checked, with +the new ACL. + +In case an acl_add_replace is being used to replace the rules +within the existing entry, a reallocation of `am->acls[X].rules` +vector will happen and potentially a change in count. + +acl_match_5tuple() has the following code: + +```{.c} + a = am->acls + acl_index; + for (i = 0; i < a->count; i++) + { + r = a->rules + i; + . . . +``` + +Ideally we should be immune from a->rules changing, +but the problem arises if the count changes in flight, +and the new ruleset is smaller - then we will attempt +to "match" against the free memory. + +This can(?) be solved by replacing the for() with while(), +so the comparison happens at each iteration. + +full_acl_match_5tuple(), which iterates over the list +of ACLs, is a bit less immune, since it takes the pointer +to the vector to iterate and keeps a local copy of +that pointer. + +This race can be solved by checking the +current pointer to the vector with the source pointer, +and seeing if there is an (unlikely) change, and if +there is, return the "deny" action, or, better, +restart the check. + +Since the check reloads the ACL list on a per-packet basis, +there is only a window of opportunity of one packet to +"match" packet against an incorrect rule set. +The workers also do not change anything, only read. +Therefore, it looks like building special structures +to ensure that it does not happen at all might be not +worth it. + +At least not until we have a unit-test able to +reliably catch this condition and test that +the measures applied are effective. Adding the code +which is not possible to exercise is worse than +not adding any code at all. + +So, I opt for "do-nothing" here for the moment. + +reflexive ACLs: single-thread +============================= + +Before we talk multi-thread, is worth revisiting the +design of the reflexive ACLs in the plugin, and +the history of their evolution. + +The very first version of the ACL plugin, shipped in +1701, mostly did the job using the existing components +and gluing them together. Because it needed to work +in bridged forwarding path only, using L2 classifier +as an insertion point appeared natural, also L2 classifier, +being a table with sessions, seemed like a good place +to hold the sessions. + +So, the original design had two conceptual nodes: +one, pointed by the next_miss from the L2 classifier table, +was checking the actual ACL, and inserting session into +the L2 classifier table, and the other one, pointed +to by the next_match within the specific session rule, +was checking the existing session. The timing out +of the existing connections was done in the datapath, +by periodically calling the aging function. + +This decision to use the existing components, +with its attrativeness, did bring a few limitations as well: + +* L2 classifier is a simple mask-and-value match, with +a fixed mask across the table. So, sanely supporting IPv6 +packets with extension headers in that framework was impossible. + +* There is no way to get a backpressure from L2 classifier +depending on memory usage. When it runs out of memory, +it simply crashes the box. When it runs out of memory ? +We don't really know. Depends on how it allocates it. + +* Since we need to match the *reflected* traffic, +we had to create *two* full session entries +in two different directions, which is quite wasteful memory-wise. + +* (showstopper): the L2 classifier runs only in +the bridged data path, so supporting routed data path +would require creating something else entirely different, +which would mean much more headaches support-wise going forward. + +Because of that, I have moved to a different model of +creating a session-5-tuple from the packet data - once, +and then doing all the matching just on that 5-tuple. + +This has allowed to add support for skipping IPv6 extension headers. + +Also, this new version started to store the sessions in a dedicated +bihash-per-interface, with the session key data being +aligned for the ingress packets, and being mirrored for the +egress packets. This allows of significant savings in memory, +because now we need to keep only one copy of the session table per +interface instead of two, and also to only have ONE node for all the lookups, +(L2/L3 path, in/out, IPv4/IPv6) - significantly reducing the code complexity. + +Unfortunately, bihash still has the "lack of backpressure" problem, +in a sense that if you try to insert too many entries and run out +of memory in the heap you supplied, you get a crash. + +To somewhat workaround against that, there is a "maximum tested number of sessions" +value, which tracks the currently inserted sessions in the bihash, +and if this number is being approached, a more aggressive cleanup +can happen. If this number is reached, two behaviors are possible: + +* attempt to do the stateless ACL matching and permit the packet + if it succeeds + +* deny the packet + +Currently I have opted for a second one, since it allows for +a better defined behavior, and if you have to permit +the traffic in both directions, why using stateful anyway ? + +In order to be able to do the cleanup, we need to discriminate between +the session types, with each session type having its own idle timeout. +In order to do that, we keep three lists, defined in enum acl_timeout_e: +ACL_TIMEOUT_UDP_IDLE, ACL_TIMEOUT_TCP_IDLE, ACL_TIMEOUT_TCP_TRANSIENT. + +The first one is hopefully obvious - it is just all UDP connections. +They have an idle timeout of 600 seconds. + +The second and third is a bit more subtle. TCP is a complicated protocol, +and we need to tread the fine line between doing too little and doing +too much, and triggering the potential compatibility issues because of +being a "middlebox". + +I decided to split the TCP connections into two classes: +established, and everything else. "Established", means we have seen +the SYN and ACK from both sides (with PUSH obviously masked out). +This is the "active" state of any TCP connection and we would like +to ensure we do not screw it up. So, the connections in this state +have the default idle timer of 24 hours. + +All the rest of the connections have the idle timeout of 2 minutes, +(inspired by an old value of MSL) and based on the observation +that the states this class represent are usually very short lived. + +Once we have these three baskets of connections, it is trivial to +imagine a simple cleanup mechanism to deal with this: take a +TCP transient connection that has been hanging around. + +It is debatable whether we want to do discrimination between the +different TCP transient connections. Assuming we do FIFO (and +the lists allow us to do just that), it means a given connection +on the head of the list has been hanging around for longest. +Thus, if we are short on resources, we might just go ahead and +reuse it within the datapath. + +This is where we are slowly approaching the question +"Why in the world have not you used timer wheel or such ?" + +The answer is simple: within the above constraints, it does +not buy me much. + +Also, timer wheel creates a leaky abstraction with a difficult +to manage corner case. Which corner case ? + +We have a set of objects (sessions) with an event that may +or may not happen (idle timeout timer firing), and a +necessity to reset the idle timeout when there is +activity on the session. + +In the worst case, where we had a 10000 of one-packet +UDP sessions just created 10 minutes ago, we would need +to deal with a spike of 10000 expired timers. + +Of course, if we have the active traffic on all +of these 10000 connections, then we will not have +to deal with that ? Right, but we will still have to deal +with canceling and requeueing the timers. + +In the best possible case, requeueing a timer is +going to be something along the lines of a linked-list +removal and reinsertion. + +However, keep in mind we already need to classify the +connections for reuse, so therefore we already have +the linked lists! + +And if we just check these linked lists periodically in +a FIFO fashion, we can get away with a very simple per-packet operation: +writing back the timestamp of "now" into the connection structure. + +Then rather than requeueing the list on a per-packet or per-frame +basis, we can defer this action until the time this session +appears on the head of the FIFO list, and the cleaning +routine makes the decision about whether to discard +the session (because the interval since last activity is bigger +than the idle timeout), or to requeue the session back to +the end of the list (because the last activity was less +than idle timeout ago). + +So, rather than using the timers, we can simply reuse our classification +FIFOs, with the following heuristic: do not look at the session that was +enqueued at time X until X+session_timeout. If we enqueue the sessions +in the order of their initial activity, then we can simply use enqueue +timestamp of the head session as a decision criterion for when we need +to get back at looking at it for the timeout purposes. + +Since the number of FIFOs is small, we get a slightly worse check +performance than with timers, but still O(1). + +We seemingly do quite a few "useless" operations of requeueing the items +back to the tail of the list - but, these are the operations we do not +have to do in the active data path, so overall it is a win. + +(Diversion: I believe this problem is congruent to poll vs. epoll or +events vs. threads, some reading on this subject: +http://web.archive.org/web/20120225022154/http://sheddingbikes.com/posts/1280829388.html) + +We can also can run a TCP-like scheme for adaptively changing +the wait period in the routine that deals with the connection timeouts: +we can attempt to check the connections a couple of times per second +(same as we would advance the timer wheel), and then if we have requeued +close to a max-per-quantum number of connections, we can half the waiting +interval, and if we did not requeue any, we can slowly increment the waiting +interval - which at a steady state should stabilize similar to what the TCP rate +does. + +reflexive ACLs: multi-thread +============================= + +The single-threaded implementation in 1704 used a separate "cleaner" process +to deal with the timing out of the connections. +It is all good and great when you know that there is only a single core +to run everything on, but the existence of the lists proves to be +a massive difficulty when it comes to operating from multiple threads. + +Initial study shows that with a few assumptions (e.g. that the cleaner running in main thread +and the worker have a demarcation point in time where either one or the other one touches +the session in the list) it might be possible to make it work, but the resulting +trickiness of doing it neatly with all the corner cases is quite large. + +So, for the multi-threaded scenario, we need to move the connection +aging back to the same CPU as its creation. + +Luckily we can do this with the help of the interrupts. + +So, the design is as follows: the aging thread (acl_fa_session_cleaner_process) +periodically fires the interrupts to the workers interrupt nodes (acl_fa_worker_session_cleaner_process_node.index), +using vlib_node_set_interrupt_pending(), and +the interrupt node acl_fa_worker_conn_cleaner_process() calls acl_fa_check_idle_sessions() +which does the actual job of advancing the lists. And within the actual datapath the only thing we will be +doing is putting the items onto FIFO, and updating the last active time on the existing connection. + +The one "delicate" part is that the worker for one leg of the connection might be different from +the worker of another leg of the connection - but, even if the "owner" tries to free the connection, +nothing terrible can happen - worst case the element of the pool (which is nominally free for a short period) +will get the timestamp updated - same thing about the TCP flags seen. + +A slightly trickier issue arises when the packet initially seen by one worker (thus owned by that worker), +and the return packet processed by another worker, and as a result changes the +the class of the connection (e.g. becomes TCP_ESTABLISHED from TCP_TRANSIENT or vice versa). +If the class changes from one with the shorter idle time to the one with the longer idle time, +then unless we are in the starvation mode where the transient connections are recycled, +we can simply do nothing and let the normal requeue mechanism kick in. If the class changes from the longer idle +timer to the shorter idle timer, then we risk keeping the connection around for longer than needed, which +will affect the resource usage. + +One solution to that is to have NxN ring buffers (where N is the number of workers), such that the non-owner +can signal to the owner the connection# that needs to be requeued out of order. + +A simpler solution though, is to ensure that each FIFO's period is equal to that of a shortest timer. +This way the resource starvation problem is taken care of, at an expense of some additional work. + +This all looks sufficiently nice and simple until a skeleton falls out of the closet: +sometimes we want to clean the connections en masse before they expire. + +There few potential scenarios: +1) removal of an ACL from the interface +2) removal of an interface +3) manual action of an operator (in the future). + +In order to tackle this, we need to modify the logic which decides whether to requeue the +connection on the end of the list, or to delete it due to idle timeout: + +We define a point in time, and have each worker thread fast-forward through its FIFO, +in the process looking for sessions that satisfy the criteria, and either keeping them or requeueing them. + +To keep the ease of appearance to the outside world, we still process this as an event +within the connection cleaner thread, but this event handler does as follows: +1) it creates the bitmap of the sw_if_index values requested to be cleared +2) for each worker, it waits to ensure there is no cleanup operation in progress (and if there is one, +it waits), and then makes a copy of the bitmap, sets the per-worker flag of a cleanup operation, and sends an interrupt. +3) wait until all cleanup operations have completed. + +Within the worker interrupt node, we check if the "cleanup in progress" is set, +and if it is, we check the "fast forward time" value. If unset, we initialize it to value now, and compare the +requested bitmap of sw_if_index values (pending_clear_sw_if_index_bitmap) with the bitmap of sw_if_index that this worker deals with. + +(we set the bit in the bitmap every time we enqueue the packet onto a FIFO - serviced_sw_if_index_bitmap in acl_fa_conn_list_add_session). + +If the result of this AND operation is zero - then we can clear the flag of cleanup in progress and return. +Else we kick off the quantum of cleanup, and make sure we get another interrupt ASAP if that cleanup operation returns non-zero, +meaning there is more work to do. +When that operation returns zero, everything has been processed, we can clear the "cleanup-in-progress" flag, and +zeroize the bitmap of sw_if_index-es requested to be cleaned. + +The interrupt node signals its wish to receive an interrupt ASAP by setting interrupt_is_needed +flag within the per-worker structure. The main thread, while waiting for the +cleanup operation to complete, checks if there is a request for interrupt, +and if there is - it sends one. + +This approach gives us a way to mass-clean the connections which is reusing the code of the regular idle +connection cleanup. + +One potential inefficiency is the bitmap values set by the session insertion +in the data path - there is nothing to clear them. + +So, if one rearranges the interface placement with the workers, then the cleanups will cause some unnecessary work. +For now, we consider it an acceptable limitation. It can be resolved by having another per-worker bitmap, which, when set, +would trigger the cleanup of the bits in the serviced_sw_if_index_bitmap). + +=== the end === + diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c new file mode 100644 index 00000000..abb9643e --- /dev/null +++ b/src/plugins/acl/acl_test.c @@ -0,0 +1,1219 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * acl_test.c - test harness plugin + *------------------------------------------------------------------ + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> +#include <arpa/inet.h> + +#define __plugin_msg_base acl_test_main.msg_id_base +#include <vlibapi/vat_helper_macros.h> + +uword unformat_sw_if_index (unformat_input_t * input, va_list * args); + +/* Declare message IDs */ +#include <acl/acl_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <acl/acl_all_api_h.h> +#undef vl_typedefs + +/* define message structures */ +#define vl_endianfun +#include <acl/acl_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <acl/acl_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <acl/acl_all_api_h.h> +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} acl_test_main_t; + +acl_test_main_t acl_test_main; + +#define foreach_standard_reply_retval_handler \ +_(acl_del_reply) \ +_(acl_interface_add_del_reply) \ +_(macip_acl_interface_add_del_reply) \ +_(acl_interface_set_acl_list_reply) \ +_(macip_acl_del_reply) + +#define foreach_reply_retval_aclindex_handler \ +_(acl_add_replace_reply) \ +_(macip_acl_add_reply) \ +_(macip_acl_add_replace_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = acl_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = acl_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + clib_warning("ACL index: %d", ntohl(mp->acl_index)); \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_reply_retval_aclindex_handler; +#undef _ + +/* These two ought to be in a library somewhere but they aren't */ +static uword +my_unformat_mac_address (unformat_input_t * input, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return unformat (input, "%x:%x:%x:%x:%x:%x", &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5]); +} + +static u8 * +my_format_mac_address (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%02x:%02x:%02x:%02x:%02x:%02x", + a[0], a[1], a[2], a[3], a[4], a[5]); +} + + + +static void vl_api_acl_plugin_get_version_reply_t_handler + (vl_api_acl_plugin_get_version_reply_t * mp) + { + vat_main_t * vam = acl_test_main.vat_main; + clib_warning("ACL plugin version: %d.%d", ntohl(mp->major), ntohl(mp->minor)); + vam->result_ready = 1; + } + +static void vl_api_acl_interface_list_details_t_handler + (vl_api_acl_interface_list_details_t * mp) + { + int i; + vat_main_t * vam = acl_test_main.vat_main; + u8 *out = 0; + vl_api_acl_interface_list_details_t_endian(mp); + out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input); + out = format(out, " input "); + for(i=0; i<mp->count; i++) { + if (i == mp->n_input) + out = format(out, "\n output "); + out = format(out, "%d ", ntohl (mp->acls[i])); + } + out = format(out, "\n"); + clib_warning("%s", out); + vec_free(out); + vam->result_ready = 1; + } + + +static inline u8 * +vl_api_acl_rule_t_pretty_format (u8 *out, vl_api_acl_rule_t * a) +{ + int af = a->is_ipv6 ? AF_INET6 : AF_INET; + u8 src[INET6_ADDRSTRLEN]; + u8 dst[INET6_ADDRSTRLEN]; + inet_ntop(af, a->src_ip_addr, (void *)src, sizeof(src)); + inet_ntop(af, a->dst_ip_addr, (void *)dst, sizeof(dst)); + + out = format(out, "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport %d-%d tcpflags %d mask %d", + a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit, + src, a->src_ip_prefix_len, + dst, a->dst_ip_prefix_len, + a->proto, + a->srcport_or_icmptype_first, a->srcport_or_icmptype_last, + a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last, + a->tcp_flags_value, a->tcp_flags_mask); + return(out); +} + + + +static void vl_api_acl_details_t_handler + (vl_api_acl_details_t * mp) + { + int i; + vat_main_t * vam = acl_test_main.vat_main; + vl_api_acl_details_t_endian(mp); + u8 *out = 0; + out = format(0, "acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag); + for(i=0; i<mp->count; i++) { + out = format(out, " "); + out = vl_api_acl_rule_t_pretty_format(out, &mp->r[i]); + out = format(out, "%s\n", i<mp->count-1 ? "," : ""); + } + clib_warning("%s", out); + vec_free(out); + vam->result_ready = 1; + } + +static inline u8 * +vl_api_macip_acl_rule_t_pretty_format (u8 *out, vl_api_macip_acl_rule_t * a) +{ + int af = a->is_ipv6 ? AF_INET6 : AF_INET; + u8 src[INET6_ADDRSTRLEN]; + inet_ntop(af, a->src_ip_addr, (void *)src, sizeof(src)); + + out = format(out, "%s action %d ip %s/%d mac %U mask %U", + a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit, + src, a->src_ip_prefix_len, + my_format_mac_address, a->src_mac, + my_format_mac_address, a->src_mac_mask); + return(out); +} + + +static void vl_api_macip_acl_details_t_handler + (vl_api_macip_acl_details_t * mp) + { + int i; + vat_main_t * vam = acl_test_main.vat_main; + vl_api_macip_acl_details_t_endian(mp); + u8 *out = format(0,"MACIP acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag); + for(i=0; i<mp->count; i++) { + out = format(out, " "); + out = vl_api_macip_acl_rule_t_pretty_format(out, &mp->r[i]); + out = format(out, "%s\n", i<mp->count-1 ? "," : ""); + } + clib_warning("%s", out); + vec_free(out); + vam->result_ready = 1; + } + +static void vl_api_macip_acl_interface_get_reply_t_handler + (vl_api_macip_acl_interface_get_reply_t * mp) + { + int i; + vat_main_t * vam = acl_test_main.vat_main; + u8 *out = format(0, "sw_if_index with MACIP ACL count: %d\n", ntohl(mp->count)); + for(i=0; i<ntohl(mp->count); i++) { + out = format(out, " macip_acl_interface_add_del sw_if_index %d add acl %d\n", i, ntohl(mp->acls[i])); + } + out = format(out, "\n"); + clib_warning("%s", out); + vec_free(out); + vam->result_ready = 1; + } + +static void vl_api_acl_plugin_control_ping_reply_t_handler + (vl_api_acl_plugin_control_ping_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(ACL_ADD_REPLACE_REPLY, acl_add_replace_reply) \ +_(ACL_DEL_REPLY, acl_del_reply) \ +_(ACL_INTERFACE_ADD_DEL_REPLY, acl_interface_add_del_reply) \ +_(ACL_INTERFACE_SET_ACL_LIST_REPLY, acl_interface_set_acl_list_reply) \ +_(ACL_INTERFACE_LIST_DETAILS, acl_interface_list_details) \ +_(ACL_DETAILS, acl_details) \ +_(MACIP_ACL_ADD_REPLY, macip_acl_add_reply) \ +_(MACIP_ACL_ADD_REPLACE_REPLY, macip_acl_add_replace_reply) \ +_(MACIP_ACL_DEL_REPLY, macip_acl_del_reply) \ +_(MACIP_ACL_DETAILS, macip_acl_details) \ +_(MACIP_ACL_INTERFACE_ADD_DEL_REPLY, macip_acl_interface_add_del_reply) \ +_(MACIP_ACL_INTERFACE_GET_REPLY, macip_acl_interface_get_reply) \ +_(ACL_PLUGIN_CONTROL_PING_REPLY, acl_plugin_control_ping_reply) \ +_(ACL_PLUGIN_GET_VERSION_REPLY, acl_plugin_get_version_reply) + +static int api_acl_plugin_get_version (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + vl_api_acl_plugin_get_version_t * mp; + u32 msg_size = sizeof(*mp); + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_ACL_PLUGIN_GET_VERSION + sm->msg_id_base); + mp->client_index = vam->my_client_index; + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_macip_acl_interface_get (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + vl_api_acl_plugin_get_version_t * mp; + u32 msg_size = sizeof(*mp); + int ret; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_INTERFACE_GET + sm->msg_id_base); + mp->client_index = vam->my_client_index; + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +#define vec_validate_acl_rules(v, idx) \ + do { \ + if (vec_len(v) < idx+1) { \ + vec_validate(v, idx); \ + v[idx].is_permit = 0x1; \ + v[idx].srcport_or_icmptype_last = 0xffff; \ + v[idx].dstport_or_icmpcode_last = 0xffff; \ + } \ + } while (0) + + +static int api_acl_add_replace (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + vl_api_acl_add_replace_t * mp; + u32 acl_index = ~0; + u32 msg_size = sizeof (*mp); /* without the rules */ + + vl_api_acl_rule_t *rules = 0; + int rule_idx = 0; + int n_rules = 0; + int n_rules_override = -1; + u32 proto = 0; + u32 port1 = 0; + u32 port2 = 0; + u32 action = 0; + u32 tcpflags, tcpmask; + u32 src_prefix_length = 0, dst_prefix_length = 0; + ip4_address_t src_v4address, dst_v4address; + ip6_address_t src_v6address, dst_v6address; + u8 *tag = 0; + int ret; + + if (!unformat (i, "%d", &acl_index)) { + /* Just assume -1 */ + } + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "ipv6")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "ipv4")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "permit+reflect")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 2; + } + else if (unformat (i, "permit")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 1; + } + else if (unformat (i, "deny")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 0; + } + else if (unformat (i, "count %d", &n_rules_override)) + { + /* we will use this later */ + } + else if (unformat (i, "action %d", &action)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = action; + } + else if (unformat (i, "src %U/%d", + unformat_ip4_address, &src_v4address, &src_prefix_length)) + { + vec_validate_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "src %U/%d", + unformat_ip6_address, &src_v6address, &src_prefix_length)) + { + vec_validate_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "dst %U/%d", + unformat_ip4_address, &dst_v4address, &dst_prefix_length)) + { + vec_validate_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].dst_ip_addr, &dst_v4address, 4); + rules[rule_idx].dst_ip_prefix_len = dst_prefix_length; + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "dst %U/%d", + unformat_ip6_address, &dst_v6address, &dst_prefix_length)) + { + vec_validate_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].dst_ip_addr, &dst_v6address, 16); + rules[rule_idx].dst_ip_prefix_len = dst_prefix_length; + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "sport %d-%d", &port1, &port2)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].srcport_or_icmptype_first = htons(port1); + rules[rule_idx].srcport_or_icmptype_last = htons(port2); + } + else if (unformat (i, "sport %d", &port1)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].srcport_or_icmptype_first = htons(port1); + rules[rule_idx].srcport_or_icmptype_last = htons(port1); + } + else if (unformat (i, "dport %d-%d", &port1, &port2)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].dstport_or_icmpcode_first = htons(port1); + rules[rule_idx].dstport_or_icmpcode_last = htons(port2); + } + else if (unformat (i, "dport %d", &port1)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].dstport_or_icmpcode_first = htons(port1); + rules[rule_idx].dstport_or_icmpcode_last = htons(port1); + } + else if (unformat (i, "tcpflags %d %d", &tcpflags, &tcpmask)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].tcp_flags_value = tcpflags; + rules[rule_idx].tcp_flags_mask = tcpmask; + } + else if (unformat (i, "tcpflags %d mask %d", &tcpflags, &tcpmask)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].tcp_flags_value = tcpflags; + rules[rule_idx].tcp_flags_mask = tcpmask; + } + else if (unformat (i, "proto %d", &proto)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].proto = proto; + } + else if (unformat (i, "tag %s", &tag)) + { + } + else if (unformat (i, ",")) + { + rule_idx++; + vec_validate_acl_rules(rules, rule_idx); + } + else + break; + } + + /* Construct the API message */ + vam->result_ready = 0; + + if(rules) + n_rules = vec_len(rules); + else + n_rules = 0; + + if (n_rules_override >= 0) + n_rules = n_rules_override; + + msg_size += n_rules*sizeof(rules[0]); + + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_ACL_ADD_REPLACE + sm->msg_id_base); + mp->client_index = vam->my_client_index; + if ((n_rules > 0) && rules) + clib_memcpy(mp->r, rules, n_rules*sizeof (vl_api_acl_rule_t)); + if (tag) + { + if (vec_len(tag) >= sizeof(mp->tag)) + { + tag[sizeof(mp->tag)-1] = 0; + _vec_len(tag) = sizeof(mp->tag); + } + clib_memcpy(mp->tag, tag, vec_len(tag)); + vec_free(tag); + } + mp->acl_index = ntohl(acl_index); + mp->count = htonl(n_rules); + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_acl_del (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + vl_api_acl_del_t * mp; + u32 acl_index = ~0; + int ret; + + if (!unformat (i, "%d", &acl_index)) { + errmsg ("missing acl index\n"); + return -99; + } + + /* Construct the API message */ + M(ACL_DEL, mp); + mp->acl_index = ntohl(acl_index); + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_macip_acl_del (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + vl_api_acl_del_t * mp; + u32 acl_index = ~0; + int ret; + + if (!unformat (i, "%d", &acl_index)) { + errmsg ("missing acl index\n"); + return -99; + } + + /* Construct the API message */ + M(MACIP_ACL_DEL, mp); + mp->acl_index = ntohl(acl_index); + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_acl_interface_add_del (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + vl_api_acl_interface_add_del_t * mp; + u32 sw_if_index = ~0; + u32 acl_index = ~0; + u8 is_input = 0; + u8 is_add = 0; + int ret; + +// acl_interface_add_del <intfc> | sw_if_index <if-idx> acl_index <acl-idx> [out] [del] + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%d", &acl_index)) + ; + else + break; + } + + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "add")) + is_add = 1; + else if (unformat (i, "del")) + is_add = 0; + else if (unformat (i, "acl %d", &acl_index)) + ; + else if (unformat (i, "input")) + is_input = 1; + else if (unformat (i, "output")) + is_input = 0; + else + break; + } + + if (sw_if_index == ~0) { + errmsg ("missing interface name / explicit sw_if_index number \n"); + return -99; + } + + if (acl_index == ~0) { + errmsg ("missing ACL index\n"); + return -99; + } + + + + /* Construct the API message */ + M(ACL_INTERFACE_ADD_DEL, mp); + mp->acl_index = ntohl(acl_index); + mp->sw_if_index = ntohl(sw_if_index); + mp->is_add = is_add; + mp->is_input = is_input; + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_macip_acl_interface_add_del (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + vl_api_macip_acl_interface_add_del_t * mp; + u32 sw_if_index = ~0; + u32 acl_index = ~0; + u8 is_add = 0; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "add")) + is_add = 1; + else if (unformat (i, "del")) + is_add = 0; + else if (unformat (i, "acl %d", &acl_index)) + ; + else + break; + } + + if (sw_if_index == ~0) { + errmsg ("missing interface name / explicit sw_if_index number \n"); + return -99; + } + + if (acl_index == ~0) { + errmsg ("missing ACL index\n"); + return -99; + } + + + + /* Construct the API message */ + M(MACIP_ACL_INTERFACE_ADD_DEL, mp); + mp->acl_index = ntohl(acl_index); + mp->sw_if_index = ntohl(sw_if_index); + mp->is_add = is_add; + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_acl_interface_set_acl_list (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + vl_api_acl_interface_set_acl_list_t * mp; + u32 sw_if_index = ~0; + u32 acl_index = ~0; + u32 *inacls = 0; + u32 *outacls = 0; + u8 is_input = 0; + int ret; + +// acl_interface_set_acl_list <intfc> | sw_if_index <if-idx> input [acl-idx list] output [acl-idx list] + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "%d", &acl_index)) + { + if(is_input) + vec_add1(inacls, htonl(acl_index)); + else + vec_add1(outacls, htonl(acl_index)); + } + else if (unformat (i, "acl %d", &acl_index)) + ; + else if (unformat (i, "input")) + is_input = 1; + else if (unformat (i, "output")) + is_input = 0; + else + break; + } + + if (sw_if_index == ~0) { + errmsg ("missing interface name / explicit sw_if_index number \n"); + return -99; + } + + /* Construct the API message */ + M2(ACL_INTERFACE_SET_ACL_LIST, mp, sizeof(u32) * (vec_len(inacls) + vec_len(outacls))); + mp->sw_if_index = ntohl(sw_if_index); + mp->n_input = vec_len(inacls); + mp->count = vec_len(inacls) + vec_len(outacls); + vec_append(inacls, outacls); + if (vec_len(inacls) > 0) + clib_memcpy(mp->acls, inacls, vec_len(inacls)*sizeof(u32)); + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static void +api_acl_send_control_ping(vat_main_t *vam) +{ + vl_api_acl_plugin_control_ping_t *mp_ping; + + M(ACL_PLUGIN_CONTROL_PING, mp_ping); + S(mp_ping); +} + + +static int api_acl_interface_list_dump (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + u32 sw_if_index = ~0; + vl_api_acl_interface_list_dump_t * mp; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else + break; + } + + /* Construct the API message */ + M(ACL_INTERFACE_LIST_DUMP, mp); + mp->sw_if_index = ntohl (sw_if_index); + + /* send it... */ + S(mp); + + /* Use control ping for synchronization */ + api_acl_send_control_ping(vam); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_acl_dump (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + u32 acl_index = ~0; + vl_api_acl_dump_t * mp; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%d", &acl_index)) + ; + else + break; + } + + /* Construct the API message */ + M(ACL_DUMP, mp); + mp->acl_index = ntohl (acl_index); + + /* send it... */ + S(mp); + + /* Use control ping for synchronization */ + api_acl_send_control_ping(vam); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_macip_acl_dump (vat_main_t * vam) +{ + unformat_input_t * i = vam->input; + u32 acl_index = ~0; + vl_api_acl_dump_t * mp; + int ret; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%d", &acl_index)) + ; + else + break; + } + + /* Construct the API message */ + M(MACIP_ACL_DUMP, mp); + mp->acl_index = ntohl (acl_index); + + /* send it... */ + S(mp); + + /* Use control ping for synchronization */ + api_acl_send_control_ping(vam); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +#define vec_validate_macip_acl_rules(v, idx) \ + do { \ + if (vec_len(v) < idx+1) { \ + vec_validate(v, idx); \ + v[idx].is_permit = 0x1; \ + } \ + } while (0) + + +static int api_macip_acl_add (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + vl_api_macip_acl_add_t * mp; + u32 msg_size = sizeof (*mp); /* without the rules */ + + vl_api_macip_acl_rule_t *rules = 0; + int rule_idx = 0; + int n_rules = 0; + int n_rules_override = -1; + u32 src_prefix_length = 0; + u32 action = 0; + ip4_address_t src_v4address; + ip6_address_t src_v6address; + u8 src_mac[6]; + u8 *tag = 0; + u8 mac_mask_all_1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + int ret; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "ipv6")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "ipv4")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "permit")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 1; + } + else if (unformat (i, "deny")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 0; + } + else if (unformat (i, "count %d", &n_rules_override)) + { + /* we will use this later */ + } + else if (unformat (i, "action %d", &action)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = action; + } + else if (unformat (i, "ip %U/%d", + unformat_ip4_address, &src_v4address, &src_prefix_length) || + unformat (i, "ip %U", + unformat_ip4_address, &src_v4address)) + { + if (src_prefix_length == 0) + src_prefix_length = 32; + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "src")) + { + /* Everything in MACIP is "source" but allow this verbosity */ + } + else if (unformat (i, "ip %U/%d", + unformat_ip6_address, &src_v6address, &src_prefix_length) || + unformat (i, "ip %U", + unformat_ip6_address, &src_v6address)) + { + if (src_prefix_length == 0) + src_prefix_length = 128; + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "mac %U", + my_unformat_mac_address, &src_mac)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_mac, &src_mac, 6); + memcpy (rules[rule_idx].src_mac_mask, &mac_mask_all_1, 6); + } + else if (unformat (i, "mask %U", + my_unformat_mac_address, &src_mac)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_mac_mask, &src_mac, 6); + } + else if (unformat (i, "tag %s", &tag)) + { + } + else if (unformat (i, ",")) + { + rule_idx++; + vec_validate_macip_acl_rules(rules, rule_idx); + } + else + break; + } + + /* Construct the API message */ + vam->result_ready = 0; + + if(rules) + n_rules = vec_len(rules); + + if (n_rules_override >= 0) + n_rules = n_rules_override; + + msg_size += n_rules*sizeof(rules[0]); + + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_ADD + sm->msg_id_base); + mp->client_index = vam->my_client_index; + if ((n_rules > 0) && rules) + clib_memcpy(mp->r, rules, n_rules*sizeof (mp->r[0])); + if (tag) + { + if (vec_len(tag) >= sizeof(mp->tag)) + { + tag[sizeof(mp->tag)-1] = 0; + _vec_len(tag) = sizeof(mp->tag); + } + clib_memcpy(mp->tag, tag, vec_len(tag)); + vec_free(tag); + } + + mp->count = htonl(n_rules); + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +static int api_macip_acl_add_replace (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + vl_api_macip_acl_add_replace_t * mp; + u32 acl_index = ~0; + u32 msg_size = sizeof (*mp); /* without the rules */ + + vl_api_macip_acl_rule_t *rules = 0; + int rule_idx = 0; + int n_rules = 0; + int n_rules_override = -1; + u32 src_prefix_length = 0; + u32 action = 0; + ip4_address_t src_v4address; + ip6_address_t src_v6address; + u8 src_mac[6]; + u8 *tag = 0; + u8 mac_mask_all_1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + int ret; + + if (!unformat (i, "%d", &acl_index)) { + /* Just assume -1 */ + } + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "ipv6")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "ipv4")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "permit")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 1; + } + else if (unformat (i, "deny")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 0; + } + else if (unformat (i, "count %d", &n_rules_override)) + { + /* we will use this later */ + } + else if (unformat (i, "action %d", &action)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = action; + } + else if (unformat (i, "ip %U/%d", + unformat_ip4_address, &src_v4address, &src_prefix_length) || + unformat (i, "ip %U", + unformat_ip4_address, &src_v4address)) + { + if (src_prefix_length == 0) + src_prefix_length = 32; + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "src")) + { + /* Everything in MACIP is "source" but allow this verbosity */ + } + else if (unformat (i, "ip %U/%d", + unformat_ip6_address, &src_v6address, &src_prefix_length) || + unformat (i, "ip %U", + unformat_ip6_address, &src_v6address)) + { + if (src_prefix_length == 0) + src_prefix_length = 128; + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "mac %U", + my_unformat_mac_address, &src_mac)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_mac, &src_mac, 6); + memcpy (rules[rule_idx].src_mac_mask, &mac_mask_all_1, 6); + } + else if (unformat (i, "mask %U", + my_unformat_mac_address, &src_mac)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_mac_mask, &src_mac, 6); + } + else if (unformat (i, "tag %s", &tag)) + { + } + else if (unformat (i, ",")) + { + rule_idx++; + vec_validate_macip_acl_rules(rules, rule_idx); + } + else + break; + } + + if (!rules) + { + errmsg ("rule/s required\n"); + return -99; + } + /* Construct the API message */ + vam->result_ready = 0; + + if(rules) + n_rules = vec_len(rules); + + if (n_rules_override >= 0) + n_rules = n_rules_override; + + msg_size += n_rules*sizeof(rules[0]); + + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_ADD_REPLACE + sm->msg_id_base); + mp->client_index = vam->my_client_index; + if ((n_rules > 0) && rules) + clib_memcpy(mp->r, rules, n_rules*sizeof (mp->r[0])); + if (tag) + { + if (vec_len(tag) >= sizeof(mp->tag)) + { + tag[sizeof(mp->tag)-1] = 0; + _vec_len(tag) = sizeof(mp->tag); + } + clib_memcpy(mp->tag, tag, vec_len(tag)); + vec_free(tag); + } + + mp->acl_index = ntohl(acl_index); + mp->count = htonl(n_rules); + + /* send it... */ + S(mp); + + /* Wait for a reply... */ + W (ret); + return ret; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(acl_plugin_get_version, "") \ +_(acl_add_replace, "<acl-idx> [<ipv4|ipv6> <permit|permit+reflect|deny|action N> [src IP/plen] [dst IP/plen] [sport X-Y] [dport X-Y] [proto P] [tcpflags FL MASK], ... , ...") \ +_(acl_del, "<acl-idx>") \ +_(acl_dump, "[<acl-idx>]") \ +_(acl_interface_add_del, "<intfc> | sw_if_index <if-idx> [add|del] [input|output] acl <acl-idx>") \ +_(acl_interface_set_acl_list, "<intfc> | sw_if_index <if-idx> input [acl-idx list] output [acl-idx list]") \ +_(acl_interface_list_dump, "[<intfc> | sw_if_index <if-idx>]") \ +_(macip_acl_add, "...") \ +_(macip_acl_add_replace, "<acl-idx> [<ipv4|ipv6> <permit|deny|action N> [count <count>] [src] ip <ipaddress/[plen]> mac <mac> mask <mac_mask>, ... , ...") \ +_(macip_acl_del, "<acl-idx>")\ +_(macip_acl_dump, "[<acl-idx>]") \ +_(macip_acl_interface_add_del, "<intfc> | sw_if_index <if-idx> [add|del] acl <acl-idx>") \ +_(macip_acl_interface_get, "") + + +static +void acl_vat_api_hookup (vat_main_t *vam) +{ + acl_test_main_t * sm = &acl_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + acl_test_main_t * sm = &acl_test_main; + u8 * name; + + sm->vat_main = vam; + + name = format (0, "acl_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~0) + acl_vat_api_hookup (vam); + + vec_free(name); + + return 0; +} diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c new file mode 100644 index 00000000..a4ba967d --- /dev/null +++ b/src/plugins/acl/fa_node.c @@ -0,0 +1,1874 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <stddef.h> +#include <netinet/in.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <acl/acl.h> +#include <vppinfra/bihash_40_8.h> + +#include <vppinfra/bihash_template.h> +#include <vppinfra/bihash_template.c> + +#include "fa_node.h" +#include "hash_lookup.h" + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u32 match_acl_in_index; + u32 match_rule_index; + u64 packet_info[6]; + u32 trace_bitmap; + u8 action; +} acl_fa_trace_t; + +/* packet trace format function */ +static u8 * +format_acl_fa_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + acl_fa_trace_t *t = va_arg (*args, acl_fa_trace_t *); + + s = + format (s, + "acl-plugin: sw_if_index %d, next index %d, action: %d, match: acl %d rule %d trace_bits %08x\n" + " pkt info %016llx %016llx %016llx %016llx %016llx %016llx", + t->sw_if_index, t->next_index, t->action, t->match_acl_in_index, + t->match_rule_index, t->trace_bitmap, + t->packet_info[0], t->packet_info[1], t->packet_info[2], + t->packet_info[3], t->packet_info[4], t->packet_info[5]); + return s; +} + +/* *INDENT-OFF* */ +#define foreach_acl_fa_error \ +_(ACL_DROP, "ACL deny packets") \ +_(ACL_PERMIT, "ACL permit packets") \ +_(ACL_NEW_SESSION, "new sessions added") \ +_(ACL_EXIST_SESSION, "existing session packets") \ +_(ACL_CHECK, "checked packets") \ +_(ACL_RESTART_SESSION_TIMER, "restart session timer") \ +_(ACL_TOO_MANY_SESSIONS, "too many sessions to add new") \ +/* end of errors */ + +typedef enum +{ +#define _(sym,str) ACL_FA_ERROR_##sym, + foreach_acl_fa_error +#undef _ + ACL_FA_N_ERROR, +} acl_fa_error_t; + +static char *acl_fa_error_strings[] = { +#define _(sym,string) string, + foreach_acl_fa_error +#undef _ +}; +/* *INDENT-ON* */ + +static void * +get_ptr_to_offset (vlib_buffer_t * b0, int offset) +{ + u8 *p = vlib_buffer_get_current (b0) + offset; + return p; +} + + +static int +fa_acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2, + int prefixlen, int is_ip6) +{ + if (prefixlen == 0) + { + /* match any always succeeds */ + return 1; + } + if (is_ip6) + { + if (memcmp (addr1, addr2, prefixlen / 8)) + { + /* If the starting full bytes do not match, no point in bittwidling the thumbs further */ + return 0; + } + if (prefixlen % 8) + { + u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8); + u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8); + u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1)); + return (b1 & mask0) == b2; + } + else + { + /* The prefix fits into integer number of bytes, so nothing left to do */ + return 1; + } + } + else + { + uint32_t a1 = ntohl (addr1->ip4.as_u32); + uint32_t a2 = ntohl (addr2->ip4.as_u32); + uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1); + return (a1 & mask0) == a2; + } +} + +static int +fa_acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6) +{ + return ((port >= port_first) && (port <= port_last)); +} + +int +single_acl_match_5tuple (acl_main_t * am, u32 acl_index, fa_5tuple_t * pkt_5tuple, + int is_ip6, u8 * r_action, u32 * r_acl_match_p, + u32 * r_rule_match_p, u32 * trace_bitmap) +{ + int i; + acl_list_t *a; + acl_rule_t *r; + + if (pool_is_free_index (am->acls, acl_index)) + { + if (r_acl_match_p) + *r_acl_match_p = acl_index; + if (r_rule_match_p) + *r_rule_match_p = -1; + /* the ACL does not exist but is used for policy. Block traffic. */ + return 0; + } + a = am->acls + acl_index; + for (i = 0; i < a->count; i++) + { + r = a->rules + i; + if (is_ip6 != r->is_ipv6) + { + continue; + } + if (!fa_acl_match_addr + (&pkt_5tuple->addr[1], &r->dst, r->dst_prefixlen, is_ip6)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt dst addr %U match rule addr %U/%d", + acl_index, i, format_ip46_address, &pkt_5tuple->addr[1], + IP46_TYPE_ANY, format_ip46_address, &r->dst, IP46_TYPE_ANY, + r->dst_prefixlen); +#endif + + if (!fa_acl_match_addr + (&pkt_5tuple->addr[0], &r->src, r->src_prefixlen, is_ip6)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt src addr %U match rule addr %U/%d", + acl_index, i, format_ip46_address, &pkt_5tuple->addr[0], + IP46_TYPE_ANY, format_ip46_address, &r->src, IP46_TYPE_ANY, + r->src_prefixlen); + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d trying to match pkt proto %d with rule %d", + acl_index, i, pkt_5tuple->l4.proto, r->proto); +#endif + if (r->proto) + { + if (pkt_5tuple->l4.proto != r->proto) + continue; + + if (PREDICT_FALSE (pkt_5tuple->pkt.is_nonfirst_fragment && + am->l4_match_nonfirst_fragment)) + { + /* non-initial fragment with frag match configured - match this rule */ + *trace_bitmap |= 0x80000000; + *r_action = r->is_permit; + if (r_acl_match_p) + *r_acl_match_p = acl_index; + if (r_rule_match_p) + *r_rule_match_p = i; + return 1; + } + + /* A sanity check just to ensure we are about to match the ports extracted from the packet */ + if (PREDICT_FALSE (!pkt_5tuple->pkt.l4_valid)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt proto %d match rule %d", + acl_index, i, pkt_5tuple->l4.proto, r->proto); +#endif + + if (!fa_acl_match_port + (pkt_5tuple->l4.port[0], r->src_port_or_type_first, + r->src_port_or_type_last, is_ip6)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt sport %d match rule [%d..%d]", + acl_index, i, pkt_5tuple->l4.port[0], r->src_port_or_type_first, + r->src_port_or_type_last); +#endif + + if (!fa_acl_match_port + (pkt_5tuple->l4.port[1], r->dst_port_or_code_first, + r->dst_port_or_code_last, is_ip6)) + continue; + +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG acl %d rule %d pkt dport %d match rule [%d..%d]", + acl_index, i, pkt_5tuple->l4.port[1], r->dst_port_or_code_first, + r->dst_port_or_code_last); +#endif + if (pkt_5tuple->pkt.tcp_flags_valid + && ((pkt_5tuple->pkt.tcp_flags & r->tcp_flags_mask) != + r->tcp_flags_value)) + continue; + } + /* everything matches! */ +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG acl %d rule %d FULL-MATCH, action %d", + acl_index, i, r->is_permit); +#endif + *r_action = r->is_permit; + if (r_acl_match_p) + *r_acl_match_p = acl_index; + if (r_rule_match_p) + *r_rule_match_p = i; + return 1; + } + return 0; +} + +static u8 +linear_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap) +{ + acl_main_t *am = &acl_main; + int i; + u32 *acl_vector; + u8 action = 0; + + if (is_input) + { + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + acl_vector = am->input_acl_vec_by_sw_if_index[sw_if_index]; + } + else + { + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + acl_vector = am->output_acl_vec_by_sw_if_index[sw_if_index]; + } + for (i = 0; i < vec_len (acl_vector); i++) + { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG: Trying to match ACL: %d", + acl_vector[i]); +#endif + if (single_acl_match_5tuple + (am, acl_vector[i], pkt_5tuple, is_ip6, &action, + acl_match_p, rule_match_p, trace_bitmap)) + { + return action; + } + } + if (vec_len (acl_vector) > 0) + { + /* If there are ACLs and none matched, deny by default */ + return 0; + } +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG: No ACL on sw_if_index %d", sw_if_index); +#endif + /* Deny by default. If there are no ACLs defined we should not be here. */ + return 0; +} + +static u8 +multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap) +{ + acl_main_t *am = &acl_main; + if (am->use_hash_acl_matching) { + return hash_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6, + is_input, acl_match_p, rule_match_p, trace_bitmap); + } else { + return linear_multi_acl_match_5tuple(sw_if_index, pkt_5tuple, is_l2, is_ip6, + is_input, acl_match_p, rule_match_p, trace_bitmap); + } +} + +static int +offset_within_packet (vlib_buffer_t * b0, int offset) +{ + /* For the purposes of this code, "within" means we have at least 8 bytes after it */ + return (offset <= (b0->current_length - 8)); +} + +static void +acl_fill_5tuple (acl_main_t * am, vlib_buffer_t * b0, int is_ip6, + int is_input, int is_l2_path, fa_5tuple_t * p5tuple_pkt) +{ + int l3_offset = ethernet_buffer_header_size(b0); + int l4_offset; + u16 ports[2]; + u16 proto; + /* IP4 and IP6 protocol numbers of ICMP */ + static u8 icmp_protos[] = { IP_PROTOCOL_ICMP, IP_PROTOCOL_ICMP6 }; + + if (is_input && !(is_l2_path)) + { + l3_offset = 0; + } + + /* key[0..3] contains src/dst address and is cleared/set below */ + /* Remainder of the key and per-packet non-key data */ + p5tuple_pkt->kv.key[4] = 0; + p5tuple_pkt->kv.value = 0; + + if (is_ip6) + { + clib_memcpy (&p5tuple_pkt->addr, + get_ptr_to_offset (b0, + offsetof (ip6_header_t, + src_address) + l3_offset), + sizeof (p5tuple_pkt->addr)); + proto = + *(u8 *) get_ptr_to_offset (b0, + offsetof (ip6_header_t, + protocol) + l3_offset); + l4_offset = l3_offset + sizeof (ip6_header_t); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG: proto: %d, l4_offset: %d", proto, + l4_offset); +#endif + /* IP6 EH handling is here, increment l4_offset if needs to, update the proto */ + int need_skip_eh = clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto); + if (PREDICT_FALSE (need_skip_eh)) + { + while (need_skip_eh && offset_within_packet (b0, l4_offset)) + { + /* Fragment header needs special handling */ + if (PREDICT_FALSE(ACL_EH_FRAGMENT == proto)) + { + proto = *(u8 *) get_ptr_to_offset (b0, l4_offset); + u16 frag_offset; + clib_memcpy (&frag_offset, get_ptr_to_offset (b0, 2 + l4_offset), sizeof(frag_offset)); + frag_offset = ntohs(frag_offset) >> 3; + if (frag_offset) + { + p5tuple_pkt->pkt.is_nonfirst_fragment = 1; + /* invalidate L4 offset so we don't try to find L4 info */ + l4_offset += b0->current_length; + } + else + { + /* First fragment: skip the frag header and move on. */ + l4_offset += 8; + } + } + else + { + u8 nwords = *(u8 *) get_ptr_to_offset (b0, 1 + l4_offset); + proto = *(u8 *) get_ptr_to_offset (b0, l4_offset); + l4_offset += 8 * (1 + (u16) nwords); + } +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_DBG: new proto: %d, new offset: %d", + proto, l4_offset); +#endif + need_skip_eh = + clib_bitmap_get (am->fa_ipv6_known_eh_bitmap, proto); + } + } + } + else + { + p5tuple_pkt->kv.key[0] = 0; + p5tuple_pkt->kv.key[1] = 0; + p5tuple_pkt->kv.key[2] = 0; + p5tuple_pkt->kv.key[3] = 0; + clib_memcpy (&p5tuple_pkt->addr[0].ip4, + get_ptr_to_offset (b0, + offsetof (ip4_header_t, + src_address) + l3_offset), + sizeof (p5tuple_pkt->addr[0].ip4)); + clib_memcpy (&p5tuple_pkt->addr[1].ip4, + get_ptr_to_offset (b0, + offsetof (ip4_header_t, + dst_address) + l3_offset), + sizeof (p5tuple_pkt->addr[1].ip4)); + proto = + *(u8 *) get_ptr_to_offset (b0, + offsetof (ip4_header_t, + protocol) + l3_offset); + l4_offset = l3_offset + sizeof (ip4_header_t); + u16 flags_and_fragment_offset; + clib_memcpy (&flags_and_fragment_offset, + get_ptr_to_offset (b0, + offsetof (ip4_header_t, + flags_and_fragment_offset)) + l3_offset, + sizeof(flags_and_fragment_offset)); + flags_and_fragment_offset = ntohs (flags_and_fragment_offset); + + /* non-initial fragments have non-zero offset */ + if ((PREDICT_FALSE(0xfff & flags_and_fragment_offset))) + { + p5tuple_pkt->pkt.is_nonfirst_fragment = 1; + /* invalidate L4 offset so we don't try to find L4 info */ + l4_offset += b0->current_length; + } + + } + p5tuple_pkt->l4.proto = proto; + if (PREDICT_TRUE (offset_within_packet (b0, l4_offset))) + { + p5tuple_pkt->pkt.l4_valid = 1; + if (icmp_protos[is_ip6] == proto) + { + /* type */ + p5tuple_pkt->l4.port[0] = + *(u8 *) get_ptr_to_offset (b0, + l4_offset + offsetof (icmp46_header_t, + type)); + /* code */ + p5tuple_pkt->l4.port[1] = + *(u8 *) get_ptr_to_offset (b0, + l4_offset + offsetof (icmp46_header_t, + code)); + } + else if ((IPPROTO_TCP == proto) || (IPPROTO_UDP == proto)) + { + clib_memcpy (&ports, + get_ptr_to_offset (b0, + l4_offset + offsetof (tcp_header_t, + src_port)), + sizeof (ports)); + p5tuple_pkt->l4.port[0] = ntohs (ports[0]); + p5tuple_pkt->l4.port[1] = ntohs (ports[1]); + + p5tuple_pkt->pkt.tcp_flags = + *(u8 *) get_ptr_to_offset (b0, + l4_offset + offsetof (tcp_header_t, + flags)); + p5tuple_pkt->pkt.tcp_flags_valid = (proto == IPPROTO_TCP); + } + /* + * FIXME: rather than the above conditional, here could + * be a nice generic mechanism to extract two L4 values: + * + * have a per-protocol array of 4 elements like this: + * u8 offset; to take the byte from, off L4 header + * u8 mask; to mask it with, before storing + * + * this way we can describe UDP, TCP and ICMP[46] semantics, + * and add a sort of FPM-type behavior for other protocols. + * + * Of course, is it faster ? and is it needed ? + * + */ + } +} + + +/* Session keys match the packets received, and mirror the packets sent */ +static void +acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt, + fa_5tuple_t * p5tuple_sess) +{ + int src_index = is_input ? 0 : 1; + int dst_index = is_input ? 1 : 0; + p5tuple_sess->addr[src_index] = p5tuple_pkt->addr[0]; + p5tuple_sess->addr[dst_index] = p5tuple_pkt->addr[1]; + p5tuple_sess->l4.as_u64 = p5tuple_pkt->l4.as_u64; + p5tuple_sess->l4.port[src_index] = p5tuple_pkt->l4.port[0]; + p5tuple_sess->l4.port[dst_index] = p5tuple_pkt->l4.port[1]; +} + + +static int +acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0) +{ + return am->fa_sessions_hash_is_initialized; +} + +static int +acl_fa_ifc_has_in_acl (acl_main_t * am, int sw_if_index0) +{ + int it_has = clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index0); + return it_has; +} + +static int +acl_fa_ifc_has_out_acl (acl_main_t * am, int sw_if_index0) +{ + int it_has = clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index0); + return it_has; +} + + +static int +fa_session_get_timeout_type (acl_main_t * am, fa_session_t * sess) +{ + /* seen both SYNs and ACKs but not FINs means we are in establshed state */ + u16 masked_flags = + sess->tcp_flags_seen.as_u16 & ((TCP_FLAGS_RSTFINACKSYN << 8) + + TCP_FLAGS_RSTFINACKSYN); + switch (sess->info.l4.proto) + { + case IPPROTO_TCP: + if (((TCP_FLAGS_ACKSYN << 8) + TCP_FLAGS_ACKSYN) == masked_flags) + { + return ACL_TIMEOUT_TCP_IDLE; + } + else + { + return ACL_TIMEOUT_TCP_TRANSIENT; + } + break; + case IPPROTO_UDP: + return ACL_TIMEOUT_UDP_IDLE; + break; + default: + return ACL_TIMEOUT_UDP_IDLE; + } +} + + +static u64 +fa_session_get_shortest_timeout(acl_main_t * am) +{ + int timeout_type; + u64 timeout = ~0LL; + for(timeout_type = 0; timeout_type < ACL_N_TIMEOUTS; timeout_type++) { + if (timeout > am->session_timeout_sec[timeout_type]) { + timeout = am->session_timeout_sec[timeout_type]; + } + } + return timeout; +} + +/* + * Get the timeout of the session in a list since its enqueue time. + */ + +static u64 +fa_session_get_list_timeout (acl_main_t * am, fa_session_t * sess) +{ + u64 timeout = am->vlib_main->clib_time.clocks_per_second; + /* + * we have the shortest possible timeout type in all the lists + * (see README-multicore for the rationale) + */ + timeout *= fa_session_get_shortest_timeout(am); + return timeout; +} + +/* + * Get the idle timeout of a session. + */ + +static u64 +fa_session_get_timeout (acl_main_t * am, fa_session_t * sess) +{ + u64 timeout = am->vlib_main->clib_time.clocks_per_second; + int timeout_type = fa_session_get_timeout_type (am, sess); + timeout *= am->session_timeout_sec[timeout_type]; + return timeout; +} + +static void +acl_fa_verify_init_sessions (acl_main_t * am) +{ + if (!am->fa_sessions_hash_is_initialized) { + u16 wk; + /* Allocate the per-worker sessions pools */ + for (wk = 0; wk < vec_len (am->per_worker_data); wk++) { + acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk]; + pool_alloc_aligned(pw->fa_sessions_pool, am->fa_conn_table_max_entries, CLIB_CACHE_LINE_BYTES); + } + + /* ... and the interface session hash table */ + BV (clib_bihash_init) (&am->fa_sessions_hash, + "ACL plugin FA session bihash", + am->fa_conn_table_hash_num_buckets, + am->fa_conn_table_hash_memory_size); + am->fa_sessions_hash_is_initialized = 1; + } +} + +static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + fa_session_t *sess = pool_is_free_index (pw->fa_sessions_pool, session_index) ? 0 : pool_elt_at_index(pw->fa_sessions_pool, session_index); + return sess; +} + +static inline int is_valid_session_ptr(acl_main_t *am, u16 thread_index, fa_session_t *sess) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + return ((sess != 0) && ((sess - pw->fa_sessions_pool) < pool_len(pw->fa_sessions_pool))); +} + +static void +acl_fa_conn_list_add_session (acl_main_t * am, fa_full_session_id_t sess_id, u64 now) +{ + fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); + u8 list_id = fa_session_get_timeout_type(am, sess); + uword thread_index = os_get_thread_index (); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + /* the retrieved session thread index must be necessarily the same as the one in the key */ + ASSERT (sess->thread_index == sess_id.thread_index); + /* the retrieved session thread index must be the same as current thread */ + ASSERT (sess->thread_index == thread_index); + sess->link_enqueue_time = now; + sess->link_list_id = list_id; + sess->link_next_idx = ~0; + sess->link_prev_idx = pw->fa_conn_list_tail[list_id]; + if (~0 != pw->fa_conn_list_tail[list_id]) { + fa_session_t *prev_sess = get_session_ptr(am, thread_index, pw->fa_conn_list_tail[list_id]); + prev_sess->link_next_idx = sess_id.session_index; + /* We should never try to link with a session on another thread */ + ASSERT(prev_sess->thread_index == sess->thread_index); + } + pw->fa_conn_list_tail[list_id] = sess_id.session_index; + pw->serviced_sw_if_index_bitmap = clib_bitmap_set(pw->serviced_sw_if_index_bitmap, sess->sw_if_index, 1); + + if (~0 == pw->fa_conn_list_head[list_id]) { + pw->fa_conn_list_head[list_id] = sess_id.session_index; + } +} + +static int +acl_fa_conn_list_delete_session (acl_main_t *am, fa_full_session_id_t sess_id) +{ + uword thread_index = os_get_thread_index (); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + if (thread_index != sess_id.thread_index) { + /* If another thread attempts to delete the session, fail it. */ +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("thread id in key %d != curr thread index, not deleting"); +#endif + return 0; + } + fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); + /* we should never try to delete the session with another thread index */ + ASSERT(sess->thread_index == thread_index); + if (~0 != sess->link_prev_idx) { + fa_session_t *prev_sess = get_session_ptr(am, thread_index, sess->link_prev_idx); + /* the previous session must be in the same list as this one */ + ASSERT(prev_sess->link_list_id == sess->link_list_id); + prev_sess->link_next_idx = sess->link_next_idx; + } + if (~0 != sess->link_next_idx) { + fa_session_t *next_sess = get_session_ptr(am, thread_index, sess->link_next_idx); + /* The next session must be in the same list as the one we are deleting */ + ASSERT(next_sess->link_list_id == sess->link_list_id); + next_sess->link_prev_idx = sess->link_prev_idx; + } + if (pw->fa_conn_list_head[sess->link_list_id] == sess_id.session_index) { + pw->fa_conn_list_head[sess->link_list_id] = sess->link_next_idx; + } + if (pw->fa_conn_list_tail[sess->link_list_id] == sess_id.session_index) { + pw->fa_conn_list_tail[sess->link_list_id] = sess->link_prev_idx; + } + return 1; +} + +static int +acl_fa_restart_timer_for_session (acl_main_t * am, u64 now, fa_full_session_id_t sess_id) +{ + if (acl_fa_conn_list_delete_session(am, sess_id)) { + acl_fa_conn_list_add_session(am, sess_id, now); + return 1; + } else { + /* + * Our thread does not own this connection, so we can not delete + * The session. To avoid the complicated signaling, we simply + * pick the list waiting time to be the shortest of the timeouts. + * This way we do not have to do anything special, and let + * the regular requeue check take care of everything. + */ + return 0; + } +} + + +static u8 +acl_fa_track_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, + fa_session_t * sess, fa_5tuple_t * pkt_5tuple) +{ + sess->last_active_time = now; + if (pkt_5tuple->pkt.tcp_flags_valid) + { + sess->tcp_flags_seen.as_u8[is_input] |= pkt_5tuple->pkt.tcp_flags; + } + return 3; +} + + +static void +acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t sess_id) +{ + void *oldheap = clib_mem_set_heap(am->acl_mheap); + fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index); + ASSERT(sess->thread_index == os_get_thread_index ()); + BV (clib_bihash_add_del) (&am->fa_sessions_hash, + &sess->info.kv, 0); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index]; + pool_put_index (pw->fa_sessions_pool, sess_id.session_index); + /* Deleting from timer structures not needed, + as the caller must have dealt with the timers. */ + vec_validate (pw->fa_session_dels_by_sw_if_index, sw_if_index); + clib_mem_set_heap (oldheap); + pw->fa_session_dels_by_sw_if_index[sw_if_index]++; + clib_smp_atomic_add(&am->fa_session_total_dels, 1); +} + +static int +acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index) +{ + u64 curr_sess_count; + curr_sess_count = am->fa_session_total_adds - am->fa_session_total_dels; + return (curr_sess_count < am->fa_conn_table_max_entries); +} + +static u64 +acl_fa_get_list_head_expiry_time(acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, int timeout_type) +{ + fa_session_t *sess = get_session_ptr(am, thread_index, pw->fa_conn_list_head[timeout_type]); + /* + * We can not check just the index here because inbetween the worker thread might + * dequeue the connection from the head just as we are about to check it. + */ + if (!is_valid_session_ptr(am, thread_index, sess)) { + return ~0LL; // infinity. + } else { + u64 timeout_time = + sess->link_enqueue_time + fa_session_get_list_timeout (am, sess); + return timeout_time; + } +} + +static int +acl_fa_conn_time_to_check (acl_main_t *am, acl_fa_per_worker_data_t *pw, u64 now, u16 thread_index, u32 session_index) +{ + fa_session_t *sess = get_session_ptr(am, thread_index, session_index); + u64 timeout_time = + sess->link_enqueue_time + fa_session_get_list_timeout (am, sess); + return (timeout_time < now) || (sess->link_enqueue_time <= pw->swipe_end_time); +} + +/* + * see if there are sessions ready to be checked, + * do the maintenance (requeue or delete), and + * return the total number of sessions reclaimed. + */ +static int +acl_fa_check_idle_sessions(acl_main_t *am, u16 thread_index, u64 now) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + fa_full_session_id_t fsid; + fsid.thread_index = thread_index; + int total_expired = 0; + + { + u8 tt = 0; + for(tt = 0; tt < ACL_N_TIMEOUTS; tt++) { + while((vec_len(pw->expired) < am->fa_max_deleted_sessions_per_interval) + && (~0 != pw->fa_conn_list_head[tt]) + && (acl_fa_conn_time_to_check(am, pw, now, thread_index, + pw->fa_conn_list_head[tt]))) { + fsid.session_index = pw->fa_conn_list_head[tt]; + vec_add1(pw->expired, fsid.session_index); + acl_fa_conn_list_delete_session(am, fsid); + } + } + } + + u32 *psid = NULL; + vec_foreach (psid, pw->expired) + { + fsid.session_index = *psid; + if (!pool_is_free_index (pw->fa_sessions_pool, fsid.session_index)) + { + fa_session_t *sess = get_session_ptr(am, thread_index, fsid.session_index); + u32 sw_if_index = sess->sw_if_index; + u64 sess_timeout_time = + sess->last_active_time + fa_session_get_timeout (am, sess); + if ((now < sess_timeout_time) && (0 == clib_bitmap_get(pw->pending_clear_sw_if_index_bitmap, sw_if_index))) + { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_CLEAN: Restarting timer for session %d", + (int) session_index); +#endif + /* There was activity on the session, so the idle timeout + has not passed. Enqueue for another time period. */ + + acl_fa_conn_list_add_session(am, fsid, now); + pw->cnt_session_timer_restarted++; + } + else + { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL_FA_NODE_CLEAN: Deleting session %d", + (int) session_index); +#endif + acl_fa_delete_session (am, sw_if_index, fsid); + pw->cnt_deleted_sessions++; + } + } + else + { + pw->cnt_already_deleted_sessions++; + } + } + total_expired = vec_len(pw->expired); + /* zero out the vector which we have acted on */ + if (pw->expired) + _vec_len (pw->expired) = 0; + /* if we were advancing and reached the end + * (no more sessions to recycle), reset the fast-forward timestamp */ + + if (pw->swipe_end_time && 0 == total_expired) + pw->swipe_end_time = 0; + return (total_expired); +} + +always_inline void +acl_fa_try_recycle_session (acl_main_t * am, int is_input, u16 thread_index, u32 sw_if_index) +{ + /* try to recycle a TCP transient session */ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + u8 timeout_type = ACL_TIMEOUT_TCP_TRANSIENT; + fa_full_session_id_t sess_id; + sess_id.session_index = pw->fa_conn_list_head[timeout_type]; + if (~0 != sess_id.session_index) { + sess_id.thread_index = thread_index; + acl_fa_conn_list_delete_session(am, sess_id); + acl_fa_delete_session(am, sw_if_index, sess_id); + } +} + +static fa_session_t * +acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now, + fa_5tuple_t * p5tuple) +{ + clib_bihash_kv_40_8_t *pkv = &p5tuple->kv; + clib_bihash_kv_40_8_t kv; + fa_full_session_id_t f_sess_id; + uword thread_index = os_get_thread_index(); + void *oldheap = clib_mem_set_heap(am->acl_mheap); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + + f_sess_id.thread_index = thread_index; + fa_session_t *sess; + + pool_get_aligned (pw->fa_sessions_pool, sess, CLIB_CACHE_LINE_BYTES); + f_sess_id.session_index = sess - pw->fa_sessions_pool; + + kv.key[0] = pkv->key[0]; + kv.key[1] = pkv->key[1]; + kv.key[2] = pkv->key[2]; + kv.key[3] = pkv->key[3]; + kv.key[4] = pkv->key[4]; + kv.value = f_sess_id.as_u64; + + memcpy (sess, pkv, sizeof (pkv->key)); + sess->last_active_time = now; + sess->sw_if_index = sw_if_index; + sess->tcp_flags_seen.as_u16 = 0; + sess->thread_index = thread_index; + sess->link_list_id = ~0; + sess->link_prev_idx = ~0; + sess->link_next_idx = ~0; + + + + ASSERT(am->fa_sessions_hash_is_initialized == 1); + BV (clib_bihash_add_del) (&am->fa_sessions_hash, + &kv, 1); + acl_fa_conn_list_add_session(am, f_sess_id, now); + + vec_validate (pw->fa_session_adds_by_sw_if_index, sw_if_index); + clib_mem_set_heap (oldheap); + pw->fa_session_adds_by_sw_if_index[sw_if_index]++; + clib_smp_atomic_add(&am->fa_session_total_adds, 1); + return sess; +} + +static int +acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple, + clib_bihash_kv_40_8_t * pvalue_sess) +{ + return (BV (clib_bihash_search) + (&am->fa_sessions_hash, &p5tuple->kv, + pvalue_sess) == 0); +} + + +always_inline uword +acl_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip6, + int is_input, int is_l2_path, u32 * l2_feat_next_node_index, + vlib_node_registration_t * acl_fa_node) +{ + u32 n_left_from, *from, *to_next; + acl_fa_next_t next_index; + u32 pkts_acl_checked = 0; + u32 pkts_new_session = 0; + u32 pkts_exist_session = 0; + u32 pkts_acl_permit = 0; + u32 pkts_restart_session_timer = 0; + u32 trace_bitmap = 0; + acl_main_t *am = &acl_main; + fa_5tuple_t fa_5tuple, kv_sess; + clib_bihash_kv_40_8_t value_sess; + vlib_node_runtime_t *error_node; + u64 now = clib_cpu_time_now (); + uword thread_index = os_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + error_node = vlib_node_get_runtime (vm, acl_fa_node->index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = 0; + u8 action = 0; + u32 sw_if_index0; + int acl_check_needed = 1; + u32 match_acl_in_index = ~0; + u32 match_rule_index = ~0; + u8 error0 = 0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + if (is_input) + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + else + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + + /* + * Extract the L3/L4 matching info into a 5-tuple structure, + * then create a session key whose layout is independent on forward or reverse + * direction of the packet. + */ + + acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple); + fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff; + acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess); + fa_5tuple.pkt.sw_if_index = sw_if_index0; + fa_5tuple.pkt.is_ip6 = is_ip6; + fa_5tuple.pkt.is_input = is_input; + fa_5tuple.pkt.mask_type_index_lsb = ~0; +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_DBG: session 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx", + kv_sess.kv.key[0], kv_sess.kv.key[1], kv_sess.kv.key[2], + kv_sess.kv.key[3], kv_sess.kv.key[4], kv_sess.kv.value); + clib_warning + ("ACL_FA_NODE_DBG: packet 5-tuple %016llx %016llx %016llx %016llx %016llx : %016llx", + fa_5tuple.kv.key[0], fa_5tuple.kv.key[1], fa_5tuple.kv.key[2], + fa_5tuple.kv.key[3], fa_5tuple.kv.key[4], fa_5tuple.kv.value); +#endif + + /* Try to match an existing session first */ + + if (acl_fa_ifc_has_sessions (am, sw_if_index0)) + { + if (acl_fa_find_session + (am, sw_if_index0, &kv_sess, &value_sess)) + { + trace_bitmap |= 0x80000000; + error0 = ACL_FA_ERROR_ACL_EXIST_SESSION; + fa_full_session_id_t f_sess_id; + + f_sess_id.as_u64 = value_sess.value; + ASSERT(f_sess_id.thread_index < vec_len(vlib_mains)); + + fa_session_t *sess = get_session_ptr(am, f_sess_id.thread_index, f_sess_id.session_index); + int old_timeout_type = + fa_session_get_timeout_type (am, sess); + action = + acl_fa_track_session (am, is_input, sw_if_index0, now, + sess, &fa_5tuple); + /* expose the session id to the tracer */ + match_rule_index = f_sess_id.session_index; + int new_timeout_type = + fa_session_get_timeout_type (am, sess); + acl_check_needed = 0; + pkts_exist_session += 1; + /* Tracking might have changed the session timeout type, e.g. from transient to established */ + if (PREDICT_FALSE (old_timeout_type != new_timeout_type)) + { + acl_fa_restart_timer_for_session (am, now, f_sess_id); + pkts_restart_session_timer++; + trace_bitmap |= + 0x00010000 + ((0xff & old_timeout_type) << 8) + + (0xff & new_timeout_type); + } + /* + * I estimate the likelihood to be very low - the VPP needs + * to have >64K interfaces to start with and then on + * exactly 64K indices apart needs to be exactly the same + * 5-tuple... Anyway, since this probability is nonzero - + * print an error and drop the unlucky packet. + * If this shows up in real world, we would need to bump + * the hash key length. + */ + if (PREDICT_FALSE(sess->sw_if_index != sw_if_index0)) { + clib_warning("BUG: session LSB16(sw_if_index) and 5-tuple collision!"); + acl_check_needed = 0; + action = 0; + } + } + } + + if (acl_check_needed) + { + action = + multi_acl_match_5tuple (sw_if_index0, &fa_5tuple, is_l2_path, + is_ip6, is_input, &match_acl_in_index, + &match_rule_index, &trace_bitmap); + error0 = action; + if (1 == action) + pkts_acl_permit += 1; + if (2 == action) + { + if (!acl_fa_can_add_session (am, is_input, sw_if_index0)) + acl_fa_try_recycle_session (am, is_input, thread_index, sw_if_index0); + + if (acl_fa_can_add_session (am, is_input, sw_if_index0)) + { + fa_session_t *sess = acl_fa_add_session (am, is_input, sw_if_index0, now, + &kv_sess); + acl_fa_track_session (am, is_input, sw_if_index0, now, + sess, &fa_5tuple); + pkts_new_session += 1; + } + else + { + action = 0; + error0 = ACL_FA_ERROR_ACL_TOO_MANY_SESSIONS; + } + } + } + + + + if (action > 0) + { + if (is_l2_path) + next0 = vnet_l2_feature_next (b0, l2_feat_next_node_index, 0); + else + vnet_feature_next (sw_if_index0, &next0, b0); + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + acl_fa_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->match_acl_in_index = match_acl_in_index; + t->match_rule_index = match_rule_index; + t->packet_info[0] = fa_5tuple.kv.key[0]; + t->packet_info[1] = fa_5tuple.kv.key[1]; + t->packet_info[2] = fa_5tuple.kv.key[2]; + t->packet_info[3] = fa_5tuple.kv.key[3]; + t->packet_info[4] = fa_5tuple.kv.key[4]; + t->packet_info[5] = fa_5tuple.kv.value; + t->action = action; + t->trace_bitmap = trace_bitmap; + } + + next0 = next0 < node->n_next_nodes ? next0 : 0; + if (0 == next0) + b0->error = error_node->errors[error0]; + + pkts_acl_checked += 1; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, bi0, + next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_CHECK, pkts_acl_checked); + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_PERMIT, pkts_acl_permit); + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_NEW_SESSION, + pkts_new_session); + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_EXIST_SESSION, + pkts_exist_session); + vlib_node_increment_counter (vm, acl_fa_node->index, + ACL_FA_ERROR_ACL_RESTART_SESSION_TIMER, + pkts_restart_session_timer); + return frame->n_vectors; +} + + +vlib_node_registration_t acl_in_l2_ip6_node; +static uword +acl_in_ip6_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + return acl_fa_node_fn (vm, node, frame, 1, 1, 1, + am->fa_acl_in_ip6_l2_node_feat_next_node_index, + &acl_in_l2_ip6_node); +} + +vlib_node_registration_t acl_in_l2_ip4_node; +static uword +acl_in_ip4_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + return acl_fa_node_fn (vm, node, frame, 0, 1, 1, + am->fa_acl_in_ip4_l2_node_feat_next_node_index, + &acl_in_l2_ip4_node); +} + +vlib_node_registration_t acl_out_l2_ip6_node; +static uword +acl_out_ip6_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + return acl_fa_node_fn (vm, node, frame, 1, 0, 1, + am->fa_acl_out_ip6_l2_node_feat_next_node_index, + &acl_out_l2_ip6_node); +} + +vlib_node_registration_t acl_out_l2_ip4_node; +static uword +acl_out_ip4_l2_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + return acl_fa_node_fn (vm, node, frame, 0, 0, 1, + am->fa_acl_out_ip4_l2_node_feat_next_node_index, + &acl_out_l2_ip4_node); +} + + +/**** L3 processing path nodes ****/ + + +vlib_node_registration_t acl_in_fa_ip6_node; +static uword +acl_in_ip6_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return acl_fa_node_fn (vm, node, frame, 1, 1, 0, 0, &acl_in_fa_ip6_node); +} + +vlib_node_registration_t acl_in_fa_ip4_node; +static uword +acl_in_ip4_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return acl_fa_node_fn (vm, node, frame, 0, 1, 0, 0, &acl_in_fa_ip4_node); +} + +vlib_node_registration_t acl_out_fa_ip6_node; +static uword +acl_out_ip6_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return acl_fa_node_fn (vm, node, frame, 1, 0, 0, 0, &acl_out_fa_ip6_node); +} + +vlib_node_registration_t acl_out_fa_ip4_node; +static uword +acl_out_ip4_fa_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return acl_fa_node_fn (vm, node, frame, 0, 0, 0, 0, &acl_out_fa_ip4_node); +} + +/* + * This process ensures the connection cleanup happens every so often + * even in absence of traffic, as well as provides general orchestration + * for requests like connection deletion on a given sw_if_index. + */ + + +/* *INDENT-OFF* */ +#define foreach_acl_fa_cleaner_error \ +_(UNKNOWN_EVENT, "unknown event received") \ +/* end of errors */ + +typedef enum +{ +#define _(sym,str) ACL_FA_CLEANER_ERROR_##sym, + foreach_acl_fa_cleaner_error +#undef _ + ACL_FA_CLEANER_N_ERROR, +} acl_fa_cleaner_error_t; + +static char *acl_fa_cleaner_error_strings[] = { +#define _(sym,string) string, + foreach_acl_fa_cleaner_error +#undef _ +}; + +/* *INDENT-ON* */ + +static vlib_node_registration_t acl_fa_session_cleaner_process_node; +static vlib_node_registration_t acl_fa_worker_session_cleaner_process_node; + +/* + * Per-worker thread interrupt-driven cleaner thread + * to clean idle connections if there are no packets + */ +static uword +acl_fa_worker_conn_cleaner_process(vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + acl_main_t *am = &acl_main; + u64 now = clib_cpu_time_now (); + u16 thread_index = os_get_thread_index (); + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + int num_expired; +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("\nacl_fa_worker_conn_cleaner: thread index %d now %lu\n\n", thread_index, now); +#endif + /* allow another interrupt to be queued */ + pw->interrupt_is_pending = 0; + if (pw->clear_in_process) { + if (0 == pw->swipe_end_time) { + /* + * Someone has just set the flag to start clearing. + * we do this by combing through the connections up to a "time T" + * which is now, and requeueing everything except the expired + * connections and those matching the interface(s) being cleared. + */ + + /* + * first filter the sw_if_index bitmap that they want from us, by + * a bitmap of sw_if_index for which we actually have connections. + */ + if ((pw->pending_clear_sw_if_index_bitmap == 0) + || (pw->serviced_sw_if_index_bitmap == 0)) { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER-CLEAR: someone tried to call clear, but one of the bitmaps are empty"); +#endif + clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap); + } else { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER-CLEAR: (before and) swiping sw-if-index bitmap: %U, my serviced bitmap %U", + format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap, + format_bitmap_hex, pw->serviced_sw_if_index_bitmap); +#endif + pw->pending_clear_sw_if_index_bitmap = clib_bitmap_and(pw->pending_clear_sw_if_index_bitmap, + pw->serviced_sw_if_index_bitmap); + } + + if (clib_bitmap_is_zero(pw->pending_clear_sw_if_index_bitmap)) { + /* if the cross-section is a zero vector, no need to do anything. */ +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER: clearing done - nothing to do"); +#endif + pw->clear_in_process = 0; + } else { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER-CLEAR: swiping sw-if-index bitmap: %U, my serviced bitmap %U", + format_bitmap_hex, pw->pending_clear_sw_if_index_bitmap, + format_bitmap_hex, pw->serviced_sw_if_index_bitmap); +#endif + /* swipe through the connection lists until enqueue timestamps become above "now" */ + pw->swipe_end_time = now; + } + } + } + num_expired = acl_fa_check_idle_sessions(am, thread_index, now); + // clib_warning("WORKER-CLEAR: checked %d sessions (clear_in_progress: %d)", num_expired, pw->clear_in_process); + if (pw->clear_in_process) { + if (0 == num_expired) { + /* we were clearing but we could not process any more connections. time to stop. */ + clib_bitmap_zero(pw->pending_clear_sw_if_index_bitmap); + pw->clear_in_process = 0; +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER: clearing done, all done"); +#endif + } else { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("WORKER-CLEAR: more work to do, raising interrupt"); +#endif + /* should continue clearing.. So could they please sent an interrupt again? */ + pw->interrupt_is_needed = 1; + } + } else { + if (num_expired >= am->fa_max_deleted_sessions_per_interval) { + /* there was too much work, we should get an interrupt ASAP */ + pw->interrupt_is_needed = 1; + pw->interrupt_is_unwanted = 0; + } else if (num_expired <= am->fa_min_deleted_sessions_per_interval) { + /* signal that they should trigger us less */ + pw->interrupt_is_needed = 0; + pw->interrupt_is_unwanted = 1; + } else { + /* the current rate of interrupts is ok */ + pw->interrupt_is_needed = 0; + pw->interrupt_is_unwanted = 0; + } + } + pw->interrupt_generation = am->fa_interrupt_generation; + return 0; +} + +static void +send_one_worker_interrupt (vlib_main_t * vm, acl_main_t *am, int thread_index) +{ + acl_fa_per_worker_data_t *pw = &am->per_worker_data[thread_index]; + if (!pw->interrupt_is_pending) { + pw->interrupt_is_pending = 1; + vlib_node_set_interrupt_pending (vlib_mains[thread_index], + acl_fa_worker_session_cleaner_process_node.index); + /* if the interrupt was requested, mark that done. */ + /* pw->interrupt_is_needed = 0; */ + } +} + +static void +send_interrupts_to_workers (vlib_main_t * vm, acl_main_t *am) +{ + int i; + /* Can't use vec_len(am->per_worker_data) since the threads might not have come up yet; */ + int n_threads = vec_len(vlib_mains); + for (i = n_threads > 1 ? 1 : 0; i < n_threads; i++) { + send_one_worker_interrupt(vm, am, i); + } +} + +/* centralized process to drive per-worker cleaners */ +static uword +acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + acl_main_t *am = &acl_main; + u64 now; + f64 cpu_cps = vm->clib_time.clocks_per_second; + u64 next_expire; + /* We should check if there are connections to clean up - at least twice a second */ + u64 max_timer_wait_interval = cpu_cps / 2; + uword event_type, *event_data = 0; + acl_fa_per_worker_data_t *pw0; + + am->fa_current_cleaner_timer_wait_interval = max_timer_wait_interval; + am->fa_cleaner_node_index = acl_fa_session_cleaner_process_node.index; + am->fa_interrupt_generation = 1; + while (1) + { + now = clib_cpu_time_now (); + next_expire = now + am->fa_current_cleaner_timer_wait_interval; + int has_pending_conns = 0; + u16 ti; + u8 tt; + + /* + * walk over all per-thread list heads of different timeouts, + * and see if there are any connections pending. + * If there aren't - we do not need to wake up until the + * worker code signals that it has added a connection. + * + * Also, while we are at it, calculate the earliest we need to wake up. + */ + for(ti = 0; ti < vec_len(vlib_mains); ti++) { + if (ti >= vec_len(am->per_worker_data)) { + continue; + } + acl_fa_per_worker_data_t *pw = &am->per_worker_data[ti]; + for(tt = 0; tt < vec_len(pw->fa_conn_list_head); tt++) { + u64 head_expiry = acl_fa_get_list_head_expiry_time(am, pw, now, ti, tt); + if ((head_expiry < next_expire) && !pw->interrupt_is_pending) { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("Head expiry: %lu, now: %lu, next_expire: %lu (worker: %d, tt: %d)", head_expiry, now, next_expire, ti, tt); +#endif + next_expire = head_expiry; + } + if (~0 != pw->fa_conn_list_head[tt]) { + has_pending_conns = 1; + } + } + } + + /* If no pending connections and no ACL applied then no point in timing out */ + if (!has_pending_conns && (0 == am->fa_total_enabled_count)) + { + am->fa_cleaner_cnt_wait_without_timeout++; + (void) vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + } + else + { + f64 timeout = ((i64) next_expire - (i64) now) / cpu_cps; + if (timeout <= 0) + { + /* skip waiting altogether */ + event_type = ~0; + } + else + { + am->fa_cleaner_cnt_wait_with_timeout++; + (void) vlib_process_wait_for_event_or_clock (vm, timeout); + event_type = vlib_process_get_events (vm, &event_data); + } + } + + switch (event_type) + { + case ~0: + /* nothing to do */ + break; + case ACL_FA_CLEANER_RESCHEDULE: + /* Nothing to do. */ + break; + case ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: + { + uword *clear_sw_if_index_bitmap = 0; + uword *sw_if_index0; + int clear_all = 0; +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX received"); +#endif + vec_foreach (sw_if_index0, event_data) + { + am->fa_cleaner_cnt_delete_by_sw_index++; +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning + ("ACL_FA_NODE_CLEAN: ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX: %d", + *sw_if_index0); +#endif + if (*sw_if_index0 == ~0) + { + clear_all = 1; + } + else + { + if (!pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, *sw_if_index0)) + { + clear_sw_if_index_bitmap = clib_bitmap_set(clear_sw_if_index_bitmap, *sw_if_index0, 1); + } + } + } +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX bitmap: %U", format_bitmap_hex, clear_sw_if_index_bitmap); +#endif + vec_foreach(pw0, am->per_worker_data) { + CLIB_MEMORY_BARRIER (); + while (pw0->clear_in_process) { + CLIB_MEMORY_BARRIER (); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_NODE_CLEAN: waiting previous cleaning cycle to finish on %d...", pw0 - am->per_worker_data); +#endif + vlib_process_suspend(vm, 0.0001); + if (pw0->interrupt_is_needed) { + send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data)); + } + } + if (pw0->clear_in_process) { + clib_warning("ERROR-BUG! Could not initiate cleaning on worker because another cleanup in progress"); + } else { + if (clear_all) + { + /* if we need to clear all, then just clear the interfaces that we are servicing */ + pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(pw0->serviced_sw_if_index_bitmap); + } + else + { + pw0->pending_clear_sw_if_index_bitmap = clib_bitmap_dup(clear_sw_if_index_bitmap); + } + pw0->clear_in_process = 1; + } + } + /* send some interrupts so they can start working */ + send_interrupts_to_workers(vm, am); + + /* now wait till they all complete */ +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("CLEANER mains len: %d per-worker len: %d", vec_len(vlib_mains), vec_len(am->per_worker_data)); +#endif + vec_foreach(pw0, am->per_worker_data) { + CLIB_MEMORY_BARRIER (); + while (pw0->clear_in_process) { + CLIB_MEMORY_BARRIER (); +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_NODE_CLEAN: waiting for my cleaning cycle to finish on %d...", pw0 - am->per_worker_data); +#endif + vlib_process_suspend(vm, 0.0001); + if (pw0->interrupt_is_needed) { + send_one_worker_interrupt(vm, am, (pw0 - am->per_worker_data)); + } + } + } +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ACL_FA_NODE_CLEAN: cleaning done"); +#endif + clib_bitmap_free(clear_sw_if_index_bitmap); + } + break; + default: +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning ("ACL plugin connection cleaner: unknown event %u", + event_type); +#endif + vlib_node_increment_counter (vm, + acl_fa_session_cleaner_process_node. + index, + ACL_FA_CLEANER_ERROR_UNKNOWN_EVENT, 1); + am->fa_cleaner_cnt_unknown_event++; + break; + } + + send_interrupts_to_workers(vm, am); + + if (event_data) + _vec_len (event_data) = 0; + + /* + * If the interrupts were not processed yet, ensure we wait a bit, + * but up to a point. + */ + int need_more_wait = 0; + int max_wait_cycles = 100; + do { + need_more_wait = 0; + vec_foreach(pw0, am->per_worker_data) { + if (pw0->interrupt_generation != am->fa_interrupt_generation) { + need_more_wait = 1; + } + } + if (need_more_wait) { + vlib_process_suspend(vm, 0.0001); + } + } while (need_more_wait && (--max_wait_cycles > 0)); + + int interrupts_needed = 0; + int interrupts_unwanted = 0; + + vec_foreach(pw0, am->per_worker_data) { + if (pw0->interrupt_is_needed) { + interrupts_needed++; + /* the per-worker value is reset when sending the interrupt */ + } + if (pw0->interrupt_is_unwanted) { + interrupts_unwanted++; + pw0->interrupt_is_unwanted = 0; + } + } + if (interrupts_needed) { + /* they need more interrupts, do less waiting around next time */ + am->fa_current_cleaner_timer_wait_interval /= 2; + /* never go into zero-wait either though - we need to give the space to others */ + am->fa_current_cleaner_timer_wait_interval += 1; + } else if (interrupts_unwanted) { + /* slowly increase the amount of sleep up to a limit */ + if (am->fa_current_cleaner_timer_wait_interval < max_timer_wait_interval) + am->fa_current_cleaner_timer_wait_interval += cpu_cps * am->fa_cleaner_wait_time_increment; + } + am->fa_cleaner_cnt_event_cycles++; + am->fa_interrupt_generation++; + } + /* NOT REACHED */ + return 0; +} + + +void +acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable) +{ + acl_main_t *am = &acl_main; + if (enable_disable) { + acl_fa_verify_init_sessions(am); + am->fa_total_enabled_count++; + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_RESCHEDULE, 0); + clib_mem_set_heap (oldheap); + } else { + am->fa_total_enabled_count--; + } + + if (is_input) + { + ASSERT(clib_bitmap_get(am->fa_in_acl_on_sw_if_index, sw_if_index) != enable_disable); + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); + vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa", + sw_if_index, enable_disable, 0, 0); + vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa", + sw_if_index, enable_disable, 0, 0); + clib_mem_set_heap (oldheap); + am->fa_in_acl_on_sw_if_index = + clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index, + enable_disable); + } + else + { + ASSERT(clib_bitmap_get(am->fa_out_acl_on_sw_if_index, sw_if_index) != enable_disable); + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); + vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa", + sw_if_index, enable_disable, 0, 0); + vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa", + sw_if_index, enable_disable, 0, 0); + clib_mem_set_heap (oldheap); + am->fa_out_acl_on_sw_if_index = + clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index, + enable_disable); + } + if ((!enable_disable) && (!acl_fa_ifc_has_in_acl (am, sw_if_index)) + && (!acl_fa_ifc_has_out_acl (am, sw_if_index))) + { +#ifdef FA_NODE_VERBOSE_DEBUG + clib_warning("ENABLE-DISABLE: clean the connections on interface %d", sw_if_index); +#endif + void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base); + vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index, + ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, + sw_if_index); + clib_mem_set_heap (oldheap); + } +} + +void +show_fa_sessions_hash(vlib_main_t * vm, u32 verbose) +{ + acl_main_t *am = &acl_main; + if (am->fa_sessions_hash_is_initialized) { + vlib_cli_output(vm, "\nSession lookup hash table:\n%U\n\n", + BV (format_bihash), &am->fa_sessions_hash, verbose); + } else { + vlib_cli_output(vm, "\nSession lookup hash table is not allocated.\n\n"); + } +} + + +/* *INDENT-OFF* */ + +VLIB_REGISTER_NODE (acl_fa_worker_session_cleaner_process_node, static) = { + .function = acl_fa_worker_conn_cleaner_process, + .name = "acl-plugin-fa-worker-cleaner-process", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_INTERRUPT, +}; + +VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = { + .function = acl_fa_session_cleaner_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "acl-plugin-fa-cleaner-process", + .n_errors = ARRAY_LEN (acl_fa_cleaner_error_strings), + .error_strings = acl_fa_cleaner_error_strings, + .n_next_nodes = 0, + .next_nodes = {}, +}; + + +VLIB_REGISTER_NODE (acl_in_l2_ip6_node) = +{ + .function = acl_in_ip6_l2_node_fn, + .name = "acl-plugin-in-ip6-l2", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VLIB_REGISTER_NODE (acl_in_l2_ip4_node) = +{ + .function = acl_in_ip4_l2_node_fn, + .name = "acl-plugin-in-ip4-l2", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VLIB_REGISTER_NODE (acl_out_l2_ip6_node) = +{ + .function = acl_out_ip6_l2_node_fn, + .name = "acl-plugin-out-ip6-l2", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VLIB_REGISTER_NODE (acl_out_l2_ip4_node) = +{ + .function = acl_out_ip4_l2_node_fn, + .name = "acl-plugin-out-ip4-l2", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + + +VLIB_REGISTER_NODE (acl_in_fa_ip6_node) = +{ + .function = acl_in_ip6_fa_node_fn, + .name = "acl-plugin-in-ip6-fa", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VNET_FEATURE_INIT (acl_in_ip6_fa_feature, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "acl-plugin-in-ip6-fa", + .runs_before = VNET_FEATURES ("ip6-flow-classify"), +}; + +VLIB_REGISTER_NODE (acl_in_fa_ip4_node) = +{ + .function = acl_in_ip4_fa_node_fn, + .name = "acl-plugin-in-ip4-fa", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VNET_FEATURE_INIT (acl_in_ip4_fa_feature, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "acl-plugin-in-ip4-fa", + .runs_before = VNET_FEATURES ("ip4-flow-classify"), +}; + + +VLIB_REGISTER_NODE (acl_out_fa_ip6_node) = +{ + .function = acl_out_ip6_fa_node_fn, + .name = "acl-plugin-out-ip6-fa", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VNET_FEATURE_INIT (acl_out_ip6_fa_feature, static) = +{ + .arc_name = "ip6-output", + .node_name = "acl-plugin-out-ip6-fa", + .runs_before = VNET_FEATURES ("interface-output"), +}; + +VLIB_REGISTER_NODE (acl_out_fa_ip4_node) = +{ + .function = acl_out_ip4_fa_node_fn, + .name = "acl-plugin-out-ip4-fa", + .vector_size = sizeof (u32), + .format_trace = format_acl_fa_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (acl_fa_error_strings), + .error_strings = acl_fa_error_strings, + .n_next_nodes = ACL_FA_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [ACL_FA_ERROR_DROP] = "error-drop", + } +}; + +VNET_FEATURE_INIT (acl_out_ip4_fa_feature, static) = +{ + .arc_name = "ip4-output", + .node_name = "acl-plugin-out-ip4-fa", + .runs_before = VNET_FEATURES ("interface-output"), +}; + + +/* *INDENT-ON* */ diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h new file mode 100644 index 00000000..fa9a2303 --- /dev/null +++ b/src/plugins/acl/fa_node.h @@ -0,0 +1,174 @@ +#ifndef _FA_NODE_H_ +#define _FA_NODE_H_ + +#include <stddef.h> +#include <vppinfra/bihash_40_8.h> + +#define TCP_FLAG_FIN 0x01 +#define TCP_FLAG_SYN 0x02 +#define TCP_FLAG_RST 0x04 +#define TCP_FLAG_PUSH 0x08 +#define TCP_FLAG_ACK 0x10 +#define TCP_FLAG_URG 0x20 +#define TCP_FLAG_ECE 0x40 +#define TCP_FLAG_CWR 0x80 +#define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK) +#define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK) + +#define ACL_FA_CONN_TABLE_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) +#define ACL_FA_CONN_TABLE_DEFAULT_HASH_MEMORY_SIZE (1<<30) +#define ACL_FA_CONN_TABLE_DEFAULT_MAX_ENTRIES 1000000 + +typedef union { + u64 as_u64; + struct { + u32 sw_if_index; + u16 mask_type_index_lsb; + u8 tcp_flags; + u8 tcp_flags_valid:1; + u8 is_input:1; + u8 l4_valid:1; + u8 is_nonfirst_fragment:1; + u8 is_ip6:1; + u8 flags_reserved:3; + }; +} fa_packet_info_t; + +typedef union { + u64 as_u64; + struct { + u16 port[2]; + u16 proto; + u16 lsb_of_sw_if_index; + }; +} fa_session_l4_key_t; + +typedef union { + struct { + ip46_address_t addr[2]; + fa_session_l4_key_t l4; + /* This field should align with u64 value in bihash_40_8 keyvalue struct */ + fa_packet_info_t pkt; + }; + clib_bihash_kv_40_8_t kv; +} fa_5tuple_t; + + +typedef struct { + fa_5tuple_t info; /* (5+1)*8 = 48 bytes */ + u64 last_active_time; /* +8 bytes = 56 */ + u32 sw_if_index; /* +4 bytes = 60 */ + union { + u8 as_u8[2]; + u16 as_u16; + } tcp_flags_seen; ; /* +2 bytes = 62 */ + u16 thread_index; /* +2 bytes = 64 */ + u64 link_enqueue_time; /* 8 byte = 8 */ + u32 link_prev_idx; /* +4 bytes = 12 */ + u32 link_next_idx; /* +4 bytes = 16 */ + u8 link_list_id; /* +1 bytes = 17 */ + u8 reserved1[7]; /* +7 bytes = 24 */ + u64 reserved2[5]; /* +5*8 bytes = 64 */ +} fa_session_t; + + +/* This structure is used to fill in the u64 value + in the per-sw-if-index hash table */ +typedef struct { + union { + u64 as_u64; + struct { + u32 session_index; + u16 thread_index; + u16 reserved0; + }; + }; +} fa_full_session_id_t; + +/* + * A few compile-time constraints on the size and the layout of the union, to ensure + * it makes sense both for bihash and for us. + */ + +#define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1] +CT_ASSERT_EQUAL(fa_l3_key_size_is_40, offsetof(fa_5tuple_t, pkt), offsetof(clib_bihash_kv_40_8_t, value)); +CT_ASSERT_EQUAL(fa_l4_key_t_is_8, sizeof(fa_session_l4_key_t), sizeof(u64)); +CT_ASSERT_EQUAL(fa_packet_info_t_is_8, sizeof(fa_packet_info_t), sizeof(u64)); +CT_ASSERT_EQUAL(fa_l3_kv_size_is_48, sizeof(fa_5tuple_t), sizeof(clib_bihash_kv_40_8_t)); + +/* Let's try to fit within two cachelines */ +CT_ASSERT_EQUAL(fa_session_t_size_is_128, sizeof(fa_session_t), 128); + +/* Session ID MUST be the same as u64 */ +CT_ASSERT_EQUAL(fa_full_session_id_size_is_64, sizeof(fa_full_session_id_t), sizeof(u64)); +#undef CT_ASSERT_EQUAL + +typedef struct { + /* The pool of sessions managed by this worker */ + fa_session_t *fa_sessions_pool; + /* per-worker ACL_N_TIMEOUTS of conn lists */ + u32 *fa_conn_list_head; + u32 *fa_conn_list_tail; + /* adds and deletes per-worker-per-interface */ + u64 *fa_session_dels_by_sw_if_index; + u64 *fa_session_adds_by_sw_if_index; + /* Vector of expired connections retrieved from lists */ + u32 *expired; + /* the earliest next expiry time */ + u64 next_expiry_time; + /* if not zero, look at all the elements until their enqueue timestamp is after below one */ + u64 requeue_until_time; + /* Current time between the checks */ + u64 current_time_wait_interval; + /* Counter of how many sessions we did delete */ + u64 cnt_deleted_sessions; + /* Counter of already deleted sessions being deleted - should not increment unless a bug */ + u64 cnt_already_deleted_sessions; + /* Number of times we requeued a session to a head of the list */ + u64 cnt_session_timer_restarted; + /* swipe up to this enqueue time, rather than following the timeouts */ + u64 swipe_end_time; + /* bitmap of sw_if_index serviced by this worker */ + uword *serviced_sw_if_index_bitmap; + /* bitmap of sw_if_indices to clear. set by main thread, cleared by worker */ + uword *pending_clear_sw_if_index_bitmap; + /* atomic, indicates that the swipe-deletion of connections is in progress */ + u32 clear_in_process; + /* Interrupt is pending from main thread */ + int interrupt_is_pending; + /* + * Interrupt node on the worker thread sets this if it knows there is + * more work to do, but it has to finish to avoid hogging the + * core for too long. + */ + int interrupt_is_needed; + /* + * Set to indicate that the interrupt node wants to get less interrupts + * because there is not enough work for the current rate. + */ + int interrupt_is_unwanted; + /* + * Set to copy of a "generation" counter in main thread so we can sync the interrupts. + */ + int interrupt_generation; +} acl_fa_per_worker_data_t; + + +typedef enum { + ACL_FA_ERROR_DROP, + ACL_FA_N_NEXT, +} acl_fa_next_t; + + +enum +{ + ACL_FA_CLEANER_RESCHEDULE = 1, + ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX, +} acl_fa_cleaner_process_event_e; + +void acl_fa_enable_disable(u32 sw_if_index, int is_input, int enable_disable); + +void show_fa_sessions_hash(vlib_main_t * vm, u32 verbose); + + +#endif diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c new file mode 100644 index 00000000..7869027b --- /dev/null +++ b/src/plugins/acl/hash_lookup.c @@ -0,0 +1,894 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stddef.h> +#include <netinet/in.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/plugin/plugin.h> +#include <acl/acl.h> +#include <vppinfra/bihash_48_8.h> + +#include "hash_lookup.h" +#include "hash_lookup_private.h" + + +static inline applied_hash_ace_entry_t **get_applied_hash_aces(acl_main_t *am, int is_input, u32 sw_if_index) +{ + applied_hash_ace_entry_t **applied_hash_aces = is_input ? vec_elt_at_index(am->input_hash_entry_vec_by_sw_if_index, sw_if_index) + : vec_elt_at_index(am->output_hash_entry_vec_by_sw_if_index, sw_if_index); + return applied_hash_aces; +} + + + +/* + * This returns true if there is indeed a match on the portranges. + * With all these levels of indirections, this is not going to be very fast, + * so, best use the individual ports or wildcard ports for performance. + */ +static int +match_portranges(acl_main_t *am, fa_5tuple_t *match, u32 index) +{ + + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, match->pkt.is_input, match->pkt.sw_if_index); + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), index); + + acl_rule_t *r = &(am->acls[pae->acl_index].rules[pae->ace_index]); + DBG("PORTMATCH: %d <= %d <= %d && %d <= %d <= %d ?", + r->src_port_or_type_first, match->l4.port[0], r->src_port_or_type_last, + r->dst_port_or_code_first, match->l4.port[1], r->dst_port_or_code_last); + + return ( ((r->src_port_or_type_first <= match->l4.port[0]) && r->src_port_or_type_last >= match->l4.port[0]) && + ((r->dst_port_or_code_first <= match->l4.port[1]) && r->dst_port_or_code_last >= match->l4.port[1]) ); +} + +static u32 +multi_acl_match_get_applied_ace_index(acl_main_t *am, fa_5tuple_t *match) +{ + clib_bihash_kv_48_8_t kv; + clib_bihash_kv_48_8_t result; + fa_5tuple_t *kv_key = (fa_5tuple_t *)kv.key; + hash_acl_lookup_value_t *result_val = (hash_acl_lookup_value_t *)&result.value; + u64 *pmatch = (u64 *)match; + u64 *pmask; + u64 *pkey; + int mask_type_index; + u32 curr_match_index = ~0; + + u32 sw_if_index = match->pkt.sw_if_index; + u8 is_input = match->pkt.is_input; + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index); + applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index : + &am->output_applied_hash_acl_info_by_sw_if_index; + + DBG("TRYING TO MATCH: %016llx %016llx %016llx %016llx %016llx %016llx", + pmatch[0], pmatch[1], pmatch[2], pmatch[3], pmatch[4], pmatch[5]); + + for(mask_type_index=0; mask_type_index < pool_len(am->ace_mask_type_pool); mask_type_index++) { + if (!clib_bitmap_get(vec_elt_at_index((*applied_hash_acls), sw_if_index)->mask_type_index_bitmap, mask_type_index)) { + /* This bit is not set. Avoid trying to match */ + continue; + } + ace_mask_type_entry_t *mte = vec_elt_at_index(am->ace_mask_type_pool, mask_type_index); + pmatch = (u64 *)match; + pmask = (u64 *)&mte->mask; + pkey = (u64 *)kv.key; + /* + * unrolling the below loop results in a noticeable performance increase. + int i; + for(i=0; i<6; i++) { + kv.key[i] = pmatch[i] & pmask[i]; + } + */ + + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + *pkey++ = *pmatch++ & *pmask++; + + kv_key->pkt.mask_type_index_lsb = mask_type_index; + DBG(" KEY %3d: %016llx %016llx %016llx %016llx %016llx %016llx", mask_type_index, + kv.key[0], kv.key[1], kv.key[2], kv.key[3], kv.key[4], kv.key[5]); + int res = BV (clib_bihash_search) (&am->acl_lookup_hash, &kv, &result); + if (res == 0) { + DBG("ACL-MATCH! result_val: %016llx", result_val->as_u64); + if (result_val->applied_entry_index < curr_match_index) { + if (PREDICT_FALSE(result_val->need_portrange_check)) { + /* + * This is going to be slow, since we can have multiple superset + * entries for narrow-ish portranges, e.g.: + * 0..42 100..400, 230..60000, + * so we need to walk linearly and check if they match. + */ + + u32 curr_index = result_val->applied_entry_index; + while ((curr_index != ~0) && !match_portranges(am, match, curr_index)) { + /* while no match and there are more entries, walk... */ + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces),curr_index); + DBG("entry %d did not portmatch, advancing to %d", curr_index, pae->next_applied_entry_index); + curr_index = pae->next_applied_entry_index; + } + if (curr_index < curr_match_index) { + DBG("The index %d is the new candidate in portrange matches.", curr_index); + curr_match_index = curr_index; + } else { + DBG("Curr portmatch index %d is too big vs. current matched one %d", curr_index, curr_match_index); + } + } else { + /* The usual path is here. Found an entry in front of the current candiate - so it's a new one */ + DBG("This match is the new candidate"); + curr_match_index = result_val->applied_entry_index; + if (!result_val->shadowed) { + /* new result is known to not be shadowed, so no point to look up further */ + break; + } + } + } + } + } + DBG("MATCH-RESULT: %d", curr_match_index); + return curr_match_index; +} + +static void +hashtable_add_del(acl_main_t *am, clib_bihash_kv_48_8_t *kv, int is_add) +{ + DBG("HASH ADD/DEL: %016llx %016llx %016llx %016llx %016llx %016llx %016llx add %d", + kv->key[0], kv->key[1], kv->key[2], + kv->key[3], kv->key[4], kv->key[5], kv->value, is_add); + BV (clib_bihash_add_del) (&am->acl_lookup_hash, kv, is_add); +} + +static void +fill_applied_hash_ace_kv(acl_main_t *am, + applied_hash_ace_entry_t **applied_hash_aces, + u32 sw_if_index, u8 is_input, + u32 new_index, clib_bihash_kv_48_8_t *kv) +{ + fa_5tuple_t *kv_key = (fa_5tuple_t *)kv->key; + hash_acl_lookup_value_t *kv_val = (hash_acl_lookup_value_t *)&kv->value; + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index); + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, pae->acl_index); + + memcpy(kv_key, &(vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->match), sizeof(*kv_key)); + /* initialize the sw_if_index and direction */ + kv_key->pkt.sw_if_index = sw_if_index; + kv_key->pkt.is_input = is_input; + kv_val->as_u64 = 0; + kv_val->applied_entry_index = new_index; + kv_val->need_portrange_check = vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->src_portrange_not_powerof2 || + vec_elt_at_index(ha->rules, pae->hash_ace_info_index)->dst_portrange_not_powerof2; + /* by default assume all values are shadowed -> check all mask types */ + kv_val->shadowed = 1; +} + +static void +add_del_hashtable_entry(acl_main_t *am, + u32 sw_if_index, u8 is_input, + applied_hash_ace_entry_t **applied_hash_aces, + u32 index, int is_add) +{ + clib_bihash_kv_48_8_t kv; + + fill_applied_hash_ace_kv(am, applied_hash_aces, sw_if_index, is_input, index, &kv); + hashtable_add_del(am, &kv, is_add); +} + + + +static void +activate_applied_ace_hash_entry(acl_main_t *am, + u32 sw_if_index, u8 is_input, + applied_hash_ace_entry_t **applied_hash_aces, + u32 new_index) +{ + clib_bihash_kv_48_8_t kv; + ASSERT(new_index != ~0); + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index); + DBG("activate_applied_ace_hash_entry sw_if_index %d is_input %d new_index %d", sw_if_index, is_input, new_index); + + fill_applied_hash_ace_kv(am, applied_hash_aces, sw_if_index, is_input, new_index, &kv); + + DBG("APPLY ADD KY: %016llx %016llx %016llx %016llx %016llx %016llx", + kv.key[0], kv.key[1], kv.key[2], + kv.key[3], kv.key[4], kv.key[5]); + + clib_bihash_kv_48_8_t result; + hash_acl_lookup_value_t *result_val = (hash_acl_lookup_value_t *)&result.value; + int res = BV (clib_bihash_search) (&am->acl_lookup_hash, &kv, &result); + ASSERT(new_index != ~0); + ASSERT(new_index < vec_len((*applied_hash_aces))); + if (res == 0) { + /* There already exists an entry or more. Append at the end. */ + u32 first_index = result_val->applied_entry_index; + ASSERT(first_index != ~0); + DBG("A key already exists, with applied entry index: %d", first_index); + applied_hash_ace_entry_t *first_pae = vec_elt_at_index((*applied_hash_aces), first_index); + u32 last_index = first_pae->tail_applied_entry_index; + ASSERT(last_index != ~0); + applied_hash_ace_entry_t *last_pae = vec_elt_at_index((*applied_hash_aces), last_index); + DBG("...advance to chained entry index: %d", last_index); + /* link ourseves in */ + last_pae->next_applied_entry_index = new_index; + pae->prev_applied_entry_index = last_index; + /* adjust the pointer to the new tail */ + first_pae->tail_applied_entry_index = new_index; + } else { + /* It's the very first entry */ + hashtable_add_del(am, &kv, 1); + ASSERT(new_index != ~0); + pae->tail_applied_entry_index = new_index; + } +} + +static void +applied_hash_entries_analyze(acl_main_t *am, applied_hash_ace_entry_t **applied_hash_aces) +{ + /* + * Go over the rules and check which ones are shadowed and which aren't. + * Naive approach: try to match the match value from every ACE as if it + * was a live packet, and see if the resulting match happens earlier in the list. + * if it does not match or it is later in the ACL - then the entry is not shadowed. + * + * This approach fails, an example: + * deny tcp 2001:db8::/32 2001:db8::/32 + * permit ip 2001:db8::1/128 2001:db8::2/128 + */ +} + +static void * +hash_acl_set_heap(acl_main_t *am) +{ + if (0 == am->hash_lookup_mheap) { + am->hash_lookup_mheap = mheap_alloc (0 /* use VM */ , am->hash_lookup_mheap_size); + mheap_t *h = mheap_header (am->hash_lookup_mheap); + h->flags |= MHEAP_FLAG_THREAD_SAFE; + } + void *oldheap = clib_mem_set_heap(am->hash_lookup_mheap); + return oldheap; +} + +void +acl_plugin_hash_acl_set_validate_heap(acl_main_t *am, int on) +{ + clib_mem_set_heap(hash_acl_set_heap(am)); + mheap_t *h = mheap_header (am->hash_lookup_mheap); + if (on) { + h->flags |= MHEAP_FLAG_VALIDATE; + h->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE; + mheap_validate(h); + } else { + h->flags &= ~MHEAP_FLAG_VALIDATE; + h->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE; + } +} + +void +acl_plugin_hash_acl_set_trace_heap(acl_main_t *am, int on) +{ + clib_mem_set_heap(hash_acl_set_heap(am)); + mheap_t *h = mheap_header (am->hash_lookup_mheap); + if (on) { + h->flags |= MHEAP_FLAG_TRACE; + } else { + h->flags &= ~MHEAP_FLAG_TRACE; + } +} + +void +hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) +{ + int i; + + DBG0("HASH ACL apply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index); + if (!am->acl_lookup_hash_initialized) { + BV (clib_bihash_init) (&am->acl_lookup_hash, "ACL plugin rule lookup bihash", + am->hash_lookup_hash_buckets, am->hash_lookup_hash_memory); + am->acl_lookup_hash_initialized = 1; + } + + void *oldheap = hash_acl_set_heap(am); + if (is_input) { + vec_validate(am->input_hash_entry_vec_by_sw_if_index, sw_if_index); + } else { + vec_validate(am->output_hash_entry_vec_by_sw_if_index, sw_if_index); + } + vec_validate(am->hash_acl_infos, acl_index); + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index); + + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index); + u32 **hash_acl_applied_sw_if_index = is_input ? &ha->inbound_sw_if_index_list + : &ha->outbound_sw_if_index_list; + + int base_offset = vec_len(*applied_hash_aces); + + /* Update the bitmap of the mask types with which the lookup + needs to happen for the ACLs applied to this sw_if_index */ + applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index : + &am->output_applied_hash_acl_info_by_sw_if_index; + vec_validate((*applied_hash_acls), sw_if_index); + applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), sw_if_index); + + /* ensure the list of applied hash acls is initialized and add this acl# to it */ + u32 index = vec_search(pal->applied_acls, acl_index); + if (index != ~0) { + clib_warning("BUG: trying to apply twice acl_index %d on sw_if_index %d is_input %d", + acl_index, sw_if_index, is_input); + goto done; + } + vec_add1(pal->applied_acls, acl_index); + u32 index2 = vec_search((*hash_acl_applied_sw_if_index), sw_if_index); + if (index2 != ~0) { + clib_warning("BUG: trying to apply twice acl_index %d on (sw_if_index %d) is_input %d", + acl_index, sw_if_index, is_input); + goto done; + } + vec_add1((*hash_acl_applied_sw_if_index), sw_if_index); + + pal->mask_type_index_bitmap = clib_bitmap_or(pal->mask_type_index_bitmap, + ha->mask_type_index_bitmap); + /* + * if the applied ACL is empty, the current code will cause a + * different behavior compared to current linear search: an empty ACL will + * simply fallthrough to the next ACL, or the default deny in the end. + * + * This is not a problem, because after vpp-dev discussion, + * the consensus was it should not be possible to apply the non-existent + * ACL, so the change adding this code also takes care of that. + */ + + /* expand the applied aces vector by the necessary amount */ + vec_resize((*applied_hash_aces), vec_len(ha->rules)); + + /* add the rules from the ACL to the hash table for lookup and append to the vector*/ + for(i=0; i < vec_len(ha->rules); i++) { + u32 new_index = base_offset + i; + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index); + pae->acl_index = acl_index; + pae->ace_index = ha->rules[i].ace_index; + pae->action = ha->rules[i].action; + pae->hitcount = 0; + pae->hash_ace_info_index = i; + /* we might link it in later */ + pae->next_applied_entry_index = ~0; + pae->prev_applied_entry_index = ~0; + pae->tail_applied_entry_index = ~0; + activate_applied_ace_hash_entry(am, sw_if_index, is_input, applied_hash_aces, new_index); + } + applied_hash_entries_analyze(am, applied_hash_aces); +done: + clib_mem_set_heap (oldheap); +} + +static u32 +find_head_applied_ace_index(applied_hash_ace_entry_t **applied_hash_aces, u32 curr_index) +{ + /* + * find back the first entry. Inefficient so might need to be a bit cleverer + * if this proves to be a problem.. + */ + u32 an_index = curr_index; + ASSERT(an_index != ~0); + applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), an_index); + while(head_pae->prev_applied_entry_index != ~0) { + an_index = head_pae->prev_applied_entry_index; + ASSERT(an_index != ~0); + head_pae = vec_elt_at_index((*applied_hash_aces), an_index); + } + return an_index; +} + +static void +move_applied_ace_hash_entry(acl_main_t *am, + u32 sw_if_index, u8 is_input, + applied_hash_ace_entry_t **applied_hash_aces, + u32 old_index, u32 new_index) +{ + ASSERT(old_index != ~0); + ASSERT(new_index != ~0); + /* move the entry */ + *vec_elt_at_index((*applied_hash_aces), new_index) = *vec_elt_at_index((*applied_hash_aces), old_index); + + /* update the linkage and hash table if necessary */ + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), old_index); + + if (pae->prev_applied_entry_index != ~0) { + applied_hash_ace_entry_t *prev_pae = vec_elt_at_index((*applied_hash_aces), pae->prev_applied_entry_index); + ASSERT(prev_pae->next_applied_entry_index == old_index); + prev_pae->next_applied_entry_index = new_index; + } else { + /* first entry - so the hash points to it, update */ + add_del_hashtable_entry(am, sw_if_index, is_input, + applied_hash_aces, new_index, 1); + ASSERT(pae->tail_applied_entry_index != ~0); + } + if (pae->next_applied_entry_index != ~0) { + applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index); + ASSERT(next_pae->prev_applied_entry_index == old_index); + next_pae->prev_applied_entry_index = new_index; + } else { + /* + * Moving the very last entry, so we need to update the tail pointer in the first one. + */ + u32 head_index = find_head_applied_ace_index(applied_hash_aces, old_index); + ASSERT(head_index != ~0); + applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), head_index); + + ASSERT(head_pae->tail_applied_entry_index == old_index); + head_pae->tail_applied_entry_index = new_index; + } + /* invalidate the old entry */ + pae->prev_applied_entry_index = ~0; + pae->next_applied_entry_index = ~0; + pae->tail_applied_entry_index = ~0; +} + +static void +deactivate_applied_ace_hash_entry(acl_main_t *am, + u32 sw_if_index, u8 is_input, + applied_hash_ace_entry_t **applied_hash_aces, + u32 old_index) +{ + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), old_index); + DBG("UNAPPLY DEACTIVATE: sw_if_index %d is_input %d, applied index %d", sw_if_index, is_input, old_index); + + if (pae->prev_applied_entry_index != ~0) { + DBG("UNAPPLY = index %d has prev_applied_entry_index %d", old_index, pae->prev_applied_entry_index); + applied_hash_ace_entry_t *prev_pae = vec_elt_at_index((*applied_hash_aces), pae->prev_applied_entry_index); + ASSERT(prev_pae->next_applied_entry_index == old_index); + prev_pae->next_applied_entry_index = pae->next_applied_entry_index; + if (pae->next_applied_entry_index == ~0) { + /* it was a last entry we removed, update the pointer on the first one */ + u32 head_index = find_head_applied_ace_index(applied_hash_aces, old_index); + DBG("UNAPPLY = index %d head index to update %d", old_index, head_index); + ASSERT(head_index != ~0); + applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), head_index); + + ASSERT(head_pae->tail_applied_entry_index == old_index); + head_pae->tail_applied_entry_index = pae->prev_applied_entry_index; + } else { + applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index); + next_pae->prev_applied_entry_index = pae->prev_applied_entry_index; + } + } else { + /* It was the first entry. We need either to reset the hash entry or delete it */ + if (pae->next_applied_entry_index != ~0) { + /* the next element becomes the new first one, so needs the tail pointer to be set */ + applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), pae->next_applied_entry_index); + ASSERT(pae->tail_applied_entry_index != ~0); + next_pae->tail_applied_entry_index = pae->tail_applied_entry_index; + DBG("Resetting the hash table entry from %d to %d, setting tail index to %d", old_index, pae->next_applied_entry_index, pae->tail_applied_entry_index); + /* unlink from the next element */ + next_pae->prev_applied_entry_index = ~0; + add_del_hashtable_entry(am, sw_if_index, is_input, + applied_hash_aces, pae->next_applied_entry_index, 1); + } else { + /* no next entry, so just delete the entry in the hash table */ + add_del_hashtable_entry(am, sw_if_index, is_input, + applied_hash_aces, old_index, 0); + } + } + /* invalidate the old entry */ + pae->prev_applied_entry_index = ~0; + pae->next_applied_entry_index = ~0; + pae->tail_applied_entry_index = ~0; +} + + +static void +hash_acl_build_applied_lookup_bitmap(acl_main_t *am, u32 sw_if_index, u8 is_input) +{ + int i; + uword *new_lookup_bitmap = 0; + applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index + : &am->output_applied_hash_acl_info_by_sw_if_index; + applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), sw_if_index); + for(i=0; i < vec_len(pal->applied_acls); i++) { + u32 a_acl_index = *vec_elt_at_index((pal->applied_acls), i); + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, a_acl_index); + DBG("Update bitmask = %U or %U (acl_index %d)\n", format_bitmap_hex, new_lookup_bitmap, + format_bitmap_hex, ha->mask_type_index_bitmap, a_acl_index); + new_lookup_bitmap = clib_bitmap_or(new_lookup_bitmap, + ha->mask_type_index_bitmap); + } + uword *old_lookup_bitmap = pal->mask_type_index_bitmap; + pal->mask_type_index_bitmap = new_lookup_bitmap; + clib_bitmap_free(old_lookup_bitmap); +} + +void +hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) +{ + int i; + + DBG0("HASH ACL unapply: sw_if_index %d is_input %d acl %d", sw_if_index, is_input, acl_index); + applied_hash_acl_info_t **applied_hash_acls = is_input ? &am->input_applied_hash_acl_info_by_sw_if_index + : &am->output_applied_hash_acl_info_by_sw_if_index; + applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), sw_if_index); + + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index); + u32 **hash_acl_applied_sw_if_index = is_input ? &ha->inbound_sw_if_index_list + : &ha->outbound_sw_if_index_list; + + /* remove this acl# from the list of applied hash acls */ + u32 index = vec_search(pal->applied_acls, acl_index); + if (index == ~0) { + clib_warning("BUG: trying to unapply unapplied acl_index %d on sw_if_index %d is_input %d", + acl_index, sw_if_index, is_input); + return; + } + vec_del1(pal->applied_acls, index); + + u32 index2 = vec_search((*hash_acl_applied_sw_if_index), sw_if_index); + if (index2 == ~0) { + clib_warning("BUG: trying to unapply twice acl_index %d on (sw_if_index %d) is_input %d", + acl_index, sw_if_index, is_input); + return; + } + vec_del1((*hash_acl_applied_sw_if_index), index2); + + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index); + + for(i=0; i < vec_len((*applied_hash_aces)); i++) { + if (vec_elt_at_index(*applied_hash_aces,i)->acl_index == acl_index) { + DBG("Found applied ACL#%d at applied index %d", acl_index, i); + break; + } + } + if (vec_len((*applied_hash_aces)) <= i) { + DBG("Did not find applied ACL#%d at sw_if_index %d", acl_index, sw_if_index); + /* we went all the way without finding any entries. Probably a list was empty. */ + return; + } + + void *oldheap = hash_acl_set_heap(am); + int base_offset = i; + int tail_offset = base_offset + vec_len(ha->rules); + int tail_len = vec_len((*applied_hash_aces)) - tail_offset; + DBG("base_offset: %d, tail_offset: %d, tail_len: %d", base_offset, tail_offset, tail_len); + + for(i=0; i < vec_len(ha->rules); i ++) { + deactivate_applied_ace_hash_entry(am, sw_if_index, is_input, + applied_hash_aces, base_offset + i); + } + for(i=0; i < tail_len; i ++) { + /* move the entry at tail offset to base offset */ + /* that is, from (tail_offset+i) -> (base_offset+i) */ + DBG("UNAPPLY MOVE: sw_if_index %d is_input %d, applied index %d ->", sw_if_index, is_input, tail_offset+i, base_offset + i); + move_applied_ace_hash_entry(am, sw_if_index, is_input, applied_hash_aces, tail_offset + i, base_offset + i); + } + /* trim the end of the vector */ + _vec_len((*applied_hash_aces)) -= vec_len(ha->rules); + + applied_hash_entries_analyze(am, applied_hash_aces); + + /* After deletion we might not need some of the mask-types anymore... */ + hash_acl_build_applied_lookup_bitmap(am, sw_if_index, is_input); + clib_mem_set_heap (oldheap); +} + +/* + * Create the applied ACEs and update the hash table, + * taking into account that the ACL may not be the last + * in the vector of applied ACLs. + * + * For now, walk from the end of the vector and unapply the ACLs, + * then apply the one in question and reapply the rest. + */ + +void +hash_acl_reapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index) +{ + u32 **applied_acls = is_input ? vec_elt_at_index(am->input_acl_vec_by_sw_if_index, sw_if_index) + : vec_elt_at_index(am->output_acl_vec_by_sw_if_index, sw_if_index); + int i; + int start_index = vec_search((*applied_acls), acl_index); + /* + * This function is called after we find out the sw_if_index where ACL is applied. + * If the by-sw_if_index vector does not have the ACL#, then it's a bug. + */ + ASSERT(start_index < vec_len(*applied_acls)); + + /* unapply all the ACLs till the current one */ + for(i = vec_len(*applied_acls) - 1; i > start_index; i--) { + hash_acl_unapply(am, sw_if_index, is_input, *vec_elt_at_index(*applied_acls, i)); + } + for(i = start_index; i < vec_len(*applied_acls); i++) { + hash_acl_apply(am, sw_if_index, is_input, *vec_elt_at_index(*applied_acls, i)); + } +} + +static void +make_address_mask(ip46_address_t *addr, u8 is_ipv6, u8 prefix_len) +{ + if (is_ipv6) { + ip6_address_mask_from_width(&addr->ip6, prefix_len); + } else { + /* FIXME: this may not be correct way */ + ip6_address_mask_from_width(&addr->ip6, prefix_len + 3*32); + ip46_address_mask_ip4(addr); + } +} + +static u8 +make_port_mask(u16 *portmask, u16 port_first, u16 port_last) +{ + if (port_first == port_last) { + *portmask = 0xffff; + /* single port is representable by masked value */ + return 0; + } + if ((port_first == 0) && (port_last == 65535)) { + *portmask = 0; + /* wildcard port is representable by a masked value */ + return 0; + } + + /* + * For now match all the ports, later + * here might be a better optimization which would + * pick out bitmaskable portranges. + * + * However, adding a new mask type potentially + * adds a per-packet extra lookup, so the benefit is not clear. + */ + *portmask = 0; + /* This port range can't be represented via bitmask exactly. */ + return 1; +} + +static void +make_mask_and_match_from_rule(fa_5tuple_t *mask, acl_rule_t *r, hash_ace_info_t *hi, int match_nonfirst_fragment) +{ + memset(mask, 0, sizeof(*mask)); + memset(&hi->match, 0, sizeof(hi->match)); + hi->action = r->is_permit; + + /* we will need to be matching based on sw_if_index, direction, and mask_type_index when applied */ + mask->pkt.sw_if_index = ~0; + mask->pkt.is_input = 1; + /* we will assign the match of mask_type_index later when we find it*/ + mask->pkt.mask_type_index_lsb = ~0; + + mask->pkt.is_ip6 = 1; + hi->match.pkt.is_ip6 = r->is_ipv6; + + make_address_mask(&mask->addr[0], r->is_ipv6, r->src_prefixlen); + hi->match.addr[0] = r->src; + make_address_mask(&mask->addr[1], r->is_ipv6, r->dst_prefixlen); + hi->match.addr[1] = r->dst; + + if (r->proto != 0) { + mask->l4.proto = ~0; /* L4 proto needs to be matched */ + hi->match.l4.proto = r->proto; + if (match_nonfirst_fragment) { + /* match the non-first fragments only */ + mask->pkt.is_nonfirst_fragment = 1; + hi->match.pkt.is_nonfirst_fragment = 1; + } else { + /* Calculate the src/dst port masks and make the src/dst port matches accordingly */ + hi->src_portrange_not_powerof2 = make_port_mask(&mask->l4.port[0], r->src_port_or_type_first, r->src_port_or_type_last); + hi->match.l4.port[0] = r->src_port_or_type_first & mask->l4.port[0]; + hi->dst_portrange_not_powerof2 = make_port_mask(&mask->l4.port[1], r->dst_port_or_code_first, r->dst_port_or_code_last); + hi->match.l4.port[1] = r->dst_port_or_code_first & mask->l4.port[1]; + /* L4 info must be valid in order to match */ + mask->pkt.l4_valid = 1; + hi->match.pkt.l4_valid = 1; + /* And we must set the mask to check that it is an initial fragment */ + mask->pkt.is_nonfirst_fragment = 1; + hi->match.pkt.is_nonfirst_fragment = 0; + if ((r->proto == IPPROTO_TCP) && (r->tcp_flags_mask != 0)) { + /* if we want to match on TCP flags, they must be masked off as well */ + mask->pkt.tcp_flags = r->tcp_flags_mask; + hi->match.pkt.tcp_flags = r->tcp_flags_value; + /* and the flags need to be present within the packet being matched */ + mask->pkt.tcp_flags_valid = 1; + hi->match.pkt.tcp_flags_valid = 1; + } + } + } + /* Sanitize the mask and the match */ + u64 *pmask = (u64 *)mask; + u64 *pmatch = (u64 *)&hi->match; + int j; + for(j=0; j<6; j++) { + pmatch[j] = pmatch[j] & pmask[j]; + } +} + +static u32 +find_mask_type_index(acl_main_t *am, fa_5tuple_t *mask) +{ + ace_mask_type_entry_t *mte; + /* *INDENT-OFF* */ + pool_foreach(mte, am->ace_mask_type_pool, + ({ + if(memcmp(&mte->mask, mask, sizeof(*mask)) == 0) + return (mte - am->ace_mask_type_pool); + })); + /* *INDENT-ON* */ + return ~0; +} + +static u32 +assign_mask_type_index(acl_main_t *am, fa_5tuple_t *mask) +{ + u32 mask_type_index = find_mask_type_index(am, mask); + ace_mask_type_entry_t *mte; + if(~0 == mask_type_index) { + pool_get_aligned (am->ace_mask_type_pool, mte, CLIB_CACHE_LINE_BYTES); + mask_type_index = mte - am->ace_mask_type_pool; + clib_memcpy(&mte->mask, mask, sizeof(mte->mask)); + mte->refcount = 0; + /* + * We can use only 16 bits, since in the match there is only u16 field. + * Realistically, once you go to 64K of mask types, it is a huge + * problem anyway, so we might as well stop half way. + */ + ASSERT(mask_type_index < 32768); + } + mte = am->ace_mask_type_pool + mask_type_index; + mte->refcount++; + return mask_type_index; +} + +static void +release_mask_type_index(acl_main_t *am, u32 mask_type_index) +{ + ace_mask_type_entry_t *mte = pool_elt_at_index(am->ace_mask_type_pool, mask_type_index); + mte->refcount--; + if (mte->refcount == 0) { + /* we are not using this entry anymore */ + pool_put(am->ace_mask_type_pool, mte); + } +} + +void hash_acl_add(acl_main_t *am, int acl_index) +{ + void *oldheap = hash_acl_set_heap(am); + DBG("HASH ACL add : %d", acl_index); + int i; + acl_list_t *a = &am->acls[acl_index]; + vec_validate(am->hash_acl_infos, acl_index); + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index); + memset(ha, 0, sizeof(*ha)); + + /* walk the newly added ACL entries and ensure that for each of them there + is a mask type, increment a reference count for that mask type */ + for(i=0; i < a->count; i++) { + hash_ace_info_t ace_info; + fa_5tuple_t mask; + memset(&ace_info, 0, sizeof(ace_info)); + ace_info.acl_index = acl_index; + ace_info.ace_index = i; + + make_mask_and_match_from_rule(&mask, &a->rules[i], &ace_info, 0); + ace_info.mask_type_index = assign_mask_type_index(am, &mask); + /* assign the mask type index for matching itself */ + ace_info.match.pkt.mask_type_index_lsb = ace_info.mask_type_index; + DBG("ACE: %d mask_type_index: %d", i, ace_info.mask_type_index); + /* Ensure a given index is set in the mask type index bitmap for this ACL */ + ha->mask_type_index_bitmap = clib_bitmap_set(ha->mask_type_index_bitmap, ace_info.mask_type_index, 1); + vec_add1(ha->rules, ace_info); + if (am->l4_match_nonfirst_fragment) { + /* add the second rule which matches the noninitial fragments with the respective mask */ + make_mask_and_match_from_rule(&mask, &a->rules[i], &ace_info, 1); + ace_info.mask_type_index = assign_mask_type_index(am, &mask); + ace_info.match.pkt.mask_type_index_lsb = ace_info.mask_type_index; + DBG("ACE: %d (non-initial frags) mask_type_index: %d", i, ace_info.mask_type_index); + /* Ensure a given index is set in the mask type index bitmap for this ACL */ + ha->mask_type_index_bitmap = clib_bitmap_set(ha->mask_type_index_bitmap, ace_info.mask_type_index, 1); + vec_add1(ha->rules, ace_info); + } + } + /* + * if an ACL is applied somewhere, fill the corresponding lookup data structures. + * We need to take care if the ACL is not the last one in the vector of ACLs applied to the interface. + */ + if (acl_index < vec_len(am->input_sw_if_index_vec_by_acl)) { + u32 *sw_if_index; + vec_foreach(sw_if_index, am->input_sw_if_index_vec_by_acl[acl_index]) { + hash_acl_reapply(am, *sw_if_index, 1, acl_index); + } + } + if (acl_index < vec_len(am->output_sw_if_index_vec_by_acl)) { + u32 *sw_if_index; + vec_foreach(sw_if_index, am->output_sw_if_index_vec_by_acl[acl_index]) { + hash_acl_reapply(am, *sw_if_index, 0, acl_index); + } + } + clib_mem_set_heap (oldheap); +} + +void hash_acl_delete(acl_main_t *am, int acl_index) +{ + void *oldheap = hash_acl_set_heap(am); + DBG0("HASH ACL delete : %d", acl_index); + /* + * If the ACL is applied somewhere, remove the references of it (call hash_acl_unapply) + * this is a different behavior from the linear lookup where an empty ACL is "deny all", + * + * However, following vpp-dev discussion the ACL that is referenced elsewhere + * should not be possible to delete, and the change adding this also adds + * the safeguards to that respect, so this is not a problem. + * + * The part to rememeber is that this routine is called in process of reapplication + * during the acl_add_replace() API call - the old acl ruleset is deleted, then + * the new one is added, without the change in the applied ACLs - so this case + * has to be handled. + */ + hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index); + u32 *interface_list_copy = 0; + { + u32 *sw_if_index; + interface_list_copy = vec_dup(ha->inbound_sw_if_index_list); + vec_foreach(sw_if_index, interface_list_copy) { + hash_acl_unapply(am, *sw_if_index, 1, acl_index); + } + vec_free(interface_list_copy); + interface_list_copy = vec_dup(ha->outbound_sw_if_index_list); + vec_foreach(sw_if_index, interface_list_copy) { + hash_acl_unapply(am, *sw_if_index, 0, acl_index); + } + } + + /* walk the mask types for the ACL about-to-be-deleted, and decrease + * the reference count, possibly freeing up some of them */ + int i; + for(i=0; i < vec_len(ha->rules); i++) { + release_mask_type_index(am, ha->rules[i].mask_type_index); + } + clib_bitmap_free(ha->mask_type_index_bitmap); + vec_free(ha->rules); + clib_mem_set_heap (oldheap); +} + +u8 +hash_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap) +{ + acl_main_t *am = &acl_main; + applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, is_input, sw_if_index); + u32 match_index = multi_acl_match_get_applied_ace_index(am, pkt_5tuple); + if (match_index < vec_len((*applied_hash_aces))) { + applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), match_index); + pae->hitcount++; + *acl_match_p = pae->acl_index; + *rule_match_p = pae->ace_index; + return pae->action; + } + return 0; +} + + +void +show_hash_acl_hash (vlib_main_t * vm, acl_main_t *am, u32 verbose) +{ + vlib_cli_output(vm, "\nACL lookup hash table:\n%U\n", + BV (format_bihash), &am->acl_lookup_hash, verbose); +} diff --git a/src/plugins/acl/hash_lookup.h b/src/plugins/acl/hash_lookup.h new file mode 100644 index 00000000..2d7058e8 --- /dev/null +++ b/src/plugins/acl/hash_lookup.h @@ -0,0 +1,64 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef _ACL_HASH_LOOKUP_H_ +#define _ACL_HASH_LOOKUP_H_ + +#include <stddef.h> +#include "acl.h" + +/* + * Do the necessary to logically apply the ACL to the existing vector of ACLs looked up + * during the packet processing + */ + +void hash_acl_apply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index); + +/* Remove the ACL from the packet processing lookups on a given interface */ + +void hash_acl_unapply(acl_main_t *am, u32 sw_if_index, u8 is_input, int acl_index); + +/* + * Add an ACL or delete an ACL. ACL may already have been referenced elsewhere, + * so potentially we also need to do the work to enable the lookups. + */ + +void hash_acl_add(acl_main_t *am, int acl_index); +void hash_acl_delete(acl_main_t *am, int acl_index); + +/* + * Do the work required to match a given 5-tuple from the packet, + * and return the action as well as populate the values pointed + * to by the *_match_p pointers and maybe trace_bitmap. + */ + +u8 +hash_multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2, + int is_ip6, int is_input, u32 * acl_match_p, + u32 * rule_match_p, u32 * trace_bitmap); + + +/* + * The debug function to show the contents of the ACL lookup hash + */ +void show_hash_acl_hash(vlib_main_t * vm, acl_main_t *am, u32 verbose); + +/* Debug functions to turn validate/trace on and off */ +void acl_plugin_hash_acl_set_validate_heap(acl_main_t *am, int on); +void acl_plugin_hash_acl_set_trace_heap(acl_main_t *am, int on); + +#endif diff --git a/src/plugins/acl/hash_lookup_private.h b/src/plugins/acl/hash_lookup_private.h new file mode 100644 index 00000000..bc621416 --- /dev/null +++ b/src/plugins/acl/hash_lookup_private.h @@ -0,0 +1,33 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#define ACL_HASH_LOOKUP_DEBUG 0 + +#if ACL_HASH_LOOKUP_DEBUG == 1 +#define DBG0(...) clib_warning(__VA_ARGS__) +#define DBG(...) +#define DBG_UNIX_LOG(...) +#elif ACL_HASH_LOOKUP_DEBUG == 2 +#define DBG0(...) clib_warning(__VA_ARGS__) +#define DBG(...) clib_warning(__VA_ARGS__) +#define DBG_UNIX_LOG(...) clib_unix_warning(__VA_ARGS__) +#else +#define DBG0(...) +#define DBG(...) +#define DBG_UNIX_LOG(...) +#endif + diff --git a/src/plugins/acl/hash_lookup_types.h b/src/plugins/acl/hash_lookup_types.h new file mode 100644 index 00000000..1fa197ec --- /dev/null +++ b/src/plugins/acl/hash_lookup_types.h @@ -0,0 +1,107 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef _ACL_HASH_LOOKUP_TYPES_H_ +#define _ACL_HASH_LOOKUP_TYPES_H_ + +/* The structure representing the single entry with hash representation */ +typedef struct { + /* these two entries refer to the original ACL# and rule# within that ACL */ + u32 acl_index; + u32 ace_index; + + u32 mask_type_index; + u8 src_portrange_not_powerof2; + u8 dst_portrange_not_powerof2; + + fa_5tuple_t match; + u8 action; +} hash_ace_info_t; + +/* + * The structure holding the information necessary for the hash-based ACL operation + */ +typedef struct { + /* The mask types present in this ACL */ + uword *mask_type_index_bitmap; + /* hash ACL applied on these interfaces */ + u32 *inbound_sw_if_index_list; + u32 *outbound_sw_if_index_list; + hash_ace_info_t *rules; +} hash_acl_info_t; + +typedef struct { + /* original non-compiled ACL */ + u32 acl_index; + u32 ace_index; + /* the index of the hash_ace_info_t */ + u32 hash_ace_info_index; + /* + * in case of the same key having multiple entries, + * this holds the index of the next entry. + */ + u32 next_applied_entry_index; + /* + * previous entry in the list of the chained ones, + * if ~0 then this is entry in the hash. + */ + u32 prev_applied_entry_index; + /* + * chain tail, if this is the first entry + */ + u32 tail_applied_entry_index; + /* + * number of hits on this entry + */ + u64 hitcount; + /* + * Action of this applied ACE + */ + u8 action; +} applied_hash_ace_entry_t; + +typedef struct { + /* + * A logical OR of all the applied_ace_hash_entry_t=> + * hash_ace_info_t=>mask_type_index bits set + */ + uword *mask_type_index_bitmap; + /* applied ACLs so we can track them independently from main ACL module */ + u32 *applied_acls; +} applied_hash_acl_info_t; + + +typedef union { + u64 as_u64; + struct { + u32 applied_entry_index; + u16 reserved_u16; + u8 reserved_u8; + /* means there is some other entry in front intersecting with this one */ + u8 shadowed:1; + u8 need_portrange_check:1; + u8 reserved_flags:6; + }; +} hash_acl_lookup_value_t; + +#define CT_ASSERT_EQUAL(name, x,y) typedef int assert_ ## name ## _compile_time_assertion_failed[((x) == (y))-1] + +CT_ASSERT_EQUAL(hash_acl_lookup_value_t_is_u64, sizeof(hash_acl_lookup_value_t), sizeof(u64)); + +#undef CT_ASSERT_EQUAL + +#endif diff --git a/src/plugins/acl/manual_fns.h b/src/plugins/acl/manual_fns.h new file mode 100644 index 00000000..e00f1abc --- /dev/null +++ b/src/plugins/acl/manual_fns.h @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_manual_fns_h +#define included_manual_fns_h + +#include <vnet/ip/format.h> +#include <vnet/ethernet/ethernet.h> + +/* Macro to finish up custom dump fns */ +#define PRINT_S \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); + +static inline void +vl_api_acl_rule_t_array_endian(vl_api_acl_rule_t *rules, u32 count) +{ + u32 i; + for(i=0; i<count; i++) { + vl_api_acl_rule_t_endian (&rules[i]); + } +} + +static inline void +vl_api_macip_acl_rule_t_array_endian(vl_api_macip_acl_rule_t *rules, u32 count) +{ + u32 i; + for(i=0; i<count; i++) { + vl_api_macip_acl_rule_t_endian (&rules[i]); + } +} + +static inline void +vl_api_acl_details_t_endian (vl_api_acl_details_t * a) +{ + a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id); + a->context = clib_net_to_host_u32 (a->context); + a->acl_index = clib_net_to_host_u32 (a->acl_index); + /* a->tag[0..63] = a->tag[0..63] (no-op) */ + a->count = clib_net_to_host_u32 (a->count); + vl_api_acl_rule_t_array_endian (a->r, a->count); +} + +static inline void +vl_api_macip_acl_details_t_endian (vl_api_macip_acl_details_t * a) +{ + a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id); + a->context = clib_net_to_host_u32 (a->context); + a->acl_index = clib_net_to_host_u32 (a->acl_index); + /* a->tag[0..63] = a->tag[0..63] (no-op) */ + a->count = clib_net_to_host_u32 (a->count); + vl_api_macip_acl_rule_t_array_endian (a->r, a->count); +} + + +static inline void +vl_api_acl_add_replace_t_endian (vl_api_acl_add_replace_t * a) +{ + a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id); + a->client_index = clib_net_to_host_u32 (a->client_index); + a->context = clib_net_to_host_u32 (a->context); + a->acl_index = clib_net_to_host_u32 (a->acl_index); + /* a->tag[0..63] = a->tag[0..63] (no-op) */ + a->count = clib_net_to_host_u32 (a->count); + vl_api_acl_rule_t_array_endian (a->r, a->count); +} + +static inline void +vl_api_macip_acl_add_t_endian (vl_api_macip_acl_add_t * a) +{ + a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id); + a->client_index = clib_net_to_host_u32 (a->client_index); + a->context = clib_net_to_host_u32 (a->context); + /* a->tag[0..63] = a->tag[0..63] (no-op) */ + a->count = clib_net_to_host_u32 (a->count); + vl_api_macip_acl_rule_t_array_endian (a->r, a->count); +} + +static inline void +vl_api_macip_acl_add_replace_t_endian (vl_api_macip_acl_add_replace_t * a) +{ + a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id); + a->client_index = clib_net_to_host_u32 (a->client_index); + a->context = clib_net_to_host_u32 (a->context); + a->acl_index = clib_net_to_host_u32 (a->acl_index); + /* a->tag[0..63] = a->tag[0..63] (no-op) */ + a->count = clib_net_to_host_u32 (a->count); + vl_api_macip_acl_rule_t_array_endian (a->r, a->count); +} + +static inline u8 * +format_acl_action(u8 *s, u8 action) +{ + switch(action) { + case 0: + s = format (s, "deny"); + break; + case 1: + s = format (s, "permit"); + break; + case 2: + s = format (s, "permit+reflect"); + break; + default: + s = format (s, "action %d", action); + } + return(s); +} + +static inline void * +vl_api_acl_rule_t_print (vl_api_acl_rule_t * a, void *handle) +{ + u8 *s; + + s = format (0, " %s ", a->is_ipv6 ? "ipv6" : "ipv4"); + s = format_acl_action (s, a->is_permit); + s = format (s, " \\\n"); + + if (a->is_ipv6) + s = format (s, " src %U/%d dst %U/%d \\\n", + format_ip6_address, a->src_ip_addr, a->src_ip_prefix_len, + format_ip6_address, a->dst_ip_addr, a->dst_ip_prefix_len); + else + s = format (s, " src %U/%d dst %U/%d \\\n", + format_ip4_address, a->src_ip_addr, a->src_ip_prefix_len, + format_ip4_address, a->dst_ip_addr, a->dst_ip_prefix_len); + s = format (s, " proto %d \\\n", a->proto); + s = format (s, " sport %d-%d dport %d-%d \\\n", + clib_net_to_host_u16 (a->srcport_or_icmptype_first), + clib_net_to_host_u16 (a->srcport_or_icmptype_last), + clib_net_to_host_u16 (a->dstport_or_icmpcode_first), + clib_net_to_host_u16 (a->dstport_or_icmpcode_last)); + + s = format (s, " tcpflags %u mask %u, \\", + a->tcp_flags_value, a->tcp_flags_mask); + PRINT_S; + return handle; +} + + + +static inline void * +vl_api_macip_acl_rule_t_print (vl_api_macip_acl_rule_t * a, void *handle) +{ + u8 *s; + + s = format (0, " %s %s \\\n", a->is_ipv6 ? "ipv6" : "ipv4", + a->is_permit ? "permit" : "deny"); + + s = format (s, " src mac %U mask %U \\\n", + format_ethernet_address, a->src_mac, + format_ethernet_address, a->src_mac_mask); + + if (a->is_ipv6) + s = format (s, " src ip %U/%d, \\", + format_ip6_address, a->src_ip_addr, a->src_ip_prefix_len); + else + s = format (s, " src ip %U/%d, \\", + format_ip4_address, a->src_ip_addr, a->src_ip_prefix_len); + + PRINT_S; + return handle; +} + +static inline void * +vl_api_acl_add_replace_t_print (vl_api_acl_add_replace_t * a, void *handle) +{ + u8 *s = 0; + int i; + u32 acl_index = clib_net_to_host_u32 (a->acl_index); + u32 count = clib_net_to_host_u32 (a->count); + if (count > 0x100000) + { + s = format (s, "WARN: acl_add_replace count endianness wrong? Fixup to avoid long loop.\n"); + count = a->count; + } + + s = format (s, "SCRIPT: acl_add_replace %d count %d ", + acl_index, count); + + if (a->tag[0]) + s = format (s, "tag %s ", a->tag); + + s = format(s, "\\\n"); + PRINT_S; + + for (i = 0; i < count; i++) + vl_api_acl_rule_t_print (&a->r[i], handle); + + s = format(s, "\n"); + PRINT_S; + return handle; +} + +static inline void * +vl_api_acl_del_t_print (vl_api_macip_acl_del_t * a, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: acl_del %d ", + clib_host_to_net_u32 (a->acl_index)); + + PRINT_S; + return handle; +} + + +static inline void * +vl_api_acl_details_t_print (vl_api_acl_details_t * a, void *handle) +{ + u8 *s = 0; + int i; + u32 acl_index = clib_net_to_host_u32 (a->acl_index); + u32 count = clib_net_to_host_u32 (a->count); + if (count > 0x100000) + { + s = format (s, "WARN: acl_defails count endianness wrong? Fixup to avoid long loop.\n"); + count = a->count; + } + + s = format (s, "acl_details index %d count %d ", + acl_index, count); + + if (a->tag[0]) + s = format (s, "tag %s ", a->tag); + + s = format(s, "\n"); + PRINT_S; + + for (i = 0; i < count; i++) + vl_api_acl_rule_t_print (&a->r[i], handle); + + return handle; +} + +static inline void * +vl_api_macip_acl_details_t_print (vl_api_macip_acl_details_t * a, + void *handle) +{ + u8 *s = 0; + int i; + u32 acl_index = clib_net_to_host_u32 (a->acl_index); + u32 count = clib_net_to_host_u32 (a->count); + if (count > 0x100000) + { + s = format (s, "WARN: macip_acl_defails count endianness wrong? Fixup to avoid long loop.\n"); + count = a->count; + } + + s = format (s, "macip_acl_details index %d count %d ", + acl_index, count); + + if (a->tag[0]) + s = format (s, "tag %s ", a->tag); + + s = format(s, "\n"); + PRINT_S; + + for (i = 0; i < count; i++) + vl_api_macip_acl_rule_t_print (&a->r[i], handle); + + return handle; +} + +static inline void * +vl_api_macip_acl_add_t_print (vl_api_macip_acl_add_t * a, void *handle) +{ + u8 *s = 0; + int i; + u32 count = clib_net_to_host_u32 (a->count); + if (count > 0x100000) + { + s = format (s, "WARN: macip_acl_add count endianness wrong? Fixup to avoid long loop.\n"); + count = a->count; + } + + s = format (s, "SCRIPT: macip_acl_add "); + if (a->tag[0]) + s = format (s, "tag %s ", a->tag); + + s = format (s, "count %d \\\n", count); + + PRINT_S; + + for (i = 0; i < count; i++) + vl_api_macip_acl_rule_t_print (&a->r[i], handle); + + s = format (0, "\n"); + PRINT_S; + + return handle; +} + +static inline void * +vl_api_macip_acl_add_replace_t_print (vl_api_macip_acl_add_replace_t * a, void *handle) +{ + u8 *s = 0; + int i; + u32 acl_index = clib_net_to_host_u32 (a->acl_index); + u32 count = clib_net_to_host_u32 (a->count); + if (count > 0x100000) + { + s = format (s, "WARN: macip_acl_add_replace count endianness wrong? Fixup to avoid long loop.\n"); + count = a->count; + } + + s = format (s, "SCRIPT: macip_acl_add_replace %d count %d ", + acl_index, count); + if (a->tag[0]) + s = format (s, "tag %s ", a->tag); + + s = format (s, "count %d \\\n", count); + + PRINT_S; + + for (i = 0; i < count; i++) + vl_api_macip_acl_rule_t_print (&a->r[i], handle); + + s = format (0, "\n"); + PRINT_S; + + return handle; +} + +static inline void * +vl_api_acl_interface_set_acl_list_t_print (vl_api_acl_interface_set_acl_list_t + * a, void *handle) +{ + u8 *s; + int i; + + s = format + (0, "SCRIPT: acl_interface_set_acl_list sw_if_index %d count %d\n", + clib_net_to_host_u32 (a->sw_if_index), (u32) a->count); + + s = format (s, " input "); + + for (i = 0; i < a->count; i++) + { + if (i == a->n_input) + s = format (s, "output "); + s = format (s, "%d ", clib_net_to_host_u32 (a->acls[i])); + } + + PRINT_S; + return handle; +} + +static inline void * +vl_api_acl_interface_add_del_t_print (vl_api_acl_interface_add_del_t * a, + void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: acl_interface_add_del sw_if_index %d acl %d ", + clib_net_to_host_u32 (a->sw_if_index), + clib_net_to_host_u32 (a->acl_index)); + s = format (s, "%s %s", + a->is_input ? "input" : "output", a->is_add ? "add" : "del"); + + PRINT_S; + return handle; +} + +static inline void *vl_api_macip_acl_interface_add_del_t_print + (vl_api_macip_acl_interface_add_del_t * a, void *handle) +{ + u8 *s; + + s = format + (0, + "SCRIPT: macip_acl_interface_add_del sw_if_index %d acl_index %d ", + clib_net_to_host_u32 (a->sw_if_index), + clib_net_to_host_u32 (a->acl_index)); + s = format (s, "%s", a->is_add ? "add" : "del"); + + PRINT_S; + return handle; +} + + +static inline void * +vl_api_macip_acl_del_t_print (vl_api_macip_acl_del_t * a, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: macip_acl_del %d ", + clib_host_to_net_u32 (a->acl_index)); + + PRINT_S; + return handle; +} + + +#endif /* included_manual_fns_h */ |