diff options
author | Damjan Marion <damarion@cisco.com> | 2016-12-28 18:38:59 +0100 |
---|---|---|
committer | Damjan Marion <damarion@cisco.com> | 2017-01-01 18:11:43 +0100 |
commit | cb034b9b374927c7552e36dcbc306d8456b2a0cb (patch) | |
tree | 9ff64f9792560630c8cf8faa2f74fc20671c30f1 /src/plugins | |
parent | fdc62abdc113ea63dc867375bd49ef3043dcd290 (diff) |
Move java,lua api and remaining plugins to src/
Change-Id: I1c3b87e886603678368428ae56a6bd3327cbc90d
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/plugins')
95 files changed, 25541 insertions, 0 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am index ffc4b3abddc..987310b7b40 100644 --- a/src/plugins/Makefile.am +++ b/src/plugins/Makefile.am @@ -22,10 +22,15 @@ BUILT_SOURCES = vppplugins_LTLIBRARIES = vppapitestplugins_LTLIBRARIES = noinst_HEADERS = +nobase_apiinclude_HEADERS = vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins vpppluginsdir = ${libdir}/vpp_plugins +if ENABLE_ACL_PLUGIN +include acl.am +endif + if ENABLE_FLOWPERPKT_PLUGIN include flowperpkt.am endif @@ -34,10 +39,22 @@ if ENABLE_ILA_PLUGIN include ila.am endif +if ENABLE_IOAM_PLUGIN +include ioam.am +endif + +if ENABLE_LB_PLUGIN +include lb.am +endif + if ENABLE_SIXRD_PLUGIN include sixrd.am endif +if ENABLE_SNAT_PLUGIN +include snat.am +endif + include ../suffix-rules.mk # Remove *.la files @@ -50,6 +67,7 @@ install-data-hook: ############################################################################### apidir = $(prefix)/share/vpp/api/plugins +apiincludedir = ${includedir}/vpp_plugins api_DATA = \ $(patsubst %.api,%.api.json,$(API_FILES)) diff --git a/src/plugins/acl.am b/src/plugins/acl.am new file mode 100644 index 00000000000..efed31c2912 --- /dev/null +++ b/src/plugins/acl.am @@ -0,0 +1,35 @@ +# Copyright (c) 2016 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppapitestplugins_LTLIBRARIES += acl_test_plugin.la +vppplugins_LTLIBRARIES += acl_plugin.la + +acl_plugin_la_SOURCES = \ + acl/acl.c \ + acl/node_in.c \ + acl/node_out.c \ + acl/l2sess.c \ + acl/l2sess_node.c \ + acl/l2sess.h \ + acl/acl_plugin.api.h + +API_FILES += acl/acl.api + +nobase_apiinclude_HEADERS += \ + acl/acl_all_api_h.h \ + acl/acl_msg_enum.h \ + acl/acl.api.h + +acl_test_plugin_la_SOURCES = acl/acl_test.c acl/acl_plugin.api.h + +# vi:syntax=automake diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api new file mode 100644 index 00000000000..58a5a17180e --- /dev/null +++ b/src/plugins/acl/acl.api @@ -0,0 +1,444 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + This file defines the vpp control-plane API messages + used to control the ACL plugin +*/ + + +/** \brief Get the plugin version + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ + +define acl_plugin_get_version +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply to get the plugin version + @param context - returned sender context, to match reply w/ request + @param major - Incremented every time a known breaking behavior change is introduced + @param minor - Incremented with small changes, may be used to avoid buggy versions +*/ + +define acl_plugin_get_version_reply +{ + u32 context; + u32 major; + u32 minor; +}; + +/** \brief Access List Rule entry + @param is_permit - deny (0), permit (1), or permit+reflect(2) action on this rule. + @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0) + @param src_ip_addr - Source prefix value + @param src_ip_prefix_len - Source prefix length + @param dst_ip_addr - Destination prefix value + @param dst_ip_prefix_len - Destination prefix length + @param proto - L4 protocol (http://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml) + @param srcport_or_icmptype_first - beginning of source port or ICMP4/6 type range + @param srcport_or_icmptype_last - end of source port or ICMP4/6 type range + @param dstport_or_icmpcode_first - beginning of destination port or ICMP4/6 code range + @param dstport_or_icmpcode_last - end of destination port or ICMP4/6 code range + @param tcp_flags_mask - if proto==6, match masked TCP flags with this value + @param tcp_flags_value - if proto==6, mask to AND the TCP flags in the packet with +*/ + +typeonly manual_print manual_endian define acl_rule +{ + u8 is_permit; + u8 is_ipv6; + u8 src_ip_addr[16]; + u8 src_ip_prefix_len; + u8 dst_ip_addr[16]; + u8 dst_ip_prefix_len; +/* + * L4 protocol. IANA number. 1 = ICMP, 58 = ICMPv6, 6 = TCP, 17 = UDP. + * 0 => ignore L4 and ignore the ports/tcpflags when matching. + */ + u8 proto; +/* + * If the L4 protocol is TCP or UDP, the below + * hold ranges of ports, else if the L4 is ICMP/ICMPv6 + * they hold ranges of ICMP(v6) types/codes. + * + * Ranges are inclusive, i.e. to match "any" TCP/UDP port, + * use first=0,last=65535. For ICMP(v6), + * use first=0,last=255. + */ + u16 srcport_or_icmptype_first; + u16 srcport_or_icmptype_last; + u16 dstport_or_icmpcode_first; + u16 dstport_or_icmpcode_last; +/* + * for proto = 6, this matches if the + * TCP flags in the packet, ANDed with tcp_flags_mask, + * is equal to tcp_flags_value. + */ + u8 tcp_flags_mask; + u8 tcp_flags_value; +}; + +/** \brief MACIP Access List Rule entry + @param is_permit - deny (0), permit (1) action on this rule. + @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0) + @param src_mac - match masked source MAC address against this value + @param src_mac_mask - AND source MAC address with this value before matching + @param src_ip_addr - Source prefix value + @param src_ip_prefix_len - Source prefix length +*/ + +typeonly manual_print manual_endian define macip_acl_rule +{ + u8 is_permit; + u8 is_ipv6; +/* + * The source mac of the packet ANDed with src_mac_mask. + * The source ip[46] address in the packet is matched + * against src_ip_addr, with src_ip_prefix_len set to 0. + * + * For better performance, minimize the number of + * (src_mac_mask, src_ip_prefix_len) combinations + * in a MACIP ACL. + */ + u8 src_mac[6]; + u8 src_mac_mask[6]; + u8 src_ip_addr[16]; + u8 src_ip_prefix_len; +}; + +/** \brief Replace an existing ACL in-place or create a new ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - an existing ACL entry (0..0xfffffffe) to replace, or 0xffffffff to make new ACL + @param tag - a string value stored along with the ACL, for descriptive purposes + @param count - number of ACL rules + @r - Rules for this access-list +*/ + +manual_print manual_endian define acl_add_replace +{ + u32 client_index; + u32 context; + u32 acl_index; /* ~0 to add, existing ACL# to replace */ + u8 tag[64]; /* What gets in here gets out in the corresponding tag field when dumping the ACLs. */ + u32 count; + vl_api_acl_rule_t r[count]; +}; + +/** \brief Reply to add/replace ACL + @param context - returned sender context, to match reply w/ request + @param acl_index - index of the updated or newly created ACL + @param retval 0 - no error +*/ + +define acl_add_replace_reply +{ + u32 context; + u32 acl_index; + i32 retval; +}; + +/** \brief Delete an ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - ACL index to delete +*/ + +define acl_del +{ + u32 client_index; + u32 context; + u32 acl_index; +}; + +/** \brief Reply to delete the ACL + @param context - returned sender context, to match reply w/ request + @param retval 0 - no error +*/ + +define acl_del_reply +{ + u32 context; + i32 retval; +}; + +/* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */ +/** \brief Use acl_interface_set_acl_list instead + Append/remove an ACL index to/from the list of ACLs checked for an interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add or delete the ACL index from the list + @param is_input - check the ACL on input (1) or output (0) + @param sw_if_index - the interface to alter the list of ACLs on + @param acl_index - index of ACL for the operation +*/ + +define acl_interface_add_del +{ + u32 client_index; + u32 context; + u8 is_add; +/* + * is_input = 0 => ACL applied on interface egress + * is_input = 1 => ACL applied on interface ingress + */ + u8 is_input; + u32 sw_if_index; + u32 acl_index; +}; + +/** \brief Reply to alter the ACL list + @param context - returned sender context, to match reply w/ request + @param retval 0 - no error +*/ + +define acl_interface_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Set the vector of input/output ACLs checked for an interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface to alter the list of ACLs on + @param count - total number of ACL indices in the vector + @param n_input - this many first elements correspond to input ACLs, the rest - output + @param acls - vector of ACL indices +*/ + +manual_endian define acl_interface_set_acl_list +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 count; + u8 n_input; /* First n_input ACLs are set as a list of input ACLs, the rest are applied as output */ + u32 acls[count]; +}; + +/** \brief Reply to set the ACL list on an interface + @param context - returned sender context, to match reply w/ request + @param retval 0 - no error +*/ + +define acl_interface_set_acl_list_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Dump the specific ACL contents or all of the ACLs' contents + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - ACL index to dump, ~0 to dump all ACLs +*/ + +define acl_dump +{ + u32 client_index; + u32 context; + u32 acl_index; /* ~0 for all ACLs */ +}; + +/** \brief Details about a single ACL contents + @param context - returned sender context, to match reply w/ request + @param acl_index - ACL index whose contents are being sent in this message + @param tag - Descriptive tag value which was supplied at ACL creation + @param count - Number of rules in this ACL + @param r - Array of rules within this ACL +*/ + +manual_print manual_endian define acl_details +{ + u32 context; + u32 acl_index; + u8 tag[64]; /* Same blob that was supplied to us when creating the ACL, one hopes. */ + u32 count; + vl_api_acl_rule_t r[count]; +}; + +/** \brief Dump the list(s) of ACL applied to specific or all interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to dump the ACL list for +*/ + +define acl_interface_list_dump +{ + u32 client_index; + u32 context; + u32 sw_if_index; /* ~0 for all interfaces */ +}; + +/** \brief Details about a single ACL contents + @param context - returned sender context, to match reply w/ request + @param sw_if_index - interface for which the list of ACLs is applied + @param count - total length of acl indices vector + @param n_input - this many of indices in the beginning are input ACLs, the rest - output + @param acls - the vector of ACL indices +*/ + +manual_endian define acl_interface_list_details +{ + u32 context; + u32 sw_if_index; + u8 count; + u8 n_input; + u32 acls[count]; +}; + +/** \brief Add a MACIP ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param tag - descriptive value for this MACIP ACL + @param count - number of rules in this ACL + @param r - vector of MACIP ACL rules +*/ + +manual_print manual_endian define macip_acl_add +{ + u32 client_index; + u32 context; + u8 tag[64]; + u32 count; + vl_api_macip_acl_rule_t r[count]; +}; + +/** \brief Reply to add MACIP ACL + @param context - returned sender context, to match reply w/ request + @param acl_index - index of the newly created ACL + @param retval 0 - no error +*/ + +define macip_acl_add_reply +{ + u32 context; + u32 acl_index; + i32 retval; +}; + +/** \brief Delete a MACIP ACL + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - MACIP ACL index to delete +*/ + +define macip_acl_del +{ + u32 client_index; + u32 context; + u32 acl_index; +}; + +/** \brief Reply to delete the MACIP ACL + @param context - returned sender context, to match reply w/ request + @param retval 0 - no error +*/ + +define macip_acl_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Add or delete a MACIP ACL to/from interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - add (1) or delete (0) ACL from being used on an interface + @param sw_if_index - interface to apply the action to + @param acl_index - MACIP ACL index +*/ + +define macip_acl_interface_add_del +{ + u32 client_index; + u32 context; + u8 is_add; + /* macip ACLs are always input */ + u32 sw_if_index; + u32 acl_index; +}; + +/** \brief Reply to apply/unapply the MACIP ACL + @param context - returned sender context, to match reply w/ request + @param retval 0 - no error +*/ + +define macip_acl_interface_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Dump one or all defined MACIP ACLs + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param acl_index - MACIP ACL index or ~0 to dump all ACLs +*/ + +define macip_acl_dump +{ + u32 client_index; + u32 context; + u32 acl_index; /* ~0 for all ACLs */ +}; + +/** \brief Details about one MACIP ACL + @param context - returned sender context, to match reply w/ request + @param acl_index - index of this MACIP ACL + @param tag - descriptive tag which was supplied during the creation + @param count - length of the vector of MACIP ACL rules + @param r - rules comprising this ACL +*/ + +manual_print manual_endian define macip_acl_details +{ + u32 context; + u32 acl_index; + u8 tag[64]; + u32 count; + vl_api_macip_acl_rule_t r[count]; +}; + +/** \brief Get the vector of MACIP ACL IDs applied to the interfaces + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ + +define macip_acl_interface_get +{ + u32 client_index; + u32 context; +}; + +/** \brief Reply with the vector of MACIP ACLs by sw_if_index + @param context - returned sender context, to match reply w/ request + @param count - total number of elements in the vector + @param acls - the vector of active MACACL indices per sw_if_index +*/ + +define macip_acl_interface_get_reply +{ + u32 context; + u32 count; + u32 acls[count]; +}; + diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c new file mode 100644 index 00000000000..8ff5a6b721c --- /dev/null +++ b/src/plugins/acl/acl.c @@ -0,0 +1,1901 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stddef.h> + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <acl/acl.h> +#include <acl/l2sess.h> + +#include <vnet/l2/l2_classify.h> +#include <vnet/classify/input_acl.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +/* define message IDs */ +#include <acl/acl_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <acl/acl_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <acl/acl_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <acl/acl_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <acl/acl_all_api_h.h> +#undef vl_api_version + +#include "node_in.h" +#include "node_out.h" + +acl_main_t acl_main; + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define REPLY_MACRO2(t, body) \ +do { \ + unix_shared_memory_queue_t * q; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + q = vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+am->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define REPLY_MACRO3(t, n, body) \ +do { \ + unix_shared_memory_queue_t * q; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + q = vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \ + rmp->_vl_msg_id = ntohs((t)+am->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + + +/* List of message types that this plugin understands */ + +#define foreach_acl_plugin_api_msg \ +_(ACL_PLUGIN_GET_VERSION, acl_plugin_get_version) \ +_(ACL_ADD_REPLACE, acl_add_replace) \ +_(ACL_DEL, acl_del) \ +_(ACL_INTERFACE_ADD_DEL, acl_interface_add_del) \ +_(ACL_INTERFACE_SET_ACL_LIST, acl_interface_set_acl_list) \ +_(ACL_DUMP, acl_dump) \ +_(ACL_INTERFACE_LIST_DUMP, acl_interface_list_dump) \ +_(MACIP_ACL_ADD, macip_acl_add) \ +_(MACIP_ACL_DEL, macip_acl_del) \ +_(MACIP_ACL_INTERFACE_ADD_DEL, macip_acl_interface_add_del) \ +_(MACIP_ACL_DUMP, macip_acl_dump) \ +_(MACIP_ACL_INTERFACE_GET, macip_acl_interface_get) + +/* + * This routine exists to convince the vlib plugin framework that + * we haven't accidentally copied a random .dll into the plugin directory. + * + * Also collects global variable pointers passed from the vpp engine + */ + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, + int from_early_init) +{ + acl_main_t *am = &acl_main; + clib_error_t *error = 0; + + am->vlib_main = vm; + am->vnet_main = h->vnet_main; + am->ethernet_main = h->ethernet_main; + + l2sess_vlib_plugin_register(vm, h, from_early_init); + + return error; +} + + +static void +vl_api_acl_plugin_get_version_t_handler (vl_api_acl_plugin_get_version_t * mp) +{ + acl_main_t *am = &acl_main; + vl_api_acl_plugin_get_version_reply_t *rmp; + int msg_size = sizeof (*rmp); + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + rmp = vl_msg_api_alloc (msg_size); + memset (rmp, 0, msg_size); + rmp->_vl_msg_id = + ntohs (VL_API_ACL_PLUGIN_GET_VERSION_REPLY + am->msg_id_base); + rmp->context = mp->context; + rmp->major = htonl (ACL_PLUGIN_VERSION_MAJOR); + rmp->minor = htonl (ACL_PLUGIN_VERSION_MINOR); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + + +static int +acl_add_list (u32 count, vl_api_acl_rule_t rules[], + u32 * acl_list_index, u8 * tag) +{ + acl_main_t *am = &acl_main; + acl_list_t *a; + acl_rule_t *r; + acl_rule_t *acl_new_rules; + int i; + + if (*acl_list_index != ~0) + { + /* They supplied some number, let's see if this ACL exists */ + if (pool_is_free_index (am->acls, *acl_list_index)) + { + /* tried to replace a non-existent ACL, no point doing anything */ + return -1; + } + } + + /* Create and populate the rules */ + acl_new_rules = clib_mem_alloc_aligned (sizeof (acl_rule_t) * count, + CLIB_CACHE_LINE_BYTES); + if (!acl_new_rules) + { + /* Could not allocate rules. New or existing ACL - bail out regardless */ + return -1; + } + + for (i = 0; i < count; i++) + { + r = &acl_new_rules[i]; + r->is_permit = rules[i].is_permit; + r->is_ipv6 = rules[i].is_ipv6; + if (r->is_ipv6) + { + memcpy (&r->src, rules[i].src_ip_addr, sizeof (r->src)); + memcpy (&r->dst, rules[i].dst_ip_addr, sizeof (r->dst)); + } + else + { + memcpy (&r->src.ip4, rules[i].src_ip_addr, sizeof (r->src.ip4)); + memcpy (&r->dst.ip4, rules[i].dst_ip_addr, sizeof (r->dst.ip4)); + } + r->src_prefixlen = rules[i].src_ip_prefix_len; + r->dst_prefixlen = rules[i].dst_ip_prefix_len; + r->proto = rules[i].proto; + r->src_port_or_type_first = rules[i].srcport_or_icmptype_first; + r->src_port_or_type_last = rules[i].srcport_or_icmptype_last; + r->dst_port_or_code_first = rules[i].dstport_or_icmpcode_first; + r->dst_port_or_code_last = rules[i].dstport_or_icmpcode_last; + r->tcp_flags_value = rules[i].tcp_flags_value; + r->tcp_flags_mask = rules[i].tcp_flags_mask; + } + + if (~0 == *acl_list_index) + { + /* Get ACL index */ + pool_get_aligned (am->acls, a, CLIB_CACHE_LINE_BYTES); + memset (a, 0, sizeof (*a)); + /* Will return the newly allocated ACL index */ + *acl_list_index = a - am->acls; + } + else + { + a = am->acls + *acl_list_index; + /* Get rid of the old rules */ + clib_mem_free (a->rules); + } + a->rules = acl_new_rules; + a->count = count; + memcpy (a->tag, tag, sizeof (a->tag)); + + return 0; +} + +static int +acl_del_list (u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + acl_list_t *a; + int i, ii; + if (pool_is_free_index (am->acls, acl_list_index)) + { + return -1; + } + + /* delete any references to the ACL */ + for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index); i++) + { + for (ii = 0; ii < vec_len (am->output_acl_vec_by_sw_if_index[i]); + /* see body */ ) + { + if (acl_list_index == am->output_acl_vec_by_sw_if_index[i][ii]) + { + vec_del1 (am->output_acl_vec_by_sw_if_index[i], ii); + } + else + { + ii++; + } + } + } + for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index); i++) + { + for (ii = 0; ii < vec_len (am->input_acl_vec_by_sw_if_index[i]); + /* see body */ ) + { + if (acl_list_index == am->input_acl_vec_by_sw_if_index[i][ii]) + { + vec_del1 (am->input_acl_vec_by_sw_if_index[i], ii); + } + else + { + ii++; + } + } + } + + /* now we can delete the ACL itself */ + a = &am->acls[acl_list_index]; + if (a->rules) + { + clib_mem_free (a->rules); + } + pool_put (am->acls, a); + return 0; +} + +/* Some aids in ASCII graphing the content */ +#define XX "\377" +#define __ "\000" +#define _(x) +#define v + +u8 ip4_5tuple_mask[] = +_(" dmac smac etype ") +_(ether) __ __ __ __ __ __ v __ __ __ __ __ __ v __ __ v + _(" v ihl totlen ") + _(0x0000) + __ __ __ __ + _(" ident fl+fo ") + _(0x0004) + __ __ __ __ + _(" ttl pr checksum ") + _(0x0008) + __ XX __ __ + _(" src address ") + _(0x000C) + XX XX XX XX + _(" dst address ") + _(0x0010) + XX XX XX XX + _("L4 T/U sport dport ") + _(tcpudp) + XX XX XX XX + _(padpad) + __ __ __ __ + _(padpad) + __ __ __ __ + _(padeth) + __ __; + + u8 ip6_5tuple_mask[] = + _(" dmac smac etype ") + _(ether) __ __ __ __ __ __ v __ __ __ __ __ __ v __ __ v + _(" v tc + flow ") + _(0x0000) __ __ __ __ + _(" plen nh hl ") + _(0x0004) __ __ XX __ + _(" src address ") + _(0x0008) XX XX XX XX + _(0x000C) XX XX XX XX + _(0x0010) XX XX XX XX + _(0x0014) XX XX XX XX + _(" dst address ") + _(0x0018) XX XX XX XX + _(0x001C) XX XX XX XX + _(0x0020) XX XX XX XX + _(0x0024) XX XX XX XX + _("L4T/U sport dport ") + _(tcpudp) XX XX XX XX _(padpad) __ __ __ __ _(padeth) __ __; + +#undef XX +#undef __ +#undef _ +#undef v + + static int count_skip (u8 * p, u32 size) +{ + u64 *p64 = (u64 *) p; + /* Be tolerant to null pointer */ + if (0 == p) + return 0; + + while ((0ULL == *p64) && ((u8 *) p64 - p) < size) + { + p64++; + } + return (p64 - (u64 *) p) / 2; +} + +static int +acl_classify_add_del_table_big (vnet_classify_main_t * cm, u8 * mask, + u32 mask_len, u32 next_table_index, + u32 miss_next_index, u32 * table_index, + int is_add) +{ + u32 nbuckets = 65536; + u32 memory_size = 2 << 30; + u32 skip = count_skip (mask, mask_len); + u32 match = (mask_len / 16) - skip; + u8 *skip_mask_ptr = mask + 16 * skip; + u32 current_data_flag = 0; + int current_data_offset = 0; + + if (0 == match) + match = 1; + + return vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets, + memory_size, skip, match, + next_table_index, miss_next_index, + table_index, current_data_flag, + current_data_offset, is_add, + 1 /* delete_chain */); +} + +static int +acl_classify_add_del_table_small (vnet_classify_main_t * cm, u8 * mask, + u32 mask_len, u32 next_table_index, + u32 miss_next_index, u32 * table_index, + int is_add) +{ + u32 nbuckets = 32; + u32 memory_size = 2 << 20; + u32 skip = count_skip (mask, mask_len); + u32 match = (mask_len / 16) - skip; + u8 *skip_mask_ptr = mask + 16 * skip; + u32 current_data_flag = 0; + int current_data_offset = 0; + + if (0 == match) + match = 1; + + return vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets, + memory_size, skip, match, + next_table_index, miss_next_index, + table_index, current_data_flag, + current_data_offset, is_add, + 1 /* delete_chain */); +} + + +static int +acl_unhook_l2_input_classify (acl_main_t * am, u32 sw_if_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + + vec_validate_init_empty (am->acl_ip4_input_classify_table_by_sw_if_index, + sw_if_index, ~0); + vec_validate_init_empty (am->acl_ip6_input_classify_table_by_sw_if_index, + sw_if_index, ~0); + + vnet_l2_input_classify_enable_disable (sw_if_index, 0); + + if (am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] != ~0) + { + ip4_table_index = + am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index]; + am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] = ~0; + acl_classify_add_del_table_big (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl, + &ip4_table_index, 0); + } + if (am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] != ~0) + { + ip6_table_index = + am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index]; + am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] = ~0; + acl_classify_add_del_table_big (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl, + &ip6_table_index, 0); + } + + return 0; +} + +static int +acl_unhook_l2_output_classify (acl_main_t * am, u32 sw_if_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + + vec_validate_init_empty (am->acl_ip4_output_classify_table_by_sw_if_index, + sw_if_index, ~0); + vec_validate_init_empty (am->acl_ip6_output_classify_table_by_sw_if_index, + sw_if_index, ~0); + + vnet_l2_output_classify_enable_disable (sw_if_index, 0); + + if (am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] != ~0) + { + ip4_table_index = + am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index]; + am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] = ~0; + acl_classify_add_del_table_big (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl, + &ip4_table_index, 0); + } + if (am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] != ~0) + { + ip6_table_index = + am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index]; + am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] = ~0; + acl_classify_add_del_table_big (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl, + &ip6_table_index, 0); + } + + return 0; +} + +static int +acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + int rv; + + /* in case there were previous tables attached */ + acl_unhook_l2_input_classify (am, sw_if_index); + rv = + acl_classify_add_del_table_big (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl, + &ip4_table_index, 1); + if (rv) + return rv; + rv = + acl_classify_add_del_table_big (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl, + &ip6_table_index, 1); + if (rv) + { + acl_classify_add_del_table_big (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl, + &ip4_table_index, 0); + return rv; + } + rv = + vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, ~0); + clib_warning + ("ACL enabling on interface sw_if_index %d, setting tables to the following: ip4: %d ip6: %d\n", + sw_if_index, ip4_table_index, ip6_table_index); + if (rv) + { + acl_classify_add_del_table_big (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl, + &ip6_table_index, 0); + acl_classify_add_del_table_big (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_input_classify_next_acl, + &ip4_table_index, 0); + return rv; + } + + am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] = + ip4_table_index; + am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] = + ip6_table_index; + + vnet_l2_input_classify_enable_disable (sw_if_index, 1); + return rv; +} + +static int +acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 ip4_table_index = ~0; + u32 ip6_table_index = ~0; + int rv; + + /* in case there were previous tables attached */ + acl_unhook_l2_output_classify (am, sw_if_index); + rv = + acl_classify_add_del_table_big (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl, + &ip4_table_index, 1); + if (rv) + return rv; + rv = + acl_classify_add_del_table_big (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl, + &ip6_table_index, 1); + if (rv) + { + acl_classify_add_del_table_big (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl, + &ip4_table_index, 0); + return rv; + } + rv = + vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index, + ip6_table_index, ~0); + clib_warning + ("ACL enabling on interface sw_if_index %d, setting tables to the following: ip4: %d ip6: %d\n", + sw_if_index, ip4_table_index, ip6_table_index); + if (rv) + { + acl_classify_add_del_table_big (cm, ip6_5tuple_mask, + sizeof (ip6_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl, + &ip6_table_index, 0); + acl_classify_add_del_table_big (cm, ip4_5tuple_mask, + sizeof (ip4_5tuple_mask) - 1, ~0, + am->l2_output_classify_next_acl, + &ip4_table_index, 0); + return rv; + } + + am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] = + ip4_table_index; + am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] = + ip6_table_index; + + vnet_l2_output_classify_enable_disable (sw_if_index, 1); + return rv; +} + + +int +acl_interface_in_enable_disable (acl_main_t * am, u32 sw_if_index, + int enable_disable) +{ + int rv; + + /* Utterly wrong? */ + if (pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (enable_disable) + { + rv = acl_hook_l2_input_classify (am, sw_if_index); + } + else + { + rv = acl_unhook_l2_input_classify (am, sw_if_index); + } + + return rv; +} + +int +acl_interface_out_enable_disable (acl_main_t * am, u32 sw_if_index, + int enable_disable) +{ + int rv; + + /* Utterly wrong? */ + if (pool_is_free_index (am->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (enable_disable) + { + rv = acl_hook_l2_output_classify (am, sw_if_index); + } + else + { + rv = acl_unhook_l2_output_classify (am, sw_if_index); + } + + return rv; +} + + +static int +acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + if (is_input) + { + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + vec_add (am->input_acl_vec_by_sw_if_index[sw_if_index], &acl_list_index, + 1); + acl_interface_in_enable_disable (am, sw_if_index, 1); + } + else + { + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + vec_add (am->output_acl_vec_by_sw_if_index[sw_if_index], + &acl_list_index, 1); + acl_interface_out_enable_disable (am, sw_if_index, 1); + } + return 0; +} + +static int +acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + int i; + int rv = -1; + if (is_input) + { + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]); + i++) + { + if (acl_list_index == + am->input_acl_vec_by_sw_if_index[sw_if_index][i]) + { + vec_del1 (am->input_acl_vec_by_sw_if_index[sw_if_index], i); + rv = 0; + break; + } + } + if (0 == vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index])) + { + acl_interface_in_enable_disable (am, sw_if_index, 0); + } + } + else + { + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + for (i = 0; + i < vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]); i++) + { + if (acl_list_index == + am->output_acl_vec_by_sw_if_index[sw_if_index][i]) + { + vec_del1 (am->output_acl_vec_by_sw_if_index[sw_if_index], i); + rv = 0; + break; + } + } + if (0 == vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index])) + { + acl_interface_out_enable_disable (am, sw_if_index, 0); + } + } + return rv; +} + +static void +acl_interface_reset_inout_acls (u32 sw_if_index, u8 is_input) +{ + acl_main_t *am = &acl_main; + if (is_input) + { + acl_interface_in_enable_disable (am, sw_if_index, 0); + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + vec_reset_length (am->input_acl_vec_by_sw_if_index[sw_if_index]); + } + else + { + acl_interface_out_enable_disable (am, sw_if_index, 0); + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + vec_reset_length (am->output_acl_vec_by_sw_if_index[sw_if_index]); + } +} + +static int +acl_interface_add_del_inout_acl (u32 sw_if_index, u8 is_add, u8 is_input, + u32 acl_list_index) +{ + int rv = -1; + if (is_add) + { + rv = + acl_interface_add_inout_acl (sw_if_index, is_input, acl_list_index); + } + else + { + rv = + acl_interface_del_inout_acl (sw_if_index, is_input, acl_list_index); + } + return rv; +} + + +static void * +get_ptr_to_offset (vlib_buffer_t * b0, int offset) +{ + u8 *p = vlib_buffer_get_current (b0) + offset; + return p; +} + +static u8 +acl_get_l4_proto (vlib_buffer_t * b0, int node_is_ip6) +{ + u8 proto; + int proto_offset; + if (node_is_ip6) + { + proto_offset = 20; + } + else + { + proto_offset = 23; + } + proto = *((u8 *) vlib_buffer_get_current (b0) + proto_offset); + return proto; +} + +static int +acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2, int prefixlen, + int is_ip6) +{ + if (prefixlen == 0) + { + /* match any always succeeds */ + return 1; + } + if (is_ip6) + { + if (memcmp (addr1, addr2, prefixlen / 8)) + { + /* If the starting full bytes do not match, no point in bittwidling the thumbs further */ + return 0; + } + if (prefixlen % 8) + { + u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8); + u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8); + u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1)); + return (b1 & mask0) == b2; + } + else + { + /* The prefix fits into integer number of bytes, so nothing left to do */ + return 1; + } + } + else + { + uint32_t a1 = ntohl (addr1->ip4.as_u32); + uint32_t a2 = ntohl (addr2->ip4.as_u32); + uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1); + return (a1 & mask0) == a2; + } +} + +static int +acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6) +{ + return ((port >= port_first) && (port <= port_last)); +} + +static int +acl_packet_match (acl_main_t * am, u32 acl_index, vlib_buffer_t * b0, + u8 * r_action, int *r_is_ip6, u32 * r_acl_match_p, + u32 * r_rule_match_p, u32 * trace_bitmap) +{ + ethernet_header_t *h0; + u16 type0; + + ip46_address_t src, dst; + int is_ip6; + int is_ip4; + u8 proto; + u16 src_port; + u16 dst_port; + u8 tcp_flags = 0; + int i; + acl_list_t *a; + acl_rule_t *r; + + h0 = vlib_buffer_get_current (b0); + type0 = clib_net_to_host_u16 (h0->type); + is_ip4 = (type0 == ETHERNET_TYPE_IP4); + is_ip6 = (type0 == ETHERNET_TYPE_IP6); + + if (!(is_ip4 || is_ip6)) + { + return 0; + } + /* The bunch of hardcoded offsets here is intentional to get rid of them + ASAP, when getting to a faster matching code */ + if (is_ip4) + { + clib_memcpy (&src.ip4, get_ptr_to_offset (b0, 26), 4); + clib_memcpy (&dst.ip4, get_ptr_to_offset (b0, 30), 4); + proto = acl_get_l4_proto (b0, 0); + if (1 == proto) + { + *trace_bitmap |= 0x00000001; + /* type */ + src_port = *(u8 *) get_ptr_to_offset (b0, 34); + /* code */ + dst_port = *(u8 *) get_ptr_to_offset (b0, 35); + } + else + { + /* assume TCP/UDP */ + src_port = (*(u16 *) get_ptr_to_offset (b0, 34)); + dst_port = (*(u16 *) get_ptr_to_offset (b0, 36)); + /* UDP gets ability to check on an oddball data byte as a bonus */ + tcp_flags = *(u8 *) get_ptr_to_offset (b0, 14 + 20 + 13); + } + } + else /* is_ipv6 implicitly */ + { + clib_memcpy (&src, get_ptr_to_offset (b0, 22), 16); + clib_memcpy (&dst, get_ptr_to_offset (b0, 38), 16); + proto = acl_get_l4_proto (b0, 1); + if (58 == proto) + { + *trace_bitmap |= 0x00000002; + /* type */ + src_port = *(u8 *) get_ptr_to_offset (b0, 54); + /* code */ + dst_port = *(u8 *) get_ptr_to_offset (b0, 55); + } + else + { + /* assume TCP/UDP */ + src_port = (*(u16 *) get_ptr_to_offset (b0, 54)); + dst_port = (*(u16 *) get_ptr_to_offset (b0, 56)); + tcp_flags = *(u8 *) get_ptr_to_offset (b0, 14 + 40 + 13); + } + } + if (pool_is_free_index (am->acls, acl_index)) + { + if (r_acl_match_p) + *r_acl_match_p = acl_index; + if (r_rule_match_p) + *r_rule_match_p = -1; + /* the ACL does not exist but is used for policy. Block traffic. */ + return 0; + } + a = am->acls + acl_index; + for (i = 0; i < a->count; i++) + { + r = a->rules + i; + if (is_ip6 != r->is_ipv6) + { + continue; + } + if (!acl_match_addr (&dst, &r->dst, r->dst_prefixlen, is_ip6)) + continue; + if (!acl_match_addr (&src, &r->src, r->src_prefixlen, is_ip6)) + continue; + if (r->proto) + { + if (proto != r->proto) + continue; + if (!acl_match_port + (src_port, r->src_port_or_type_first, r->src_port_or_type_last, + is_ip6)) + continue; + if (!acl_match_port + (dst_port, r->dst_port_or_code_first, r->dst_port_or_code_last, + is_ip6)) + continue; + /* No need for check of proto == TCP, since in other rules both fields should be zero, so this match will succeed */ + if ((tcp_flags & r->tcp_flags_mask) != r->tcp_flags_value) + continue; + } + /* everything matches! */ + *r_action = r->is_permit; + *r_is_ip6 = is_ip6; + if (r_acl_match_p) + *r_acl_match_p = acl_index; + if (r_rule_match_p) + *r_rule_match_p = i; + return 1; + } + return 0; +} + +void +input_acl_packet_match (u32 sw_if_index, vlib_buffer_t * b0, u32 * nextp, + u32 * acl_match_p, u32 * rule_match_p, + u32 * trace_bitmap) +{ + acl_main_t *am = &acl_main; + uint8_t action = 0; + int is_ip6 = 0; + int i; + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]); + i++) + { + if (acl_packet_match + (am, am->input_acl_vec_by_sw_if_index[sw_if_index][i], b0, &action, + &is_ip6, acl_match_p, rule_match_p, trace_bitmap)) + { + if (is_ip6) + { + *nextp = am->acl_in_ip6_match_next[action]; + } + else + { + *nextp = am->acl_in_ip4_match_next[action]; + } + return; + } + } + if (vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]) > 0) + { + /* If there are ACLs and none matched, deny by default */ + *nextp = 0; + } + +} + +void +output_acl_packet_match (u32 sw_if_index, vlib_buffer_t * b0, u32 * nextp, + u32 * acl_match_p, u32 * rule_match_p, + u32 * trace_bitmap) +{ + acl_main_t *am = &acl_main; + uint8_t action = 0; + int is_ip6 = 0; + int i; + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]); + i++) + { + if (acl_packet_match + (am, am->output_acl_vec_by_sw_if_index[sw_if_index][i], b0, &action, + &is_ip6, acl_match_p, rule_match_p, trace_bitmap)) + { + if (is_ip6) + { + *nextp = am->acl_out_ip6_match_next[action]; + } + else + { + *nextp = am->acl_out_ip4_match_next[action]; + } + return; + } + } + if (vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]) > 0) + { + /* If there are ACLs and none matched, deny by default */ + *nextp = 0; + } +} + +typedef struct +{ + u8 is_ipv6; + u8 mac_mask[6]; + u8 prefix_len; + u32 count; + u32 table_index; +} macip_match_type_t; + +static u32 +macip_find_match_type (macip_match_type_t * mv, u8 * mac_mask, u8 prefix_len, + u8 is_ipv6) +{ + u32 i; + if (mv) + { + for (i = 0; i < vec_len (mv); i++) + { + if ((mv[i].prefix_len == prefix_len) && (mv[i].is_ipv6 == is_ipv6) + && (0 == memcmp (mv[i].mac_mask, mac_mask, 6))) + { + return i; + } + } + } + return ~0; +} + + +/* Get metric used to sort match types. + The more specific and the more often seen - the bigger the metric */ +static int +match_type_metric (macip_match_type_t * m) +{ + /* FIXME: count the ones in the MAC mask as well, check how well this heuristic works in real life */ + return m->prefix_len + m->is_ipv6 + 10 * m->count; +} + +static int +match_type_compare (macip_match_type_t * m1, macip_match_type_t * m2) +{ + /* Ascending sort based on the metric values */ + return match_type_metric (m1) - match_type_metric (m2); +} + +/* Get the offset of L3 source within ethernet packet */ +static int +get_l3_src_offset(int is6) +{ + if(is6) + return (sizeof(ethernet_header_t) + offsetof(ip6_header_t, src_address)); + else + return (sizeof(ethernet_header_t) + offsetof(ip4_header_t, src_address)); +} + +static int +macip_create_classify_tables (acl_main_t * am, u32 macip_acl_index) +{ + macip_match_type_t *mvec = NULL; + macip_match_type_t *mt; + macip_acl_list_t *a = &am->macip_acls[macip_acl_index]; + int i; + u32 match_type_index; + u32 last_table; + u8 mask[5 * 16]; + vnet_classify_main_t *cm = &vnet_classify_main; + + /* Count the number of different types of rules */ + for (i = 0; i < a->count; i++) + { + if (~0 == + (match_type_index = + macip_find_match_type (mvec, a->rules[i].src_mac_mask, + a->rules[i].src_prefixlen, + a->rules[i].is_ipv6))) + { + match_type_index = vec_len (mvec); + vec_validate (mvec, match_type_index); + memcpy (mvec[match_type_index].mac_mask, + a->rules[match_type_index].src_mac_mask, 6); + mvec[match_type_index].prefix_len = a->rules[i].src_prefixlen; + mvec[match_type_index].is_ipv6 = a->rules[i].is_ipv6; + mvec[match_type_index].table_index = ~0; + } + mvec[match_type_index].count++; + } + /* Put the most frequently used tables last in the list so we can create classifier tables in reverse order */ + vec_sort_with_function (mvec, match_type_compare); + /* Create the classifier tables */ + last_table = ~0; + vec_foreach (mt, mvec) + { + int mask_len; + int is6 = mt->is_ipv6; + int l3_src_offs = get_l3_src_offset(is6); + memset (mask, 0, sizeof (mask)); + memcpy (&mask[6], mt->mac_mask, 6); + for (i = 0; i < (mt->prefix_len / 8); i++) + { + mask[l3_src_offs + i] = 0xff; + } + if (mt->prefix_len % 8) + { + mask[l3_src_offs + (mt->prefix_len / 8)] = + 0xff - ((1 << (8 - mt->prefix_len % 8)) - 1); + } + /* + * Round-up the number of bytes needed to store the prefix, + * and round up the number of vectors too + */ + mask_len = ((l3_src_offs + ((mt->prefix_len+7) / 8) + + (sizeof (u32x4)-1))/sizeof(u32x4)) * sizeof (u32x4); + acl_classify_add_del_table_small (cm, mask, mask_len, last_table, + (~0 == last_table) ? 0 : ~0, &mt->table_index, + 1); + last_table = mt->table_index; + } + a->ip4_table_index = ~0; + a->ip6_table_index = ~0; + a->l2_table_index = last_table; + + /* Populate the classifier tables with rules from the MACIP ACL */ + for (i = 0; i < a->count; i++) + { + u32 action = 0; + u32 metadata = 0; + int is6 = a->rules[i].is_ipv6; + int l3_src_offs = get_l3_src_offset(is6); + memset (mask, 0, sizeof (mask)); + memcpy (&mask[6], a->rules[i].src_mac, 6); + if (is6) + { + memcpy (&mask[l3_src_offs], &a->rules[i].src_ip_addr.ip6, 16); + } + else + { + memcpy (&mask[l3_src_offs], &a->rules[i].src_ip_addr.ip4, 4); + } + match_type_index = + macip_find_match_type (mvec, a->rules[i].src_mac_mask, + a->rules[i].src_prefixlen, + a->rules[i].is_ipv6); + /* add session to table mvec[match_type_index].table_index; */ + vnet_classify_add_del_session (cm, mvec[match_type_index].table_index, + mask, a->rules[i].is_permit ? ~0 : 0, i, + 0, action, metadata, 1); + } + return 0; +} + +static void +macip_destroy_classify_tables (acl_main_t * am, u32 macip_acl_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + macip_acl_list_t *a = &am->macip_acls[macip_acl_index]; + + if (a->ip4_table_index != ~0) + { + acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->ip4_table_index, 0); + a->ip4_table_index = ~0; + } + if (a->ip6_table_index != ~0) + { + acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->ip6_table_index, 0); + a->ip6_table_index = ~0; + } + if (a->l2_table_index != ~0) + { + acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->l2_table_index, 0); + a->l2_table_index = ~0; + } +} + +static int +macip_acl_add_list (u32 count, vl_api_macip_acl_rule_t rules[], + u32 * acl_list_index, u8 * tag) +{ + acl_main_t *am = &acl_main; + macip_acl_list_t *a; + macip_acl_rule_t *r; + macip_acl_rule_t *acl_new_rules; + int i; + + /* Create and populate the rules */ + acl_new_rules = clib_mem_alloc_aligned (sizeof (macip_acl_rule_t) * count, + CLIB_CACHE_LINE_BYTES); + if (!acl_new_rules) + { + /* Could not allocate rules. New or existing ACL - bail out regardless */ + return -1; + } + + for (i = 0; i < count; i++) + { + r = &acl_new_rules[i]; + r->is_permit = rules[i].is_permit; + r->is_ipv6 = rules[i].is_ipv6; + memcpy (&r->src_mac, rules[i].src_mac, 6); + memcpy (&r->src_mac_mask, rules[i].src_mac_mask, 6); + if(rules[i].is_ipv6) + memcpy (&r->src_ip_addr.ip6, rules[i].src_ip_addr, 16); + else + memcpy (&r->src_ip_addr.ip4, rules[i].src_ip_addr, 4); + r->src_prefixlen = rules[i].src_ip_prefix_len; + } + + /* Get ACL index */ + pool_get_aligned (am->macip_acls, a, CLIB_CACHE_LINE_BYTES); + memset (a, 0, sizeof (*a)); + /* Will return the newly allocated ACL index */ + *acl_list_index = a - am->macip_acls; + + a->rules = acl_new_rules; + a->count = count; + memcpy (a->tag, tag, sizeof (a->tag)); + + /* Create and populate the classifer tables */ + macip_create_classify_tables (am, *acl_list_index); + + return 0; +} + + +/* No check for validity of sw_if_index - the callers were supposed to validate */ + +static int +macip_acl_interface_del_acl (acl_main_t * am, u32 sw_if_index) +{ + int rv; + u32 macip_acl_index; + macip_acl_list_t *a; + vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0); + macip_acl_index = am->macip_acl_by_sw_if_index[sw_if_index]; + /* No point in deleting MACIP ACL which is not applied */ + if (~0 == macip_acl_index) + return -1; + a = &am->macip_acls[macip_acl_index]; + /* remove the classifier tables off the interface L2 ACL */ + rv = + vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index, + a->ip6_table_index, a->l2_table_index, 0); + /* Unset the MACIP ACL index */ + am->macip_acl_by_sw_if_index[sw_if_index] = ~0; + return rv; +} + +/* No check for validity of sw_if_index - the callers were supposed to validate */ + +static int +macip_acl_interface_add_acl (acl_main_t * am, u32 sw_if_index, + u32 macip_acl_index) +{ + macip_acl_list_t *a; + int rv; + if (pool_is_free_index (am->macip_acls, macip_acl_index)) + { + return -1; + } + a = &am->macip_acls[macip_acl_index]; + vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0); + /* If there already a MACIP ACL applied, unapply it */ + if (~0 != am->macip_acl_by_sw_if_index[sw_if_index]) + macip_acl_interface_del_acl(am, sw_if_index); + am->macip_acl_by_sw_if_index[sw_if_index] = macip_acl_index; + /* Apply the classifier tables for L2 ACLs */ + rv = + vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index, + a->ip6_table_index, a->l2_table_index, 1); + return rv; +} + +static int +macip_acl_del_list (u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + macip_acl_list_t *a; + int i; + if (pool_is_free_index (am->macip_acls, acl_list_index)) + { + return -1; + } + + /* delete any references to the ACL */ + for (i = 0; i < vec_len (am->macip_acl_by_sw_if_index); i++) + { + if (am->macip_acl_by_sw_if_index[i] == acl_list_index) + { + macip_acl_interface_del_acl (am, i); + } + } + + /* Now that classifier tables are detached, clean them up */ + macip_destroy_classify_tables (am, acl_list_index); + + /* now we can delete the ACL itself */ + a = &am->macip_acls[acl_list_index]; + if (a->rules) + { + clib_mem_free (a->rules); + } + pool_put (am->macip_acls, a); + return 0; +} + + +static int +macip_acl_interface_add_del_acl (u32 sw_if_index, u8 is_add, + u32 acl_list_index) +{ + acl_main_t *am = &acl_main; + int rv = -1; + if (is_add) + { + rv = macip_acl_interface_add_acl (am, sw_if_index, acl_list_index); + } + else + { + rv = macip_acl_interface_del_acl (am, sw_if_index); + } + return rv; +} + +/* API message handler */ +static void +vl_api_acl_add_replace_t_handler (vl_api_acl_add_replace_t * mp) +{ + vl_api_acl_add_replace_reply_t *rmp; + acl_main_t *am = &acl_main; + int rv; + u32 acl_list_index = ntohl (mp->acl_index); + + rv = acl_add_list (ntohl (mp->count), mp->r, &acl_list_index, mp->tag); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_ACL_ADD_REPLACE_REPLY, + ({ + rmp->acl_index = htonl(acl_list_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_acl_del_t_handler (vl_api_acl_del_t * mp) +{ + acl_main_t *sm = &acl_main; + vl_api_acl_del_reply_t *rmp; + int rv; + + rv = acl_del_list (ntohl (mp->acl_index)); + + REPLY_MACRO (VL_API_ACL_DEL_REPLY); +} + +static void +vl_api_acl_interface_add_del_t_handler (vl_api_acl_interface_add_del_t * mp) +{ + acl_main_t *sm = &acl_main; + vnet_interface_main_t *im = &sm->vnet_main->interface_main; + u32 sw_if_index = ntohl (mp->sw_if_index); + vl_api_acl_interface_add_del_reply_t *rmp; + int rv = -1; + + if (pool_is_free_index(im->sw_interfaces, sw_if_index)) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + rv = + acl_interface_add_del_inout_acl (sw_if_index, mp->is_add, + mp->is_input, ntohl (mp->acl_index)); + + REPLY_MACRO (VL_API_ACL_INTERFACE_ADD_DEL_REPLY); +} + +static void +vl_api_acl_interface_set_acl_list_t_handler + (vl_api_acl_interface_set_acl_list_t * mp) +{ + acl_main_t *sm = &acl_main; + vl_api_acl_interface_set_acl_list_reply_t *rmp; + int rv = 0; + int i; + vnet_interface_main_t *im = &sm->vnet_main->interface_main; + u32 sw_if_index = ntohl (mp->sw_if_index); + + if (pool_is_free_index(im->sw_interfaces, sw_if_index)) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + { + acl_interface_reset_inout_acls (sw_if_index, 0); + acl_interface_reset_inout_acls (sw_if_index, 1); + + for (i = 0; i < mp->count; i++) + { + acl_interface_add_del_inout_acl (sw_if_index, 1, (i < mp->n_input), + ntohl (mp->acls[i])); + } + } + + REPLY_MACRO (VL_API_ACL_INTERFACE_SET_ACL_LIST_REPLY); +} + +static void +copy_acl_rule_to_api_rule (vl_api_acl_rule_t * api_rule, acl_rule_t * r) +{ + api_rule->is_permit = r->is_permit; + api_rule->is_ipv6 = r->is_ipv6; + if(r->is_ipv6) + { + memcpy (api_rule->src_ip_addr, &r->src, sizeof (r->src)); + memcpy (api_rule->dst_ip_addr, &r->dst, sizeof (r->dst)); + } + else + { + memcpy (api_rule->src_ip_addr, &r->src.ip4, sizeof (r->src.ip4)); + memcpy (api_rule->dst_ip_addr, &r->dst.ip4, sizeof (r->dst.ip4)); + } + api_rule->src_ip_prefix_len = r->src_prefixlen; + api_rule->dst_ip_prefix_len = r->dst_prefixlen; + api_rule->proto = r->proto; + api_rule->srcport_or_icmptype_first = r->src_port_or_type_first; + api_rule->srcport_or_icmptype_last = r->src_port_or_type_last; + api_rule->dstport_or_icmpcode_first = r->dst_port_or_code_first; + api_rule->dstport_or_icmpcode_last = r->dst_port_or_code_last; + api_rule->tcp_flags_mask = r->tcp_flags_mask; + api_rule->tcp_flags_value = r->tcp_flags_value; +} + +static void +send_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q, + acl_list_t * acl, u32 context) +{ + vl_api_acl_details_t *mp; + vl_api_acl_rule_t *rules; + int i; + int msg_size = sizeof (*mp) + sizeof (mp->r[0]) * acl->count; + + mp = vl_msg_api_alloc (msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_ACL_DETAILS + am->msg_id_base); + + /* fill in the message */ + mp->context = context; + mp->count = htonl (acl->count); + mp->acl_index = htonl (acl - am->acls); + memcpy (mp->tag, acl->tag, sizeof (mp->tag)); + // clib_memcpy (mp->r, acl->rules, acl->count * sizeof(acl->rules[0])); + rules = mp->r; + for (i = 0; i < acl->count; i++) + { + copy_acl_rule_to_api_rule (&rules[i], &acl->rules[i]); + } + + clib_warning("Sending acl details for ACL index %d", ntohl(mp->acl_index)); + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + + +static void +vl_api_acl_dump_t_handler (vl_api_acl_dump_t * mp) +{ + acl_main_t *am = &acl_main; + u32 acl_index; + acl_list_t *acl; + + int rv = -1; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + if (mp->acl_index == ~0) + { + /* *INDENT-OFF* */ + /* Just dump all ACLs */ + pool_foreach (acl, am->acls, + ({ + send_acl_details(am, q, acl, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + acl_index = ntohl (mp->acl_index); + if (!pool_is_free_index (am->acls, acl_index)) + { + acl = &am->acls[acl_index]; + send_acl_details (am, q, acl, mp->context); + } + } + + if (rv == -1) + { + /* FIXME API: should we signal an error here at all ? */ + return; + } +} + +static void +send_acl_interface_list_details (acl_main_t * am, + unix_shared_memory_queue_t * q, + u32 sw_if_index, u32 context) +{ + vl_api_acl_interface_list_details_t *mp; + int msg_size; + int n_input; + int n_output; + int count; + int i = 0; + + vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index); + vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index); + + n_input = vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]); + n_output = vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]); + count = n_input + n_output; + + msg_size = sizeof (*mp); + msg_size += sizeof (mp->acls[0]) * count; + + mp = vl_msg_api_alloc (msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = + ntohs (VL_API_ACL_INTERFACE_LIST_DETAILS + am->msg_id_base); + + /* fill in the message */ + mp->context = context; + mp->sw_if_index = htonl (sw_if_index); + mp->count = count; + mp->n_input = n_input; + for (i = 0; i < n_input; i++) + { + mp->acls[i] = htonl (am->input_acl_vec_by_sw_if_index[sw_if_index][i]); + } + for (i = 0; i < n_output; i++) + { + mp->acls[n_input + i] = + htonl (am->output_acl_vec_by_sw_if_index[sw_if_index][i]); + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_acl_interface_list_dump_t_handler (vl_api_acl_interface_list_dump_t * + mp) +{ + acl_main_t *am = &acl_main; + vnet_sw_interface_t *swif; + vnet_interface_main_t *im = &am->vnet_main->interface_main; + + u32 sw_if_index; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + if (mp->sw_if_index == ~0) + { + /* *INDENT-OFF* */ + pool_foreach (swif, im->sw_interfaces, + ({ + send_acl_interface_list_details(am, q, swif->sw_if_index, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + sw_if_index = ntohl (mp->sw_if_index); + if (!pool_is_free_index(im->sw_interfaces, sw_if_index)) + send_acl_interface_list_details (am, q, sw_if_index, mp->context); + } +} + +/* MACIP ACL API handlers */ + +static void +vl_api_macip_acl_add_t_handler (vl_api_macip_acl_add_t * mp) +{ + vl_api_macip_acl_add_reply_t *rmp; + acl_main_t *am = &acl_main; + int rv; + u32 acl_list_index = ~0; + + rv = + macip_acl_add_list (ntohl (mp->count), mp->r, &acl_list_index, mp->tag); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLY, + ({ + rmp->acl_index = htonl(acl_list_index); + })); + /* *INDENT-ON* */ +} + +static void +vl_api_macip_acl_del_t_handler (vl_api_macip_acl_del_t * mp) +{ + acl_main_t *sm = &acl_main; + vl_api_macip_acl_del_reply_t *rmp; + int rv; + + rv = macip_acl_del_list (ntohl (mp->acl_index)); + + REPLY_MACRO (VL_API_MACIP_ACL_DEL_REPLY); +} + +static void +vl_api_macip_acl_interface_add_del_t_handler + (vl_api_macip_acl_interface_add_del_t * mp) +{ + acl_main_t *sm = &acl_main; + vl_api_macip_acl_interface_add_del_reply_t *rmp; + int rv = -1; + vnet_interface_main_t *im = &sm->vnet_main->interface_main; + u32 sw_if_index = ntohl (mp->sw_if_index); + + if (pool_is_free_index(im->sw_interfaces, sw_if_index)) + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; + else + rv = + macip_acl_interface_add_del_acl (ntohl (mp->sw_if_index), mp->is_add, + ntohl (mp->acl_index)); + + REPLY_MACRO (VL_API_MACIP_ACL_INTERFACE_ADD_DEL_REPLY); +} + +static void +send_macip_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q, + macip_acl_list_t * acl, u32 context) +{ + vl_api_macip_acl_details_t *mp; + vl_api_macip_acl_rule_t *rules; + macip_acl_rule_t *r; + int i; + int msg_size = sizeof (*mp) + (acl ? sizeof (mp->r[0]) * acl->count : 0); + + mp = vl_msg_api_alloc (msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_DETAILS + am->msg_id_base); + + /* fill in the message */ + mp->context = context; + if (acl) + { + memcpy (mp->tag, acl->tag, sizeof (mp->tag)); + mp->count = htonl (acl->count); + mp->acl_index = htonl (acl - am->macip_acls); + rules = mp->r; + for (i = 0; i < acl->count; i++) + { + r = &acl->rules[i]; + rules[i].is_permit = r->is_permit; + rules[i].is_ipv6 = r->is_ipv6; + memcpy (rules[i].src_mac, &r->src_mac, sizeof (r->src_mac)); + memcpy (rules[i].src_mac_mask, &r->src_mac_mask, + sizeof (r->src_mac_mask)); + if (r->is_ipv6) + memcpy (rules[i].src_ip_addr, &r->src_ip_addr.ip6, + sizeof (r->src_ip_addr.ip6)); + else + memcpy (rules[i].src_ip_addr, &r->src_ip_addr.ip4, + sizeof (r->src_ip_addr.ip4)); + rules[i].src_ip_prefix_len = r->src_prefixlen; + } + } + else + { + /* No martini, no party - no ACL applied to this interface. */ + mp->acl_index = ~0; + mp->count = 0; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + + +static void +vl_api_macip_acl_dump_t_handler (vl_api_macip_acl_dump_t * mp) +{ + acl_main_t *am = &acl_main; + macip_acl_list_t *acl; + + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + if (mp->acl_index == ~0) + { + /* Just dump all ACLs for now, with sw_if_index = ~0 */ + pool_foreach (acl, am->macip_acls, ( + { + send_macip_acl_details (am, q, acl, + mp-> + context);} + )); + /* *INDENT-ON* */ + } + else + { + u32 acl_index = ntohl (mp->acl_index); + if (!pool_is_free_index (am->macip_acls, acl_index)) + { + acl = &am->macip_acls[acl_index]; + send_macip_acl_details (am, q, acl, mp->context); + } + } +} + +static void +vl_api_macip_acl_interface_get_t_handler (vl_api_macip_acl_interface_get_t * + mp) +{ + acl_main_t *am = &acl_main; + vl_api_macip_acl_interface_get_reply_t *rmp; + u32 count = vec_len (am->macip_acl_by_sw_if_index); + int msg_size = sizeof (*rmp) + sizeof (rmp->acls[0]) * count; + unix_shared_memory_queue_t *q; + int i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + rmp = vl_msg_api_alloc (msg_size); + memset (rmp, 0, msg_size); + rmp->_vl_msg_id = + ntohs (VL_API_MACIP_ACL_INTERFACE_GET_REPLY + am->msg_id_base); + rmp->context = mp->context; + rmp->count = htonl (count); + for (i = 0; i < count; i++) + { + rmp->acls[i] = htonl (am->macip_acl_by_sw_if_index[i]); + } + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + + + +/* Set up the API message handling tables */ +static clib_error_t * +acl_plugin_api_hookup (vlib_main_t * vm) +{ + acl_main_t *sm = &acl_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_acl_plugin_api_msg; +#undef _ + + return 0; +} + +#define vl_msg_name_crc_list +#include <acl/acl_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (acl_main_t * sm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base); + foreach_vl_msg_name_crc_acl; +#undef _ +} + +u32 +register_match_action_nexts (u32 next_in_ip4, u32 next_in_ip6, + u32 next_out_ip4, u32 next_out_ip6) +{ + acl_main_t *am = &acl_main; + u32 act = am->n_match_actions; + if (am->n_match_actions == 255) + { + return ~0; + } + am->n_match_actions++; + am->acl_in_ip4_match_next[act] = next_in_ip4; + am->acl_in_ip6_match_next[act] = next_in_ip6; + am->acl_out_ip4_match_next[act] = next_out_ip4; + am->acl_out_ip6_match_next[act] = next_out_ip6; + return act; +} + +void +acl_setup_nodes (void) +{ + vlib_main_t *vm = vlib_get_main (); + acl_main_t *am = &acl_main; + vlib_node_t *n; + + n = vlib_get_node_by_name (vm, (u8 *) "l2-input-classify"); + am->l2_input_classify_next_acl = + vlib_node_add_next_with_slot (vm, n->index, acl_in_node.index, ~0); + n = vlib_get_node_by_name (vm, (u8 *) "l2-output-classify"); + am->l2_output_classify_next_acl = + vlib_node_add_next_with_slot (vm, n->index, acl_out_node.index, ~0); + + feat_bitmap_init_next_nodes (vm, acl_in_node.index, L2INPUT_N_FEAT, + l2input_get_feat_names (), + am->acl_in_node_input_next_node_index); + + memset (&am->acl_in_ip4_match_next[0], 0, + sizeof (am->acl_in_ip4_match_next)); + memset (&am->acl_in_ip6_match_next[0], 0, + sizeof (am->acl_in_ip6_match_next)); + memset (&am->acl_out_ip4_match_next[0], 0, + sizeof (am->acl_out_ip4_match_next)); + memset (&am->acl_out_ip6_match_next[0], 0, + sizeof (am->acl_out_ip6_match_next)); + am->n_match_actions = 0; + + register_match_action_nexts (0, 0, 0, 0); /* drop */ + register_match_action_nexts (~0, ~0, ~0, ~0); /* permit */ + register_match_action_nexts (ACL_IN_L2S_INPUT_IP4_ADD, ACL_IN_L2S_INPUT_IP6_ADD, ACL_OUT_L2S_OUTPUT_IP4_ADD, ACL_OUT_L2S_OUTPUT_IP6_ADD); /* permit + create session */ +} + + + +static clib_error_t * +acl_init (vlib_main_t * vm) +{ + acl_main_t *am = &acl_main; + clib_error_t *error = 0; + memset (am, 0, sizeof (*am)); + am->vlib_main = vm; + am->vnet_main = vnet_get_main (); + + u8 *name = format (0, "acl_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + am->msg_id_base = vl_msg_api_get_msg_ids ((char *) name, + VL_MSG_FIRST_AVAILABLE); + + error = acl_plugin_api_hookup (vm); + acl_setup_nodes (); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (am, &api_main); + + vec_free (name); + + return error; +} + +VLIB_INIT_FUNCTION (acl_init); diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h new file mode 100644 index 00000000000..afc9b289cee --- /dev/null +++ b/src/plugins/acl/acl.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_acl_h +#define included_acl_h + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/l2/l2_output.h> + + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#define ACL_PLUGIN_VERSION_MAJOR 1 +#define ACL_PLUGIN_VERSION_MINOR 1 + +extern vlib_node_registration_t acl_in_node; +extern vlib_node_registration_t acl_out_node; + +void input_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap); +void output_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap); + +enum address_e { IP4, IP6 }; +typedef struct +{ + enum address_e type; + union { + ip6_address_t ip6; + ip4_address_t ip4; + } addr; +} address_t; + +/* + * ACL rules + */ +typedef struct +{ + u8 is_permit; + u8 is_ipv6; + ip46_address_t src; + u8 src_prefixlen; + ip46_address_t dst; + u8 dst_prefixlen; + u8 proto; + u16 src_port_or_type_first; + u16 src_port_or_type_last; + u16 dst_port_or_code_first; + u16 dst_port_or_code_last; + u8 tcp_flags_value; + u8 tcp_flags_mask; +} acl_rule_t; + +typedef struct +{ + u8 is_permit; + u8 is_ipv6; + u8 src_mac[6]; + u8 src_mac_mask[6]; + ip46_address_t src_ip_addr; + u8 src_prefixlen; +} macip_acl_rule_t; + +/* + * ACL + */ +typedef struct +{ + u8 tag[64]; + u32 count; + acl_rule_t *rules; +} acl_list_t; + +typedef struct +{ + u8 tag[64]; + u32 count; + macip_acl_rule_t *rules; + /* References to the classifier tables that will enforce the rules */ + u32 ip4_table_index; + u32 ip6_table_index; + u32 l2_table_index; +} macip_acl_list_t; + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + + acl_list_t *acls; /* Pool of ACLs */ + macip_acl_list_t *macip_acls; /* Pool of MAC-IP ACLs */ + + /* ACLs associated with interfaces */ + u32 **input_acl_vec_by_sw_if_index; + u32 **output_acl_vec_by_sw_if_index; + + /* + * Classify tables used to grab the packets for the ACL check, + * and serving as the 5-tuple session tables at the same time + */ + u32 *acl_ip4_input_classify_table_by_sw_if_index; + u32 *acl_ip6_input_classify_table_by_sw_if_index; + u32 *acl_ip4_output_classify_table_by_sw_if_index; + u32 *acl_ip6_output_classify_table_by_sw_if_index; + + /* MACIP (input) ACLs associated with the interfaces */ + u32 *macip_acl_by_sw_if_index; + + /* next indices for our nodes in the l2-classify tables */ + u32 l2_input_classify_next_acl; + u32 l2_output_classify_next_acl; + + /* next node indices for feature bitmap */ + u32 acl_in_node_input_next_node_index[32]; + /* the respective thing for the output feature */ + l2_output_next_nodes_st acl_out_output_next_nodes; + + /* ACL match actions (must be coherent across in/out ACLs to next indices (can differ) */ + + u32 acl_in_ip4_match_next[256]; + u32 acl_in_ip6_match_next[256]; + u32 acl_out_ip4_match_next[256]; + u32 acl_out_ip6_match_next[256]; + u32 n_match_actions; + + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + ethernet_main_t * ethernet_main; +} acl_main_t; + +extern acl_main_t acl_main; + + +#endif diff --git a/src/plugins/acl/acl_all_api_h.h b/src/plugins/acl/acl_all_api_h.h new file mode 100644 index 00000000000..96eca56d31c --- /dev/null +++ b/src/plugins/acl/acl_all_api_h.h @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <acl/acl.api.h> + +#ifdef vl_printfun + +#ifdef LP64 +#define _uword_fmt "%lld" +#define _uword_cast (long long) +#else +#define _uword_fmt "%ld" +#define _uword_cast long +#endif + +static inline void * +vl_api_acl_rule_t_print (vl_api_acl_rule_t * a, void *handle) +{ + vl_print (handle, "vl_api_acl_rule_t:\n"); + vl_print (handle, "is_permit: %u\n", (unsigned) a->is_permit); + vl_print (handle, "is_ipv6: %u\n", (unsigned) a->is_ipv6); + { + int _i; + for (_i = 0; _i < 16; _i++) + { + vl_print (handle, "src_ip_addr[%d]: %u\n", _i, a->src_ip_addr[_i]); + } + } + vl_print (handle, "src_ip_prefix_len: %u\n", + (unsigned) a->src_ip_prefix_len); + { + int _i; + for (_i = 0; _i < 16; _i++) + { + vl_print (handle, "dst_ip_addr[%d]: %u\n", _i, a->dst_ip_addr[_i]); + } + } + vl_print (handle, "dst_ip_prefix_len: %u\n", + (unsigned) a->dst_ip_prefix_len); + vl_print (handle, "proto: %u\n", (unsigned) a->proto); + vl_print (handle, "srcport_or_icmptype_first: %u\n", + (unsigned) a->srcport_or_icmptype_first); + vl_print (handle, "srcport_or_icmptype_last: %u\n", + (unsigned) a->srcport_or_icmptype_last); + vl_print (handle, "dstport_or_icmpcode_first: %u\n", + (unsigned) a->dstport_or_icmpcode_first); + vl_print (handle, "dstport_or_icmpcode_last: %u\n", + (unsigned) a->dstport_or_icmpcode_last); + vl_print (handle, "tcp_flags_mask: %u\n", (unsigned) a->tcp_flags_mask); + vl_print (handle, "tcp_flags_value: %u\n", (unsigned) a->tcp_flags_value); + return handle; +} + +static inline void * +vl_api_acl_add_replace_t_print (vl_api_acl_add_replace_t * a, void *handle) +{ + int i; + vl_print (handle, "vl_api_acl_add_replace_t:\n"); + vl_print (handle, "_vl_msg_id: %u\n", (unsigned) a->_vl_msg_id); + vl_print (handle, "client_index: %u\n", (unsigned) a->client_index); + vl_print (handle, "context: %u\n", (unsigned) a->context); + vl_print (handle, "acl_index: %u\n", (unsigned) a->acl_index); + vl_print (handle, "count: %u\n", (unsigned) a->count); + vl_print (handle, "r ----- \n"); + for (i = 0; i < a->count; i++) + { + vl_print (handle, " r[%d]:\n", i); + vl_api_acl_rule_t_print (&a->r[i], handle); + } + vl_print (handle, "r ----- END \n"); + return handle; +} + + +static inline void *vl_api_acl_details_t_print (vl_api_acl_details_t *a,void *handle) +{ + vl_print(handle, "vl_api_acl_details_t:\n"); + vl_print(handle, "_vl_msg_id: %u\n", (unsigned) a->_vl_msg_id); + vl_print(handle, "context: %u\n", (unsigned) a->context); + vl_print(handle, "acl_index: %u\n", (unsigned) a->acl_index); + { + int _i; + for (_i = 0; _i < 64; _i++) { + vl_print(handle, "tag[%d]: %u\n", _i, a->tag[_i]); + } + } + vl_print(handle, "count: %u\n", (unsigned) a->count); + vl_print(handle, "r ----- \n"); + // FIXME vl_api_acl_rule_t_print(&a->r, handle); + vl_print(handle, "r ----- END \n"); + return handle; +} + +static inline void * +vl_api_macip_acl_rule_t_print (vl_api_macip_acl_rule_t * a, void *handle) +{ + vl_print (handle, "vl_api_macip_acl_rule_t:\n"); + vl_print (handle, "is_permit: %u\n", (unsigned) a->is_permit); + vl_print (handle, "is_ipv6: %u\n", (unsigned) a->is_ipv6); + { + int _i; + for (_i = 0; _i < 6; _i++) + { + vl_print (handle, "src_mac[%d]: %u\n", _i, a->src_mac[_i]); + } + } + { + int _i; + for (_i = 0; _i < 6; _i++) + { + vl_print (handle, "src_mac_mask[%d]: %u\n", _i, a->src_mac_mask[_i]); + } + } + { + int _i; + for (_i = 0; _i < 16; _i++) + { + vl_print (handle, "src_ip_addr[%d]: %u\n", _i, a->src_ip_addr[_i]); + } + } + vl_print (handle, "src_ip_prefix_len: %u\n", + (unsigned) a->src_ip_prefix_len); + return handle; +} + +static inline void * +vl_api_macip_acl_add_t_print (vl_api_macip_acl_add_t * a, void *handle) +{ + int i; + vl_print (handle, "vl_api_macip_acl_add_t:\n"); + vl_print (handle, "_vl_msg_id: %u\n", (unsigned) a->_vl_msg_id); + vl_print (handle, "client_index: %u\n", (unsigned) a->client_index); + vl_print (handle, "context: %u\n", (unsigned) a->context); + vl_print (handle, "count: %u\n", (unsigned) a->count); + vl_print (handle, "r ----- \n"); + for (i = 0; i < a->count; i++) + { + vl_print (handle, " r[%d]:\n", i); + vl_api_macip_acl_rule_t_print (&a->r[i], handle); + } + vl_print (handle, "r ----- END \n"); + return handle; +} + +static inline void *vl_api_macip_acl_details_t_print (vl_api_macip_acl_details_t *a,void *handle) +{ + int i; + vl_print(handle, "vl_api_macip_acl_details_t:\n"); + vl_print(handle, "_vl_msg_id: %u\n", (unsigned) a->_vl_msg_id); + vl_print(handle, "context: %u\n", (unsigned) a->context); + vl_print(handle, "acl_index: %u\n", (unsigned) a->acl_index); + { + int _i; + for (_i = 0; _i < 64; _i++) { + vl_print(handle, "tag[%d]: %u\n", _i, a->tag[_i]); + } + } + vl_print(handle, "count: %u\n", (unsigned) a->count); + vl_print(handle, "r ----- \n"); + for (i = 0; i < a->count; i++) + { + vl_print (handle, " r[%d]:\n", i); + vl_api_macip_acl_rule_t_print (&a->r[i], handle); + } + vl_print(handle, "r ----- END \n"); + return handle; +} + +#endif /* vl_printfun */ + + +#ifdef vl_endianfun + +#undef clib_net_to_host_uword +#ifdef LP64 +#define clib_net_to_host_uword clib_net_to_host_u64 +#else +#define clib_net_to_host_uword clib_net_to_host_u32 +#endif + +/* + * Manual endian/print functions created by copypasting the automatically + * generated ones with small required adjustments. Appears the codegen + * can't make code to print the contents of custom-type array. + */ + +static inline void +vl_api_acl_rule_t_endian (vl_api_acl_rule_t * a) +{ + /* a->is_permit = a->is_permit (no-op) */ + /* a->is_ipv6 = a->is_ipv6 (no-op) */ + /* a->src_ip_addr[0..15] = a->src_ip_addr[0..15] (no-op) */ + /* a->src_ip_prefix_len = a->src_ip_prefix_len (no-op) */ + /* a->dst_ip_addr[0..15] = a->dst_ip_addr[0..15] (no-op) */ + /* a->dst_ip_prefix_len = a->dst_ip_prefix_len (no-op) */ + /* a->proto = a->proto (no-op) */ + a->srcport_or_icmptype_first = + clib_net_to_host_u16 (a->srcport_or_icmptype_first); + a->srcport_or_icmptype_last = + clib_net_to_host_u16 (a->srcport_or_icmptype_last); + a->dstport_or_icmpcode_first = + clib_net_to_host_u16 (a->dstport_or_icmpcode_first); + a->dstport_or_icmpcode_last = + clib_net_to_host_u16 (a->dstport_or_icmpcode_last); + /* a->tcp_flags_mask = a->tcp_flags_mask (no-op) */ + /* a->tcp_flags_value = a->tcp_flags_value (no-op) */ +} + +static inline void +vl_api_acl_add_replace_t_endian (vl_api_acl_add_replace_t * a) +{ + int i; + a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id); + a->client_index = clib_net_to_host_u32 (a->client_index); + a->context = clib_net_to_host_u32 (a->context); + a->acl_index = clib_net_to_host_u32 (a->acl_index); + a->count = clib_net_to_host_u32 (a->count); + for (i = 0; i < a->count; i++) + { + vl_api_acl_rule_t_endian (&a->r[i]); + } +} + +static inline void vl_api_acl_details_t_endian (vl_api_acl_details_t *a) +{ + int i; + a->_vl_msg_id = clib_net_to_host_u16(a->_vl_msg_id); + a->context = clib_net_to_host_u32(a->context); + a->acl_index = clib_net_to_host_u32(a->acl_index); + /* a->tag[0..63] = a->tag[0..63] (no-op) */ + a->count = clib_net_to_host_u32(a->count); + for (i = 0; i < a->count; i++) + { + vl_api_acl_rule_t_endian (&a->r[i]); + } +} + +static inline void vl_api_acl_interface_list_details_t_endian (vl_api_acl_interface_list_details_t *a) +{ + int i; + a->_vl_msg_id = clib_net_to_host_u16(a->_vl_msg_id); + a->context = clib_net_to_host_u32(a->context); + a->sw_if_index = clib_net_to_host_u32(a->sw_if_index); + /* a->count = a->count (no-op) */ + /* a->n_input = a->n_input (no-op) */ + for(i=0; i<a->count; i++) { + a->acls[i] = clib_net_to_host_u32(a->acls[i]); + } +} + +static inline void vl_api_acl_interface_set_acl_list_t_endian (vl_api_acl_interface_set_acl_list_t *a) +{ + int i; + a->_vl_msg_id = clib_net_to_host_u16(a->_vl_msg_id); + a->client_index = clib_net_to_host_u32(a->client_index); + a->context = clib_net_to_host_u32(a->context); + a->sw_if_index = clib_net_to_host_u32(a->sw_if_index); + /* a->count = a->count (no-op) */ + /* a->n_input = a->n_input (no-op) */ + for(i=0; i<a->count; i++) { + a->acls[i] = clib_net_to_host_u32(a->acls[i]); + } +} + +static inline void +vl_api_macip_acl_rule_t_endian (vl_api_macip_acl_rule_t * a) +{ + /* a->is_permit = a->is_permit (no-op) */ + /* a->is_ipv6 = a->is_ipv6 (no-op) */ + /* a->src_mac[0..5] = a->src_mac[0..5] (no-op) */ + /* a->src_mac_mask[0..5] = a->src_mac_mask[0..5] (no-op) */ + /* a->src_ip_addr[0..15] = a->src_ip_addr[0..15] (no-op) */ + /* a->src_ip_prefix_len = a->src_ip_prefix_len (no-op) */ +} + +static inline void +vl_api_macip_acl_add_t_endian (vl_api_macip_acl_add_t * a) +{ + int i; + a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id); + a->client_index = clib_net_to_host_u32 (a->client_index); + a->context = clib_net_to_host_u32 (a->context); + a->count = clib_net_to_host_u32 (a->count); + for (i = 0; i < a->count; i++) + { + vl_api_macip_acl_rule_t_endian (&a->r[i]); + } +} + +static inline void vl_api_macip_acl_details_t_endian (vl_api_macip_acl_details_t *a) +{ + int i; + a->_vl_msg_id = clib_net_to_host_u16(a->_vl_msg_id); + a->context = clib_net_to_host_u32(a->context); + a->acl_index = clib_net_to_host_u32(a->acl_index); + /* a->tag[0..63] = a->tag[0..63] (no-op) */ + a->count = clib_net_to_host_u32(a->count); + for (i = 0; i < a->count; i++) + { + vl_api_macip_acl_rule_t_endian (&a->r[i]); + } +} + + + + +#endif /* vl_printfun */ + + diff --git a/src/plugins/acl/acl_msg_enum.h b/src/plugins/acl/acl_msg_enum.h new file mode 100644 index 00000000000..14d8b48c207 --- /dev/null +++ b/src/plugins/acl/acl_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_acl_msg_enum_h +#define included_acl_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <acl/acl_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c new file mode 100644 index 00000000000..a0e413e16da --- /dev/null +++ b/src/plugins/acl/acl_test.c @@ -0,0 +1,1024 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * acl_test.c - test harness plugin + *------------------------------------------------------------------ + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> +#include <arpa/inet.h> + +uword unformat_sw_if_index (unformat_input_t * input, va_list * args); + +/* Declare message IDs */ +#include <acl/acl_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <acl/acl_all_api_h.h> +#undef vl_typedefs + +/* define message structures */ +#define vl_endianfun +#include <acl/acl_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <acl/acl_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <acl/acl_all_api_h.h> +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} acl_test_main_t; + +acl_test_main_t acl_test_main; + +#define foreach_standard_reply_retval_handler \ +_(acl_del_reply) \ +_(acl_interface_add_del_reply) \ +_(macip_acl_interface_add_del_reply) \ +_(acl_interface_set_acl_list_reply) \ +_(macip_acl_del_reply) + +#define foreach_reply_retval_aclindex_handler \ +_(acl_add_replace_reply) \ +_(macip_acl_add_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = acl_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = acl_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + clib_warning("ACL index: %d", ntohl(mp->acl_index)); \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_reply_retval_aclindex_handler; +#undef _ + +/* These two ought to be in a library somewhere but they aren't */ +static uword +my_unformat_mac_address (unformat_input_t * input, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return unformat (input, "%x:%x:%x:%x:%x:%x", &a[0], &a[1], &a[2], &a[3], + &a[4], &a[5]); +} + +static u8 * +my_format_mac_address (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%02x:%02x:%02x:%02x:%02x:%02x", + a[0], a[1], a[2], a[3], a[4], a[5]); +} + + + +static void vl_api_acl_plugin_get_version_reply_t_handler + (vl_api_acl_plugin_get_version_reply_t * mp) + { + vat_main_t * vam = acl_test_main.vat_main; + clib_warning("ACL plugin version: %d.%d", ntohl(mp->major), ntohl(mp->minor)); + vam->result_ready = 1; + } + +static void vl_api_acl_interface_list_details_t_handler + (vl_api_acl_interface_list_details_t * mp) + { + int i; + vat_main_t * vam = acl_test_main.vat_main; + u8 *out = 0; + vl_api_acl_interface_list_details_t_endian(mp); + out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input); + out = format(out, " input "); + for(i=0; i<mp->count; i++) { + out = format(out, "%d ", mp->acls[i]); + if (i == mp->n_input-1) + out = format(out, "\n output "); + } + out = format(out, "\n"); + clib_warning("%s", out); + vec_free(out); + vam->result_ready = 1; + } + + +static inline u8 * +vl_api_acl_rule_t_pretty_format (u8 *out, vl_api_acl_rule_t * a) +{ + int af = a->is_ipv6 ? AF_INET6 : AF_INET; + u8 src[INET6_ADDRSTRLEN]; + u8 dst[INET6_ADDRSTRLEN]; + inet_ntop(af, a->src_ip_addr, (void *)src, sizeof(src)); + inet_ntop(af, a->dst_ip_addr, (void *)dst, sizeof(dst)); + + out = format(out, "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport %d-%d tcpflags %d %d", + a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit, + src, a->src_ip_prefix_len, + dst, a->dst_ip_prefix_len, + a->proto, + a->srcport_or_icmptype_first, a->srcport_or_icmptype_last, + a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last, + a->tcp_flags_mask, a->tcp_flags_value); + return(out); +} + + + +static void vl_api_acl_details_t_handler + (vl_api_acl_details_t * mp) + { + int i; + vat_main_t * vam = acl_test_main.vat_main; + vl_api_acl_details_t_endian(mp); + u8 *out = 0; + out = format(0, "acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag); + for(i=0; i<mp->count; i++) { + out = format(out, " "); + out = vl_api_acl_rule_t_pretty_format(out, &mp->r[i]); + out = format(out, "%s\n", i<mp->count-1 ? "," : ""); + } + clib_warning("%s", out); + vec_free(out); + vam->result_ready = 1; + } + +static inline u8 * +vl_api_macip_acl_rule_t_pretty_format (u8 *out, vl_api_macip_acl_rule_t * a) +{ + int af = a->is_ipv6 ? AF_INET6 : AF_INET; + u8 src[INET6_ADDRSTRLEN]; + inet_ntop(af, a->src_ip_addr, (void *)src, sizeof(src)); + + out = format(out, "%s action %d ip %s/%d mac %U mask %U", + a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit, + src, a->src_ip_prefix_len, + my_format_mac_address, a->src_mac, + my_format_mac_address, a->src_mac_mask); + return(out); +} + + +static void vl_api_macip_acl_details_t_handler + (vl_api_macip_acl_details_t * mp) + { + int i; + vat_main_t * vam = acl_test_main.vat_main; + vl_api_macip_acl_details_t_endian(mp); + u8 *out = format(0,"MACIP acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag); + for(i=0; i<mp->count; i++) { + out = format(out, " "); + out = vl_api_macip_acl_rule_t_pretty_format(out, &mp->r[i]); + out = format(out, "%s\n", i<mp->count-1 ? "," : ""); + } + clib_warning("%s", out); + vec_free(out); + vam->result_ready = 1; + } + +static void vl_api_macip_acl_interface_get_reply_t_handler + (vl_api_macip_acl_interface_get_reply_t * mp) + { + int i; + vat_main_t * vam = acl_test_main.vat_main; + u8 *out = format(0, "sw_if_index with MACIP ACL count: %d\n", ntohl(mp->count)); + for(i=0; i<ntohl(mp->count); i++) { + out = format(out, " macip_acl_interface_add_del sw_if_index %d add acl %d\n", i, ntohl(mp->acls[i])); + } + out = format(out, "\n"); + clib_warning("%s", out); + vec_free(out); + vam->result_ready = 1; + } + + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(ACL_ADD_REPLACE_REPLY, acl_add_replace_reply) \ +_(ACL_DEL_REPLY, acl_del_reply) \ +_(ACL_INTERFACE_ADD_DEL_REPLY, acl_interface_add_del_reply) \ +_(ACL_INTERFACE_SET_ACL_LIST_REPLY, acl_interface_set_acl_list_reply) \ +_(ACL_INTERFACE_LIST_DETAILS, acl_interface_list_details) \ +_(ACL_DETAILS, acl_details) \ +_(MACIP_ACL_ADD_REPLY, macip_acl_add_reply) \ +_(MACIP_ACL_DEL_REPLY, macip_acl_del_reply) \ +_(MACIP_ACL_DETAILS, macip_acl_details) \ +_(MACIP_ACL_INTERFACE_ADD_DEL_REPLY, macip_acl_interface_add_del_reply) \ +_(MACIP_ACL_INTERFACE_GET_REPLY, macip_acl_interface_get_reply) \ +_(ACL_PLUGIN_GET_VERSION_REPLY, acl_plugin_get_version_reply) + +/* M: construct, but don't yet send a message */ + +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int api_acl_plugin_get_version (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + vl_api_acl_plugin_get_version_t * mp; + u32 msg_size = sizeof(*mp); + f64 timeout; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_ACL_PLUGIN_GET_VERSION + sm->msg_id_base); + mp->client_index = vam->my_client_index; + + /* send it... */ + S; + + /* Wait for a reply... */ + W; + + return 0; +} + +static int api_macip_acl_interface_get (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + vl_api_acl_plugin_get_version_t * mp; + u32 msg_size = sizeof(*mp); + f64 timeout; + + vam->result_ready = 0; + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_INTERFACE_GET + sm->msg_id_base); + mp->client_index = vam->my_client_index; + + /* send it... */ + S; + + /* Wait for a reply... */ + W; + + return 0; +} + +#define vec_validate_acl_rules(v, idx) \ + do { \ + if (vec_len(v) < idx+1) { \ + vec_validate(v, idx); \ + v[idx].is_permit = 0x1; \ + v[idx].srcport_or_icmptype_last = 0xffff; \ + v[idx].dstport_or_icmpcode_last = 0xffff; \ + } \ + } while (0) + + +static int api_acl_add_replace (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_acl_add_replace_t * mp; + u32 acl_index = ~0; + u32 msg_size = sizeof (*mp); /* without the rules */ + + vl_api_acl_rule_t *rules = 0; + int rule_idx = 0; + int n_rules = 0; + u32 proto = 0; + u32 port1 = 0; + u32 port2 = 0; + u32 action = 0; + u32 tcpflags, tcpmask; + u32 src_prefix_length = 0, dst_prefix_length = 0; + ip4_address_t src_v4address, dst_v4address; + ip6_address_t src_v6address, dst_v6address; + u8 *tag = 0; + + if (!unformat (i, "%d", &acl_index)) { + /* Just assume -1 */ + } + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "ipv6")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "ipv4")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "permit+reflect")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 2; + } + else if (unformat (i, "permit")) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 1; + } + else if (unformat (i, "action %d", &action)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = action; + } + else if (unformat (i, "src %U/%d", + unformat_ip4_address, &src_v4address, &src_prefix_length)) + { + vec_validate_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "src %U/%d", + unformat_ip6_address, &src_v6address, &src_prefix_length)) + { + vec_validate_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "dst %U/%d", + unformat_ip4_address, &dst_v4address, &dst_prefix_length)) + { + vec_validate_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].dst_ip_addr, &dst_v4address, 4); + rules[rule_idx].dst_ip_prefix_len = dst_prefix_length; + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "dst %U/%d", + unformat_ip6_address, &dst_v6address, &dst_prefix_length)) + { + vec_validate_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].dst_ip_addr, &dst_v6address, 16); + rules[rule_idx].dst_ip_prefix_len = dst_prefix_length; + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "sport %d-%d", &port1, &port2)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].srcport_or_icmptype_first = htons(port1); + rules[rule_idx].srcport_or_icmptype_last = htons(port2); + } + else if (unformat (i, "sport %d", &port1)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].srcport_or_icmptype_first = htons(port1); + rules[rule_idx].srcport_or_icmptype_last = htons(port1); + } + else if (unformat (i, "dport %d-%d", &port1, &port2)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].dstport_or_icmpcode_first = htons(port1); + rules[rule_idx].dstport_or_icmpcode_last = htons(port2); + } + else if (unformat (i, "dport %d", &port1)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].dstport_or_icmpcode_first = htons(port1); + rules[rule_idx].dstport_or_icmpcode_last = htons(port1); + } + else if (unformat (i, "tcpflags %d %d", &tcpflags, &tcpmask)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].tcp_flags_value = tcpflags; + rules[rule_idx].tcp_flags_mask = tcpmask; + } + else if (unformat (i, "proto %d", &proto)) + { + vec_validate_acl_rules(rules, rule_idx); + rules[rule_idx].proto = proto; + } + else if (unformat (i, "tag %s", &tag)) + { + } + else if (unformat (i, ",")) + { + rule_idx++; + vec_validate_acl_rules(rules, rule_idx); + } + else + break; + } + + /* Construct the API message */ + vam->result_ready = 0; + + if(rules) + n_rules = vec_len(rules); + else + n_rules = 0; + + msg_size += n_rules*sizeof(rules[0]); + + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_ACL_ADD_REPLACE + sm->msg_id_base); + mp->client_index = vam->my_client_index; + if (n_rules > 0) + clib_memcpy(mp->r, rules, n_rules*sizeof (vl_api_acl_rule_t)); + if (tag) + { + if (vec_len(tag) >= sizeof(mp->tag)) + { + tag[sizeof(mp->tag)-1] = 0; + _vec_len(tag) = sizeof(mp->tag); + } + clib_memcpy(mp->tag, tag, vec_len(tag)); + vec_free(tag); + } + mp->acl_index = ntohl(acl_index); + mp->count = htonl(n_rules); + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +static int api_acl_del (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_acl_del_t * mp; + u32 acl_index = ~0; + + if (!unformat (i, "%d", &acl_index)) { + errmsg ("missing acl index\n"); + return -99; + } + + /* Construct the API message */ + M(ACL_DEL, acl_del); + mp->acl_index = ntohl(acl_index); + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +static int api_macip_acl_del (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_acl_del_t * mp; + u32 acl_index = ~0; + + if (!unformat (i, "%d", &acl_index)) { + errmsg ("missing acl index\n"); + return -99; + } + + /* Construct the API message */ + M(MACIP_ACL_DEL, acl_del); + mp->acl_index = ntohl(acl_index); + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +static int api_acl_interface_add_del (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_acl_interface_add_del_t * mp; + u32 sw_if_index = ~0; + u32 acl_index = ~0; + u8 is_input = 0; + u8 is_add = 0; + +// acl_interface_add_del <intfc> | sw_if_index <if-idx> acl_index <acl-idx> [out] [del] + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%d", &acl_index)) + ; + else + break; + } + + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "add")) + is_add = 1; + else if (unformat (i, "del")) + is_add = 0; + else if (unformat (i, "acl %d", &acl_index)) + ; + else if (unformat (i, "input")) + is_input = 1; + else if (unformat (i, "output")) + is_input = 0; + else + break; + } + + if (sw_if_index == ~0) { + errmsg ("missing interface name / explicit sw_if_index number \n"); + return -99; + } + + if (acl_index == ~0) { + errmsg ("missing ACL index\n"); + return -99; + } + + + + /* Construct the API message */ + M(ACL_INTERFACE_ADD_DEL, acl_interface_add_del); + mp->acl_index = ntohl(acl_index); + mp->sw_if_index = ntohl(sw_if_index); + mp->is_add = is_add; + mp->is_input = is_input; + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +static int api_macip_acl_interface_add_del (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_macip_acl_interface_add_del_t * mp; + u32 sw_if_index = ~0; + u32 acl_index = ~0; + u8 is_add = 0; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "add")) + is_add = 1; + else if (unformat (i, "del")) + is_add = 0; + else if (unformat (i, "acl %d", &acl_index)) + ; + else + break; + } + + if (sw_if_index == ~0) { + errmsg ("missing interface name / explicit sw_if_index number \n"); + return -99; + } + + if (acl_index == ~0) { + errmsg ("missing ACL index\n"); + return -99; + } + + + + /* Construct the API message */ + M(MACIP_ACL_INTERFACE_ADD_DEL, macip_acl_interface_add_del); + mp->acl_index = ntohl(acl_index); + mp->sw_if_index = ntohl(sw_if_index); + mp->is_add = is_add; + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +static int api_acl_interface_set_acl_list (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_acl_interface_set_acl_list_t * mp; + u32 sw_if_index = ~0; + u32 acl_index = ~0; + u32 *inacls = 0; + u32 *outacls = 0; + u8 is_input = 0; + +// acl_interface_set_acl_list <intfc> | sw_if_index <if-idx> input [acl-idx list] output [acl-idx list] + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else if (unformat (i, "%d", &acl_index)) + { + if(is_input) + vec_add1(inacls, htonl(acl_index)); + else + vec_add1(outacls, htonl(acl_index)); + } + else if (unformat (i, "acl %d", &acl_index)) + ; + else if (unformat (i, "input")) + is_input = 1; + else if (unformat (i, "output")) + is_input = 0; + else + break; + } + + if (sw_if_index == ~0) { + errmsg ("missing interface name / explicit sw_if_index number \n"); + return -99; + } + + /* Construct the API message */ + M2(ACL_INTERFACE_SET_ACL_LIST, acl_interface_set_acl_list, sizeof(u32) * (vec_len(inacls) + vec_len(outacls))); + mp->sw_if_index = ntohl(sw_if_index); + mp->n_input = vec_len(inacls); + mp->count = vec_len(inacls) + vec_len(outacls); + vec_append(inacls, outacls); + if (vec_len(inacls) > 0) + clib_memcpy(mp->acls, inacls, vec_len(inacls)*sizeof(u32)); + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + + +static int api_acl_interface_list_dump (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + u32 sw_if_index = ~0; + vl_api_acl_interface_list_dump_t * mp; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + ; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + ; + else + break; + } + + /* Construct the API message */ + M(ACL_INTERFACE_LIST_DUMP, acl_interface_list_dump); + mp->sw_if_index = ntohl (sw_if_index); + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +static int api_acl_dump (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + u32 acl_index = ~0; + vl_api_acl_dump_t * mp; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%d", &acl_index)) + ; + else + break; + } + + /* Construct the API message */ + M(ACL_DUMP, acl_dump); + mp->acl_index = ntohl (acl_index); + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +static int api_macip_acl_dump (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + u32 acl_index = ~0; + vl_api_acl_dump_t * mp; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) { + if (unformat (i, "%d", &acl_index)) + ; + else + break; + } + + /* Construct the API message */ + M(MACIP_ACL_DUMP, macip_acl_dump); + mp->acl_index = ntohl (acl_index); + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +#define vec_validate_macip_acl_rules(v, idx) \ + do { \ + if (vec_len(v) < idx+1) { \ + vec_validate(v, idx); \ + v[idx].is_permit = 0x1; \ + } \ + } while (0) + + +static int api_macip_acl_add (vat_main_t * vam) +{ + acl_test_main_t * sm = &acl_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_macip_acl_add_t * mp; + u32 msg_size = sizeof (*mp); /* without the rules */ + + vl_api_macip_acl_rule_t *rules = 0; + int rule_idx = 0; + int n_rules = 0; + u32 src_prefix_length = 0; + u32 action = 0; + ip4_address_t src_v4address; + ip6_address_t src_v6address; + u8 src_mac[6]; + u8 *tag = 0; + u8 mac_mask_all_1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "ipv6")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "ipv4")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "permit")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 1; + } + else if (unformat (i, "deny")) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = 0; + } + else if (unformat (i, "action %d", &action)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + rules[rule_idx].is_permit = action; + } + else if (unformat (i, "ip %U/%d", + unformat_ip4_address, &src_v4address, &src_prefix_length)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 0; + } + else if (unformat (i, "ip %U/%d", + unformat_ip6_address, &src_v6address, &src_prefix_length)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16); + rules[rule_idx].src_ip_prefix_len = src_prefix_length; + rules[rule_idx].is_ipv6 = 1; + } + else if (unformat (i, "mac %U", + my_unformat_mac_address, &src_mac)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_mac, &src_mac, 6); + memcpy (rules[rule_idx].src_mac_mask, &mac_mask_all_1, 6); + } + else if (unformat (i, "mask %U", + my_unformat_mac_address, &src_mac)) + { + vec_validate_macip_acl_rules(rules, rule_idx); + memcpy (rules[rule_idx].src_mac_mask, &src_mac, 6); + } + else if (unformat (i, "tag %s", &tag)) + { + } + else if (unformat (i, ",")) + { + rule_idx++; + vec_validate_macip_acl_rules(rules, rule_idx); + } + else + break; + } + + /* Construct the API message */ + vam->result_ready = 0; + + if(rules) + n_rules = vec_len(rules); + else + n_rules = 0; + + msg_size += n_rules*sizeof(rules[0]); + + mp = vl_msg_api_alloc_as_if_client(msg_size); + memset (mp, 0, msg_size); + mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_ADD + sm->msg_id_base); + mp->client_index = vam->my_client_index; + if (n_rules > 0) + clib_memcpy(mp->r, rules, n_rules*sizeof (mp->r[0])); + if (tag) + { + if (vec_len(tag) >= sizeof(mp->tag)) + { + tag[sizeof(mp->tag)-1] = 0; + _vec_len(tag) = sizeof(mp->tag); + } + clib_memcpy(mp->tag, tag, vec_len(tag)); + vec_free(tag); + } + + mp->count = htonl(n_rules); + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(acl_plugin_get_version, "") \ +_(acl_add_replace, "<acl-idx> [<ipv4|ipv6> <permit|permit+reflect|deny|action N> [src IP/plen] [dst IP/plen] [sport X-Y] [dport X-Y] [proto P] [tcpflags FL MASK], ... , ...") \ +_(acl_del, "<acl-idx>") \ +_(acl_dump, "[<acl-idx>]") \ +_(acl_interface_add_del, "<intfc> | sw_if_index <if-idx> [add|del] [input|output] acl <acl-idx>") \ +_(acl_interface_set_acl_list, "<intfc> | sw_if_index <if-idx> input [acl-idx list] output [acl-idx list]") \ +_(acl_interface_list_dump, "[<intfc> | sw_if_index <if-idx>]") \ +_(macip_acl_add, "...") \ +_(macip_acl_del, "<acl-idx>")\ +_(macip_acl_dump, "[<acl-idx>]") \ +_(macip_acl_interface_add_del, "<intfc> | sw_if_index <if-idx> [add|del] acl <acl-idx>") \ +_(macip_acl_interface_get, "") + + + +void vat_api_hookup (vat_main_t *vam) +{ + acl_test_main_t * sm = &acl_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + acl_test_main_t * sm = &acl_test_main; + u8 * name; + + sm->vat_main = vam; + + name = format (0, "acl_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~0) + vat_api_hookup (vam); + + vec_free(name); + + return 0; +} diff --git a/src/plugins/acl/l2sess.c b/src/plugins/acl/l2sess.c new file mode 100644 index 00000000000..cc9bde4417d --- /dev/null +++ b/src/plugins/acl/l2sess.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * l2sess.c - simple MAC-swap API / debug CLI handling + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <acl/l2sess.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/timing_wheel.h> + +#include <vnet/l2/l2_output.h> +#include <vnet/l2/l2_input.h> + +void +l2sess_vlib_plugin_register (vlib_main_t * vm, void* hh, + int from_early_init) +{ + l2sess_main_t *sm = &l2sess_main; + vnet_plugin_handoff_t * h = hh; + memset (sm, 0, sizeof (*sm)); + + sm->vlib_main = vm; + sm->vnet_main = h->vnet_main; + sm->ethernet_main = h->ethernet_main; +} + +void +l2sess_init_next_features_input (vlib_main_t * vm, l2sess_main_t * sm) +{ +#define _(node_name, node_var, is_out, is_ip6, is_track) \ + if (!is_out) feat_bitmap_init_next_nodes(vm, node_var.index, L2INPUT_N_FEAT, l2input_get_feat_names (), sm->node_var ## _input_next_node_index); + foreach_l2sess_node +#undef _ +} + +void +l2sess_add_our_next_nodes (vlib_main_t * vm, l2sess_main_t * sm, + u8 * prev_node_name, int add_output_nodes) +{ + vlib_node_t *n; + n = vlib_get_node_by_name (vm, prev_node_name); +#define _(node_name, node_var, is_out, is_ip6, is_track) \ + if (is_out == add_output_nodes) { \ + u32 idx = vlib_node_add_next_with_slot(vm, n->index, node_var.index, ~0); \ + if (is_track) { \ + sm->next_slot_track_node_by_is_ip6_is_out[is_ip6][is_out] = idx; \ + } \ + } + foreach_l2sess_node +#undef _ +} + +void +l2sess_setup_nodes (void) +{ + vlib_main_t *vm = vlib_get_main (); + l2sess_main_t *sm = &l2sess_main; + + l2sess_init_next_features_input (vm, sm); + + l2sess_add_our_next_nodes (vm, sm, (u8 *) "l2-input-classify", 0); + l2sess_add_our_next_nodes (vm, sm, (u8 *) "l2-output-classify", 1); + +} + +static char * +get_l4_proto_str (int is_ip6, uint8_t l4_proto) +{ + switch (l4_proto) + { + case 6: + return "tcp"; + case 17: + return "udp"; + case 1: + return "icmp"; + case 58: + return "icmp6"; + default: + return "<?l4-unknown?>"; + } +} + +static clib_error_t * +l2sess_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + l2sess_main_t *sm = &l2sess_main; + clib_time_t *ct = &vm->clib_time; + l2s_session_t *s; + u64 now = clib_cpu_time_now (); + + vlib_cli_output (vm, "Timing wheel info: \n%U", format_timing_wheel, + &sm->timing_wheel, 255); + + pool_foreach (s, sm->sessions, ( + { + f64 ctime = + (now - + s->create_time) * ct->seconds_per_clock; + f64 atime0 = + (now - + s->side[0].active_time) * + ct->seconds_per_clock; + f64 atime1 = + (now - + s->side[1].active_time) * + ct->seconds_per_clock; +/* + f64 ctime = (s->create_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock; + f64 atime0 = (s->side[0].active_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock; + f64 atime1 = (s->side[1].active_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock; +*/ + u8 * out0 = + format (0, + "%5d: create time: %U pkts/bytes/active time: [ %ld %ld %U : %ld %ld %U ]\n", + (s - sm->sessions), + format_time_interval, "h:m:s:u", + ctime, s->side[0].n_packets, + s->side[0].n_bytes, + format_time_interval, "h:m:s:u", + atime0, s->side[1].n_packets, + s->side[1].n_bytes, + format_time_interval, "h:m:s:u", + atime1); u8 * out1 = 0; + if (s->is_ip6) + { + out1 = + format (0, "%s %U :%u <-> %U :%u", + get_l4_proto_str (s->is_ip6, + s->l4_proto), + format_ip6_address, + &s->side[0].addr.ip6, + s->side[0].port, + format_ip6_address, + &s->side[1].addr.ip6, + s->side[1].port);} + else + { + out1 = + format (0, "%s %U :%u <-> %U :%u", + get_l4_proto_str (s->is_ip6, + s->l4_proto), + format_ip4_address, + &s->side[0].addr.ip4, + s->side[0].port, + format_ip4_address, + &s->side[1].addr.ip4, + s->side[1].port);} + vlib_cli_output (vm, "%s %s", out0, + out1); vec_free (out0); + vec_free (out1);} + )); + return 0; +} + +static clib_error_t * +l2sess_show_count_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + l2sess_main_t *sm = &l2sess_main; + + vlib_cli_output (vm, "Timing wheel info: \n%U", format_timing_wheel, + &sm->timing_wheel, 255); + vlib_cli_output (vm, "session pool len: %d, pool elts: %d", + pool_len (sm->sessions), pool_elts (sm->sessions)); + vlib_cli_output (vm, + "attempted to delete sessions which were already free: %d", + sm->counter_attempted_delete_free_session); + return 0; +} + + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (l2sess_show_command, static) = { + .path = "show l2sess", + .short_help = "show l2sess", + .function = l2sess_show_command_fn, +}; + +VLIB_CLI_COMMAND (l2sess_show_count_command, static) = { + .path = "show l2sess count", + .short_help = "show l2sess count", + .function = l2sess_show_count_command_fn, +}; +/* *INDENT-OFF* */ + +static inline u64 +time_sec_to_clock( clib_time_t *ct, f64 sec) +{ + return (u64)(((f64)sec)/ct->seconds_per_clock); +} + +static clib_error_t * l2sess_init (vlib_main_t * vm) +{ + l2sess_main_t * sm = &l2sess_main; + clib_error_t * error = 0; + u64 cpu_time_now = clib_cpu_time_now(); + + + clib_time_t *ct = &vm->clib_time; + sm->udp_session_idle_timeout = time_sec_to_clock(ct, UDP_SESSION_IDLE_TIMEOUT_SEC); + sm->tcp_session_idle_timeout = time_sec_to_clock(ct, TCP_SESSION_IDLE_TIMEOUT_SEC); + sm->tcp_session_transient_timeout = time_sec_to_clock(ct, TCP_SESSION_TRANSIENT_TIMEOUT_SEC); + + /* The min sched time of 10e-1 causes erroneous behavior... */ + sm->timing_wheel.min_sched_time = 10e-2; + sm->timing_wheel.max_sched_time = 3600.0*48.0; + timing_wheel_init (&sm->timing_wheel, cpu_time_now, vm->clib_time.clocks_per_second); + sm->timer_wheel_next_expiring_time = 0; + sm->timer_wheel_tick = time_sec_to_clock(ct, sm->timing_wheel.min_sched_time); + /* Pre-allocate expired nodes. */ + vec_alloc (sm->data_from_advancing_timing_wheel, 32); + + l2sess_setup_nodes(); + l2output_init_output_node_vec (&sm->output_next_nodes.output_node_index_vec); + + return error; +} + +VLIB_INIT_FUNCTION (l2sess_init); + + diff --git a/src/plugins/acl/l2sess.h b/src/plugins/acl/l2sess.h new file mode 100644 index 00000000000..db899917113 --- /dev/null +++ b/src/plugins/acl/l2sess.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_l2sess_h__ +#define __included_l2sess_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> +#include <vppinfra/timing_wheel.h> + +#include <vnet/l2/l2_output.h> +#include <vnet/l2/l2_input.h> + +#define _(node_name, node_var, is_out, is_ip6, is_track) +#undef _ +#define foreach_l2sess_node \ + _("aclp-l2s-input-ip4-add", l2sess_in_ip4_add, 0, 0, 0) \ + _("aclp-l2s-input-ip6-add", l2sess_in_ip6_add, 0, 1, 0) \ + _("aclp-l2s-output-ip4-add", l2sess_out_ip4_add, 1, 0, 0) \ + _("aclp-l2s-output-ip6-add", l2sess_out_ip6_add, 1, 1, 0) \ + _("aclp-l2s-input-ip4-track", l2sess_in_ip4_track, 0, 0, 1) \ + _("aclp-l2s-input-ip6-track", l2sess_in_ip6_track, 0, 1, 1) \ + _("aclp-l2s-output-ip4-track",l2sess_out_ip4_track, 1, 0, 1) \ + _("aclp-l2s-output-ip6-track", l2sess_out_ip6_track, 1, 1, 1) + +#define _(node_name, node_var, is_out, is_ip6, is_track) \ + extern vlib_node_registration_t node_var; +foreach_l2sess_node +#undef _ + +#define TCP_FLAG_FIN 0x01 +#define TCP_FLAG_SYN 0x02 +#define TCP_FLAG_RST 0x04 +#define TCP_FLAG_PUSH 0x08 +#define TCP_FLAG_ACK 0x10 +#define TCP_FLAG_URG 0x20 +#define TCP_FLAG_ECE 0x40 +#define TCP_FLAG_CWR 0x80 +#define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK) +#define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK) + +typedef struct { + ip46_address_t addr; + u64 active_time; + u64 n_packets; + u64 n_bytes; + u16 port; +} l2s_session_side_t; + +enum { + L2S_SESSION_SIDE_IN = 0, + L2S_SESSION_SIDE_OUT, + L2S_N_SESSION_SIDES +}; + +typedef struct { + u64 create_time; + l2s_session_side_t side[L2S_N_SESSION_SIDES]; + u8 l4_proto; + u8 is_ip6; + u16 tcp_flags_seen; /* u16 because of two sides */ +} l2s_session_t; + +#define PROD +#ifdef PROD +#define UDP_SESSION_IDLE_TIMEOUT_SEC 600 +#define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24) +#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 120 +#else +#define UDP_SESSION_IDLE_TIMEOUT_SEC 15 +#define TCP_SESSION_IDLE_TIMEOUT_SEC 15 +#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 5 +#endif + +typedef struct { + /* + * the next two fields are present for all nodes, but + * only one of them is used per node - depending + * on whether the node is an input or output one. + */ +#define _(node_name, node_var, is_out, is_ip6, is_track) \ + u32 node_var ## _input_next_node_index[32]; \ + l2_output_next_nodes_st node_var ## _next_nodes; +foreach_l2sess_node +#undef _ + l2_output_next_nodes_st output_next_nodes; + + /* Next indices of the tracker nodes */ + u32 next_slot_track_node_by_is_ip6_is_out[2][2]; + + /* + * Pairing of "forward" and "reverse" tables by table index. + * Each relationship has two entries - for one and the other table, + * so it is bidirectional. + */ + + u32 *fwd_to_rev_by_table_index; + + /* + * The vector of per-interface session pools + */ + + l2s_session_t *sessions; + + /* The session timeouts */ + u64 tcp_session_transient_timeout; + u64 tcp_session_idle_timeout; + u64 udp_session_idle_timeout; + + /* Timing wheel to time out the idle sessions */ + timing_wheel_t timing_wheel; + u32 *data_from_advancing_timing_wheel; + u64 timer_wheel_next_expiring_time; + u64 timer_wheel_tick; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + ethernet_main_t * ethernet_main; + + /* Counter(s) */ + u64 counter_attempted_delete_free_session; +} l2sess_main_t; + +l2sess_main_t l2sess_main; + +/* Just exposed for acl.c */ + +void +l2sess_vlib_plugin_register (vlib_main_t * vm, void * hh, + int from_early_init); + + +#endif /* __included_l2sess_h__ */ diff --git a/src/plugins/acl/l2sess_node.c b/src/plugins/acl/l2sess_node.c new file mode 100644 index 00000000000..520e5929b4b --- /dev/null +++ b/src/plugins/acl/l2sess_node.c @@ -0,0 +1,816 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <netinet/in.h> +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <acl/l2sess.h> +#include <vnet/l2/l2_classify.h> + + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u32 trace_flags; + u32 session_tables[2]; + u32 session_nexts[2]; + u8 l4_proto; +} l2sess_trace_t; + +/* packet trace format function */ + +#define _(node_name, node_var, is_out, is_ip6, is_track) \ +static u8 * format_## node_var ##_trace (u8 * s, va_list * args) \ +{ \ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); \ + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); \ + l2sess_trace_t * t = va_arg (*args, l2sess_trace_t *); \ + \ + s = format (s, node_name ": sw_if_index %d, next index %d trace_flags %08x L4 proto %d\n" \ + " tables [ %d, %d ] nexts [ %d, %d ]", \ + t->sw_if_index, t->next_index, t->trace_flags, t->l4_proto, \ + t->session_tables[0], t->session_tables[1], \ + t->session_nexts[0], t->session_nexts[1]); \ + return s; \ +} +foreach_l2sess_node +#undef _ +#define foreach_l2sess_error \ +_(SWAPPED, "Mac swap packets processed") + typedef enum +{ +#define _(sym,str) L2SESS_ERROR_##sym, + foreach_l2sess_error +#undef _ + L2SESS_N_ERROR, +} l2sess_error_t; + +static char *l2sess_error_strings[] = { +#define _(sym,string) string, + foreach_l2sess_error +#undef _ +}; + +typedef enum +{ + L2SESS_NEXT_DROP, + L2SESS_N_NEXT, +} l2sess_next_t; + +u8 +l2sess_get_l4_proto (vlib_buffer_t * b0, int node_is_ip6) +{ + u8 proto; + int proto_offset; + if (node_is_ip6) + { + proto_offset = 20; + } + else + { + proto_offset = 23; + } + proto = *((u8 *) vlib_buffer_get_current (b0) + proto_offset); + return proto; +} + + +u8 +l2sess_get_tcp_flags (vlib_buffer_t * b0, int node_is_ip6) +{ + u8 flags; + int flags_offset; + if (node_is_ip6) + { + flags_offset = 14 + 40 + 13; /* FIXME: no extension headers assumed */ + } + else + { + flags_offset = 14 + 20 + 13; + } + flags = *((u8 *) vlib_buffer_get_current (b0) + flags_offset); + return flags; +} + +static inline int +l4_tcp_or_udp (u8 proto) +{ + return ((proto == 6) || (proto == 17)); +} + +void +l2sess_get_session_tables (l2sess_main_t * sm, u32 sw_if_index, + int node_is_out, int node_is_ip6, u8 l4_proto, + u32 * session_tables) +{ +/* + * Based on the direction, l3 and l4 protocol, fill a u32[2] array: + * [0] is index for the "direct match" path, [1] is for "mirrored match". + * Store the indices of the tables to add the session to in session_tables[] + */ + l2_output_classify_main_t *l2om = &l2_output_classify_main; + l2_input_classify_main_t *l2im = &l2_input_classify_main; + + u32 output_table_index; + u32 input_table_index; + + if (!l4_tcp_or_udp (l4_proto)) + { + return; + } + + if (node_is_ip6) + { + vec_validate_init_empty (l2im-> + classify_table_index_by_sw_if_index + [L2_INPUT_CLASSIFY_TABLE_IP6], sw_if_index, + ~0); + input_table_index = + l2im-> + classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6] + [sw_if_index]; + vec_validate_init_empty (l2om-> + classify_table_index_by_sw_if_index + [L2_OUTPUT_CLASSIFY_TABLE_IP6], sw_if_index, + ~0); + output_table_index = + l2om-> + classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6] + [sw_if_index]; + } + else + { + vec_validate_init_empty (l2im-> + classify_table_index_by_sw_if_index + [L2_INPUT_CLASSIFY_TABLE_IP4], sw_if_index, + ~0); + input_table_index = + l2im-> + classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4] + [sw_if_index]; + vec_validate_init_empty (l2om-> + classify_table_index_by_sw_if_index + [L2_OUTPUT_CLASSIFY_TABLE_IP4], sw_if_index, + ~0); + output_table_index = + l2om-> + classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4] + [sw_if_index]; + } + + if (node_is_out) + { + session_tables[0] = output_table_index; + session_tables[1] = input_table_index; + } + else + { + session_tables[0] = input_table_index; + session_tables[1] = output_table_index; + } +} + +void +l2sess_get_session_nexts (l2sess_main_t * sm, u32 sw_if_index, + int node_is_out, int node_is_ip6, u8 l4_proto, + u32 * session_nexts) +{ +/* + * Based on the direction, l3 and l4 protocol, fill a u32[2] array: + * [0] is the index for the "direct match" path, [1] is for "mirrored match". + * Store the match_next_index in session_nexts[] for a new session entry which is being added to session tables. + */ + u32 input_node_index; + u32 output_node_index; + + if (!l4_tcp_or_udp (l4_proto)) + { + return; + } + + input_node_index = + sm->next_slot_track_node_by_is_ip6_is_out[node_is_ip6][0]; + output_node_index = + sm->next_slot_track_node_by_is_ip6_is_out[node_is_ip6][1]; + + if (node_is_out) + { + session_nexts[0] = output_node_index; + session_nexts[1] = input_node_index; + } + else + { + session_nexts[0] = input_node_index; + session_nexts[1] = output_node_index; + } +} + + +static inline void +swap_bytes (vlib_buffer_t * b0, int off_a, int off_b, int nbytes) +{ + u8 tmp; + u8 *pa = vlib_buffer_get_current (b0) + off_a; + u8 *pb = vlib_buffer_get_current (b0) + off_b; + while (nbytes--) + { + tmp = *pa; + *pa++ = *pb; + *pb++ = tmp; + } +} + +/* + * This quite pro[bv]ably is a terrible idea performance wise. Moreso doing it twice. + * Would having a long (ish) chunk of memory work better for this ? + * We will see when we get to the performance of this. + */ +void +l2sess_flip_l3l4_fields (vlib_buffer_t * b0, int node_is_ip6, u8 l4_proto) +{ + if (!l4_tcp_or_udp (l4_proto)) + { + return; + } + if (node_is_ip6) + { + swap_bytes (b0, 22, 38, 16); /* L3 */ + swap_bytes (b0, 54, 56, 2); /* L4 (when no EH!) */ + } + else + { + swap_bytes (b0, 26, 30, 4); /* L3 */ + swap_bytes (b0, 34, 36, 2); /* L4 */ + } +} + +void +l2sess_add_session (vlib_buffer_t * b0, int node_is_out, int node_is_ip6, + u32 session_table, u32 session_match_next, + u32 opaque_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u32 action = 0; + u32 metadata = 0; + +#ifdef DEBUG_SESSIONS + printf ("Adding session to table %d with next %d\n", session_table, + session_match_next); +#endif + vnet_classify_add_del_session (cm, session_table, + vlib_buffer_get_current (b0), + session_match_next, opaque_index, 0, action, + metadata, 1); +} + + + +static void * +get_ptr_to_offset (vlib_buffer_t * b0, int offset) +{ + u8 *p = vlib_buffer_get_current (b0) + offset; + return p; +} + + +/* + * FIXME: Hardcoded offsets are ugly, although if casting to structs one + * would need to take care about alignment.. So let's for now be naive and simple. + */ + +void +session_store_ip4_l3l4_info (vlib_buffer_t * b0, l2s_session_t * sess, + int node_is_out) +{ + clib_memcpy (&sess->side[1 - node_is_out].addr.ip4, + get_ptr_to_offset (b0, 26), 4); + clib_memcpy (&sess->side[node_is_out].addr.ip4, get_ptr_to_offset (b0, 30), + 4); + sess->side[1 - node_is_out].port = + ntohs (*(u16 *) get_ptr_to_offset (b0, 34)); + sess->side[node_is_out].port = ntohs (*(u16 *) get_ptr_to_offset (b0, 36)); +} + +void +session_store_ip6_l3l4_info (vlib_buffer_t * b0, l2s_session_t * sess, + int node_is_out) +{ + clib_memcpy (&sess->side[1 - node_is_out].addr.ip6, + get_ptr_to_offset (b0, 22), 16); + clib_memcpy (&sess->side[node_is_out].addr.ip4, get_ptr_to_offset (b0, 38), + 16); + sess->side[1 - node_is_out].port = + ntohs (*(u16 *) get_ptr_to_offset (b0, 54)); + sess->side[node_is_out].port = ntohs (*(u16 *) get_ptr_to_offset (b0, 56)); +} + +static void +build_match_from_session (l2sess_main_t * sm, u8 * match, + l2s_session_t * sess, int is_out) +{ + if (sess->is_ip6) + { + match[20] = sess->l4_proto; + clib_memcpy (&match[22], &sess->side[1 - is_out].addr.ip6, 16); + clib_memcpy (&match[38], &sess->side[is_out].addr.ip4, 16); + *(u16 *) & match[54] = htons (sess->side[1 - is_out].port); + *(u16 *) & match[56] = htons (sess->side[is_out].port); + } + else + { + match[23] = sess->l4_proto; + clib_memcpy (&match[26], &sess->side[1 - is_out].addr.ip6, 4); + clib_memcpy (&match[30], &sess->side[is_out].addr.ip4, 4); + *(u16 *) & match[34] = htons (sess->side[1 - is_out].port); + *(u16 *) & match[36] = htons (sess->side[is_out].port); + } +} + +static void +delete_session (l2sess_main_t * sm, u32 sw_if_index, u32 session_index) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + u8 match[5 * 16]; /* For building the mock of the packet to delete the classifier session */ + u32 session_tables[2] = { ~0, ~0 }; + l2s_session_t *sess = sm->sessions + session_index; + if (pool_is_free (sm->sessions, sess)) + { + sm->counter_attempted_delete_free_session++; + return; + } + l2sess_get_session_tables (sm, sw_if_index, 0, sess->is_ip6, sess->l4_proto, + session_tables); + if (session_tables[1] != ~0) + { + build_match_from_session (sm, match, sess, 1); + vnet_classify_add_del_session (cm, session_tables[1], match, 0, 0, 0, 0, + 0, 0); + } + if (session_tables[1] != ~0) + { + build_match_from_session (sm, match, sess, 1); + vnet_classify_add_del_session (cm, session_tables[1], match, 0, 0, 0, 0, + 0, 0); + } + pool_put (sm->sessions, sess); +} + +static void +udp_session_account_buffer (vlib_buffer_t * b0, l2s_session_t * s, + int which_side, u64 now) +{ + l2s_session_side_t *ss = &s->side[which_side]; + ss->active_time = now; + ss->n_packets++; + ss->n_bytes += b0->current_data + b0->current_length; +} + +static inline u64 +udp_session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now) +{ + return (sm->udp_session_idle_timeout); +} + +static void +tcp_session_account_buffer (vlib_buffer_t * b0, l2s_session_t * s, + int which_side, u64 now) +{ + l2s_session_side_t *ss = &s->side[which_side]; + ss->active_time = now; + ss->n_packets++; + ss->n_bytes += b0->current_data + b0->current_length; + /* Very very lightweight TCP state tracking: just record which flags were seen */ + s->tcp_flags_seen |= + l2sess_get_tcp_flags (b0, s->is_ip6) << (8 * which_side); +} + +/* + * Since we are tracking for the purposes of timing the sessions out, + * we mostly care about two states: established (maximize the idle timeouts) + * and transient (halfopen/halfclosed/reset) - we need to have a reasonably short timeout to + * quickly get rid of sessions but not short enough to violate the TCP specs. + */ + +static inline u64 +tcp_session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now) +{ + /* seen both SYNs and ACKs but not FINs means we are in establshed state */ + u16 masked_flags = + sess->tcp_flags_seen & ((TCP_FLAGS_RSTFINACKSYN << 8) + + TCP_FLAGS_RSTFINACKSYN); + if (((TCP_FLAGS_ACKSYN << 8) + TCP_FLAGS_ACKSYN) == masked_flags) + { + return (sm->tcp_session_idle_timeout); + } + else + { + return (sm->tcp_session_transient_timeout); + } +} + +static inline u64 +session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now) +{ + u64 timeout; + + switch (sess->l4_proto) + { + case 6: + timeout = tcp_session_get_timeout (sm, sess, now); + break; + case 17: + timeout = udp_session_get_timeout (sm, sess, now); + break; + default: + timeout = 0; + } + + return timeout; +} + +static inline u64 +get_session_last_active_time(l2s_session_t * sess) +{ + u64 last_active = + sess->side[0].active_time > + sess->side[1].active_time ? sess->side[0].active_time : sess->side[1]. + active_time; + return last_active; +} + +static int +session_is_alive (l2sess_main_t * sm, l2s_session_t * sess, u64 now, u64 *last_active_cache) +{ + u64 last_active = get_session_last_active_time(sess); + u64 timeout = session_get_timeout (sm, sess, now); + int is_alive = ((now - last_active) < timeout); + if (last_active_cache) + *last_active_cache = last_active; + return is_alive; +} + +static void +check_idle_sessions (l2sess_main_t * sm, u32 sw_if_index, u64 now) +{ + sm->timer_wheel_next_expiring_time = 0; + sm->data_from_advancing_timing_wheel + = + timing_wheel_advance (&sm->timing_wheel, now, + sm->data_from_advancing_timing_wheel, + &sm->timer_wheel_next_expiring_time); +#ifdef DEBUG_SESSIONS_VERBOSE + { + clib_time_t *ct = &sm->vlib_main->clib_time; + f64 ctime; + ctime = now * ct->seconds_per_clock; + clib_warning ("Now : %U", format_time_interval, "h:m:s:u", ctime); + ctime = sm->timer_wheel_next_expiring_time * ct->seconds_per_clock; + clib_warning ("Next expire: %U", format_time_interval, "h:m:s:u", ctime); + clib_warning ("Expired items: %d", + (int) vec_len (sm->data_from_advancing_timing_wheel)); + } +#endif + + sm->timer_wheel_next_expiring_time = now + sm->timer_wheel_tick; + if (PREDICT_FALSE ( 0 == sm->data_from_advancing_timing_wheel )) { + return; + } + + if (PREDICT_FALSE (_vec_len (sm->data_from_advancing_timing_wheel) > 0)) + { + uword i; + for (i = 0; i < _vec_len (sm->data_from_advancing_timing_wheel); i++) + { + u32 session_index = sm->data_from_advancing_timing_wheel[i]; + if (!pool_is_free_index (sm->sessions, session_index)) + { + l2s_session_t *sess = sm->sessions + session_index; + u64 last_active; + if (session_is_alive (sm, sess, now, &last_active)) + { +#ifdef DEBUG_SESSIONS + clib_warning ("Restarting timer for session %d", (int) session_index); +#endif + /* Pretend we did this in the past, at last_active moment */ + timing_wheel_insert (&sm->timing_wheel, + last_active + session_get_timeout (sm, sess, + last_active), + session_index); + } + else + { +#ifdef DEBUG_SESSIONS + clib_warning ("Deleting session %d", (int) session_index); +#endif + delete_session (sm, sw_if_index, session_index); + } + } + } + _vec_len (sm->data_from_advancing_timing_wheel) = 0; + } +} + +static uword +l2sess_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + l2sess_next_t next_index; + u32 pkts_swapped = 0; + u32 cached_sw_if_index = (u32) ~ 0; + u32 cached_next_index = (u32) ~ 0; + u32 feature_bitmap0; + u32 trace_flags0; + + l2sess_main_t *sm = &l2sess_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Only a single loop for now for simplicity */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = L2SESS_NEXT_DROP; + u32 sw_if_index0; + //ethernet_header_t *en0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + //en0 = vlib_buffer_get_current (b0); + +/* + * The non-boilerplate is in the block below. + * Note first a magic macro block that sets up the behavior qualifiers: + * node_is_out : 1 = is output, 0 = is input + * node_is_ip6 : 1 = is ip6, 0 = is ip4 + * node_is_track : 1 = is a state tracking node, 0 - is a session addition node + * + * Subsequently the code adjusts its behavior depending on these variables. + * It's most probably not great performance wise but much easier to work with. + * + */ + { + int node_is_out = -1; + CLIB_UNUSED (int node_is_ip6) = -1; + CLIB_UNUSED (int node_is_track) = -1; + u32 node_index = 0; + u32 session_tables[2] = { ~0, ~0 }; + u32 session_nexts[2] = { ~0, ~0 }; + l2_output_next_nodes_st *next_nodes = 0; + u32 *input_feat_next_node_index; + u8 l4_proto; + u64 now = clib_cpu_time_now (); + +/* + * Set the variables according to which of the 8 nodes we are. + * Hopefully the compiler is smart enough to eliminate the extraneous. + */ +#define _(node_name, node_var, is_out, is_ip6, is_track) \ +if(node_var.index == node->node_index) \ + { \ + node_is_out = is_out; \ + node_is_ip6 = is_ip6; \ + node_is_track = is_track; \ + node_index = node_var.index; \ + next_nodes = &sm->node_var ## _next_nodes; \ + input_feat_next_node_index = sm->node_var ## _input_next_node_index; \ + } + foreach_l2sess_node +#undef _ + trace_flags0 = 0; + if (node_is_out) + { + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + } + else + { + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + } + /* potentially also remove the nodes here */ + feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap; + + if (node_is_track) + { + u32 sess_index = vnet_buffer (b0)->l2_classify.opaque_index; + l2s_session_t *sess = sm->sessions + sess_index; + l4_proto = sess->l4_proto; + + if (session_is_alive (sm, sess, now, 0)) + { + if (6 == l4_proto) + { + tcp_session_account_buffer (b0, sess, node_is_out, + now); + } + else + { + udp_session_account_buffer (b0, sess, node_is_out, + now); + } + } + else + { + timing_wheel_delete (&sm->timing_wheel, sess_index); + delete_session (sm, sw_if_index0, sess_index); + /* FIXME: drop the packet that hit the obsolete node, for now. We really ought to recycle it. */ + next0 = 0; + } + } + else + { + /* + * "-add" node: take l2opaque which arrived to us, and deduce + * the tables out of that. ~0 means the topmost classifier table + * applied for this AF on the RX(for input)/TX(for output)) sw_if_index. + * Also add the mirrored session to the paired table. + */ + l2s_session_t *sess; + u32 sess_index; + + l4_proto = l2sess_get_l4_proto (b0, node_is_ip6); + + pool_get (sm->sessions, sess); + sess_index = sess - sm->sessions; + sess->create_time = now; + sess->side[node_is_out].active_time = now; + sess->side[1 - node_is_out].active_time = now; + sess->l4_proto = l4_proto; + sess->is_ip6 = node_is_ip6; + if (node_is_ip6) + { + session_store_ip6_l3l4_info (b0, sess, node_is_out); + } + else + { + session_store_ip4_l3l4_info (b0, sess, node_is_out); + } + + l2sess_get_session_tables (sm, sw_if_index0, node_is_out, + node_is_ip6, l4_proto, + session_tables); + l2sess_get_session_nexts (sm, sw_if_index0, node_is_out, + node_is_ip6, l4_proto, + session_nexts); + l2sess_flip_l3l4_fields (b0, node_is_ip6, l4_proto); + if (session_tables[1] != ~0) + { + l2sess_add_session (b0, node_is_out, node_is_ip6, + session_tables[1], session_nexts[1], + sess_index); + } + l2sess_flip_l3l4_fields (b0, node_is_ip6, l4_proto); + if (session_tables[0] != ~0) + { + l2sess_add_session (b0, node_is_out, node_is_ip6, + session_tables[0], session_nexts[0], + sess_index); + } + if (6 == sess->l4_proto) + { + tcp_session_account_buffer (b0, sess, node_is_out, now); + } + else + { + udp_session_account_buffer (b0, sess, node_is_out, now); + } + timing_wheel_insert (&sm->timing_wheel, + now + session_get_timeout (sm, sess, + now), + sess_index); + } + + if (now >= sm->timer_wheel_next_expiring_time) + { + check_idle_sessions (sm, sw_if_index0, now); + } + + if (node_is_out) + { + if (feature_bitmap0) + { + trace_flags0 |= 0x10; + } + if (sw_if_index0 == cached_sw_if_index) + { + trace_flags0 |= 0x20; + } + l2_output_dispatch (sm->vlib_main, + sm->vnet_main, + node, + node_index, + &cached_sw_if_index, + &cached_next_index, + next_nodes, + b0, sw_if_index0, feature_bitmap0, + &next0); + trace_flags0 |= 2; + + } + else + { + next0 = + feat_bitmap_get_next_node_index (input_feat_next_node_index, + feature_bitmap0); + trace_flags0 |= 4; + + } + + + + if (next0 >= node->n_next_nodes) + { + trace_flags0 |= 1; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + l2sess_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->trace_flags = trace_flags0; + t->l4_proto = l4_proto; + t->session_tables[0] = session_tables[0]; + t->session_tables[1] = session_tables[1]; + t->session_nexts[0] = session_nexts[0]; + t->session_nexts[1] = session_nexts[1]; + } + + } + pkts_swapped += 1; + if (next0 >= node->n_next_nodes) + { + next0 = 0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, node->node_index, + L2SESS_ERROR_SWAPPED, pkts_swapped); + return frame->n_vectors; +} + + +#define _(node_name, node_var, is_out, is_ip6, is_track) \ +static uword \ +node_var ## node_fn (vlib_main_t * vm, \ + vlib_node_runtime_t * node, \ + vlib_frame_t * frame) \ +{ \ + return l2sess_node_fn(vm, node, frame); \ +} \ +VLIB_REGISTER_NODE (node_var) = { \ + .function = node_var ## node_fn, \ + .name = node_name, \ + .vector_size = sizeof (u32), \ + .format_trace = format_ ## node_var ## _trace, \ + .type = VLIB_NODE_TYPE_INTERNAL, \ + \ + .n_errors = ARRAY_LEN(l2sess_error_strings), \ + .error_strings = l2sess_error_strings, \ + \ + .n_next_nodes = L2SESS_N_NEXT, \ + .next_nodes = { \ + [L2SESS_NEXT_DROP] = "error-drop", \ + }, \ +}; +foreach_l2sess_node +#undef _ diff --git a/src/plugins/acl/node_in.c b/src/plugins/acl/node_in.c new file mode 100644 index 00000000000..2a5199a9ab8 --- /dev/null +++ b/src/plugins/acl/node_in.c @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <acl/acl.h> +#include "node_in.h" + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u32 match_acl_index; + u32 match_rule_index; + u32 trace_bitmap; +} acl_in_trace_t; + +/* packet trace format function */ +static u8 * +format_acl_in_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + acl_in_trace_t *t = va_arg (*args, acl_in_trace_t *); + + s = + format (s, + "ACL_IN: sw_if_index %d, next index %d, match: inacl %d rule %d trace_bits %08x", + t->sw_if_index, t->next_index, t->match_acl_index, + t->match_rule_index, t->trace_bitmap); + return s; +} + +vlib_node_registration_t acl_in_node; + +#define foreach_acl_in_error \ +_(ACL_CHECK, "InACL check packets processed") + +typedef enum +{ +#define _(sym,str) ACL_IN_ERROR_##sym, + foreach_acl_in_error +#undef _ + ACL_IN_N_ERROR, +} acl_in_error_t; + +static char *acl_in_error_strings[] = { +#define _(sym,string) string, + foreach_acl_in_error +#undef _ +}; + +static uword +acl_in_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + acl_in_next_t next_index; + u32 pkts_acl_checked = 0; + u32 feature_bitmap0; + u32 trace_bitmap = 0; + u32 *input_feat_next_node_index = + acl_main.acl_in_node_input_next_node_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = ~0; + u32 sw_if_index0; + u32 next = ~0; + u32 match_acl_index = ~0; + u32 match_rule_index = ~0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap; + + input_acl_packet_match (sw_if_index0, b0, &next, &match_acl_index, + &match_rule_index, &trace_bitmap); + if (next != ~0) + { + next0 = next; + } + if (next0 == ~0) + { + next0 = + feat_bitmap_get_next_node_index (input_feat_next_node_index, + feature_bitmap0); + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + acl_in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->match_acl_index = match_acl_index; + t->match_rule_index = match_rule_index; + t->trace_bitmap = trace_bitmap; + } + + next0 = next0 < node->n_next_nodes ? next0 : 0; + + pkts_acl_checked += 1; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, acl_in_node.index, + ACL_IN_ERROR_ACL_CHECK, pkts_acl_checked); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (acl_in_node) = +{ + .function = acl_in_node_fn,.name = "acl-plugin-in",.vector_size = + sizeof (u32),.format_trace = format_acl_in_trace,.type = + VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (acl_in_error_strings),.error_strings = + acl_in_error_strings,.n_next_nodes = ACL_IN_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [ACL_IN_ERROR_DROP] = "error-drop", + [ACL_IN_ETHERNET_INPUT] = "ethernet-input", + [ACL_IN_L2S_INPUT_IP4_ADD] = "aclp-l2s-input-ip4-add", + [ACL_IN_L2S_INPUT_IP6_ADD] = "aclp-l2s-input-ip6-add",} +,}; diff --git a/src/plugins/acl/node_in.h b/src/plugins/acl/node_in.h new file mode 100644 index 00000000000..502bbf8dd1d --- /dev/null +++ b/src/plugins/acl/node_in.h @@ -0,0 +1,12 @@ +#ifndef _NODE_IN_H_ +#define _NODE_IN_H_ + +typedef enum { + ACL_IN_ERROR_DROP, + ACL_IN_ETHERNET_INPUT, + ACL_IN_L2S_INPUT_IP4_ADD, + ACL_IN_L2S_INPUT_IP6_ADD, + ACL_IN_N_NEXT, +} acl_in_next_t; + +#endif diff --git a/src/plugins/acl/node_out.c b/src/plugins/acl/node_out.c new file mode 100644 index 00000000000..50af3679b6e --- /dev/null +++ b/src/plugins/acl/node_out.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <acl/acl.h> + +#include "node_out.h" + +typedef struct +{ + u32 next_index; + u32 sw_if_index; + u32 match_acl_index; + u32 match_rule_index; + u32 trace_bitmap; +} acl_out_trace_t; + +/* packet trace format function */ +static u8 * +format_acl_out_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + acl_out_trace_t *t = va_arg (*args, acl_out_trace_t *); + s = + format (s, + "ACL_OUT: sw_if_index %d, next index %d, match: outacl %d rule %d trace_bits %08x", + t->sw_if_index, t->next_index, t->match_acl_index, + t->match_rule_index, t->trace_bitmap); + return s; +} + +vlib_node_registration_t acl_out_node; + +#define foreach_acl_out_error \ +_(ACL_CHECK, "OutACL check packets processed") + +typedef enum +{ +#define _(sym,str) ACL_OUT_ERROR_##sym, + foreach_acl_out_error +#undef _ + ACL_OUT_N_ERROR, +} acl_out_error_t; + +static char *acl_out_error_strings[] = { +#define _(sym,string) string, + foreach_acl_out_error +#undef _ +}; + +static uword +acl_out_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + acl_main_t *am = &acl_main; + l2_output_next_nodes_st *next_nodes = &am->acl_out_output_next_nodes; + u32 n_left_from, *from, *to_next; + acl_out_next_t next_index; + u32 pkts_acl_checked = 0; + u32 feature_bitmap0; + u32 cached_sw_if_index = (u32) ~ 0; + u32 cached_next_index = (u32) ~ 0; + u32 match_acl_index = ~0; + u32 match_rule_index = ~0; + u32 trace_bitmap = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = ~0; + u32 next = 0; + u32 sw_if_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX]; + feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap; + + output_acl_packet_match (sw_if_index0, b0, &next, &match_acl_index, + &match_rule_index, &trace_bitmap); + if (next != ~0) + { + next0 = next; + } + if (next0 == ~0) + { + l2_output_dispatch (vm, + am->vnet_main, + node, + acl_out_node.index, + &cached_sw_if_index, + &cached_next_index, + next_nodes, + b0, sw_if_index0, feature_bitmap0, &next0); + } + + + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + acl_out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->match_acl_index = match_acl_index; + t->match_rule_index = match_rule_index; + t->trace_bitmap = trace_bitmap; + } + + pkts_acl_checked += 1; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, acl_out_node.index, + ACL_OUT_ERROR_ACL_CHECK, pkts_acl_checked); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (acl_out_node) = +{ + .function = acl_out_node_fn,.name = "acl-plugin-out",.vector_size = + sizeof (u32),.format_trace = format_acl_out_trace,.type = + VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (acl_out_error_strings),.error_strings = + acl_out_error_strings,.n_next_nodes = ACL_OUT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [ACL_OUT_ERROR_DROP] = "error-drop", + [ACL_OUT_INTERFACE_OUTPUT] = "interface-output", + [ACL_OUT_L2S_OUTPUT_IP4_ADD] = "aclp-l2s-output-ip4-add", + [ACL_OUT_L2S_OUTPUT_IP6_ADD] = "aclp-l2s-output-ip6-add",} +,}; diff --git a/src/plugins/acl/node_out.h b/src/plugins/acl/node_out.h new file mode 100644 index 00000000000..c919f3b701c --- /dev/null +++ b/src/plugins/acl/node_out.h @@ -0,0 +1,12 @@ +#ifndef _NODE_OUT_H_ +#define _NODE_OUT_H_ + +typedef enum { + ACL_OUT_ERROR_DROP, + ACL_OUT_INTERFACE_OUTPUT, + ACL_OUT_L2S_OUTPUT_IP4_ADD, + ACL_OUT_L2S_OUTPUT_IP6_ADD, + ACL_OUT_N_NEXT, +} acl_out_next_t; + +#endif diff --git a/src/plugins/acl/test/run-python b/src/plugins/acl/test/run-python new file mode 100755 index 00000000000..215eb17aa8d --- /dev/null +++ b/src/plugins/acl/test/run-python @@ -0,0 +1,28 @@ +#!/bin/sh +# +# Do all the legwork to run a scapy shell with APIs available for load +# +CURR_DIR=`pwd` +ROOT_DIR=`git rev-parse --show-toplevel` +cd $ROOT_DIR +sudo apt-get install -y python-virtualenv +# uncomment the line below to enable build of plugins and api each time +# make plugins && make build-vpp-api || exit +virtualenv virtualenv +virtualenv/bin/pip install ipaddress +virtualenv/bin/pip install scapy +# install the python API into the virtualenv +cd $ROOT_DIR/vpp-api/python/ +$ROOT_DIR/virtualenv/bin/python setup.py install +# install the python ACL plugin API into the virtualenv +ACL_PLUGIN_SETUP_DIR=`find $ROOT_DIR/build-root -name acl-plugin` +cd $ACL_PLUGIN_SETUP_DIR; +$ROOT_DIR/virtualenv/bin/python setup.py install +cd $ROOT_DIR +# figure out the shared library path and start scapy +export LD_LIBRARY_PATH=`pwd`/`find . -name "libpneum.so" -exec dirname {} \; | grep lib64 | head -n 1` +cd $CURR_DIR +sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH $ROOT_DIR/virtualenv/bin/python $1 $2 $3 $4 $5 $6 $7 $8 $9 + + + diff --git a/src/plugins/acl/test/run-scapy b/src/plugins/acl/test/run-scapy new file mode 100755 index 00000000000..266f07d1b1a --- /dev/null +++ b/src/plugins/acl/test/run-scapy @@ -0,0 +1,26 @@ +#!/bin/sh +# +# Do all the legwork to run a scapy shell with APIs available for load +# +ROOT_DIR=`git rev-parse --show-toplevel` +cd $ROOT_DIR +sudo apt-get install -y python-virtualenv +# uncomment the line below to enable the build of plugins and API each time.. +# make plugins && make build-vpp-api || exit +virtualenv virtualenv +virtualenv/bin/pip install ipaddress +virtualenv/bin/pip install scapy +# install the python API into the virtualenv +cd $ROOT_DIR/vpp-api/python/ +$ROOT_DIR/virtualenv/bin/python setup.py install +# install the python ACL plugin API into the virtualenv +ACL_PLUGIN_SETUP_DIR=`find $ROOT_DIR/build-root -name acl-plugin` +cd $ACL_PLUGIN_SETUP_DIR; +$ROOT_DIR/virtualenv/bin/python setup.py install +cd $ROOT_DIR +# figure out the shared library path and start scapy +export LD_LIBRARY_PATH=`pwd`/`find . -name "libpneum.so" -exec dirname {} \; | grep lib64 | head -n 1` +sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH virtualenv/bin/scapy + + + diff --git a/src/plugins/acl/test/test_acl_plugin.py b/src/plugins/acl/test/test_acl_plugin.py new file mode 100644 index 00000000000..7fc72d670a5 --- /dev/null +++ b/src/plugins/acl/test/test_acl_plugin.py @@ -0,0 +1,118 @@ +from __future__ import print_function +import unittest, sys, time, threading, struct, logging, os +import vpp_papi +# import vpp_papi_plugins.acl +from ipaddress import * +papi_event = threading.Event() +print(vpp_papi.vpe.VL_API_SW_INTERFACE_SET_FLAGS) +def papi_event_handler(result): + if result.vl_msg_id == vpp_papi.vpe.VL_API_SW_INTERFACE_SET_FLAGS: + return + if result.vl_msg_id == vpp_papi.vpe.VL_API_VNET_INTERFACE_COUNTERS: + print('Interface counters', result) + return + if result.vl_msg_id == vpp_papi.vpe.VL_API_VNET_IP6_FIB_COUNTERS: + print('IPv6 FIB counters', result) + papi_event.set() + return + + print('Unknown message id:', result.vl_msg_id) + +import glob, subprocess +class TestAclPlugin(unittest.TestCase): + @classmethod + def setUpClass(cls): + print("Setup") + @classmethod + def tearDownClass(cls): + print("Teardown") + + def setUp(self): + print("Connecting API") + r = vpp_papi.connect("test_papi") + self.assertEqual(r, 0) + + def tearDown(self): + r = vpp_papi.disconnect() + self.assertEqual(r, 0) + + # + # The tests themselves + # + + # + # Basic request / reply + # + def test_show_version(self): + t = vpp_papi.show_version() + print('T', t); + program = t.program.decode().rstrip('\x00') + self.assertEqual('vpe', program) + + def x_test_acl_add(self): + print("Test ACL add") + self.assertEqual(1, 1) + + # + # Details / Dump + # + def x_test_details_dump(self): + t = vpp_papi.sw_interface_dump(0, b'') + print('Dump/details T', t) + + # + # Arrays + # + def x_test_arrays(self): + t = vpp_papi.vnet_get_summary_stats() + print('Summary stats', t) + print('Packets:', t.total_pkts[0]) + print('Packets:', t.total_pkts[1]) + # + # Variable sized arrays and counters + # + #@unittest.skip("stats") + def x_test_want_stats(self): + pid = 123 + vpp_papi.register_event_callback(papi_event_handler) + papi_event.clear() + + # Need to configure IPv6 to get som IPv6 FIB stats + t = vpp_papi.create_loopback('') + print(t) + self.assertEqual(t.retval, 0) + + ifindex = t.sw_if_index + addr = str(IPv6Address(u'1::1').packed) + t = vpp_papi.sw_interface_add_del_address(ifindex, 1, 1, 0, 16, addr) + print(t) + self.assertEqual(t.retval, 0) + + # Check if interface is up + # XXX: Add new API to query interface state based on ifindex, instead of dump all. + t = vpp_papi.sw_interface_set_flags(ifindex, 1, 1, 0) + self.assertEqual(t.retval, 0) + + t = vpp_papi.want_stats(True, pid) + + print (t) + + # + # Wait for some stats + # + self.assertEqual(papi_event.wait(15), True) + t = vpp_papi.want_stats(False, pid) + print (t) + + + # + # Plugins? + # + +if __name__ == '__main__' or __name__ == '__builtin__': + print("This is main") + suite = unittest.TestLoader().loadTestsFromTestCase(TestAclPlugin) + unittest.TextTestRunner(verbosity=2).run(suite) + #logging.basicConfig(level=logging.DEBUG) + # unittest.main() + diff --git a/src/plugins/ioam.am b/src/plugins/ioam.am new file mode 100644 index 00000000000..a4984b18299 --- /dev/null +++ b/src/plugins/ioam.am @@ -0,0 +1,150 @@ +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +######################################## +# iOAM Proof of Transit +######################################## + +ioam_pot_plugin_la_SOURCES = \ + ioam/lib-pot/pot_util.c \ + ioam/encap/ip6_ioam_pot.c \ + ioam/lib-pot/pot_util.h \ + ioam/lib-pot/math64.h \ + ioam/lib-pot/pot_api.c + +noinst_HEADERS += \ + ioam/lib-pot/pot_all_api_h.h \ + ioam/lib-pot/pot_msg_enum.h \ + ioam/lib-pot/pot.api.h \ + ioam/lib-pot/pot_util.h \ + ioam/lib-pot/math64.h + +API_FILES += ioam/lib-pot/pot.api + +ioam_pot_test_plugin_la_SOURCES = \ + ioam/lib-pot/pot_test.c \ + ioam/lib-pot/pot_plugin.api.h + +vppapitestplugins_LTLIBRARIES += ioam_pot_test_plugin.la +vppplugins_LTLIBRARIES += ioam_pot_plugin.la + +######################################## +# iOAM trace export for IPv6 +######################################## + +ioam_export_plugin_la_SOURCES = \ +ioam/export/ioam_export.c \ +ioam/export/node.c \ +ioam/export/ioam_export.api.h \ +ioam/export/ioam_export_thread.c + +noinst_HEADERS += \ + ioam/export/ioam_export_all_api_h.h \ + ioam/export/ioam_export_msg_enum.h \ + ioam/export/ioam_export.api.h + +API_FILES += ioam/export/ioam_export.api + +ioam_export_test_plugin_la_SOURCES = \ + ioam/export/ioam_export_test.c \ + ioam/export/ioam_export_plugin.api.h + +vppapitestplugins_LTLIBRARIES += ioam_export_test_plugin.la +vppplugins_LTLIBRARIES += ioam_export_plugin.la + +######################################## +# iOAM Trace +######################################## +libioam_trace_plugin_la_SOURCES = \ + ioam/lib-trace/trace_util.c \ + ioam/encap/ip6_ioam_trace.c \ + ioam/lib-trace/trace_util.h \ + ioam/lib-trace/trace_api.c + +noinst_HEADERS += \ + ioam/export/ioam_export_all_api_h.h \ + ioam/lib-trace/trace_all_api_h.h \ + ioam/lib-trace/trace_msg_enum.h \ + ioam/lib-trace/trace.api.h \ + ioam/lib-trace/trace_util.h + +API_FILES += ioam/lib-trace/trace.api + +ioam_trace_test_plugin_la_SOURCES = \ + ioam/lib-trace/trace_test.c \ + ioam/lib-trace/trace_plugin.api.h + +vppapitestplugins_LTLIBRARIES += ioam_trace_test_plugin.la +vppplugins_LTLIBRARIES += libioam_trace_plugin.la + +######################################## +# VxLAN-GPE +######################################## +libioam_vxlan_gpe_plugin_la_SOURCES = \ + ioam/lib-vxlan-gpe/ioam_encap.c \ + ioam/lib-vxlan-gpe/ioam_decap.c \ + ioam/lib-vxlan-gpe/ioam_transit.c \ + ioam/lib-vxlan-gpe/ioam_pop.c \ + ioam/lib-vxlan-gpe/vxlan_gpe_api.c \ + ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c \ + ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c \ + ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c \ + ioam/export-vxlan-gpe/vxlan_gpe_node.c \ + ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h\ + ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c + +noinst_HEADERS += \ + ioam/export/ioam_export_all_api_h.h \ + ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h \ + ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h \ + ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api.h \ + ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h \ + ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h \ + ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h \ + ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h \ + ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h \ + ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h + +API_FILES += ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api +API_FILES += ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api + +ioam_vxlan_gpe_test_plugin_la_SOURCES = \ + ioam/lib-vxlan-gpe/vxlan_gpe_test.c \ + ioam/lib-vxlan-gpe/vxlan_gpe_plugin.api.h + +vppapitestplugins_LTLIBRARIES += ioam_vxlan_gpe_test_plugin.la +vppplugins_LTLIBRARIES += libioam_vxlan_gpe_plugin.la + +vxlan_gpe_ioam_export_test_plugin_la_SOURCES = \ + ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c \ + ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_plugin.api.h + +vppapitestplugins_LTLIBRARIES += vxlan_gpe_ioam_export_test_plugin.la + +######################################## +# iOAM E2E plugin +######################################## + +ioam_e2e_plugin_la_SOURCES = \ + ioam/encap/ip6_ioam_e2e.c \ + ioam/encap/ip6_ioam_seqno.c \ + ioam/encap/ip6_ioam_seqno_analyse.c + +noinst_HEADERS += \ + ioam/encap/ip6_ioam_e2e.h \ + ioam/encap/ip6_ioam_seqno.h + +vppplugins_LTLIBRARIES += ioam_e2e_plugin.la + +# vi:syntax=automake diff --git a/src/plugins/ioam/dir.dox b/src/plugins/ioam/dir.dox new file mode 100644 index 00000000000..f3389b52c3e --- /dev/null +++ b/src/plugins/ioam/dir.dox @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir + @brief Inband OAM (iOAM) implementation +*/ diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.c b/src/plugins/ioam/encap/ip6_ioam_e2e.c new file mode 100644 index 00000000000..0839cdceca7 --- /dev/null +++ b/src/plugins/ioam/encap/ip6_ioam_e2e.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include <vnet/ip/ip.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#include <vnet/ip/ip6_hop_by_hop.h> +#include <vnet/plugin/plugin.h> + +#include "ip6_ioam_e2e.h" + +ioam_e2e_main_t ioam_e2e_main; + +static u8 * ioam_e2e_trace_handler (u8 * s, + ip6_hop_by_hop_option_t *opt) +{ + ioam_e2e_option_t * e2e = (ioam_e2e_option_t *)opt; + u32 seqno = 0; + + if (e2e) + { + seqno = clib_net_to_host_u32 (e2e->e2e_data); + } + + s = format (s, "SeqNo = 0x%Lx", seqno); + return s; +} + +int +ioam_e2e_config_handler (void *data, u8 disable) +{ + int *analyse = data; + + /* Register hanlders if enabled */ + if (!disable) + { + /* If encap node register for encap handler */ + if (0 == *analyse) + { + if (ip6_hbh_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE, + ioam_seqno_encap_handler, + ioam_e2e_trace_handler) < 0) + { + return (-1); + } + } + /* If analyze node then register for decap handler */ + else + { + if (ip6_hbh_pop_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE, + ioam_seqno_decap_handler) < 0) + { + return (-1); + } + } + return 0; + } + + /* UnRegister handlers */ + (void) ip6_hbh_unregister_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE); + (void) ip6_hbh_pop_unregister_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE); + return 0; +} + +int +ioam_e2e_rewrite_handler (u8 *rewrite_string, + u8 *rewrite_size) +{ + ioam_e2e_option_t *e2e_option; + + if (rewrite_string && *rewrite_size == sizeof(ioam_e2e_option_t)) + { + e2e_option = (ioam_e2e_option_t *)rewrite_string; + e2e_option->hdr.type = HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE + | HBH_OPTION_TYPE_SKIP_UNKNOWN; + e2e_option->hdr.length = sizeof (ioam_e2e_option_t) - + sizeof (ip6_hop_by_hop_option_t); + return(0); + } + return(-1); +} + +u32 +ioam_e2e_flow_handler (u32 ctx, u8 add) +{ + ioam_e2e_data_t *data; + u16 i; + + if (add) + { + pool_get(ioam_e2e_main.e2e_data, data); + data->flow_ctx = ctx; + ioam_seqno_init_bitmap(&data->seqno_data); + return ((u32) (data - ioam_e2e_main.e2e_data)); + } + + /* Delete case */ + for (i = 0; i < vec_len(ioam_e2e_main.e2e_data); i++) + { + if (pool_is_free_index(ioam_e2e_main.e2e_data, i)) + continue; + + data = pool_elt_at_index(ioam_e2e_main.e2e_data, i); + if (data && (data->flow_ctx == ctx)) + { + pool_put_index(ioam_e2e_main.e2e_data, i); + return (0); + } + } + return 0; +} + +static clib_error_t * +ioam_show_e2e_cmd_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ioam_e2e_data_t *e2e_data; + u8 *s = 0; + int i; + + vec_reset_length(s); + + s = format(0, "IOAM E2E information: \n"); + for (i = 0; i < vec_len(ioam_e2e_main.e2e_data); i++) + { + if (pool_is_free_index(ioam_e2e_main.e2e_data, i)) + continue; + + e2e_data = pool_elt_at_index(ioam_e2e_main.e2e_data, i); + s = format(s, "Flow name: %s\n", get_flow_name_from_flow_ctx(e2e_data->flow_ctx)); + + s = show_ioam_seqno_cmd_fn(s, + &e2e_data->seqno_data, + !IOAM_DEAP_ENABLED(e2e_data->flow_ctx)); + } + + vlib_cli_output(vm, "%v", s); + return 0; +} + + +VLIB_CLI_COMMAND (ioam_show_e2e_cmd, static) = { + .path = "show ioam e2e ", + .short_help = "show ioam e2e information", + .function = ioam_show_e2e_cmd_fn, +}; + +/* + * This routine exists to convince the vlib plugin framework that + * we haven't accidentally copied a random .dll into the plugin directory. + * + * Also collects global variable pointers passed from the vpp engine + */ +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, + int from_early_init) +{ + clib_error_t * error = 0; + + ioam_e2e_main.vlib_main = vm; + ioam_e2e_main.vnet_main = h->vnet_main; + return error; +} + +/* + * Init handler E2E headet handling. + * Init hanlder registers encap, decap, trace and Rewrite handlers. + */ +static clib_error_t * +ioam_e2e_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init))) + { + return(error); + } + + /* + * As of now we have only PPC under E2E header. + */ + if (ip6_hbh_config_handler_register(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE, + ioam_e2e_config_handler) < 0) + { + return (clib_error_create("Registration of " + "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE for rewrite failed")); + } + + if (ip6_hbh_add_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE, + sizeof(ioam_e2e_option_t), + ioam_e2e_rewrite_handler) < 0) + { + return (clib_error_create("Registration of " + "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE for rewrite failed")); + } + + if (ip6_hbh_flow_handler_register(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE, + ioam_e2e_flow_handler) < 0) + { + return (clib_error_create("Registration of " + "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE Flow handler failed")); + } + + return (0); +} + +/* + * Init function for the E2E lib. + * ip6_hop_by_hop_ioam_e2e_init gets called during init. + */ +VLIB_INIT_FUNCTION (ioam_e2e_init); diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.h b/src/plugins/ioam/encap/ip6_ioam_e2e.h new file mode 100644 index 00000000000..18f35f80c60 --- /dev/null +++ b/src/plugins/ioam/encap/ip6_ioam_e2e.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_ip6_ioam_e2e_h__ +#define __included_ip6_ioam_e2e_h__ + +#include "ip6_ioam_seqno.h" + +typedef struct ioam_e2e_data_t_ { + u32 flow_ctx; + u32 pad; + ioam_seqno_data seqno_data; +} ioam_e2e_data_t; + +typedef struct { + ioam_e2e_data_t *e2e_data; + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} ioam_e2e_main_t; + +extern ioam_e2e_main_t ioam_e2e_main; + +static inline ioam_seqno_data * +ioam_e2ec_get_seqno_data_from_flow_ctx (u32 flow_ctx) +{ + ioam_e2e_data_t *data = NULL; + u32 index; + + index = get_flow_data_from_flow_ctx(flow_ctx, + HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE); + data = &ioam_e2e_main.e2e_data[index]; + return &(data->seqno_data); +} + +#endif /* __included_ioam_e2e_h__ */ diff --git a/src/plugins/ioam/encap/ip6_ioam_pot.c b/src/plugins/ioam/encap/ip6_ioam_pot.c new file mode 100644 index 00000000000..05f42c91d0f --- /dev/null +++ b/src/plugins/ioam/encap/ip6_ioam_pot.c @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include <vnet/ip/ip6.h> +#include <vnet/ip/ip6_hop_by_hop.h> +#include <vnet/ip/ip6_hop_by_hop_packet.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#include <ioam/lib-pot/pot_util.h> + +typedef CLIB_PACKED(struct { + ip6_hop_by_hop_option_t hdr; + u8 pot_type; +#define PROFILE_ID_MASK 0xF + u8 reserved_profile_id; /* 4 bits reserved, 4 bits to carry profile id */ + u64 random; + u64 cumulative; +}) ioam_pot_option_t; + +#define foreach_ip6_hop_by_hop_ioam_pot_stats \ + _(PROCESSED, "Pkts with ip6 hop-by-hop pot options") \ + _(PROFILE_MISS, "Pkts with ip6 hop-by-hop pot options but no profile set") \ + _(PASSED, "Pkts with POT in Policy") \ + _(FAILED, "Pkts with POT out of Policy") + +static char * ip6_hop_by_hop_ioam_pot_stats_strings[] = { +#define _(sym,string) string, + foreach_ip6_hop_by_hop_ioam_pot_stats +#undef _ +}; + +typedef enum { +#define _(sym,str) IP6_IOAM_POT_##sym, + foreach_ip6_hop_by_hop_ioam_pot_stats +#undef _ + IP6_IOAM_POT_N_STATS, +} ip6_ioam_pot_stats_t; + +typedef struct { + /* stats */ + u64 counters[ARRAY_LEN(ip6_hop_by_hop_ioam_pot_stats_strings)]; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} ip6_hop_by_hop_ioam_pot_main_t; + +ip6_hop_by_hop_ioam_pot_main_t ip6_hop_by_hop_ioam_pot_main; + +always_inline void +ip6_ioam_stats_increment_counter (u32 counter_index, u64 increment) +{ + ip6_hop_by_hop_ioam_pot_main_t *hm = &ip6_hop_by_hop_ioam_pot_main; + + hm->counters[counter_index] += increment; +} + + +static u8 * format_ioam_pot (u8 * s, va_list * args) +{ + ioam_pot_option_t * pot0 = va_arg (*args, ioam_pot_option_t *); + u64 random, cumulative; + random = cumulative = 0; + if (pot0) + { + random = clib_net_to_host_u64 (pot0->random); + cumulative = clib_net_to_host_u64 (pot0->cumulative); + } + + s = format (s, "random = 0x%Lx, Cumulative = 0x%Lx, Index = 0x%x", + random, cumulative, pot0 ? pot0->reserved_profile_id : ~0); + return s; +} + +u8 * +ip6_hbh_ioam_proof_of_transit_trace_handler (u8 *s, ip6_hop_by_hop_option_t *opt) +{ + ioam_pot_option_t *pot; + + s = format (s, " POT opt present\n"); + pot = (ioam_pot_option_t *) opt; + s = format (s, " %U\n", format_ioam_pot, pot); + return (s); +} + +int +ip6_hbh_ioam_proof_of_transit_handler (vlib_buffer_t *b, + ip6_header_t *ip, + ip6_hop_by_hop_option_t *opt0) +{ + ioam_pot_option_t * pot0; + u64 random = 0, cumulative = 0; + int rv = 0; + u8 pot_profile_index; + pot_profile *pot_profile = 0, *new_profile = 0; + u8 pot_encap = 0; + + pot0 = (ioam_pot_option_t *) opt0; + pot_encap = (pot0->random == 0); + pot_profile_index = pot_profile_get_active_id(); + pot_profile = pot_profile_get_active(); + if (pot_encap && PREDICT_FALSE(!pot_profile)) + { + ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROFILE_MISS, 1); + return(-1); + } + if (pot_encap) + { + pot0->reserved_profile_id = + pot_profile_index & PROFILE_ID_MASK; + pot_profile_incr_usage_stats(pot_profile); + } + else + { /* Non encap node */ + if (PREDICT_FALSE(pot0->reserved_profile_id != + pot_profile_index || pot_profile == 0)) + { + /* New profile announced by encap node. */ + new_profile = + pot_profile_find(pot0->reserved_profile_id); + if (PREDICT_FALSE(new_profile == 0 || + new_profile->valid == 0)) + { + ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROFILE_MISS, 1); + return(-1); + } + else + { + pot_profile_index = pot0->reserved_profile_id; + pot_profile = new_profile; + pot_profile_set_active(pot_profile_index); + pot_profile_reset_usage_stats(pot_profile); + } + } + pot_profile_incr_usage_stats(pot_profile); + } + + if (pot0->random == 0) + { + pot0->random = clib_host_to_net_u64(pot_generate_random(pot_profile)); + pot0->cumulative = 0; + } + random = clib_net_to_host_u64(pot0->random); + cumulative = clib_net_to_host_u64(pot0->cumulative); + pot0->cumulative = clib_host_to_net_u64( + pot_update_cumulative(pot_profile, + cumulative, + random)); + ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROCESSED, 1); + + return (rv); +} + +int +ip6_hbh_ioam_proof_of_transit_pop_handler (vlib_buffer_t *b, ip6_header_t *ip, + ip6_hop_by_hop_option_t *opt0) +{ + ioam_pot_option_t * pot0; + u64 random = 0; + u64 cumulative = 0; + int rv = 0; + pot_profile *pot_profile = 0; + u8 result = 0; + + pot0 = (ioam_pot_option_t *) opt0; + random = clib_net_to_host_u64(pot0->random); + cumulative = clib_net_to_host_u64(pot0->cumulative); + pot_profile = pot_profile_get_active(); + result = pot_validate (pot_profile, + cumulative, random); + + if (result == 1) + { + ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PASSED, 1); + } + else + { + ip6_ioam_stats_increment_counter (IP6_IOAM_POT_FAILED, 1); + } + return (rv); +} + +int ip6_hop_by_hop_ioam_pot_rewrite_handler (u8 *rewrite_string, u8 *rewrite_size) +{ + ioam_pot_option_t * pot_option; + if (rewrite_string && *rewrite_size == sizeof(ioam_pot_option_t)) + { + pot_option = (ioam_pot_option_t *)rewrite_string; + pot_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT + | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE; + pot_option->hdr.length = sizeof (ioam_pot_option_t) - + sizeof (ip6_hop_by_hop_option_t); + return(0); + } + return(-1); +} + +static clib_error_t * +ip6_show_ioam_pot_cmd_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_hop_by_hop_ioam_pot_main_t *hm = &ip6_hop_by_hop_ioam_pot_main; + u8 *s = 0; + int i = 0; + + for ( i = 0; i < IP6_IOAM_POT_N_STATS; i++) + { + s = format(s, " %s - %lu\n", ip6_hop_by_hop_ioam_pot_stats_strings[i], + hm->counters[i]); + } + + vlib_cli_output(vm, "%v", s); + vec_free(s); + return 0; +} + + +VLIB_CLI_COMMAND (ip6_show_ioam_pot_cmd, static) = { + .path = "show ioam pot", + .short_help = "iOAM pot statistics", + .function = ip6_show_ioam_pot_cmd_fn, +}; + + +static clib_error_t * +ip6_hop_by_hop_ioam_pot_init (vlib_main_t * vm) +{ + ip6_hop_by_hop_ioam_pot_main_t * hm = &ip6_hop_by_hop_ioam_pot_main; + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init))) + return(error); + + hm->vlib_main = vm; + hm->vnet_main = vnet_get_main(); + memset(hm->counters, 0, sizeof(hm->counters)); + + if (ip6_hbh_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT, ip6_hbh_ioam_proof_of_transit_handler, + ip6_hbh_ioam_proof_of_transit_trace_handler) < 0) + return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT failed")); + + if (ip6_hbh_add_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT, + sizeof(ioam_pot_option_t), + ip6_hop_by_hop_ioam_pot_rewrite_handler) < 0) + return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT for rewrite failed")); + + if (ip6_hbh_pop_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT, + ip6_hbh_ioam_proof_of_transit_pop_handler) < 0) + return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT POP failed")); + + return (0); +} + +VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_pot_init); + + diff --git a/src/plugins/ioam/encap/ip6_ioam_seqno.c b/src/plugins/ioam/encap/ip6_ioam_seqno.c new file mode 100644 index 00000000000..0b4d4192975 --- /dev/null +++ b/src/plugins/ioam/encap/ip6_ioam_seqno.c @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include <vnet/ip/ip.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#include "ip6_ioam_seqno.h" +#include "ip6_ioam_e2e.h" + +ioam_seqno_data_main_t ioam_seqno_main; + +void ioam_seqno_init_bitmap (ioam_seqno_data *data) +{ + seqno_bitmap *bitmap = &data->seqno_rx.bitmap; + bitmap->window_size = SEQNO_WINDOW_SIZE; + bitmap->array_size = SEQNO_WINDOW_ARRAY_SIZE; + bitmap->mask = 32 * SEQNO_WINDOW_ARRAY_SIZE - 1; + bitmap->array[0] = 0x00000000;/* pretend we haven seen sequence numbers 0*/ + bitmap->highest = 0; + + data->seq_num = 0; + return ; +} + +/* + * This Routine gets called from IPv6 hop-by-hop option handling. + * Only if we are encap node, then add PPC data. + * On a Transit(MID) node we dont do anything with E2E headers. + * On decap node decap is handled by seperate function. + */ +int +ioam_seqno_encap_handler (vlib_buffer_t *b, ip6_header_t *ip, + ip6_hop_by_hop_option_t *opt) +{ + u32 opaque_index = vnet_buffer(b)->l2_classify.opaque_index; + ioam_e2e_option_t * e2e; + int rv = 0; + ioam_seqno_data *data; + + data = ioam_e2ec_get_seqno_data_from_flow_ctx(opaque_index); + e2e = (ioam_e2e_option_t *) opt; + e2e->e2e_data = clib_host_to_net_u32(++data->seq_num); + + return (rv); +} + +/* + * This Routine gets called on POP/Decap node. + */ +int +ioam_seqno_decap_handler (vlib_buffer_t *b, ip6_header_t *ip, + ip6_hop_by_hop_option_t *opt) +{ + u32 opaque_index = vnet_buffer(b)->l2_classify.opaque_index; + ioam_e2e_option_t * e2e; + int rv = 0; + ioam_seqno_data *data; + + data = ioam_e2ec_get_seqno_data_from_flow_ctx(opaque_index); + e2e = (ioam_e2e_option_t *) opt; + ioam_analyze_seqno(&data->seqno_rx, (u64) clib_net_to_host_u32(e2e->e2e_data)); + + return (rv); +} + +u8 * +show_ioam_seqno_cmd_fn (u8 *s, ioam_seqno_data *seqno_data, u8 enc) +{ + seqno_rx_info *rx; + + s = format(s, "SeqNo Data:\n"); + if (enc) + { + s = format(s, " Current Seq. Number : %llu\n", seqno_data->seq_num); + } + else + { + rx = &seqno_data->seqno_rx; + s = format(s, " Highest Seq. Number : %llu\n", rx->bitmap.highest); + s = format(s, " Packets received : %llu\n", rx->rx_packets); + s = format(s, " Lost packets : %llu\n", rx->lost_packets); + s = format(s, " Reordered packets : %llu\n", rx->reordered_packets); + s = format(s, " Duplicate packets : %llu\n", rx->dup_packets); + } + + format(s, "\n"); + return s; +} diff --git a/src/plugins/ioam/encap/ip6_ioam_seqno.h b/src/plugins/ioam/encap/ip6_ioam_seqno.h new file mode 100644 index 00000000000..13a84db0d71 --- /dev/null +++ b/src/plugins/ioam/encap/ip6_ioam_seqno.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_ip6_ioam_seqno_h__ +#define __included_ip6_ioam_seqno_h__ + +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/ip6_hop_by_hop.h> + +#define SEQ_CHECK_VALUE 0x80000000 /* for seq number wraparound detection */ + +#define SEQNO_WINDOW_SIZE 2048 +#define SEQNO_WINDOW_ARRAY_SIZE 64 + +typedef struct seqno_bitmap_ { + u32 window_size; + u32 array_size; + u32 mask; + u32 pad; + u64 highest; + u64 array[SEQNO_WINDOW_ARRAY_SIZE]; /* Will be alloc to array_size */ +} seqno_bitmap; + +typedef struct seqno_rx_info_ { + u64 rx_packets; + u64 lost_packets; + u64 reordered_packets; + u64 dup_packets; + seqno_bitmap bitmap; +} seqno_rx_info; + +/* This structure is 64-byte aligned */ +typedef struct ioam_seqno_data_ { + union { + u32 seq_num; /* Useful only for encap node */ + seqno_rx_info seqno_rx; + }; +} ioam_seqno_data; + +typedef struct ioam_seqno_data_main_t_ { + ioam_seqno_data *seqno_data; +} ioam_seqno_data_main_t; + +void ioam_seqno_init_bitmap(ioam_seqno_data *data); + +int ioam_seqno_encap_handler(vlib_buffer_t *b, ip6_header_t *ip, + ip6_hop_by_hop_option_t *opt); + +int +ioam_seqno_decap_handler(vlib_buffer_t *b, ip6_header_t *ip, + ip6_hop_by_hop_option_t *opt); + +void ioam_analyze_seqno(seqno_rx_info *ppc_rx, u64 seqno); + +u8 * +show_ioam_seqno_cmd_fn(u8 *s, ioam_seqno_data *seqno_data, u8 enc); + +#endif diff --git a/src/plugins/ioam/encap/ip6_ioam_seqno_analyse.c b/src/plugins/ioam/encap/ip6_ioam_seqno_analyse.c new file mode 100644 index 00000000000..4638871c224 --- /dev/null +++ b/src/plugins/ioam/encap/ip6_ioam_seqno_analyse.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include "ip6_ioam_seqno.h" + +static inline void BIT_SET (u64 *p, u32 n) +{ + p[ n>>5 ] |= (1 << (n&31)); +} + +static inline int BIT_TEST (u64 *p, u32 n) +{ + return p[ n>>5 ] & (1 << (n&31)); +} + +static void BIT_CLEAR (u64 *p, u64 start, int num_bits, u32 mask) +{ + int n, t; + int start_index = (start >> 5); + int mask_index = (mask >> 5); + + start_index &= mask_index; + if (start & 0x1f) + { + int start_bit = (start & 0x1f); + + n = (1 << start_bit)-1; + t = start_bit + num_bits; + if (t < 32) + { + n |= ~((1 << t)-1); + p[ start_index ] &= n; + return; + } + p[ start_index ] &= n; + start_index = (start_index + 1) & mask_index; + num_bits -= (32 - start_bit); + } + while (num_bits >= 32) + { + p[ start_index ] = 0; + start_index = (start_index + 1) & mask_index; + num_bits -= 32; + } + n = ~((1 << num_bits) - 1); + p[ start_index ] &= n; +} + +static inline u8 seqno_check_wraparound(u32 a, u32 b) +{ + if ((a != b) && (a > b) && ((a - b) > SEQ_CHECK_VALUE)) + { + return 1; + } + return 0; +} + +/* + * Function to analyze the PPC value recevied. + * - Updates the bitmap with received sequence number + * - counts the received/lost/duplicate/reordered packets + */ +void ioam_analyze_seqno (seqno_rx_info *seqno_rx, u64 seqno) +{ + int diff; + static int peer_dead_count; + seqno_bitmap *bitmap = &seqno_rx->bitmap; + + seqno_rx->rx_packets++; + + if (seqno > bitmap->highest) + { /* new larger sequence number */ + peer_dead_count = 0; + diff = seqno - bitmap->highest; + if (diff < bitmap->window_size) + { + if (diff > 1) + { /* diff==1 is *such* a common case it's a win to optimize it */ + BIT_CLEAR(bitmap->array, bitmap->highest+1, diff-1, bitmap->mask); + seqno_rx->lost_packets += diff -1; + } + } + else + { + seqno_rx->lost_packets += diff -1; + memset( bitmap->array, 0, bitmap->array_size * sizeof(u64) ); + } + BIT_SET(bitmap->array, seqno & bitmap->mask); + bitmap->highest = seqno; + return; + } + + /* we've seen a bigger seq number before */ + diff = bitmap->highest - seqno; + if (diff >= bitmap->window_size) + { + if (seqno_check_wraparound(bitmap->highest, seqno)) + { + memset( bitmap->array, 0, bitmap->array_size * sizeof(u64)); + BIT_SET(bitmap->array, seqno & bitmap->mask); + bitmap->highest = seqno; + return; + } + else + { + peer_dead_count++; + if (peer_dead_count > 25) + { + peer_dead_count = 0; + memset( bitmap->array, 0, bitmap->array_size * sizeof(u64) ); + BIT_SET(bitmap->array, seqno & bitmap->mask); + bitmap->highest = seqno; + } + //ppc_rx->reordered_packets++; + } + return; + } + + if (BIT_TEST(bitmap->array, seqno & bitmap->mask)) + { + seqno_rx->dup_packets++; + return; /* Already seen */ + } + seqno_rx->reordered_packets++; + seqno_rx->lost_packets--; + BIT_SET(bitmap->array, seqno & bitmap->mask); + return; +} diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.c b/src/plugins/ioam/encap/ip6_ioam_trace.c new file mode 100644 index 00000000000..e63db6e4ec5 --- /dev/null +++ b/src/plugins/ioam/encap/ip6_ioam_trace.c @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include <vnet/ip/ip6.h> +#include <vnet/ip/ip6_hop_by_hop.h> +#include <vnet/ip/ip6_hop_by_hop_packet.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#include <ioam/lib-trace/trace_util.h> + +/* Timestamp precision multipliers for seconds, milliseconds, microseconds + * and nanoseconds respectively. + */ +static f64 trace_tsp_mul[4] = { 1, 1e3, 1e6, 1e9 }; + +typedef union +{ + u64 as_u64; + u32 as_u32[2]; +} time_u64_t; + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct { + ip6_hop_by_hop_option_t hdr; + u8 ioam_trace_type; + u8 data_list_elts_left; + u32 elts[0]; /* Variable type. So keep it generic */ +}) ioam_trace_option_t; +/* *INDENT-ON* */ + + +extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main; +extern ip6_main_t ip6_main; + +#define foreach_ip6_hop_by_hop_ioam_trace_stats \ + _(PROCESSED, "Pkts with ip6 hop-by-hop trace options") \ + _(PROFILE_MISS, "Pkts with ip6 hop-by-hop trace options but no profile set") \ + _(UPDATED, "Pkts with trace updated") \ + _(FULL, "Pkts with trace options but no space") + +static char *ip6_hop_by_hop_ioam_trace_stats_strings[] = { +#define _(sym,string) string, + foreach_ip6_hop_by_hop_ioam_trace_stats +#undef _ +}; + +typedef enum +{ +#define _(sym,str) IP6_IOAM_TRACE_##sym, + foreach_ip6_hop_by_hop_ioam_trace_stats +#undef _ + IP6_IOAM_TRACE_N_STATS, +} ip6_ioam_trace_stats_t; + + +typedef struct +{ + /* stats */ + u64 counters[ARRAY_LEN (ip6_hop_by_hop_ioam_trace_stats_strings)]; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} ip6_hop_by_hop_ioam_trace_main_t; + +ip6_hop_by_hop_ioam_trace_main_t ip6_hop_by_hop_ioam_trace_main; + +always_inline void +ip6_ioam_trace_stats_increment_counter (u32 counter_index, u64 increment) +{ + ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main; + + hm->counters[counter_index] += increment; +} + + +static u8 * +format_ioam_data_list_element (u8 * s, va_list * args) +{ + u32 *elt = va_arg (*args, u32 *); + u8 *trace_type_p = va_arg (*args, u8 *); + u8 trace_type = *trace_type_p; + + + if (trace_type & BIT_TTL_NODEID) + { + u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "ttl 0x%x node id 0x%x ", + ttl_node_id_host_byte_order >> 24, + ttl_node_id_host_byte_order & 0x00FFFFFF); + + elt++; + } + + if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE) + { + u32 ingress_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "ingress 0x%x egress 0x%x ", + ingress_host_byte_order >> 16, + ingress_host_byte_order & 0xFFFF); + elt++; + } + + if (trace_type & BIT_TIMESTAMP) + { + u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "ts 0x%x \n", ts_in_host_byte_order); + elt++; + } + + if (trace_type & BIT_APPDATA) + { + u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "app 0x%x ", appdata_in_host_byte_order); + elt++; + } + + return s; +} + + +int +ip6_ioam_trace_get_sizeof_handler (u32 * result) +{ + u16 size = 0; + u8 trace_data_size = 0; + trace_profile *profile = NULL; + + *result = 0; + + profile = trace_profile_find (); + + if (PREDICT_FALSE (!profile)) + { + ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1); + return (-1); + } + + trace_data_size = fetch_trace_data_size (profile->trace_type); + if (PREDICT_FALSE (trace_data_size == 0)) + return VNET_API_ERROR_INVALID_VALUE; + + if (PREDICT_FALSE (profile->num_elts * trace_data_size > 254)) + return VNET_API_ERROR_INVALID_VALUE; + + size += + sizeof (ioam_trace_option_t) + (profile->num_elts * trace_data_size); + *result = size; + + return 0; +} + + + +int +ip6_hop_by_hop_ioam_trace_rewrite_handler (u8 * rewrite_string, + u8 * rewrite_size) +{ + ioam_trace_option_t *trace_option = NULL; + u8 trace_data_size = 0; + u8 trace_option_elts = 0; + trace_profile *profile = NULL; + + + profile = trace_profile_find (); + + if (PREDICT_FALSE (!profile)) + { + ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1); + return (-1); + } + + if (PREDICT_FALSE (!rewrite_string)) + return -1; + + trace_option_elts = profile->num_elts; + trace_data_size = fetch_trace_data_size (profile->trace_type); + trace_option = (ioam_trace_option_t *) rewrite_string; + trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST | + HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE; + trace_option->hdr.length = 2 /*ioam_trace_type,data_list_elts_left */ + + trace_option_elts * trace_data_size; + trace_option->ioam_trace_type = profile->trace_type & TRACE_TYPE_MASK; + trace_option->data_list_elts_left = trace_option_elts; + *rewrite_size = + sizeof (ioam_trace_option_t) + (trace_option_elts * trace_data_size); + + return 0; +} + + +int +ip6_hbh_ioam_trace_data_list_handler (vlib_buffer_t * b, ip6_header_t * ip, + ip6_hop_by_hop_option_t * opt) +{ + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + u8 elt_index = 0; + ioam_trace_option_t *trace = (ioam_trace_option_t *) opt; + u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX]; + ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index); + time_u64_t time_u64; + u32 *elt; + int rv = 0; + trace_profile *profile = NULL; + + + profile = trace_profile_find (); + + if (PREDICT_FALSE (!profile)) + { + ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1); + return (-1); + } + + + time_u64.as_u64 = 0; + + if (PREDICT_TRUE (trace->data_list_elts_left)) + { + trace->data_list_elts_left--; + /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes + * to skip to this node's location. + */ + elt_index = + trace->data_list_elts_left * + fetch_trace_data_size (trace->ioam_trace_type) / 4; + elt = &trace->elts[elt_index]; + if (trace->ioam_trace_type & BIT_TTL_NODEID) + { + *elt = + clib_host_to_net_u32 ((ip->hop_limit << 24) | profile->node_id); + elt++; + } + + if (trace->ioam_trace_type & BIT_ING_INTERFACE) + { + *elt = + (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 | + (adj->rewrite_header.sw_if_index & 0xFFFF); + *elt = clib_host_to_net_u32 (*elt); + elt++; + } + + if (trace->ioam_trace_type & BIT_TIMESTAMP) + { + /* Send least significant 32 bits */ + f64 time_f64 = + (f64) (((f64) hm->unix_time_0) + + (vlib_time_now (hm->vlib_main) - hm->vlib_time_0)); + + time_u64.as_u64 = time_f64 * trace_tsp_mul[profile->trace_tsp]; + *elt = clib_host_to_net_u32 (time_u64.as_u32[0]); + elt++; + } + + if (trace->ioam_trace_type & BIT_APPDATA) + { + /* $$$ set elt0->app_data */ + *elt = clib_host_to_net_u32 (profile->app_data); + elt++; + } + ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_UPDATED, 1); + } + else + { + ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_FULL, 1); + } + return (rv); +} + +u8 * +ip6_hbh_ioam_trace_data_list_trace_handler (u8 * s, + ip6_hop_by_hop_option_t * opt) +{ + ioam_trace_option_t *trace; + u8 trace_data_size_in_words = 0; + u32 *elt; + int elt_index = 0; + + trace = (ioam_trace_option_t *) opt; + s = + format (s, " Trace Type 0x%x , %d elts left\n", trace->ioam_trace_type, + trace->data_list_elts_left); + trace_data_size_in_words = + fetch_trace_data_size (trace->ioam_trace_type) / 4; + elt = &trace->elts[0]; + while ((u8 *) elt < ((u8 *) (&trace->elts[0]) + trace->hdr.length - 2 + /* -2 accounts for ioam_trace_type,elts_left */ )) + { + s = format (s, " [%d] %U\n", elt_index, + format_ioam_data_list_element, + elt, &trace->ioam_trace_type); + elt_index++; + elt += trace_data_size_in_words; + } + return (s); +} + + +static clib_error_t * +ip6_show_ioam_trace_cmd_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main; + u8 *s = 0; + int i = 0; + + for (i = 0; i < IP6_IOAM_TRACE_N_STATS; i++) + { + s = + format (s, " %s - %lu\n", ip6_hop_by_hop_ioam_trace_stats_strings[i], + hm->counters[i]); + } + + vlib_cli_output (vm, "%v", s); + vec_free (s); + return 0; +} + + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_show_ioam_trace_cmd, static) = { + .path = "show ioam trace", + .short_help = "iOAM trace statistics", + .function = ip6_show_ioam_trace_cmd_fn, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +ip6_hop_by_hop_ioam_trace_init (vlib_main_t * vm) +{ + ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main; + clib_error_t *error; + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return (error); + + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init))) + return (error); + + hm->vlib_main = vm; + hm->vnet_main = vnet_get_main (); + memset (hm->counters, 0, sizeof (hm->counters)); + + + if (ip6_hbh_register_option + (HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST, + ip6_hbh_ioam_trace_data_list_handler, + ip6_hbh_ioam_trace_data_list_trace_handler) < 0) + return (clib_error_create + ("registration of HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST failed")); + + + if (ip6_hbh_add_register_option (HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST, + sizeof (ioam_trace_option_t), + ip6_hop_by_hop_ioam_trace_rewrite_handler) + < 0) + return (clib_error_create + ("registration of HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST for rewrite failed")); + + + return (0); +} + +int +ip6_trace_profile_cleanup (void) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] = 0; + + return 0; + +} + + +int +ip6_trace_profile_setup (void) +{ + u32 trace_size = 0; + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + trace_profile *profile = NULL; + + + profile = trace_profile_find (); + + if (PREDICT_FALSE (!profile)) + { + ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1); + return (-1); + } + + + if (ip6_ioam_trace_get_sizeof_handler (&trace_size) < 0) + return (-1); + + hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] = trace_size; + + return (0); +} + + +VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_trace_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h new file mode 100644 index 00000000000..fbd86180656 --- /dev/null +++ b/src/plugins/ioam/export-common/ioam_export.h @@ -0,0 +1,616 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ioam_export_h__ +#define __included_ioam_export_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/udp.h> +#include <vnet/flow/ipfix_packet.h> + +#include <vppinfra/pool.h> +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#include <vlib/threads.h> + +typedef struct ioam_export_buffer +{ + /* Allocated buffer */ + u32 buffer_index; + u64 touched_at; + u8 records_in_this_buffer; +} ioam_export_buffer_t; + + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + + /* TODO: to support multiple collectors all this has to be grouped and create a vector here */ + u8 *record_header; + u32 sequence_number; + u32 domain_id; + + /* ipfix collector, our ip address */ + ip4_address_t ipfix_collector; + ip4_address_t src_address; + + /* Pool of ioam_export_buffer_t */ + ioam_export_buffer_t *buffer_pool; + /* Vector of per thread ioam_export_buffer_t to buffer pool index */ + u32 *buffer_per_thread; + /* Lock per thread to swap buffers between worker and timer process */ + volatile u32 **lockp; + + /* time scale transform */ + u32 unix_time_0; + f64 vlib_time_0; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + u32 ip4_lookup_node_index; + + uword my_hbh_slot; + u32 export_process_node_index; +} ioam_export_main_t; + +ioam_export_main_t ioam_export_main; +ioam_export_main_t vxlan_gpe_ioam_export_main; + +extern vlib_node_registration_t export_node; + +#define DEFAULT_EXPORT_SIZE (3 * CLIB_CACHE_LINE_BYTES) +/* + * Number of records in a buffer + * ~(MTU (1500) - [ip hdr(40) + UDP(8) + ipfix (24)]) / DEFAULT_EXPORT_SIZE + */ +#define DEFAULT_EXPORT_RECORDS 7 + +always_inline ioam_export_buffer_t * +ioam_export_get_my_buffer (ioam_export_main_t * em, u32 thread_id) +{ + + if (vec_len (em->buffer_per_thread) > thread_id) + return (pool_elt_at_index + (em->buffer_pool, em->buffer_per_thread[thread_id])); + return (0); +} + +inline static int +ioam_export_buffer_add_header (ioam_export_main_t * em, vlib_buffer_t * b0) +{ + clib_memcpy (b0->data, em->record_header, vec_len (em->record_header)); + b0->current_data = 0; + b0->current_length = vec_len (em->record_header); + b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + return (1); +} + +inline static int +ioam_export_init_buffer (ioam_export_main_t * em, vlib_main_t * vm, + ioam_export_buffer_t * eb) +{ + vlib_buffer_t *b = 0; + + if (!eb) + return (-1); + /* TODO: Perhaps buffer init from template here */ + if (vlib_buffer_alloc (vm, &(eb->buffer_index), 1) != 1) + return (-2); + eb->records_in_this_buffer = 0; + eb->touched_at = vlib_time_now (vm); + b = vlib_get_buffer (vm, eb->buffer_index); + (void) ioam_export_buffer_add_header (em, b); + vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; + return (1); +} + +inline static void +ioam_export_thread_buffer_free (ioam_export_main_t * em) +{ + vlib_main_t *vm = em->vlib_main; + ioam_export_buffer_t *eb = 0; + int i; + for (i = 0; i < vec_len (em->buffer_per_thread); i++) + { + eb = pool_elt_at_index (em->buffer_pool, em->buffer_per_thread[i]); + if (eb) + vlib_buffer_free (vm, &(eb->buffer_index), 1); + } + for (i = 0; i < vec_len (em->lockp); i++) + clib_mem_free ((void *) em->lockp[i]); + vec_free (em->buffer_per_thread); + pool_free (em->buffer_pool); + vec_free (em->lockp); + em->buffer_per_thread = 0; + em->buffer_pool = 0; + em->lockp = 0; +} + +inline static int +ioam_export_thread_buffer_init (ioam_export_main_t * em, vlib_main_t * vm) +{ + int no_of_threads = vec_len (vlib_worker_threads); + int i; + ioam_export_buffer_t *eb = 0; + vlib_node_t *ip4_lookup_node; + + pool_alloc_aligned (em->buffer_pool, + no_of_threads - 1, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (em->buffer_per_thread, + no_of_threads - 1, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (em->lockp, no_of_threads - 1, CLIB_CACHE_LINE_BYTES); + ip4_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup"); + em->ip4_lookup_node_index = ip4_lookup_node->index; + if (!em->buffer_per_thread || !em->buffer_pool || !em->lockp) + { + return (-1); + } + for (i = 0; i < no_of_threads; i++) + { + eb = 0; + pool_get_aligned (em->buffer_pool, eb, CLIB_CACHE_LINE_BYTES); + memset (eb, 0, sizeof (*eb)); + em->buffer_per_thread[i] = eb - em->buffer_pool; + if (ioam_export_init_buffer (em, vm, eb) != 1) + { + ioam_export_thread_buffer_free (em); + return (-2); + } + em->lockp[i] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) em->lockp[i], 0, CLIB_CACHE_LINE_BYTES); + } + return (1); +} + +#define IPFIX_IOAM_EXPORT_ID 272 + +/* Used to build the rewrite */ +/* data set packet */ +typedef struct +{ + ipfix_message_header_t h; + ipfix_set_header_t s; +} ipfix_data_packet_t; + +typedef struct +{ + ip4_header_t ip4; + udp_header_t udp; + ipfix_data_packet_t ipfix; +} ip4_ipfix_data_packet_t; + + +inline static void +ioam_export_header_cleanup (ioam_export_main_t * em, + ip4_address_t * collector_address, + ip4_address_t * src_address) +{ + vec_free (em->record_header); + em->record_header = 0; +} + +inline static int +ioam_export_header_create (ioam_export_main_t * em, + ip4_address_t * collector_address, + ip4_address_t * src_address) +{ + ip4_header_t *ip; + udp_header_t *udp; + ipfix_message_header_t *h; + ipfix_set_header_t *s; + u8 *rewrite = 0; + ip4_ipfix_data_packet_t *tp; + + + /* allocate rewrite space */ + vec_validate_aligned (rewrite, + sizeof (ip4_ipfix_data_packet_t) - 1, + CLIB_CACHE_LINE_BYTES); + + tp = (ip4_ipfix_data_packet_t *) rewrite; + ip = (ip4_header_t *) & tp->ip4; + udp = (udp_header_t *) (ip + 1); + h = (ipfix_message_header_t *) (udp + 1); + s = (ipfix_set_header_t *) (h + 1); + + ip->ip_version_and_header_length = 0x45; + ip->ttl = 254; + ip->protocol = IP_PROTOCOL_UDP; + ip->src_address.as_u32 = src_address->as_u32; + ip->dst_address.as_u32 = collector_address->as_u32; + udp->src_port = clib_host_to_net_u16 (4939 /* $$FIXME */ ); + udp->dst_port = clib_host_to_net_u16 (4939); + /* FIXUP: UDP length */ + udp->length = clib_host_to_net_u16 (vec_len (rewrite) + + (DEFAULT_EXPORT_RECORDS * + DEFAULT_EXPORT_SIZE) - sizeof (*ip)); + + /* FIXUP: message header export_time */ + /* FIXUP: message header sequence_number */ + h->domain_id = clib_host_to_net_u32 (em->domain_id); + + /*FIXUP: Setid length in octets if records exported are not default */ + s->set_id_length = ipfix_set_id_length (IPFIX_IOAM_EXPORT_ID, + (sizeof (*s) + + (DEFAULT_EXPORT_RECORDS * + DEFAULT_EXPORT_SIZE))); + + /* FIXUP: h version and length length in octets if records exported are not default */ + h->version_length = version_length (sizeof (*h) + + (sizeof (*s) + + (DEFAULT_EXPORT_RECORDS * + DEFAULT_EXPORT_SIZE))); + + /* FIXUP: ip length if records exported are not default */ + /* FIXUP: ip checksum if records exported are not default */ + ip->length = clib_host_to_net_u16 (vec_len (rewrite) + + (DEFAULT_EXPORT_RECORDS * + DEFAULT_EXPORT_SIZE)); + ip->checksum = ip4_header_checksum (ip); + _vec_len (rewrite) = sizeof (ip4_ipfix_data_packet_t); + em->record_header = rewrite; + return (1); +} + +inline static int +ioam_export_send_buffer (ioam_export_main_t * em, vlib_main_t * vm, + ioam_export_buffer_t * eb) +{ + ip4_header_t *ip; + udp_header_t *udp; + ipfix_message_header_t *h; + ipfix_set_header_t *s; + ip4_ipfix_data_packet_t *tp; + vlib_buffer_t *b0; + u16 new_l0, old_l0; + ip_csum_t sum0; + vlib_frame_t *nf = 0; + u32 *to_next; + + b0 = vlib_get_buffer (vm, eb->buffer_index); + tp = vlib_buffer_get_current (b0); + ip = (ip4_header_t *) & tp->ip4; + udp = (udp_header_t *) (ip + 1); + h = (ipfix_message_header_t *) (udp + 1); + s = (ipfix_set_header_t *) (h + 1); + + /* FIXUP: message header export_time */ + h->export_time = clib_host_to_net_u32 ((u32) + (((f64) em->unix_time_0) + + (vlib_time_now (em->vlib_main) - + em->vlib_time_0))); + + /* FIXUP: message header sequence_number */ + h->sequence_number = clib_host_to_net_u32 (em->sequence_number++); + + /* FIXUP: lengths if different from default */ + if (PREDICT_FALSE (eb->records_in_this_buffer != DEFAULT_EXPORT_RECORDS)) + { + s->set_id_length = + ipfix_set_id_length (IPFIX_IOAM_EXPORT_ID /* set_id */ , + b0->current_length - (sizeof (*ip) + + sizeof (*udp) + + sizeof (*h))); + h->version_length = + version_length (b0->current_length - (sizeof (*ip) + sizeof (*udp))); + sum0 = ip->checksum; + old_l0 = ip->length; + new_l0 = clib_host_to_net_u16 ((u16) b0->current_length); + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */ ); + ip->checksum = ip_csum_fold (sum0); + ip->length = new_l0; + udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip)); + } + + /* Enqueue pkts to ip4-lookup */ + + nf = vlib_get_frame_to_node (vm, em->ip4_lookup_node_index); + nf->n_vectors = 0; + to_next = vlib_frame_vector_args (nf); + nf->n_vectors = 1; + to_next[0] = eb->buffer_index; + vlib_put_frame_to_node (vm, em->ip4_lookup_node_index, nf); + return (1); + +} + +#define EXPORT_TIMEOUT (20.0) +#define THREAD_PERIOD (30.0) +inline static uword +ioam_export_process_common (ioam_export_main_t * em, vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f, + u32 index) +{ + f64 now; + f64 timeout = 30.0; + uword event_type; + uword *event_data = 0; + int i; + ioam_export_buffer_t *eb = 0, *new_eb = 0; + u32 *vec_buffer_indices = 0; + u32 *vec_buffer_to_be_sent = 0; + u32 *thread_index = 0; + u32 new_pool_index = 0; + + em->export_process_node_index = index; + /* Wait for Godot... */ + vlib_process_wait_for_event_or_clock (vm, 1e9); + event_type = vlib_process_get_events (vm, &event_data); + if (event_type != 1) + clib_warning ("bogus kickoff event received, %d", event_type); + vec_reset_length (event_data); + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, timeout); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case 2: /* Stop and Wait for kickoff again */ + timeout = 1e9; + break; + case 1: /* kickoff : Check for unsent buffers */ + timeout = THREAD_PERIOD; + break; + case ~0: /* timeout */ + break; + } + vec_reset_length (event_data); + now = vlib_time_now (vm); + /* + * Create buffers for threads that are not active enough + * to send out the export records + */ + for (i = 0; i < vec_len (em->buffer_per_thread); i++) + { + /* If the worker thread is processing export records ignore further checks */ + if (*em->lockp[i] == 1) + continue; + eb = pool_elt_at_index (em->buffer_pool, em->buffer_per_thread[i]); + if (eb->records_in_this_buffer > 0 + && now > (eb->touched_at + EXPORT_TIMEOUT)) + { + pool_get_aligned (em->buffer_pool, new_eb, + CLIB_CACHE_LINE_BYTES); + memset (new_eb, 0, sizeof (*new_eb)); + if (ioam_export_init_buffer (em, vm, new_eb) == 1) + { + new_pool_index = new_eb - em->buffer_pool; + vec_add (vec_buffer_indices, &new_pool_index, 1); + vec_add (vec_buffer_to_be_sent, &em->buffer_per_thread[i], + 1); + vec_add (thread_index, &i, 1); + } + else + { + pool_put (em->buffer_pool, new_eb); + /*Give up */ + goto CLEANUP; + } + } + } + if (vec_len (thread_index) != 0) + { + /* + * Now swap the buffers out + */ + for (i = 0; i < vec_len (thread_index); i++) + { + while (__sync_lock_test_and_set (em->lockp[thread_index[i]], 1)) + ; + em->buffer_per_thread[thread_index[i]] = + vec_pop (vec_buffer_indices); + *em->lockp[thread_index[i]] = 0; + } + + /* Send the buffers */ + for (i = 0; i < vec_len (vec_buffer_to_be_sent); i++) + { + eb = + pool_elt_at_index (em->buffer_pool, vec_buffer_to_be_sent[i]); + ioam_export_send_buffer (em, vm, eb); + pool_put (em->buffer_pool, eb); + } + } + + CLEANUP: + /* Free any leftover/unused buffers and everything that was allocated */ + for (i = 0; i < vec_len (vec_buffer_indices); i++) + { + new_eb = pool_elt_at_index (em->buffer_pool, vec_buffer_indices[i]); + vlib_buffer_free (vm, &new_eb->buffer_index, 1); + pool_put (em->buffer_pool, new_eb); + } + vec_free (vec_buffer_indices); + vec_free (vec_buffer_to_be_sent); + vec_free (thread_index); + } + return 0; /* not so much */ +} + +#define ioam_export_node_common(EM, VM, N, F, HTYPE, L, V, NEXT) \ +do { \ + u32 n_left_from, *from, *to_next; \ + export_next_t next_index; \ + u32 pkts_recorded = 0; \ + ioam_export_buffer_t *my_buf = 0; \ + vlib_buffer_t *eb0 = 0; \ + u32 ebi0 = 0; \ + from = vlib_frame_vector_args (F); \ + n_left_from = (F)->n_vectors; \ + next_index = (N)->cached_next_index; \ + while (__sync_lock_test_and_set ((EM)->lockp[(VM)->cpu_index], 1)); \ + my_buf = ioam_export_get_my_buffer (EM, (VM)->cpu_index); \ + my_buf->touched_at = vlib_time_now (VM); \ + while (n_left_from > 0) \ + { \ + u32 n_left_to_next; \ + vlib_get_next_frame (VM, N, next_index, to_next, n_left_to_next); \ + while (n_left_from >= 4 && n_left_to_next >= 2) \ + { \ + u32 next0 = NEXT; \ + u32 next1 = NEXT; \ + u32 bi0, bi1; \ + HTYPE *ip0, *ip1; \ + vlib_buffer_t *p0, *p1; \ + u32 ip_len0, ip_len1; \ + { \ + vlib_buffer_t *p2, *p3; \ + p2 = vlib_get_buffer (VM, from[2]); \ + p3 = vlib_get_buffer (VM, from[3]); \ + vlib_prefetch_buffer_header (p2, LOAD); \ + vlib_prefetch_buffer_header (p3, LOAD); \ + CLIB_PREFETCH (p2->data, 3 * CLIB_CACHE_LINE_BYTES, LOAD); \ + CLIB_PREFETCH (p3->data, 3 * CLIB_CACHE_LINE_BYTES, LOAD); \ + } \ + to_next[0] = bi0 = from[0]; \ + to_next[1] = bi1 = from[1]; \ + from += 2; \ + to_next += 2; \ + n_left_from -= 2; \ + n_left_to_next -= 2; \ + p0 = vlib_get_buffer (VM, bi0); \ + p1 = vlib_get_buffer (VM, bi1); \ + ip0 = vlib_buffer_get_current (p0); \ + ip1 = vlib_buffer_get_current (p1); \ + ip_len0 = \ + clib_net_to_host_u16 (ip0->L) + sizeof (HTYPE); \ + ip_len1 = \ + clib_net_to_host_u16 (ip1->L) + sizeof (HTYPE); \ + ebi0 = my_buf->buffer_index; \ + eb0 = vlib_get_buffer (VM, ebi0); \ + if (PREDICT_FALSE (eb0 == 0)) \ + goto NO_BUFFER1; \ + ip_len0 = \ + ip_len0 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len0; \ + ip_len1 = \ + ip_len1 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len1; \ + copy3cachelines (eb0->data + eb0->current_length, ip0, ip_len0); \ + eb0->current_length += DEFAULT_EXPORT_SIZE; \ + my_buf->records_in_this_buffer++; \ + if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \ + { \ + ioam_export_send_buffer (EM, VM, my_buf); \ + ioam_export_init_buffer (EM, VM, my_buf); \ + } \ + ebi0 = my_buf->buffer_index; \ + eb0 = vlib_get_buffer (VM, ebi0); \ + if (PREDICT_FALSE (eb0 == 0)) \ + goto NO_BUFFER1; \ + copy3cachelines (eb0->data + eb0->current_length, ip1, ip_len1); \ + eb0->current_length += DEFAULT_EXPORT_SIZE; \ + my_buf->records_in_this_buffer++; \ + if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \ + { \ + ioam_export_send_buffer (EM, VM, my_buf); \ + ioam_export_init_buffer (EM, VM, my_buf); \ + } \ + pkts_recorded += 2; \ + if (PREDICT_FALSE (((node)->flags & VLIB_NODE_FLAG_TRACE))) \ + { \ + if (p0->flags & VLIB_BUFFER_IS_TRACED) \ + { \ + export_trace_t *t = \ + vlib_add_trace (VM, node, p0, sizeof (*t)); \ + t->flow_label = \ + clib_net_to_host_u32 (ip0->V); \ + t->next_index = next0; \ + } \ + if (p1->flags & VLIB_BUFFER_IS_TRACED) \ + { \ + export_trace_t *t = \ + vlib_add_trace (VM, N, p1, sizeof (*t)); \ + t->flow_label = \ + clib_net_to_host_u32 (ip1->V); \ + t->next_index = next1; \ + } \ + } \ + NO_BUFFER1: \ + vlib_validate_buffer_enqueue_x2 (VM, N, next_index, \ + to_next, n_left_to_next, \ + bi0, bi1, next0, next1); \ + } \ + while (n_left_from > 0 && n_left_to_next > 0) \ + { \ + u32 bi0; \ + vlib_buffer_t *p0; \ + u32 next0 = NEXT; \ + HTYPE *ip0; \ + u32 ip_len0; \ + bi0 = from[0]; \ + to_next[0] = bi0; \ + from += 1; \ + to_next += 1; \ + n_left_from -= 1; \ + n_left_to_next -= 1; \ + p0 = vlib_get_buffer (VM, bi0); \ + ip0 = vlib_buffer_get_current (p0); \ + ip_len0 = \ + clib_net_to_host_u16 (ip0->L) + sizeof (HTYPE); \ + ebi0 = my_buf->buffer_index; \ + eb0 = vlib_get_buffer (VM, ebi0); \ + if (PREDICT_FALSE (eb0 == 0)) \ + goto NO_BUFFER; \ + ip_len0 = \ + ip_len0 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len0; \ + copy3cachelines (eb0->data + eb0->current_length, ip0, ip_len0); \ + eb0->current_length += DEFAULT_EXPORT_SIZE; \ + my_buf->records_in_this_buffer++; \ + if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \ + { \ + ioam_export_send_buffer (EM, VM, my_buf); \ + ioam_export_init_buffer (EM, VM, my_buf); \ + } \ + if (PREDICT_FALSE (((N)->flags & VLIB_NODE_FLAG_TRACE) \ + && (p0->flags & VLIB_BUFFER_IS_TRACED))) \ + { \ + export_trace_t *t = vlib_add_trace (VM, (N), p0, sizeof (*t)); \ + t->flow_label = \ + clib_net_to_host_u32 (ip0->V); \ + t->next_index = next0; \ + } \ + pkts_recorded += 1; \ + NO_BUFFER: \ + vlib_validate_buffer_enqueue_x1 (VM, N, next_index, \ + to_next, n_left_to_next, \ + bi0, next0); \ + } \ + vlib_put_next_frame (VM, N, next_index, n_left_to_next); \ + } \ + vlib_node_increment_counter (VM, export_node.index, \ + EXPORT_ERROR_RECORDED, pkts_recorded); \ + *(EM)->lockp[(VM)->cpu_index] = 0; \ +} while(0) + +#endif /* __included_ioam_export_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api new file mode 100644 index 00000000000..7b17c3f7a32 --- /dev/null +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api @@ -0,0 +1,42 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Define a simple binary API to control the feature */ + +define vxlan_gpe_ioam_export_enable_disable { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + /* Enable / disable the feature */ + u8 is_disable; + + /* Collector ip address */ + u8 collector_address[4]; + u8 src_address[4]; + + /* Src ip address */ +}; + +define vxlan_gpe_ioam_export_enable_disable_reply { + /* From the request */ + u32 context; + + /* Return value, zero means all OK */ + i32 retval; +};
\ No newline at end of file diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c new file mode 100644 index 00000000000..bab8d977062 --- /dev/null +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * vxlan_gpe_ioam_export.c - ioam export API / debug CLI handling + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <ioam/export-common/ioam_export.h> +#include <vnet/vxlan-gpe/vxlan_gpe.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> + +/* define message IDs */ +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + + +/* List of message types that this plugin understands */ + + +#define foreach_vxlan_gpe_ioam_export_plugin_api_msg \ +_(VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE, vxlan_gpe_ioam_export_enable_disable) + +extern void vxlan_gpe_set_next_override (uword next); +/* Action function shared between message handler and debug CLI */ +int +vxlan_gpe_ioam_export_enable_disable (ioam_export_main_t * em, + u8 is_disable, + ip4_address_t * collector_address, + ip4_address_t * src_address) +{ + vlib_main_t *vm = em->vlib_main; + u32 node_index = export_node.index; + vlib_node_t *vxlan_gpe_decap_ioam_node = NULL; + + if (is_disable == 0) + { + if (em->my_hbh_slot == ~0) + { + /* Hook this export node to vxlan-gpe-decap-ioam-v4 */ + vxlan_gpe_decap_ioam_node = + vlib_get_node_by_name (vm, (u8 *) "vxlan-gpe-decap-ioam-v4"); + if (!vxlan_gpe_decap_ioam_node) + { + /* node does not exist give up */ + return (-1); + } + em->my_hbh_slot = + vlib_node_add_next (vm, vxlan_gpe_decap_ioam_node->index, + node_index); + } + if (1 == ioam_export_header_create (em, collector_address, src_address)) + { + ioam_export_thread_buffer_init (em, vm); + vxlan_gpe_set_next_override (em->my_hbh_slot); + /* Turn on the export buffer check process */ + vlib_process_signal_event (vm, em->export_process_node_index, 1, 0); + + } + else + { + return (-2); + } + } + else + { + vxlan_gpe_set_next_override (VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP); + ioam_export_header_cleanup (em, collector_address, src_address); + ioam_export_thread_buffer_free (em); + /* Turn off the export buffer check process */ + vlib_process_signal_event (vm, em->export_process_node_index, 2, 0); + + } + + return 0; +} + +/* API message handler */ +static void vl_api_vxlan_gpe_ioam_export_enable_disable_t_handler + (vl_api_vxlan_gpe_ioam_export_enable_disable_t * mp) +{ + vl_api_vxlan_gpe_ioam_export_enable_disable_reply_t *rmp; + ioam_export_main_t *sm = &vxlan_gpe_ioam_export_main; + int rv; + + rv = vxlan_gpe_ioam_export_enable_disable (sm, (int) (mp->is_disable), + (ip4_address_t *) + mp->collector_address, + (ip4_address_t *) + mp->src_address); + + REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE_REPLY); +} /* API message handler */ + + + +/* Set up the API message handling tables */ +static clib_error_t * +vxlan_gpe_ioam_export_plugin_api_hookup (vlib_main_t * vm) +{ + ioam_export_main_t *sm = &vxlan_gpe_ioam_export_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vxlan_gpe_ioam_export_plugin_api_msg; +#undef _ + + return 0; +} + + +static clib_error_t * +set_vxlan_gpe_ioam_export_ipfix_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ioam_export_main_t *em = &vxlan_gpe_ioam_export_main; + ip4_address_t collector, src; + u8 is_disable = 0; + + collector.as_u32 = 0; + src.as_u32 = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "collector %U", unformat_ip4_address, &collector)) + ; + else if (unformat (input, "src %U", unformat_ip4_address, &src)) + ; + else if (unformat (input, "disable")) + is_disable = 1; + else + break; + } + + if (collector.as_u32 == 0) + return clib_error_return (0, "collector address required"); + + if (src.as_u32 == 0) + return clib_error_return (0, "src address required"); + + em->ipfix_collector.as_u32 = collector.as_u32; + em->src_address.as_u32 = src.as_u32; + + vlib_cli_output (vm, "Collector %U, src address %U", + format_ip4_address, &em->ipfix_collector, + format_ip4_address, &em->src_address); + + /* Turn on the export timer process */ + // vlib_process_signal_event (vm, flow_report_process_node.index, + //1, 0); + if (0 != + vxlan_gpe_ioam_export_enable_disable (em, is_disable, &collector, &src)) + { + return clib_error_return (0, "Unable to set ioam vxlan-gpe export"); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_vxlan_gpe_ioam_ipfix_command, static) = +{ +.path = "set vxlan-gpe-ioam export ipfix", +.short_help = "set vxlan-gpe-ioam export ipfix collector <ip4-address> src <ip4-address>", +.function = set_vxlan_gpe_ioam_export_ipfix_command_fn, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +vxlan_gpe_ioam_export_init (vlib_main_t * vm) +{ + ioam_export_main_t *em = &vxlan_gpe_ioam_export_main; + clib_error_t *error = 0; + u8 *name; + + name = format (0, "vxlan_gpe_ioam_export_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + em->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + em->unix_time_0 = (u32) time (0); /* Store starting time */ + em->vlib_time_0 = vlib_time_now (vm); + + error = vxlan_gpe_ioam_export_plugin_api_hookup (vm); + em->my_hbh_slot = ~0; + em->vlib_main = vm; + em->vnet_main = vnet_get_main (); + vec_free (name); + + return error; +} + +VLIB_INIT_FUNCTION (vxlan_gpe_ioam_export_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h new file mode 100644 index 00000000000..6d93f09341a --- /dev/null +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h> diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h new file mode 100644 index 00000000000..cc5698de334 --- /dev/null +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vxlan_gpe_ioam_export_msg_enum_h +#define included_vxlan_gpe_ioam_export_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_vxlan_gpe_ioam_export_msg_enum_h */ diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c new file mode 100644 index 00000000000..494263d9a1a --- /dev/null +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * vxlan_gpe_ioam_export_test.c - test harness plugin + *------------------------------------------------------------------ + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> + + +/* Declare message IDs */ +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h> +#undef vl_api_version + + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} export_test_main_t; + +export_test_main_t export_test_main; + +#define foreach_standard_reply_retval_handler \ +_(vxlan_gpe_ioam_export_enable_disable_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = export_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE_REPLY, vxlan_gpe_ioam_export_enable_disable_reply) + + +/* M: construct, but don't yet send a message */ + +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int +api_vxlan_gpe_ioam_export_enable_disable (vat_main_t * vam) +{ + export_test_main_t *sm = &export_test_main; + unformat_input_t *i = vam->input; + f64 timeout; + int is_disable = 0; + vl_api_vxlan_gpe_ioam_export_enable_disable_t *mp; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "disable")) + is_disable = 1; + else + break; + } + + /* Construct the API message */ + M (VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE, + vxlan_gpe_ioam_export_enable_disable); + mp->is_disable = is_disable; + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(vxlan_gpe_ioam_export_enable_disable, "<intfc> [disable]") + +void +vat_api_hookup (vat_main_t * vam) +{ + export_test_main_t *sm = &export_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * +vat_plugin_register (vat_main_t * vam) +{ + export_test_main_t *sm = &export_test_main; + u8 *name; + + sm->vat_main = vam; + + name = format (0, "vxlan_gpe_ioam_export_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~ 0) + vat_api_hookup (vam); + + vec_free (name); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c new file mode 100644 index 00000000000..58508ebf10e --- /dev/null +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ioam_export_thread.c + */ +#include <vnet/api_errno.h> +#include <vppinfra/pool.h> +#include <ioam/export-common/ioam_export.h> + +static vlib_node_registration_t vxlan_gpe_ioam_export_process_node; + +static uword +vxlan_gpe_ioam_export_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + return (ioam_export_process_common (&vxlan_gpe_ioam_export_main, + vm, rt, f, + vxlan_gpe_ioam_export_process_node.index)); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vxlan_gpe_ioam_export_process_node, static) = +{ + .function = vxlan_gpe_ioam_export_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "vxlan-gpe-ioam-export-process", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c new file mode 100644 index 00000000000..722c2b061c0 --- /dev/null +++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> +#include <ioam/export-common/ioam_export.h> +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> + +typedef struct +{ + u32 next_index; + u32 flow_label; +} export_trace_t; + +/* packet trace format function */ +static u8 * +format_export_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + export_trace_t *t = va_arg (*args, export_trace_t *); + + s = format (s, "EXPORT: flow_label %d, next index %d", + t->flow_label, t->next_index); + return s; +} + +vlib_node_registration_t export_node; + +#define foreach_export_error \ +_(RECORDED, "Packets recorded for export") + +typedef enum +{ +#define _(sym,str) EXPORT_ERROR_##sym, + foreach_export_error +#undef _ + EXPORT_N_ERROR, +} export_error_t; + +static char *export_error_strings[] = { +#define _(sym,string) string, + foreach_export_error +#undef _ +}; + +typedef enum +{ + EXPORT_NEXT_VXLAN_GPE_INPUT, + EXPORT_N_NEXT, +} export_next_t; + +always_inline void +copy3cachelines (void *dst, const void *src, size_t n) +{ +#if 0 + if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE)) + { + /* Copy only the first 1/2 cache lines whatever is available */ + if (n >= 64) + clib_mov64 ((u8 *) dst, (const u8 *) src); + if (n >= 128) + clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64); + return; + } + clib_mov64 ((u8 *) dst, (const u8 *) src); + clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64); + clib_mov64 ((u8 *) dst + 128, (const u8 *) src + 128); +#endif +#if 1 + + u64 *copy_dst, *copy_src; + int i; + copy_dst = (u64 *) dst; + copy_src = (u64 *) src; + if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE)) + { + for (i = 0; i < n / 64; i++) + { + copy_dst[0] = copy_src[0]; + copy_dst[1] = copy_src[1]; + copy_dst[2] = copy_src[2]; + copy_dst[3] = copy_src[3]; + copy_dst[4] = copy_src[4]; + copy_dst[5] = copy_src[5]; + copy_dst[6] = copy_src[6]; + copy_dst[7] = copy_src[7]; + copy_dst += 8; + copy_src += 8; + } + return; + } + for (i = 0; i < 3; i++) + { + copy_dst[0] = copy_src[0]; + copy_dst[1] = copy_src[1]; + copy_dst[2] = copy_src[2]; + copy_dst[3] = copy_src[3]; + copy_dst[4] = copy_src[4]; + copy_dst[5] = copy_src[5]; + copy_dst[6] = copy_src[6]; + copy_dst[7] = copy_src[7]; + copy_dst += 8; + copy_src += 8; + } +#endif +} + + +static uword +vxlan_gpe_export_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + ioam_export_main_t *em = &vxlan_gpe_ioam_export_main; + ioam_export_node_common (em, vm, node, frame, ip4_header_t, length, + ip_version_and_header_length, + EXPORT_NEXT_VXLAN_GPE_INPUT); + return frame->n_vectors; +} + +/* + * Node for VXLAN-GPE export + */ +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (export_node) = +{ + .function = vxlan_gpe_export_node_fn, + .name = "vxlan-gpe-ioam-export", + .vector_size = sizeof (u32), + .format_trace = format_export_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (export_error_strings), + .error_strings = export_error_strings, + .n_next_nodes = EXPORT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + {[EXPORT_NEXT_VXLAN_GPE_INPUT] = "vxlan-gpe-pop-ioam-v4"}, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/export/ioam_export.api b/src/plugins/ioam/export/ioam_export.api new file mode 100644 index 00000000000..f22d9fc8ebe --- /dev/null +++ b/src/plugins/ioam/export/ioam_export.api @@ -0,0 +1,42 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Define a simple binary API to control the feature */ + +define ioam_export_ip6_enable_disable { + /* Client identifier, set from api_main.my_client_index */ + u32 client_index; + + /* Arbitrary context, so client can match reply to request */ + u32 context; + + /* Enable / disable the feature */ + u8 is_disable; + + /* Collector ip address */ + u8 collector_address[4]; + u8 src_address[4]; + + /* Src ip address */ +}; + +define ioam_export_ip6_enable_disable_reply { + /* From the request */ + u32 context; + + /* Return value, zero means all OK */ + i32 retval; +}; diff --git a/src/plugins/ioam/export/ioam_export.c b/src/plugins/ioam/export/ioam_export.c new file mode 100644 index 00000000000..b122e445b28 --- /dev/null +++ b/src/plugins/ioam/export/ioam_export.c @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * ioam_export.c - ioam export API / debug CLI handling + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <ioam/export-common/ioam_export.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vnet/ip/ip6_hop_by_hop.h> + + +/* define message IDs */ +#include <ioam/export/ioam_export_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + + +/* List of message types that this plugin understands */ + +#define foreach_ioam_export_plugin_api_msg \ +_(IOAM_EXPORT_IP6_ENABLE_DISABLE, ioam_export_ip6_enable_disable) + +/* + * This routine exists to convince the vlib plugin framework that + * we haven't accidentally copied a random .dll into the plugin directory. + * + * Also collects global variable pointers passed from the vpp engine + */ + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, + int from_early_init) +{ + ioam_export_main_t *em = &ioam_export_main; + clib_error_t *error = 0; + + em->vlib_main = vm; + em->vnet_main = h->vnet_main; + + return error; +} + +/* Action function shared between message handler and debug CLI */ + +int +ioam_export_ip6_enable_disable (ioam_export_main_t * em, + u8 is_disable, + ip4_address_t * collector_address, + ip4_address_t * src_address) +{ + vlib_main_t *vm = em->vlib_main; + + if (is_disable == 0) + { + if (1 == ioam_export_header_create (em, collector_address, src_address)) + { + ioam_export_thread_buffer_init (em, vm); + ip6_hbh_set_next_override (em->my_hbh_slot); + /* Turn on the export buffer check process */ + vlib_process_signal_event (vm, em->export_process_node_index, 1, 0); + + } + else + { + return (-2); + } + } + else + { + ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP); + ioam_export_header_cleanup (em, collector_address, src_address); + ioam_export_thread_buffer_free (em); + /* Turn off the export buffer check process */ + vlib_process_signal_event (vm, em->export_process_node_index, 2, 0); + + } + + return 0; +} + +/* API message handler */ +static void vl_api_ioam_export_ip6_enable_disable_t_handler + (vl_api_ioam_export_ip6_enable_disable_t * mp) +{ + vl_api_ioam_export_ip6_enable_disable_reply_t *rmp; + ioam_export_main_t *sm = &ioam_export_main; + int rv; + + rv = ioam_export_ip6_enable_disable (sm, (int) (mp->is_disable), + (ip4_address_t *) + mp->collector_address, + (ip4_address_t *) mp->src_address); + + REPLY_MACRO (VL_API_IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY); +} + +/* Set up the API message handling tables */ +static clib_error_t * +ioam_export_plugin_api_hookup (vlib_main_t * vm) +{ + ioam_export_main_t *sm = &ioam_export_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_ioam_export_plugin_api_msg; +#undef _ + + return 0; +} + +#define vl_msg_name_crc_list +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (ioam_export_main_t * sm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base); + foreach_vl_msg_name_crc_ioam_export; +#undef _ +} + +static clib_error_t * +set_ioam_export_ipfix_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ioam_export_main_t *em = &ioam_export_main; + ip4_address_t collector, src; + u8 is_disable = 0; + + collector.as_u32 = 0; + src.as_u32 = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "collector %U", unformat_ip4_address, &collector)) + ; + else if (unformat (input, "src %U", unformat_ip4_address, &src)) + ; + else if (unformat (input, "disable")) + is_disable = 1; + else + break; + } + + if (collector.as_u32 == 0) + return clib_error_return (0, "collector address required"); + + if (src.as_u32 == 0) + return clib_error_return (0, "src address required"); + + em->ipfix_collector.as_u32 = collector.as_u32; + em->src_address.as_u32 = src.as_u32; + + vlib_cli_output (vm, "Collector %U, src address %U", + format_ip4_address, &em->ipfix_collector, + format_ip4_address, &em->src_address); + + /* Turn on the export timer process */ + // vlib_process_signal_event (vm, flow_report_process_node.index, + //1, 0); + ioam_export_ip6_enable_disable (em, is_disable, &collector, &src); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ipfix_command, static) = +{ +.path = "set ioam export ipfix",.short_help = + "set ioam export ipfix collector <ip4-address> src <ip4-address>",. + function = set_ioam_export_ipfix_command_fn,}; +/* *INDENT-ON* */ + + +static clib_error_t * +ioam_export_init (vlib_main_t * vm) +{ + ioam_export_main_t *em = &ioam_export_main; + clib_error_t *error = 0; + u8 *name; + u32 node_index = export_node.index; + vlib_node_t *ip6_hbyh_node = NULL; + + name = format (0, "ioam_export_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + em->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + em->unix_time_0 = (u32) time (0); /* Store starting time */ + em->vlib_time_0 = vlib_time_now (vm); + + error = ioam_export_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (em, &api_main); + + /* Hook this export node to ip6-hop-by-hop */ + ip6_hbyh_node = vlib_get_node_by_name (vm, (u8 *) "ip6-hop-by-hop"); + em->my_hbh_slot = vlib_node_add_next (vm, ip6_hbyh_node->index, node_index); + vec_free (name); + + return error; +} + +VLIB_INIT_FUNCTION (ioam_export_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/export/ioam_export_all_api_h.h b/src/plugins/ioam/export/ioam_export_all_api_h.h new file mode 100644 index 00000000000..bc4368f2acb --- /dev/null +++ b/src/plugins/ioam/export/ioam_export_all_api_h.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <ioam/export/ioam_export.api.h> diff --git a/src/plugins/ioam/export/ioam_export_msg_enum.h b/src/plugins/ioam/export/ioam_export_msg_enum.h new file mode 100644 index 00000000000..c2de798893c --- /dev/null +++ b/src/plugins/ioam/export/ioam_export_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_ioam_export_msg_enum_h +#define included_ioam_export_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <ioam/export/ioam_export_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_ioam_export_msg_enum_h */ diff --git a/src/plugins/ioam/export/ioam_export_test.c b/src/plugins/ioam/export/ioam_export_test.c new file mode 100644 index 00000000000..f991fc0c795 --- /dev/null +++ b/src/plugins/ioam/export/ioam_export_test.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * ioam_export_test.c - test harness plugin + *------------------------------------------------------------------ + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> + + +/* Declare message IDs */ +#include <ioam/export/ioam_export_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/export/ioam_export_all_api_h.h> +#undef vl_api_version + + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} export_test_main_t; + +export_test_main_t export_test_main; + +#define foreach_standard_reply_retval_handler \ +_(ioam_export_ip6_enable_disable_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = export_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY, ioam_export_ip6_enable_disable_reply) + + +/* M: construct, but don't yet send a message */ + +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int +api_ioam_export_ip6_enable_disable (vat_main_t * vam) +{ + export_test_main_t *sm = &export_test_main; + unformat_input_t *i = vam->input; + f64 timeout; + int is_disable = 0; + vl_api_ioam_export_ip6_enable_disable_t *mp; + + /* Parse args required to build the message */ + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "disable")) + is_disable = 1; + else + break; + } + + /* Construct the API message */ + M (IOAM_EXPORT_IP6_ENABLE_DISABLE, ioam_export_ip6_enable_disable); + mp->is_disable = is_disable; + + /* send it... */ + S; + + /* Wait for a reply... */ + W; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(ioam_export_ip6_enable_disable, "<intfc> [disable]") + +void +vat_api_hookup (vat_main_t * vam) +{ + export_test_main_t *sm = &export_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * +vat_plugin_register (vat_main_t * vam) +{ + export_test_main_t *sm = &export_test_main; + u8 *name; + + sm->vat_main = vam; + + name = format (0, "ioam_export_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~ 0) + vat_api_hookup (vam); + + vec_free (name); + + return 0; +} diff --git a/src/plugins/ioam/export/ioam_export_thread.c b/src/plugins/ioam/export/ioam_export_thread.c new file mode 100644 index 00000000000..d2eb200936a --- /dev/null +++ b/src/plugins/ioam/export/ioam_export_thread.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ioam_export_thread.c + */ +#include <vnet/api_errno.h> +#include <vppinfra/pool.h> +#include <ioam/export-common/ioam_export.h> + +static vlib_node_registration_t ioam_export_process_node; + +static uword +ioam_export_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + return (ioam_export_process_common(&ioam_export_main, + vm, rt, f, + ioam_export_process_node.index)); +} + +VLIB_REGISTER_NODE (ioam_export_process_node, static) = +{ + .function = ioam_export_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "ioam-export-process", +}; diff --git a/src/plugins/ioam/export/node.c b/src/plugins/ioam/export/node.c new file mode 100644 index 00000000000..19f143dfbf3 --- /dev/null +++ b/src/plugins/ioam/export/node.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> +#include <ioam/export-common/ioam_export.h> + +typedef struct +{ + u32 next_index; + u32 flow_label; +} export_trace_t; + +/* packet trace format function */ +static u8 * +format_export_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + export_trace_t *t = va_arg (*args, export_trace_t *); + + s = format (s, "EXPORT: flow_label %d, next index %d", + t->flow_label, t->next_index); + return s; +} + +vlib_node_registration_t export_node; + +#define foreach_export_error \ +_(RECORDED, "Packets recorded for export") + +typedef enum +{ +#define _(sym,str) EXPORT_ERROR_##sym, + foreach_export_error +#undef _ + EXPORT_N_ERROR, +} export_error_t; + +static char *export_error_strings[] = { +#define _(sym,string) string, + foreach_export_error +#undef _ +}; + +typedef enum +{ + EXPORT_NEXT_POP_HBYH, + EXPORT_N_NEXT, +} export_next_t; + +always_inline void +copy3cachelines (void *dst, const void *src, size_t n) +{ +#if 0 + if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE)) + { + /* Copy only the first 1/2 cache lines whatever is available */ + if (n >= 64) + clib_mov64 ((u8 *) dst, (const u8 *) src); + if (n >= 128) + clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64); + return; + } + clib_mov64 ((u8 *) dst, (const u8 *) src); + clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64); + clib_mov64 ((u8 *) dst + 128, (const u8 *) src + 128); +#endif +#if 1 + + u64 *copy_dst, *copy_src; + int i; + copy_dst = (u64 *) dst; + copy_src = (u64 *) src; + if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE)) + { + for (i = 0; i < n / 64; i++) + { + copy_dst[0] = copy_src[0]; + copy_dst[1] = copy_src[1]; + copy_dst[2] = copy_src[2]; + copy_dst[3] = copy_src[3]; + copy_dst[4] = copy_src[4]; + copy_dst[5] = copy_src[5]; + copy_dst[6] = copy_src[6]; + copy_dst[7] = copy_src[7]; + copy_dst += 8; + copy_src += 8; + } + return; + } + for (i = 0; i < 3; i++) + { + copy_dst[0] = copy_src[0]; + copy_dst[1] = copy_src[1]; + copy_dst[2] = copy_src[2]; + copy_dst[3] = copy_src[3]; + copy_dst[4] = copy_src[4]; + copy_dst[5] = copy_src[5]; + copy_dst[6] = copy_src[6]; + copy_dst[7] = copy_src[7]; + copy_dst += 8; + copy_src += 8; + } +#endif +} + +static uword +ip6_export_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + ioam_export_main_t *em = &ioam_export_main; + ioam_export_node_common(em, vm, node, frame, ip6_header_t, payload_length, + ip_version_traffic_class_and_flow_label, + EXPORT_NEXT_POP_HBYH); + return frame->n_vectors; +} + +/* + * Node for IP6 export + */ +VLIB_REGISTER_NODE (export_node) = +{ + .function = ip6_export_node_fn, + .name = "ip6-export", + .vector_size = sizeof (u32), + .format_trace = format_export_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (export_error_strings), + .error_strings = export_error_strings, + .n_next_nodes = EXPORT_N_NEXT, + /* edit / add dispositions here */ + .next_nodes = + { + [EXPORT_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop" + }, +}; diff --git a/src/plugins/ioam/ioam_plugin_doc.md b/src/plugins/ioam/ioam_plugin_doc.md new file mode 100644 index 00000000000..343abcf73d8 --- /dev/null +++ b/src/plugins/ioam/ioam_plugin_doc.md @@ -0,0 +1,464 @@ +## VPP Inband OAM (iOAM) {#ioam_plugin_doc} + +In-band OAM (iOAM) is an implementation study to record operational +information in the packet while the packet traverses a path between +two points in the network. + +Overview of iOAM can be found in [iOAM-Devnet] page. +The following IETF drafts detail the motivation and mechanism for +recording operational information: + - [iOAM-ietf-requirements] - Describes motivation and usecases for iOAM + - [iOAM-ietf-data] - Describes data records that can be collected using iOAM + - [iOAM-ietf-transport] - Lists out the transport protocols + and mechanism to carry iOAM data records + - [iOAM-ietf-proof-of-transit] - Describes the idea of Proof of Transit (POT) + and mechanisms to operationalize the idea + +## Terminology +In-band OAM is expected to be deployed in a specific domain rather +than on the overall Internet. The part of the network which employs in-band OAM +is referred to as **"in-band OAM-domain"**. + +In-band OAM data is added to a packet on entering the in-band OAM-domain +and is removed from the packet when exiting the domain. +Within the in-band OAM-domain, network nodes that the packet traverses +may update the in-band OAM data records. + +- The node which adds in-band OAM data to the packet is called the +**"in-band OAM encapsulating node"**. + +- The node which removes the in-band OAM data is referred to as the +**"in-band OAM decapsulating node"**. + +- Nodes within the domain which are aware of in-band OAM data and read +and/or write or process the in-band OAM data are called +**"in-band OAM transit nodes"**. + +## Features supported in the current release +VPP can function as in-band OAM encapsulating, transit and decapsulating node. +In this version of VPP in-band OAM data is transported as options in an +IPv6 hop-by-hop extension header. Hence in-band OAM can be enabled +for IPv6 traffic. + +The following iOAM features are supported: + +- **In-band OAM Tracing** : In-band OAM supports multiple data records to be +recorded in the packet as the packet traverses the network. +These data records offer insights into the operational behavior of the network. +The following information can be collected in the tracing +data from the nodes a packet traverses: + - Node ID + - Ingress interface ID + - Egress interface ID + - Timestamp + - Pre-configured application data + +- **In-band OAM Proof of Transit (POT)**: Proof of transit iOAM data is +added to every packet for verifying that a packet traverses a specific +set of nodes. +In-band OAM data is updated at every node that is enabled with iOAM +proof of transit and is used to verify whether a packet traversed +all the specified nodes. When the verifier receives each packet, +it can validate whether the packet traversed the specified nodes. + + +## Configuration +Configuring iOAM involves: +- Selecting the packets for which iOAM data must be inserted, updated or removed + - Selection of packets for iOAM data insertion on iOAM encapsulating node. + Selection of packets is done by 5-tuple based classification + - Selection of packets for updating iOAM data is implicitly done on the + presence of iOAM options in the packet + - Selection of packets for removing the iOAM data is done on 5-tuple + based classification +- The kind of data to be collected + - Tracing data + - Proof of transit +- Additional details for processing iOAM data to be collected + - For trace data - trace type, number of nodes to be recorded in the trace, + time stamp precision, etc. + - For POT data - configuration of POT profile required to process the POT data + +The CLI for configuring iOAM is explained here followed by detailed steps +and examples to deploy iOAM on VPP as an encapsulating, transit or +decapsulating iOAM node in the subsequent sub-sections. + +VPP iOAM configuration for enabling trace and POT is as follows: + + set ioam rewrite trace-type <0x1f|0x7|0x9|0x11|0x19> + trace-elts <number of trace elements> trace-tsp <0|1|2|3> + node-id <node ID in hex> app-data <application data in hex> [pot] + +A description of each of the options of the CLI follows: +- trace-type : An entry in the "Node data List" array of the trace option +can have different formats, following the needs of the a deployment. +For example: Some deployments might only be interested +in recording the node identifiers, whereas others might be interested +in recording node identifier and timestamp. +The following types are currently supported: + - 0x1f : Node data to include hop limit (8 bits), node ID (24 bits), + ingress and egress interface IDs (16 bits each), timestamp (32 bits), + application data (32 bits) + - 0x7 : Node data to include hop limit (8 bits), node ID (24 bits), + ingress and egress interface IDs (16 bits each) + - 0x9 : Node data to include hop limit (8 bits), node ID (24 bits), + timestamp (32 bits) + - 0x11: Node data to include hop limit (8 bits), node ID (24 bits), + application data (32 bits) + - 0x19: Node data to include hop limit (8 bits), node ID (24 bits), + timestamp (32 bits), application data (32 bits) +- trace-elts : Defines the length of the node data array in the trace option. +- trace-tsp : Defines the timestamp precision to use with the enumerated value + for precision as follows: + - 0 : 32bits timestamp in seconds + - 1 : 32bits timestamp in milliseconds + - 2 : 32bits timestamp in microseconds + - 3 : 32bits timestamp in nanoseconds +- node-id : Unique identifier for the node, included in the node ID + field of the node data in trace option. +- app-data : The value configured here is included as is in +application data field of node data in trace option. +- pot : Enables POT option to be included in the iOAM options. + +### Trace configuration + +#### On in-band OAM encapsulating node + - **Configure classifier and apply ACL** to select packets for + iOAM data insertion + - Example to enable iOAM data insertion for all the packets + towards IPv6 address db06::06: + + vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst + + vpp# classify session acl-hit-next node ip6-add-hop-by-hop + table-index 0 match l3 ip6 dst db06::06 + + vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0 + + - **Enable tracing** : Specify node ID, maximum number of nodes for which + trace data should be recorded, type of data to be included for recording, + optionally application data to be included + - Example to enable tracing with a maximum of 4 nodes recorded + and the data to be recorded to include - hop limit, node id, + ingress and egress interface IDs, timestamp (millisecond precision), + application data (0x1234): + + + vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1 + node-id 0x1 app-data 0x1234 + + + +#### On in-band OAM transit node +- The transit node requires trace type, timestamp precision, node ID and +optionally application data to be configured, +to update its node data in the trace option. + +Example: + + vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1 + node-id 0x2 app-data 0x1234 + +#### On the In-band OAM decapsulating node +- The decapsulating node similar to encapsulating node requires +**classification** of the packets to remove iOAM data from. + - Example to decapsulate iOAM data for packets towards + db06::06, configure classifier and enable it as an ACL as follows: + + + vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst + + vpp# classify session acl-hit-next node ip6-lookup table-index 0 + match l3 ip6 dst db06::06 opaque-index 100 + + vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0 + + +- Decapsulating node requires trace type, timestamp precision, +node ID and optionally application data to be configured, +to update its node data in the trace option before it is decapsulated. + +Example: + + vpp# set ioam rewrite trace-type 0x1f trace-elts 4 + trace-tsp 1 node-id 0x3 app-data 0x1234 + + +### Proof of Transit configuration + +For details on proof-of-transit, +see the IETF draft [iOAM-ietf-proof-of-transit]. +To enable Proof of Transit all the nodes that participate +and hence are verified for transit need a proof of transit profile. +A script to generate a proof of transit profile as per the mechanism +described in [iOAM-ietf-proof-of-transit] will be available at [iOAM-Devnet]. + +The Proof of transit mechanism implemented here is based on +Shamir's Secret Sharing algorithm. +The overall algorithm uses two polynomials +POLY-1 and POLY-2. The degree of polynomials depends on number of nodes +to be verified for transit. +POLY-1 is secret and constant. Each node gets a point on POLY-1 +at setup-time and keeps it secret. +POLY-2 is public, random and per packet. +Each node is assigned a point on POLY-1 and POLY-2 with the same x index. +Each node derives its point on POLY-2 each time a packet arrives at it. +A node then contributes its points on POLY-1 and POLY-2 to construct +POLY-3 (POLY-3 = POLY-1 + POLY-2) using lagrange extrapolation and +forwards it towards the verifier by updating POT data in the packet. +The verifier constructs POLY-3 from the accumulated value from all the nodes +and its own points on POLY-1 and POLY-2 and verifies whether +POLY-3 = POLY-1 + POLY-2. Only the verifier knows POLY-1. +The solution leverages finite field arithmetic in a field of size "prime number" +for reasons explained in description of Shamir's secret sharing algorithm. + +Here is an explanation of POT profile list and profile configuration CLI to +realize the above mechanism. +It is best to use the script provided at [iOAM-Devnet] to generate +this configuration. +- **Create POT profile** : set pot profile name <string> id [0-1] +[validator-key 0xu64] prime-number 0xu64 secret_share 0xu64 +lpc 0xu64 polynomial2 0xu64 bits-in-random [0-64] + - name : Profile list name. + - id : Profile id, it can be 0 or 1. + A maximum of two profiles can be configured per profile list. + - validator-key : Secret key configured only on the + verifier/decapsulating node used to compare and verify proof of transit. + - prime-number : Prime number for finite field arithmetic as required by the + proof of transit mechanism. + - secret_share : Unique point for each node on the secret polynomial POLY-1. + - lpc : Lagrange Polynomial Constant(LPC) calculated per node based on + its point (x value used for evaluating the points on the polynomial) + on the polynomial used in lagrange extrapolation + for reconstructing polynomial (POLY-3). + - polynomial2 : Is the pre-evaluated value of the point on + 2nd polynomial(POLY-2). This is unique for each node. + It is pre-evaluated for all the coefficients of POLY-2 except + for the constant part of the polynomial that changes per packet + and is received as part of the POT data in the packet. + - bits-in-random : To control the size of the random number to be + generated. This number has to match the other numbers generated and used + in the profile as per the algorithm. + +- **Set a configured profile as active/in-use** : +set pot profile-active name <string> ID [0-1] + - name : Name of the profile list to be used for computing + POT data per packet. + - ID : Identifier of the profile within the list to be used. + +#### On In-band OAM encapsulating node + - Configure the classifier and apply ACL to select packets for iOAM data insertion. + - Example to enable iOAM data insertion for all the packet towards + IPv6 address db06::06 - + + + vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst + + vpp# classify session acl-hit-next node + ip6-add-hop-by-hop table-index 0 match l3 ip6 dst db06::06 + + vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0 + + + - Configure the proof of transit profile list with profiles. +Each profile list referred to by a name can contain 2 profiles, +only one is in use for updating proof of transit data at any time. + - Example profile list example with a profile generated from the + script to verify transit through 3 nodes is: + + + vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685 + secret_share 0x6c22eff0f45ec56d lpc 0x7fff0000fa884682 + polynomial2 0xffb543d4a9c bits-in-random 63 + + - Enable one of the profiles from the configured profile list as active + so that is will be used for calculating proof of transit + +Example enable profile ID 0 from profile list example configured above: + + + vpp# set pot profile-active name example ID 0 + + + - Enable POT option to be inserted + + + vpp# set ioam rewrite pot + + +#### On in-band OAM transit node + - Configure the proof of transit profile list with profiles for transit node. +Example: + + + vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685 + secret_share 0x564cdbdec4eb625d lpc 0x1 + polynomial2 0x23f3a227186a bits-in-random 63 + +#### On in-band OAM decapsulating node / verifier +- The decapsulating node, similar to the encapsulating node requires +classification of the packets to remove iOAM data from. + - Example to decapsulate iOAM data for packets towards db06::06 + configure classifier and enable it as an ACL as follows: + + + vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst + + vpp# classify session acl-hit-next node ip6-lookup table-index 0 + match l3 ip6 dst db06::06 opaque-index 100 + + vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0 + +- To update and verify the proof of transit, POT profile list should be configured. + - Example POT profile list configured as follows: + + vpp# set pot profile name example id 0 validate-key 0x7fff0000fa88465d + prime-number 0x7fff0000fa884685 secret_share 0x7a08fbfc5b93116d lpc 0x3 + polynomial2 0x3ff738597ce bits-in-random 63 + +## Operational data + +Following CLIs are available to check iOAM operation: +- To check iOAM configuration that are effective use "show ioam summary" + +Example: + + vpp# show ioam summary + REWRITE FLOW CONFIGS - Not configured + HOP BY HOP OPTIONS - TRACE CONFIG - + Trace Type : 0x1f (31) + Trace timestamp precision : 1 (Milliseconds) + Num of trace nodes : 4 + Node-id : 0x2 (2) + App Data : 0x1234 (4660) + POT OPTION - 1 (Enabled) + Try 'show ioam pot and show pot profile' for more information + +- To find statistics about packets for which iOAM options were +added (encapsulating node) and removed (decapsulating node) execute +*show errors* + +Example on encapsulating node: + + + vpp# show error + Count Node Reason + 1208804706 ip6-inacl input ACL hits + 1208804706 ip6-add-hop-by-hop Pkts w/ added ip6 hop-by-hop options + +Example on decapsulating node: + + vpp# show error + Count Node Reason + 69508569 ip6-inacl input ACL hits + 69508569 ip6-pop-hop-by-hop Pkts w/ removed ip6 hop-by-hop options + +- To check the POT profiles use "show pot profile" + +Example: + + vpp# show pot profile + Profile list in use : example + POT Profile at index: 0 + ID : 0 + Validator : False (0) + Secret share : 0x564cdbdec4eb625d (6218586935324795485) + Prime number : 0x7fff0000fa884685 (9223090566081300101) + 2nd polynomial(eval) : 0x23f3a227186a (39529304496234) + LPC : 0x1 (1) + Bit mask : 0x7fffffffffffffff (9223372036854775807) + Profile index in use: 0 + Pkts passed : 0x36 (54) + +- To get statistics of POT for packets use "show ioam pot" + +Example at encapsulating or transit node: + + vpp# show ioam pot + Pkts with ip6 hop-by-hop POT options - 54 + Pkts with ip6 hop-by-hop POT options but no profile set - 0 + Pkts with POT in Policy - 0 + Pkts with POT out of Policy - 0 + + +Example at decapsulating/verification node: + + + vpp# show ioam pot + Pkts with ip6 hop-by-hop POT options - 54 + Pkts with ip6 hop-by-hop POT options but no profile set - 0 + Pkts with POT in Policy - 54 + Pkts with POT out of Policy - 0 + +- Tracing - enable trace of IPv6 packets to view the data inserted and +collected. + +Example when the nodes are receiving data over a DPDK interface: +Enable tracing using "trace add dpdk-input 20" and +execute "show trace" to view the iOAM data collected: + + + vpp# trace add dpdk-input 20 + + vpp# show trace + + ------------------- Start of thread 0 vpp_main ------------------- + + Packet 1 + + 00:00:19:294697: dpdk-input + GigabitEthernetb/0/0 rx queue 0 + buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0 + PKT MBUF: port 0, nb_segs 1, pkt_len 214 + buf_len 2176, data_len 214, ol_flags 0x0, data_off 128, phys_addr 0xe9a35a00 + packet_type 0x0 + IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55 + IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6 + tos 0x00, flow label 0x0, hop limit 63, payload length 160 + 00:00:19:294737: ethernet-input + IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55 + 00:00:19:294753: ip6-input + IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6 + tos 0x00, flow label 0x0, hop limit 63, payload length 160 + 00:00:19:294757: ip6-lookup + fib 0 adj-idx 15 : indirect via db05::2 flow hash: 0x00000000 + IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6 + tos 0x00, flow label 0x0, hop limit 63, payload length 160 + 00:00:19:294802: ip6-hop-by-hop + IP6_HOP_BY_HOP: next index 5 len 96 traced 96 Trace Type 0x1f , 1 elts left + [0] ttl 0x0 node ID 0x0 ingress 0x0 egress 0x0 ts 0x0 + app 0x0 + [1] ttl 0x3e node ID 0x3 ingress 0x1 egress 0x2 ts 0xb68c2213 + app 0x1234 + [2] ttl 0x3f node ID 0x2 ingress 0x1 egress 0x2 ts 0xb68c2204 + app 0x1234 + [3] ttl 0x40 node ID 0x1 ingress 0x5 egress 0x6 ts 0xb68c2200 + app 0x1234 + POT opt present + random = 0x577a916946071950, Cumulative = 0x10b46e78a35a392d, Index = 0x0 + 00:00:19:294810: ip6-rewrite + tx_sw_if_index 1 adj-idx 14 : GigabitEthernetb/0/0 + IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 flow hash: 0x00000000 + IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 + IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6 + tos 0x00, flow label 0x0, hop limit 62, payload length 160 + 00:00:19:294814: GigabitEthernetb/0/0-output + GigabitEthernetb/0/0 + IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 + IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6 + tos 0x00, flow label 0x0, hop limit 62, payload length 160 + 00:00:19:294820: GigabitEthernetb/0/0-tx + GigabitEthernetb/0/0 tx queue 0 + buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0 + IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 + + IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6 + + tos 0x00, flow label 0x0, hop limit 62, payload length 160 + + +[iOAM-Devnet]: <https://github.com/ciscodevnet/iOAM> +[iOAM-ietf-requirements]:<https://tools.ietf.org/html/draft-brockners-inband-oam-requirements-01> +[iOAM-ietf-transport]:<https://tools.ietf.org/html/draft-brockners-inband-oam-transport-01> +[iOAM-ietf-data]:<https://tools.ietf.org/html/draft-brockners-inband-oam-data-01> +[iOAM-ietf-proof-of-transit]:<https://tools.ietf.org/html/draft-brockners-proof-of-transit-01> diff --git a/src/plugins/ioam/lib-pot/math64.h b/src/plugins/ioam/lib-pot/math64.h new file mode 100644 index 00000000000..4c608a37de4 --- /dev/null +++ b/src/plugins/ioam/lib-pot/math64.h @@ -0,0 +1,159 @@ +/* + * math64.h provides the 64 bit unsigned integer add, multiply followed by modulo operation + * The linux/math64.h provides divide and multiply 64 bit integers but: + * 1. multiply: mul_u64_u64_shr - only returns 64 bits of the result and has to be called + * twice to get the complete 128 bits of the result. + * 2. Modulo operation of the result of addition and multiplication of u64 that may result + * in integers > 64 bits is not supported + * Hence this header to combine add/multiply followed by modulo of u64 integrers + * always resulting in u64. + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef include_vnet_math64_h +#define include_vnet_math64_h +#include <stdint.h> + +/* + * multiplies and returns result in hi and lo + */ +static inline void mul64by64(u64 a, u64 b, u64 * hi, u64 * lo) +{ + u64 a_lo = (u64) (uint32_t) a; + u64 a_hi = a >> 32; + u64 b_lo = (u64) (u32) b; + u64 b_hi = b >> 32; + + u64 p0 = a_lo * b_lo; + u64 p1 = a_lo * b_hi; + u64 p2 = a_hi * b_lo; + u64 p3 = a_hi * b_hi; + + u32 cy = (u32) (((p0 >> 32) + (u32) p1 + (u32) p2) >> 32); + + *lo = p0 + (p1 << 32) + (p2 << 32); + *hi = p3 + (p1 >> 32) + (p2 >> 32) + cy; + return; +} + +#define TWO64 18446744073709551616.0 + +static inline u64 mod128by64(u64 x, u64 y, u64 m, double di) +{ + u64 q1, q2, q; + u64 p1, p0; + double dq; + + /* calculate quotient first pass 53 bits */ + dq = (TWO64 * (double)x + (double)y) * di; + + if (dq >= TWO64) + q1 = 0xfffffffffffff800L; + else + q1 = dq; + + /* q1 * m to compare the product to the dividend. */ + mul64by64(q1, m, &p1, &p0); + + /* Adjust quotient. is it > actual result: */ + if (x < p1 || (x == p1 && y < p0)) + { + /* q1 > quotient. calculate abs remainder */ + x = p1 - (x + (p0 < y)); + y = p0 - y; + + /* use the remainder as new dividend to adjust quotient */ + q2 = (u64) ((TWO64 * (double)x + (double)y) * di); + mul64by64(q2, m, &p1, &p0); + + q = q1 - q2; + if (x < p1 || (x == p1 && y <= p0)) + { + y = p0 - y; + } + else + { + y = p0 - y; + y += m; + q--; + } + } + else + { + x = x - (p1 + (y < p0)); + y = y - p0; + + q2 = (u64) ((TWO64 * (double)x + (double)y) * di); + mul64by64(q2, m, &p1, &p0); + + q = q1 + q2; + if (x < p1 || (x == p1 && y < p0)) + { + y = y - p0; + y += m; + q--; + } + else + { + y = y - p0; + if (y >= m) + { + y -= m; + q++; + } + } + } + + return y; +} + +/* + * returns a % p + */ +static inline u64 mod64by64(u64 a, u64 p, u64 primeinv) +{ + return (mod128by64(0, a, p, primeinv)); +} + +static inline void add64(u64 a, u64 b, u64 * whi, u64 * wlo) +{ + *wlo = a + b; + if (*wlo < a) + *whi = 1; + +} + +/* + * returns (a + b)%p + */ +static inline u64 add64_mod(u64 a, u64 b, u64 p, double pi) +{ + u64 shi = 0, slo = 0; + + add64(a, b, &shi, &slo); + return (mod128by64(shi, slo, p, pi)); +} + +/* + * returns (ab) % p + */ +static inline u64 mul64_mod(u64 a, u64 b, u64 p, double pi) +{ + u64 phi = 0, plo = 0; + + mul64by64(a, b, &phi, &plo); + return (mod128by64(phi, plo, p, pi)); +} + +#endif diff --git a/src/plugins/ioam/lib-pot/pot.api b/src/plugins/ioam/lib-pot/pot.api new file mode 100644 index 00000000000..fa2fc126b7e --- /dev/null +++ b/src/plugins/ioam/lib-pot/pot.api @@ -0,0 +1,133 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** \brief Proof of Transit(POT): Set POT profile + @param id - id of the profile + @param validator - True/False to indicate if this is verifier + @param secret_key - Verification key + @param secret_share - Share of the 1st polynomial + @param prime - Prime number used for modulo operation + @param max_bits - Max bits to be used for Random number generation + @param lpc - Lagrange basis polynomial + @param polynomial_public - pre-evaluated public polynomial + @param list_name_len - length of the name of this profile list + @param list_name - name of this profile list +*/ +define pot_profile_add { + u32 client_index; + u32 context; + u8 id; + u8 validator; + u64 secret_key; + u64 secret_share; + u64 prime; + u8 max_bits; + u64 lpc; + u64 polynomial_public; + u8 list_name_len; + u8 list_name[0]; +}; + +/** \brief Proof of Transit profile add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define pot_profile_add_reply { + u32 context; + i32 retval; +}; + + +/** \brief Proof of Transit(POT): Activate POT profile in the list + @param id - id of the profile + @param list_name_len - length of the name of this profile list + @param list_name - name of this profile list +*/ +define pot_profile_activate { + u32 client_index; + u32 context; + u8 id; + u8 list_name_len; + u8 list_name[0]; +}; + +/** \brief Proof of Transit profile activate response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define pot_profile_activate_reply { + u32 context; + i32 retval; +}; + +/** \brief Delete POT Profile + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param list_name_len - length of the name of the profile list + @param list_name - name of profile list to delete +*/ +define pot_profile_del { + u32 client_index; + u32 context; + u8 list_name_len; + u8 list_name[0]; +}; + +/** \brief Proof of Transit profile add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define pot_profile_del_reply { + u32 context; + i32 retval; +}; + +/** \brief Show POT Profiles + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param id - id of the profile +*/ +define pot_profile_show_config_dump { + u32 client_index; + u32 context; + u8 id; +}; + +/** \brief Show POT profile reply + @param id - id of the profile + @param validator - True/False to indicate if this is verifier + @param secret_key - Verification key + @param secret_share - Share of the 1st polynomial + @param prime - Prime number used for modulo operation + @param max_bits - Max bits to be used for Random number generation + @param lpc - Lagrange basis polynomial + @param polynomial_public - pre-evaluated public polynomial + @param list_name_len - length of the name of this profile list + @param list_name - name of this profile list +*/ +define pot_profile_show_config_details { + u32 context; + i32 retval; + u8 id; + u8 validator; + u64 secret_key; + u64 secret_share; + u64 prime; + u64 bit_mask; + u64 lpc; + u64 polynomial_public; +}; diff --git a/src/plugins/ioam/lib-pot/pot_all_api_h.h b/src/plugins/ioam/lib-pot/pot_all_api_h.h new file mode 100644 index 00000000000..63967c45444 --- /dev/null +++ b/src/plugins/ioam/lib-pot/pot_all_api_h.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <ioam/lib-pot/pot.api.h> diff --git a/src/plugins/ioam/lib-pot/pot_api.c b/src/plugins/ioam/lib-pot/pot_api.c new file mode 100644 index 00000000000..d3af7b4036a --- /dev/null +++ b/src/plugins/ioam/lib-pot/pot_api.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * pot_api.c - Proof of Transit related APIs to create + * and maintain profiles + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <ioam/lib-pot/pot_util.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +/* define message IDs */ +#include <ioam/lib-pot/pot_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define REPLY_MACRO2(t, body) \ +do { \ + unix_shared_memory_queue_t * q; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + q = vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +/* List of message types that this plugin understands */ + +#define foreach_pot_plugin_api_msg \ +_(POT_PROFILE_ADD, pot_profile_add) \ +_(POT_PROFILE_ACTIVATE, pot_profile_activate) \ +_(POT_PROFILE_DEL, pot_profile_del) \ +_(POT_PROFILE_SHOW_CONFIG_DUMP, pot_profile_show_config_dump) \ + +static void vl_api_pot_profile_add_t_handler +(vl_api_pot_profile_add_t *mp) +{ + pot_main_t * sm = &pot_main; + int rv = 0; + vl_api_pot_profile_add_reply_t * rmp; + u8 id; + pot_profile *profile = NULL; + u8 *name = 0; + + if (mp->list_name_len) + name = format(0, "%s", mp->list_name); + + pot_profile_list_init(name); + id = mp->id; + profile = pot_profile_find(id); + if (profile) { + rv = pot_profile_create(profile, + clib_net_to_host_u64(mp->prime), + clib_net_to_host_u64(mp->polynomial_public), + clib_net_to_host_u64(mp->lpc), + clib_net_to_host_u64(mp->secret_share)); + if (rv != 0) + goto ERROROUT; + if (1 == mp->validator) + (void)pot_set_validator(profile, clib_net_to_host_u64(mp->secret_key)); + (void)pot_profile_set_bit_mask(profile, mp->max_bits); + } else { + rv = -3; + } + ERROROUT: + vec_free(name); + REPLY_MACRO(VL_API_POT_PROFILE_ADD_REPLY); +} + +static void send_pot_profile_details(vl_api_pot_profile_show_config_dump_t *mp, u8 id) +{ + vl_api_pot_profile_show_config_details_t * rmp; + pot_main_t * sm = &pot_main; + pot_profile *profile = pot_profile_find(id); + int rv = 0; + if(profile){ + REPLY_MACRO2(VL_API_POT_PROFILE_SHOW_CONFIG_DETAILS, + rmp->id=id; + rmp->validator=profile->validator; + rmp->secret_key=clib_host_to_net_u64(profile->secret_key); + rmp->secret_share=clib_host_to_net_u64(profile->secret_share); + rmp->prime=clib_host_to_net_u64(profile->prime); + rmp->bit_mask=clib_host_to_net_u64(profile->bit_mask); + rmp->lpc=clib_host_to_net_u64(profile->lpc); + rmp->polynomial_public=clib_host_to_net_u64(profile->poly_pre_eval); + ); + } + else{ + REPLY_MACRO2(VL_API_POT_PROFILE_SHOW_CONFIG_DETAILS, + rmp->id=id; + rmp->validator=0; + rmp->secret_key=0; + rmp->secret_share=0; + rmp->prime=0; + rmp->bit_mask=0; + rmp->lpc=0; + rmp->polynomial_public=0; + ); + } +} + +static void vl_api_pot_profile_show_config_dump_t_handler +(vl_api_pot_profile_show_config_dump_t *mp) +{ + u8 id = mp->id; + u8 dump_call_id = ~0; + if(dump_call_id==id){ + for(id=0;id<MAX_POT_PROFILES;id++) + send_pot_profile_details(mp,id); + } + else + send_pot_profile_details(mp,id); +} + +static void vl_api_pot_profile_activate_t_handler +(vl_api_pot_profile_activate_t *mp) +{ + pot_main_t * sm = &pot_main; + int rv = 0; + vl_api_pot_profile_add_reply_t * rmp; + u8 id; + u8 *name = NULL; + + if (mp->list_name_len) + name = format(0, "%s", mp->list_name); + if (!pot_profile_list_is_enabled(name)) { + rv = -1; + } else { + id = mp->id; + rv = pot_profile_set_active(id); + } + + vec_free(name); + REPLY_MACRO(VL_API_POT_PROFILE_ACTIVATE_REPLY); +} + + +static void vl_api_pot_profile_del_t_handler +(vl_api_pot_profile_del_t *mp) +{ + pot_main_t * sm = &pot_main; + int rv = 0; + vl_api_pot_profile_del_reply_t * rmp; + + clear_pot_profiles(); + + REPLY_MACRO(VL_API_POT_PROFILE_DEL_REPLY); +} + + +/* + * This routine exists to convince the vlib plugin framework that + * we haven't accidentally copied a random .dll into the plugin directory. + * + * Also collects global variable pointers passed from the vpp engine + */ + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, + int from_early_init) +{ + pot_main_t * sm = &pot_main; + clib_error_t * error = 0; + + sm->vlib_main = vm; + sm->vnet_main = h->vnet_main; + return error; +} + +/* Set up the API message handling tables */ +static clib_error_t * +pot_plugin_api_hookup (vlib_main_t *vm) +{ + pot_main_t * sm = &pot_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_pot_plugin_api_msg; +#undef _ + + return 0; +} + +#define vl_msg_name_crc_list +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (pot_main_t * sm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base); + foreach_vl_msg_name_crc_pot; +#undef _ +} + +static clib_error_t * pot_init (vlib_main_t * vm) +{ + pot_main_t * sm = &pot_main; + clib_error_t * error = 0; + u8 * name; + + bzero(sm, sizeof(pot_main)); + (void)pot_util_init(); + name = format (0, "ioam_pot_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + sm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + + error = pot_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (sm, &api_main); + + vec_free(name); + + return error; +} + +VLIB_INIT_FUNCTION (pot_init); diff --git a/src/plugins/ioam/lib-pot/pot_msg_enum.h b/src/plugins/ioam/lib-pot/pot_msg_enum.h new file mode 100644 index 00000000000..a4a88bed20f --- /dev/null +++ b/src/plugins/ioam/lib-pot/pot_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_pot_msg_enum_h +#define included_pot_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <ioam/lib-pot/pot_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_pot_msg_enum_h */ diff --git a/src/plugins/ioam/lib-pot/pot_test.c b/src/plugins/ioam/lib-pot/pot_test.c new file mode 100644 index 00000000000..2e87023896e --- /dev/null +++ b/src/plugins/ioam/lib-pot/pot_test.c @@ -0,0 +1,365 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * pot_test.c - test harness for pot plugin + *------------------------------------------------------------------ + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> + +/* Declare message IDs */ +#include <ioam/lib-pot/pot_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/lib-pot/pot_all_api_h.h> +#undef vl_api_version + + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} pot_test_main_t; + +pot_test_main_t pot_test_main; + +#define foreach_standard_reply_retval_handler \ +_(pot_profile_add_reply) \ +_(pot_profile_activate_reply) \ +_(pot_profile_del_reply) + +#define foreach_custom_reply_retval_handler \ +_(pot_profile_show_config_details, \ + errmsg(" ID:%d\n",mp->id); \ + errmsg(" Validator:%d\n",mp->validator); \ + errmsg(" secret_key:%Lx\n",clib_net_to_host_u64(mp->secret_key)); \ + errmsg(" secret_share:%Lx\n",clib_net_to_host_u64(mp->secret_share)); \ + errmsg(" prime:%Lx\n",clib_net_to_host_u64(mp->prime)); \ + errmsg(" bitmask:%Lx\n",clib_net_to_host_u64(mp->bit_mask)); \ + errmsg(" lpc:%Lx\n",clib_net_to_host_u64(mp->lpc)); \ + errmsg(" public poly:%Lx\n",clib_net_to_host_u64(mp->polynomial_public)); \ + ) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = pot_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +#define _(n,body) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = pot_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + do{body;}while(0); \ + } +foreach_custom_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(POT_PROFILE_ADD_REPLY, pot_profile_add_reply) \ +_(POT_PROFILE_ACTIVATE_REPLY, pot_profile_activate_reply) \ +_(POT_PROFILE_DEL_REPLY, pot_profile_del_reply) \ +_(POT_PROFILE_SHOW_CONFIG_DETAILS, pot_profile_show_config_details) + + +/* M: construct, but don't yet send a message */ + +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + + +static int api_pot_profile_add (vat_main_t *vam) +{ +#define MAX_BITS 64 + pot_test_main_t * sm = &pot_test_main; + unformat_input_t *input = vam->input; + vl_api_pot_profile_add_t *mp; + u8 *name = NULL; + u64 prime = 0; + u64 secret_share = 0; + u64 secret_key = 0; + u32 bits = MAX_BITS; + u64 lpc = 0, poly2 = 0; + f64 timeout; + u8 id = 0; + int rv = 0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(input, "name %s", &name)) + ; + else if(unformat(input, "id %d", &id)) + ; + else if (unformat(input, "validator-key 0x%Lx", &secret_key)) + ; + else if (unformat(input, "prime-number 0x%Lx", &prime)) + ; + else if (unformat(input, "secret-share 0x%Lx", &secret_share)) + ; + else if (unformat(input, "polynomial-public 0x%Lx", &poly2)) + ; + else if (unformat(input, "lpc 0x%Lx", &lpc)) + ; + else if (unformat(input, "bits-in-random %u", &bits)) + { + if (bits > MAX_BITS) + bits = MAX_BITS; + } + else + break; + } + + if (!name) + { + errmsg ("name required\n"); + rv = -99; + goto OUT; + } + + M2(POT_PROFILE_ADD, pot_profile_add, vec_len(name)); + + mp->list_name_len = vec_len(name); + clib_memcpy(mp->list_name, name, mp->list_name_len); + mp->secret_share = clib_host_to_net_u64(secret_share); + mp->polynomial_public = clib_host_to_net_u64(poly2); + mp->lpc = clib_host_to_net_u64(lpc); + mp->prime = clib_host_to_net_u64(prime); + if (secret_key != 0) + { + mp->secret_key = clib_host_to_net_u64(secret_key); + mp->validator = 1; + } + else + { + mp->validator = 0; + } + mp->id = id; + mp->max_bits = bits; + + S; W; + +OUT: + vec_free(name); + return(rv); +} + +static int api_pot_profile_activate (vat_main_t *vam) +{ +#define MAX_BITS 64 + pot_test_main_t * sm = &pot_test_main; + unformat_input_t *input = vam->input; + vl_api_pot_profile_activate_t *mp; + u8 *name = NULL; + u8 id = 0; + int rv = 0; + f64 timeout; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(input, "name %s", &name)) + ; + else if(unformat(input, "id %d", &id)) + ; + else + break; + } + + if (!name) + { + errmsg ("name required\n"); + rv = -99; + goto OUT; + } + + M2(POT_PROFILE_ACTIVATE, pot_profile_activate, vec_len(name)); + + mp->list_name_len = vec_len(name); + clib_memcpy(mp->list_name, name, mp->list_name_len); + mp->id = id; + + S; W; + +OUT: + vec_free(name); + return(rv); +} + + +static int api_pot_profile_del (vat_main_t *vam) +{ + pot_test_main_t * sm = &pot_test_main; + vl_api_pot_profile_del_t *mp; + f64 timeout; + + M(POT_PROFILE_DEL, pot_profile_del); + mp->list_name_len = 0; + S; W; + return 0; +} + +static int api_pot_profile_show_config_dump (vat_main_t *vam) +{ + pot_test_main_t * sm = &pot_test_main; + unformat_input_t *input = vam->input; + vl_api_pot_profile_show_config_dump_t *mp; + f64 timeout; + u8 id = 0; + + while(unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if(unformat(input,"id %d",&id)); + else + break; + } + M(POT_PROFILE_SHOW_CONFIG_DUMP, pot_profile_show_config_dump); + + mp->id = id; + + S; W; + return 0; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(pot_profile_add, "name <name> id [0-1] " \ + "prime-number <0xu64> bits-in-random [0-64] " \ + "secret-share <0xu64> lpc <0xu64> polynomial-public <0xu64> " \ + "[validator-key <0xu64>] [validity <0xu64>]") \ +_(pot_profile_activate, "name <name> id [0-1] ") \ +_(pot_profile_del, "[id <nn>]") \ +_(pot_profile_show_config_dump, "id [0-1]") + +void vat_api_hookup (vat_main_t *vam) +{ + pot_test_main_t * sm = &pot_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + pot_test_main_t * sm = &pot_test_main; + u8 * name; + + sm->vat_main = vam; + + name = format (0, "ioam_pot_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~0) + vat_api_hookup (vam); + + vec_free(name); + + return 0; +} diff --git a/src/plugins/ioam/lib-pot/pot_util.c b/src/plugins/ioam/lib-pot/pot_util.c new file mode 100644 index 00000000000..a253ad4130f --- /dev/null +++ b/src/plugins/ioam/lib-pot/pot_util.c @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <stdint.h> +#include <time.h> +#include <string.h> +#include <vppinfra/mem.h> +#include "math64.h" +#include "pot_util.h" + +pot_main_t pot_main; + +static void pot_profile_cleanup(pot_profile *profile); + +static void pot_main_profiles_reset (void) +{ + pot_main_t *sm = &pot_main; + int i = 0; + + for (i = 0; i < MAX_POT_PROFILES; i++) + { + pot_profile_cleanup(&(sm->profile_list[i])); + } + sm->active_profile_id = 0; + if (sm->profile_list_name) + vec_free(sm->profile_list_name); + sm->profile_list_name = NULL; +} + +int pot_util_init (void) +{ + pot_main_profiles_reset(); + + return(0); +} + +static void pot_profile_init(pot_profile * new, u8 id) +{ + if (new) + { + memset(new, 0, sizeof(pot_profile)); + new->id = id; + } +} + +pot_profile *pot_profile_find(u8 id) +{ + pot_main_t *sm = &pot_main; + + if (id < MAX_POT_PROFILES) + { + return (&(sm->profile_list[id])); + } + return (NULL); +} +static int pot_profile_name_equal (u8 *name0, u8 *name1) +{ + int len0, len1; + + len0 = vec_len (name0); + len1 = vec_len (name1); + if (len0 != len1) + return(0); + return (0==strncmp ((char *) name0, (char *)name1, len0)); +} + +int pot_profile_list_is_enabled (u8 *name) +{ + pot_main_t *sm = &pot_main; + return (pot_profile_name_equal(sm->profile_list_name, name)); +} + +void pot_profile_list_init(u8 * profile_list_name) +{ + pot_main_t *sm = &pot_main; + int i = 0; + + /* If it is the same profile list skip reset */ + if (pot_profile_name_equal(sm->profile_list_name, profile_list_name)) + { + return; + } + + pot_main_profiles_reset(); + if (vec_len(profile_list_name)) + sm->profile_list_name = (u8 *)vec_dup(profile_list_name); + else + sm->profile_list_name = 0; + sm->active_profile_id = 0; + + for (i = 0; i < MAX_POT_PROFILES; i++) + { + pot_profile_init(&(sm->profile_list[i]), i); + } +} + +static void pot_profile_cleanup(pot_profile * profile) +{ + u16 id = profile->id; + + memset(profile, 0, sizeof(pot_profile)); + profile->id = id; /* Restore id alone */ +} + +int pot_profile_create(pot_profile * profile, u64 prime, + u64 poly2, u64 lpc, u64 secret_share) +{ + if (profile && !profile->in_use) + { + pot_profile_cleanup(profile); + profile->prime = prime; + profile->primeinv = 1.0 / prime; + profile->lpc = lpc; + profile->poly_pre_eval = poly2; + profile->secret_share = secret_share; + profile->total_pkts_using_this_profile = 0; + profile->valid = 1; + return(0); + } + + return(-1); +} + +int pot_set_validator(pot_profile * profile, u64 key) +{ + if (profile && !profile->in_use) + { + profile->validator = 1; + profile->secret_key = key; + return(0); + } + return(-1); +} + +always_inline u64 pot_update_cumulative_inline(u64 cumulative, u64 random, + u64 secret_share, u64 prime, u64 lpc, u64 pre_split, double prime_inv) +{ + u64 share_random = 0; + u64 cumulative_new = 0; + + /* + * calculate split share for random + */ + share_random = add64_mod(pre_split, random, prime, prime_inv); + + /* + * lpc * (share_secret + share_random) + */ + share_random = add64_mod(share_random, secret_share, prime, prime_inv); + share_random = mul64_mod(share_random, lpc, prime, prime_inv); + + cumulative_new = add64_mod(cumulative, share_random, prime, prime_inv); + + return (cumulative_new); +} + +u64 pot_update_cumulative(pot_profile * profile, u64 cumulative, u64 random) +{ + if (profile && profile->valid != 0) + { + return (pot_update_cumulative_inline(cumulative, random, profile->secret_share, + profile->prime, profile->lpc, profile->poly_pre_eval, + profile->primeinv)); + } + return (0); +} + +always_inline u8 pot_validate_inline(u64 secret, u64 prime, double prime_inv, + u64 cumulative, u64 random) +{ + if (cumulative == (random + secret)) + { + return (1); + } + else if (cumulative == add64_mod(random, secret, prime, prime_inv)) + { + return (1); + } + return (0); +} + +/* + * return True if the cumulative matches secret from a profile + */ +u8 pot_validate(pot_profile * profile, u64 cumulative, u64 random) +{ + if (profile && profile->validator) + { + return (pot_validate_inline(profile->secret_key, profile->prime, + profile->primeinv, cumulative, random)); + } + return (0); +} + +/* + * Utility function to get random number per pack + */ +u64 pot_generate_random(pot_profile * profile) +{ + u64 random = 0; + int32_t second_half; + static u32 seed = 0; + + if (PREDICT_FALSE(!seed)) + seed = random_default_seed(); + + /* + * Upper 4 bytes seconds + */ + random = (u64) time(NULL); + + random &= 0xffffffff; + random = random << 32; + /* + * Lower 4 bytes random number + */ + second_half = random_u32(&seed); + + random |= second_half; + + if (PREDICT_TRUE(profile != NULL)) + { + random &= profile->bit_mask; + } + return (random); +} + +int pot_profile_set_bit_mask(pot_profile * profile, u16 bits) +{ + int sizeInBits; + + if (profile && !profile->in_use) + { + sizeInBits = sizeof(profile->bit_mask) * 8; + profile->bit_mask = + (bits >= + sizeInBits ? (u64) - 1 : (u64) ((u64) 1 << (u64) bits) - 1); + return(0); + } + return(-1); +} + +clib_error_t *clear_pot_profile_command_fn(vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + + pot_main_profiles_reset(); + + return 0; +} + +void clear_pot_profiles() +{ + clear_pot_profile_command_fn(0, 0, 0); +} + +VLIB_CLI_COMMAND(clear_pot_profile_command) = +{ +.path = "clear pot profile", +.short_help = "clear pot profile [<index>|all]", +.function = clear_pot_profile_command_fn, +}; + +static clib_error_t *set_pot_profile_command_fn(vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u64 prime; + u64 secret_share; + u64 secret_key; + u8 validator = 0; + u32 profile_id = ~0; + u32 bits; + u64 lpc = 0, poly2 = 0; + pot_profile *profile = NULL; + u8 *profile_list_name = NULL; + + bits = MAX_BITS; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(input, "name %s", + &profile_list_name)); + else if (unformat(input, "id %d", &profile_id)) + ; + else if (unformat(input, "validate-key 0x%Lx", &secret_key)) + validator = 1; + else if (unformat(input, "prime-number 0x%Lx", &prime)) + ; + else if (unformat(input, "secret_share 0x%Lx", &secret_share)) + ; + else if (unformat(input, "polynomial2 0x%Lx", &poly2)) + ; + else if (unformat(input, "lpc 0x%Lx", &lpc)) + ; + else if (unformat(input, "bits-in-random %d", &bits)) + { + if (bits > MAX_BITS) + bits = MAX_BITS; + } + else + break; + } + if (profile_list_name == 0) + { + return clib_error_return(0, "Name cannot be null"); + } + pot_profile_list_init(profile_list_name); + profile = pot_profile_find(profile_id); + + if (profile) + { + pot_profile_create(profile, prime, poly2, lpc, secret_share); + if (validator) + pot_set_validator(profile, secret_key); + pot_profile_set_bit_mask(profile, bits); + } + vec_free(profile_list_name); + return 0; +} + +VLIB_CLI_COMMAND(set_pot_profile_command) = +{ +.path = "set pot profile", +.short_help = "set pot profile name <string> id [0-1] [validator-key 0xu64] \ + prime-number 0xu64 secret_share 0xu64 lpc 0xu64 \ + polynomial2 0xu64 bits-in-random [0-64] ", +.function = set_pot_profile_command_fn, +}; + +static clib_error_t *set_pot_profile_activate_command_fn(vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + pot_main_t *sm = &pot_main; + u8 *profile_list_name = NULL; + u32 id = 0; + clib_error_t *result = NULL; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(input, "name %s", + &profile_list_name)); + else if (unformat(input, "id %d", &id)) + ; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + if (profile_list_name == 0) + { + return clib_error_return(0, "Name cannot be null"); + } + + if (!pot_profile_list_is_enabled(profile_list_name)) { + result = clib_error_return(0, "%s list is not enabled, profile in use %s", + profile_list_name, sm->profile_list_name); + } else if (0 != pot_profile_set_active((u8)id)) { + result = clib_error_return(0, "Profile %d not defined in %s", + id, sm->profile_list_name); + } + vec_free(profile_list_name); + return result; +} + +VLIB_CLI_COMMAND(set_pot_profile_activate_command) = +{ +.path = "set pot profile-active", +.short_help = "set pot profile-active name <string> id [0-1]", +.function = set_pot_profile_activate_command_fn, +}; + +static clib_error_t *show_pot_profile_command_fn(vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + pot_main_t *sm = &pot_main; + pot_profile *p = NULL; + u16 i; + u8 *s = 0; + + if (vec_len(sm->profile_list_name) == 0) + { + s = format(s, "POT Profiles not configured\n"); + vlib_cli_output(vm, "%v", s); + return 0; + } + s = format(s, "Profile list in use : %s\n",sm->profile_list_name); + for (i = 0; i < MAX_POT_PROFILES; i++) + { + p = pot_profile_find(i); + if (p->valid == 0) + continue; + s = format(s, "POT Profile at index: %d\n", i); + s = format(s, " Id : %d\n", p->id); + s = format(s, " Validator : %s (%d)\n", + (p->validator) ? "True" : "False", p->validator); + if (p->validator == 1) + s = format(s, " Secret key : 0x%Lx (%Ld)\n", + p->secret_key, p->secret_key); + s = format(s, " Secret share : 0x%Lx (%Ld)\n", + p->secret_share, p->secret_share); + s = format(s, " Prime number : 0x%Lx (%Ld)\n", + p->prime, p->prime); + s = format(s, "2nd polynomial(eval) : 0x%Lx (%Ld)\n", + p->poly_pre_eval, p->poly_pre_eval); + s = format(s, " LPC : 0x%Lx (%Ld)\n", p->lpc, p->lpc); + + s = format(s, " Bit mask : 0x%Lx (%Ld)\n", + p->bit_mask, p->bit_mask); + } + + p = pot_profile_find(sm->active_profile_id); + + if (p && p->valid && p->in_use) { + s = format(s, "\nProfile index in use: %d\n", sm->active_profile_id); + s = format(s, "Pkts passed : 0x%Lx (%Ld)\n", + p->total_pkts_using_this_profile, + p->total_pkts_using_this_profile); + if (pot_is_decap(p)) + s = format(s, " This is Decap node. \n"); + } else { + s = format(s, "\nProfile index in use: None\n"); + } + vlib_cli_output(vm, "%v", s); + vec_free(s); + + return 0; +} + +VLIB_CLI_COMMAND(show_pot_profile_command) = +{ +.path = "show pot profile", +.short_help = "show pot profile", +.function = show_pot_profile_command_fn, +}; diff --git a/src/plugins/ioam/lib-pot/pot_util.h b/src/plugins/ioam/lib-pot/pot_util.h new file mode 100644 index 00000000000..9df31fae0df --- /dev/null +++ b/src/plugins/ioam/lib-pot/pot_util.h @@ -0,0 +1,195 @@ +/* + * pot_util.h -- Proof Of Transit Utility Header + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef include_vnet_pot_util_h +#define include_vnet_pot_util_h + +#include <vnet/ip/ip6_hop_by_hop.h> +#define debug_ioam debug_ioam_fn +/* Dont change this size 256. This is there across multiple components */ +#define PATH_NAME_SIZE 256 + +/* Ring size. this should be same as the one in ODL. Do not change this + without change in ODL. */ +#define MAX_POT_PROFILES 2 + +/** + * Usage: + * + * On any node that participates in Proof of Transit: + * + * Step 1: Initialize this library by calling pot_init() + * Step 2: Setup a proof of transit profile that contains all the parameters needed to compute cumulative: + * Call these functions: + * pot_profile_find + * pot_profile_create + * pot_profile_set_bit_mask - To setup how large we want the numbers used in the computation and random number <= 64 bits + * Step 2a: For validator do this: + * pot_set_validator + * Step 2b: On initial node enable the profile to be used: + * pot_profile_set_active / pot_profile_get_active will return the profile + * Step 3a: At the initial node to generate Random number that will be read by all other nodes: + * pot_generate_random + * Step 3b: At all nodes including initial and verifier call this to compute cumulative: + * pot_update_cumulative + * Step 4: At the verifier: + * pot_validate + * + */ + +typedef struct pot_profile_ +{ + u8 id : 1; + u8 valid : 1; + u8 in_use : 1; + u64 random; + u8 validator; + u64 secret_key; + u64 secret_share; + u64 prime; + u64 lpc; + u64 poly_pre_eval; + u64 bit_mask; + u64 limit; + double primeinv; + u64 total_pkts_using_this_profile; +} pot_profile; + +typedef struct { + /* Name of the default profile list in use*/ + u8 *profile_list_name; + pot_profile profile_list[MAX_POT_PROFILES]; + /* number of profiles in the list */ + u8 active_profile_id : 1; + + /* API message ID base */ + u16 msg_id_base; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} pot_main_t; + +extern pot_main_t pot_main; + +/* + * Initialize proof of transit + */ +int pot_util_init(void); +void pot_profile_list_init(u8 * name); + + +/* + * Find a pot profile by ID + */ +pot_profile *pot_profile_find(u8 id); + +static inline u16 pot_profile_get_id(pot_profile * profile) +{ + if (profile) + { + return (profile->id); + } + return (0); +} + +/* setup and clean up profile */ +int pot_profile_create(pot_profile * profile, u64 prime, + u64 poly2, u64 lpc, u64 secret_share); +/* + * Setup profile as a validator + */ +int pot_set_validator(pot_profile * profile, u64 key); + +/* + * Setup max bits to be used for random number generation + */ +#define MAX_BITS 64 +int pot_profile_set_bit_mask(pot_profile * profile, u16 bits); + +/* + * Given a random and cumulative compute the new cumulative for a given profile + */ +u64 pot_update_cumulative(pot_profile * profile, u64 cumulative, u64 random); + +/* + * return True if the cumulative matches secret from a profile + */ +u8 pot_validate(pot_profile * profile, u64 cumulative, u64 random); + +/* + * Utility function to get random number per pack + */ +u64 pot_generate_random(pot_profile * profile); + + +extern void clear_pot_profiles(); +extern int pot_profile_list_is_enabled(u8 *name); + +static inline u8 pot_is_decap(pot_profile * p) +{ + return (p->validator == 1); +} + +static inline int pot_profile_set_active (u8 id) +{ + pot_main_t *sm = &pot_main; + pot_profile *profile = NULL; + pot_profile *current_active_prof = NULL; + + current_active_prof = pot_profile_find(sm->active_profile_id); + profile = pot_profile_find(id); + if (profile && profile->valid) { + sm->active_profile_id = id; + current_active_prof->in_use = 0; + profile->in_use = 1; + return(0); + } + return(-1); +} +static inline u8 pot_profile_get_active_id (void) +{ + pot_main_t *sm = &pot_main; + return (sm->active_profile_id); +} + +static inline pot_profile * pot_profile_get_active (void) +{ + pot_main_t *sm = &pot_main; + pot_profile *profile = NULL; + profile = pot_profile_find(sm->active_profile_id); + if (profile && profile->in_use) + return(profile); + return (NULL); +} + +static inline void pot_profile_reset_usage_stats (pot_profile *pow) +{ + if (pow) { + pow->total_pkts_using_this_profile = 0; + } +} + +static inline void pot_profile_incr_usage_stats (pot_profile *pow) +{ + if (pow) { + pow->total_pkts_using_this_profile++; + } +} + + +#endif diff --git a/src/plugins/ioam/lib-trace/trace.api b/src/plugins/ioam/lib-trace/trace.api new file mode 100644 index 00000000000..cb9583256f6 --- /dev/null +++ b/src/plugins/ioam/lib-trace/trace.api @@ -0,0 +1,92 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** \brief iOAM6 Trace - Set the iOAM6 trace profile + @param trace_type - Type of trace requested + @param num_elts - Number of trace elements to be inserted + @param node_id - Trace Node ID + @param trace_tsp- Timestamp resolution + @param app_data - Application specific opaque +*/ +define trace_profile_add { + u32 client_index; + u32 context; + u8 trace_type; + u8 num_elts; + u8 trace_tsp; + u32 node_id; + u32 app_data; +}; + +/** \brief Trace profile add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define trace_profile_add_reply { + u32 context; + i32 retval; +}; + + + +/** \brief Delete trace Profile + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define trace_profile_del { + u32 client_index; + u32 context; +}; + +/** \brief Trace profile add / del response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define trace_profile_del_reply { + u32 context; + i32 retval; +}; + + + +/** \brief Show trace Profile + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define trace_profile_show_config { + u32 client_index; + u32 context; +}; + +/** \brief Show trace config response + @param context - sender context, to match reply w/ request + @param retval - return value for request + @param trace_type - Type of trace requested + @param num_elts - Number of trace elements to be inserted + @param node_id - Trace Node ID + @param trace_tsp- Timestamp resolution + @param app_data - Application specific opaque +*/ +define trace_profile_show_config_reply { + u32 context; + i32 retval; + u8 trace_type; + u8 num_elts; + u8 trace_tsp; + u32 node_id; + u32 app_data; +}; diff --git a/src/plugins/ioam/lib-trace/trace_all_api_h.h b/src/plugins/ioam/lib-trace/trace_all_api_h.h new file mode 100644 index 00000000000..223f95450aa --- /dev/null +++ b/src/plugins/ioam/lib-trace/trace_all_api_h.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <ioam/lib-trace/trace.api.h> diff --git a/src/plugins/ioam/lib-trace/trace_api.c b/src/plugins/ioam/lib-trace/trace_api.c new file mode 100644 index 00000000000..7e0d708e155 --- /dev/null +++ b/src/plugins/ioam/lib-trace/trace_api.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * trace_api.c - iOAM Trace related APIs to create + * and maintain profiles + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <ioam/lib-trace/trace_util.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +/* define message IDs */ +#include <ioam/lib-trace/trace_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define TRACE_REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +/* *INDENT-OFF* */ +#define TRACE_REPLY_MACRO2(t, body) \ +do { \ + unix_shared_memory_queue_t * q; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + q = vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); +/* *INDENT-ON* */ + +/* List of message types that this plugin understands */ + +#define foreach_trace_plugin_api_msg \ +_(TRACE_PROFILE_ADD, trace_profile_add) \ +_(TRACE_PROFILE_DEL, trace_profile_del) \ +_(TRACE_PROFILE_SHOW_CONFIG, trace_profile_show_config) + +static void vl_api_trace_profile_add_t_handler + (vl_api_trace_profile_add_t * mp) +{ + trace_main_t *sm = &trace_main; + int rv = 0; + vl_api_trace_profile_add_reply_t *rmp; + trace_profile *profile = NULL; + + profile = trace_profile_find (); + if (profile) + { + rv = + trace_profile_create (profile, mp->trace_type, mp->num_elts, + mp->trace_tsp, ntohl (mp->node_id), + ntohl (mp->app_data)); + if (rv != 0) + goto ERROROUT; + } + else + { + rv = -3; + } +ERROROUT: + TRACE_REPLY_MACRO (VL_API_TRACE_PROFILE_ADD_REPLY); +} + + +static void vl_api_trace_profile_del_t_handler + (vl_api_trace_profile_del_t * mp) +{ + trace_main_t *sm = &trace_main; + int rv = 0; + vl_api_trace_profile_del_reply_t *rmp; + + clear_trace_profiles (); + + TRACE_REPLY_MACRO (VL_API_TRACE_PROFILE_DEL_REPLY); +} + +static void vl_api_trace_profile_show_config_t_handler + (vl_api_trace_profile_show_config_t * mp) +{ + trace_main_t *sm = &trace_main; + vl_api_trace_profile_show_config_reply_t *rmp; + int rv = 0; + trace_profile *profile = trace_profile_find (); + if (profile->valid) + { + TRACE_REPLY_MACRO2 (VL_API_TRACE_PROFILE_SHOW_CONFIG_REPLY, + rmp->trace_type = profile->trace_type; + rmp->num_elts = profile->num_elts; + rmp->trace_tsp = profile->trace_tsp; + rmp->node_id = htonl (profile->node_id); + rmp->app_data = htonl (profile->app_data); + ); + } + else + { + TRACE_REPLY_MACRO2 (VL_API_TRACE_PROFILE_SHOW_CONFIG_REPLY, + rmp->trace_type = 0; + rmp->num_elts = 0; rmp->trace_tsp = 0; + rmp->node_id = 0; rmp->app_data = 0; + ); + } +} + +/* + * This routine exists to convince the vlib plugin framework that + * we haven't accidentally copied a random .dll into the plugin directory. + * + * Also collects global variable pointers passed from the vpp engine + */ + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, + int from_early_init) +{ + trace_main_t *sm = &trace_main; + clib_error_t *error = 0; + + sm->vlib_main = vm; + sm->vnet_main = h->vnet_main; + return error; +} + +/* Set up the API message handling tables */ +static clib_error_t * +trace_plugin_api_hookup (vlib_main_t * vm) +{ + trace_main_t *sm = &trace_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_trace_plugin_api_msg; +#undef _ + + return 0; +} + +#define vl_msg_name_crc_list +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (trace_main_t * sm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base); + foreach_vl_msg_name_crc_trace; +#undef _ +} + +static clib_error_t * +trace_init (vlib_main_t * vm) +{ + trace_main_t *sm = &trace_main; + clib_error_t *error = 0; + u8 *name; + + bzero (sm, sizeof (trace_main)); + (void) trace_util_init (); + name = format (0, "ioam_trace_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + sm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + + error = trace_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (sm, &api_main); + + vec_free (name); + + return error; +} + +VLIB_INIT_FUNCTION (trace_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-trace/trace_msg_enum.h b/src/plugins/ioam/lib-trace/trace_msg_enum.h new file mode 100644 index 00000000000..78c35665f3c --- /dev/null +++ b/src/plugins/ioam/lib-trace/trace_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_trace_msg_enum_h +#define included_trace_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <ioam/lib-trace/trace_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_trace_msg_enum_h */ diff --git a/src/plugins/ioam/lib-trace/trace_test.c b/src/plugins/ioam/lib-trace/trace_test.c new file mode 100644 index 00000000000..111dd461b5b --- /dev/null +++ b/src/plugins/ioam/lib-trace/trace_test.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * trace_test.c - test harness for trace plugin + *------------------------------------------------------------------ + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> + +/* Declare message IDs */ +#include <ioam/lib-trace/trace_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/lib-trace/trace_all_api_h.h> +#undef vl_api_version + + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} trace_test_main_t; + +trace_test_main_t trace_test_main; + +#define foreach_standard_reply_retval_handler \ +_(trace_profile_add_reply) \ +_(trace_profile_del_reply) + +#define foreach_custom_reply_handler \ +_(trace_profile_show_config_reply, \ + if(mp->trace_type) \ + { \ + errmsg(" Trace Type : 0x%x (%d)\n",mp->trace_type, mp->trace_type); \ + errmsg(" Trace timestamp precision : %d \n",mp->trace_tsp); \ + errmsg(" Node Id : 0x%x (%d)\n",htonl(mp->node_id), htonl(mp->node_id)); \ + errmsg(" App Data : 0x%x (%d)\n",htonl(mp->app_data), htonl(mp->app_data)); \ + } \ + else errmsg("No valid trace profile configuration found\n");) +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = trace_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +#define _(n,body) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = trace_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + if(retval>=0)do{body;} while(0); \ + else errmsg("Error, retval: %d",retval); \ + } +foreach_custom_reply_handler; +#undef _ +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(TRACE_PROFILE_ADD_REPLY, trace_profile_add_reply) \ +_(TRACE_PROFILE_DEL_REPLY, trace_profile_del_reply) \ +_(TRACE_PROFILE_SHOW_CONFIG_REPLY, trace_profile_show_config_reply) + + +/* M: construct, but don't yet send a message */ + +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + + +static int +api_trace_profile_add (vat_main_t * vam) +{ + trace_test_main_t *sm = &trace_test_main; + unformat_input_t *input = vam->input; + vl_api_trace_profile_add_t *mp; + u8 trace_type = 0; + u8 num_elts = 0; + int rv = 0; + u32 node_id = 0; + u32 app_data = 0; + u8 trace_tsp = 0; + f64 timeout; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "trace-type 0x%x", &trace_type)) + ; + else if (unformat (input, "trace-elts %d", &num_elts)) + ; + else if (unformat (input, "trace-tsp %d", &trace_tsp)) + ; + else if (unformat (input, "node-id 0x%x", &node_id)) + ; + else if (unformat (input, "app-data 0x%x", &app_data)) + ; + + else + break; + } + + + M (TRACE_PROFILE_ADD, trace_profile_add); + + mp->trace_type = trace_type; + mp->trace_tsp = trace_tsp; + mp->node_id = htonl (node_id); + mp->app_data = htonl (app_data); + mp->num_elts = num_elts; + + S; + W; + + return (rv); +} + + + +static int +api_trace_profile_del (vat_main_t * vam) +{ + trace_test_main_t *sm = &trace_test_main; + vl_api_trace_profile_del_t *mp; + f64 timeout; + + M (TRACE_PROFILE_DEL, trace_profile_del); + S; + W; + return 0; +} + +static int +api_trace_profile_show_config (vat_main_t * vam) +{ + trace_test_main_t *sm = &trace_test_main; + vl_api_trace_profile_show_config_t *mp; + f64 timeout; + M (TRACE_PROFILE_SHOW_CONFIG, trace_profile_show_config); + S; + W; + return 0; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(trace_profile_add, ""\ + "trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> node-id <node id in hex> app-data <app_data in hex>") \ +_(trace_profile_del, "[id <nn>]") \ +_(trace_profile_show_config, "[id <nn>]") + + +void +vat_api_hookup (vat_main_t * vam) +{ + trace_test_main_t *sm = &trace_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * +vat_plugin_register (vat_main_t * vam) +{ + trace_test_main_t *sm = &trace_test_main; + u8 *name; + + sm->vat_main = vam; + + name = format (0, "ioam_trace_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~ 0) + vat_api_hookup (vam); + + vec_free (name); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-trace/trace_util.c b/src/plugins/ioam/lib-trace/trace_util.c new file mode 100644 index 00000000000..5c7f1eefd9c --- /dev/null +++ b/src/plugins/ioam/lib-trace/trace_util.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <stdint.h> +#include <time.h> +#include <string.h> +#include <vppinfra/mem.h> +#include "trace_util.h" + +trace_main_t trace_main; + +static int +trace_profile_cleanup (trace_profile * profile) +{ + + memset (profile, 0, sizeof (trace_profile)); + profile->trace_tsp = TSP_MICROSECONDS; /* Micro seconds */ + ip6_trace_profile_cleanup (); /* lib-trace_TODO: Remove this once IOAM-IPv6 transport is a plugin */ + return 0; + +} + +static int +trace_main_profiles_reset (void) +{ + int rv; + + trace_main_t *sm = &trace_main; + rv = trace_profile_cleanup (&(sm->profile)); + return (rv); +} + +int +trace_util_init (void) +{ + int rv; + + rv = trace_main_profiles_reset (); + return (rv); +} + + +int +trace_profile_create (trace_profile * profile, u8 trace_type, u8 num_elts, + u32 trace_tsp, u32 node_id, u32 app_data) +{ + + if (!trace_type || !num_elts || !(node_id)) + { + return (-1); + } + if (profile && !profile->valid) + { + //rv = trace_profile_cleanup (profile); + profile->trace_type = trace_type; + profile->num_elts = num_elts; + profile->trace_tsp = trace_tsp; + profile->node_id = node_id; + profile->app_data = app_data; + profile->valid = 1; + + /* lib-trace_TODO: Remove this once IOAM-IPv6 transport is a plugin */ + ip6_trace_profile_setup (); + return (0); + } + + return (-1); +} + + + +clib_error_t * +clear_trace_profile_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + + trace_main_profiles_reset (); + return 0; +} + +void +clear_trace_profiles (void) +{ + clear_trace_profile_command_fn (0, 0, 0); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND(clear_trace_profile_command) = +{ +.path = "clear ioam-trace profile", +.short_help = "clear ioam-trace profile [<index>|all]", +.function = clear_trace_profile_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_trace_profile_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u8 trace_type = 0; + u8 num_elts = 0; + u32 node_id = 0; + u32 app_data = 0; + u32 trace_tsp = 0; + trace_profile *profile = NULL; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "trace-type 0x%x", &trace_type)); + else if (unformat (input, "trace-elts %d", &num_elts)); + else if (unformat (input, "trace-tsp %d", &trace_tsp)); + else if (unformat (input, "node-id 0x%x", &node_id)); + else if (unformat (input, "app-data 0x%x", &app_data)); + else + break; + } + profile = trace_profile_find (); + if (profile) + { + trace_profile_create (profile, trace_type, num_elts, trace_tsp, + node_id, app_data); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_trace_profile_command, static) = +{ +.path = "set ioam-trace profile", +.short_help = "set ioam-trace \ + trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> \ + node-id <node id in hex> app-data <app_data in hex>", +.function = set_trace_profile_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_trace_profile_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + trace_profile *p = NULL; + u8 *s = 0; + p = trace_profile_find (); + if (!(p && p->valid)) + { + s = format (s, "\nTrace configuration not valid\n"); + vlib_cli_output (vm, "%v", s); + vec_free (s); + return 0; + } + s = format (s, " HOP BY HOP OPTIONS - TRACE CONFIG - \n"); + s = format (s, " Trace Type : 0x%x (%d)\n", + p->trace_type, p->trace_type); + s = + format (s, " Trace timestamp precision : %d (%s)\n", + p->trace_tsp, + (p->trace_tsp == + TSP_SECONDS) ? "Seconds" : ((p->trace_tsp == + TSP_MILLISECONDS) ? + "Milliseconds" + : (((p->trace_tsp == + TSP_MICROSECONDS) ? + "Microseconds" : + "Nanoseconds")))); + s = format (s, " Num of trace nodes : %d\n", p->num_elts); + s = + format (s, " Node-id : 0x%x (%d)\n", + p->node_id, p->node_id); + s = + format (s, " App Data : 0x%x (%d)\n", + p->app_data, p->app_data); + vlib_cli_output (vm, "%v", s); + vec_free (s); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_trace_profile_command, static) = +{ +.path = "show ioam-trace profile", +.short_help = "show ioam-trace profile", +.function = show_trace_profile_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-trace/trace_util.h b/src/plugins/ioam/lib-trace/trace_util.h new file mode 100644 index 00000000000..556f07ee3f1 --- /dev/null +++ b/src/plugins/ioam/lib-trace/trace_util.h @@ -0,0 +1,247 @@ +/* + * trace_util.h -- Trace Profile Utility header + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef include_vnet_trace_util_h +#define include_vnet_trace_util_h + +#define debug_ioam debug_ioam_fn + + +/** + * Usage: + * + * On any node that participates in iOAM Trace. + * + * Step 1: Initialize this library by calling trace_init() + * Step 2: Setup a trace profile that contains all the parameters needed to compute cumulative: + * Call these functions: + * trace_profile_find + * trace_profile_create + * Step 2a: On initial node enable the profile to be used: + * trace_profile_set_active / trace_profile_get_active will return the profile + * Step 4: TBD + * trace_validate + * + */ + +typedef struct trace_profile_ +{ + u8 valid:1; + u8 trace_type; + u8 num_elts; + /* Configured node-id */ + u32 node_id; + u32 app_data; + u32 trace_tsp; +} trace_profile; + +typedef struct +{ + /* Name of the default profile list in use */ + trace_profile profile; + + /* API message ID base */ + u16 msg_id_base; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} trace_main_t; + +extern trace_main_t trace_main; + +/* + * Initialize Trace profile + */ +int trace_util_init (void); + + +/* + * Find a trace profile + */ + +always_inline trace_profile * +trace_profile_find (void) +{ + trace_main_t *sm = &trace_main; + + return (&(sm->profile)); +} + + +/* setup and clean up profile */ +int trace_profile_create (trace_profile * profile, u8 trace_type, u8 num_elts, + u32 trace_tsp, u32 node_id, u32 app_data); + +void clear_trace_profiles (void); + + + +#define BIT_TTL_NODEID (1<<0) +#define BIT_ING_INTERFACE (1<<1) +#define BIT_EGR_INTERFACE (1<<2) +#define BIT_TIMESTAMP (1<<3) +#define BIT_APPDATA (1<<4) +#define TRACE_TYPE_MASK 0x1F /* Mask of all above bits */ + +/* + 0x00011111 iOAM-trace-type is 0x00011111 then the format of node + data is: + + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Hop_Lim | node_id | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ingress_if_id | egress_if_id | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + timestamp + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | app_data | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +*/ +#define TRACE_TYPE_IF_TS_APP 0x1f +typedef struct +{ + u32 ttl_node_id; + u16 ingress_if; + u16 egress_if; + u32 timestamp; + u32 app_data; +} ioam_trace_if_ts_app_t; + +/* + 0x00000111 iOAM-trace-type is 0x00000111 then the format is: + + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Hop_Lim | node_id | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ingress_if_id | egress_if_id | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +*/ + +#define TRACE_TYPE_IF 0x03 +typedef struct +{ + u32 ttl_node_id; + u16 ingress_if; + u16 egress_if; +} ioam_trace_if_t; + +/* + 0x00001001 iOAM-trace-type is 0x00001001 then the format is: + + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Hop_Lim | node_id | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + timestamp + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +*/ + +#define TRACE_TYPE_TS 0x09 +typedef struct +{ + u32 ttl_node_id; + u32 timestamp; +} ioam_trace_ts_t; + +/* + 0x00010001 iOAM-trace-type is 0x00010001 then the format is: + + + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Hop_Lim | node_id | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | app_data | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +*/ + + +#define TRACE_TYPE_APP 0x11 +typedef struct +{ + u32 ttl_node_id; + u32 app_data; +} ioam_trace_app_t; + +/* + + 0x00011001 iOAM-trace-type is 0x00011001 then the format is: + + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Hop_Lim | node_id | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + timestamp + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | app_data | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +*/ + +#define TRACE_TYPE_TS_APP 0x19 +typedef struct +{ + u32 ttl_node_id; + u32 timestamp; + u32 app_data; +} ioam_trace_ts_app_t; + + + +static inline u8 +fetch_trace_data_size (u8 trace_type) +{ + u8 trace_data_size = 0; + + if (trace_type == TRACE_TYPE_IF_TS_APP) + trace_data_size = sizeof (ioam_trace_if_ts_app_t); + else if (trace_type == TRACE_TYPE_IF) + trace_data_size = sizeof (ioam_trace_if_t); + else if (trace_type == TRACE_TYPE_TS) + trace_data_size = sizeof (ioam_trace_ts_t); + else if (trace_type == TRACE_TYPE_APP) + trace_data_size = sizeof (ioam_trace_app_t); + else if (trace_type == TRACE_TYPE_TS_APP) + trace_data_size = sizeof (ioam_trace_ts_app_t); + + return trace_data_size; +} + +int ioam_trace_get_sizeof_handler (u32 * result); +int ip6_trace_profile_setup (void); +int ip6_trace_profile_cleanup (void); + +#define TSP_SECONDS 0 +#define TSP_MILLISECONDS 1 +#define TSP_MICROSECONDS 2 +#define TSP_NANOSECONDS 3 + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c new file mode 100644 index 00000000000..fd3086571eb --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> + +/* Statistics (not really errors) */ +#define foreach_vxlan_gpe_decap_ioam_v4_error \ +_(DECAPSULATED, "good packets decapsulated") + +static char *vxlan_gpe_decap_ioam_v4_error_strings[] = { +#define _(sym,string) string, + foreach_vxlan_gpe_decap_ioam_v4_error +#undef _ +}; + +typedef enum +{ +#define _(sym,str) VXLAN_GPE_DECAP_IOAM_V4_ERROR_##sym, + foreach_vxlan_gpe_decap_ioam_v4_error +#undef _ + VXLAN_GPE_DECAP_IOAM_V4_N_ERROR, +} vxlan_gpe_decap_ioam_v4_error_t; + + +always_inline void +vxlan_gpe_decap_ioam_v4_two_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vxlan_gpe_main_t * ngm, + vlib_buffer_t * b0, vlib_buffer_t * b1, + u32 * next0, u32 * next1) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + next0[0] = next1[0] = hm->decap_v4_next_override; + vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0, &next0[0], + VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP, + 0 /* use_adj */ ); + vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b1, &next0[1], + VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP, + 0 /* use_adj */ ); +} + + + +static uword +vxlan_gpe_decap_ioam (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, u8 is_ipv6) +{ + u32 n_left_from, next_index, *from, *to_next; + vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + + next0 = next1 = hm->decap_v4_next_override; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + + vlib_buffer_advance (b0, + -(word) (sizeof (udp_header_t) + + sizeof (ip4_header_t) + + sizeof (vxlan_gpe_header_t))); + vlib_buffer_advance (b1, + -(word) (sizeof (udp_header_t) + + sizeof (ip4_header_t) + + sizeof (vxlan_gpe_header_t))); + + vxlan_gpe_decap_ioam_v4_two_inline (vm, node, ngm, b0, b1, + &next0, &next1); + + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0, + sizeof (*tr)); + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = hm->decap_v4_next_override; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + + vlib_buffer_advance (b0, + -(word) (sizeof (udp_header_t) + + sizeof (ip4_header_t) + + sizeof (vxlan_gpe_header_t))); + + next0 = hm->decap_v4_next_override; + vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0, + &next0, + VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP, + 0 /* use_adj */ ); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0, + sizeof (*tr)); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + + +static uword +vxlan_gpe_decap_ioam_v4 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return vxlan_gpe_decap_ioam (vm, node, from_frame, 0); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vxlan_gpe_decap_ioam_v4_node) = { + .function = vxlan_gpe_decap_ioam_v4, + .name = "vxlan-gpe-decap-ioam-v4", + .vector_size = sizeof (u32), + .format_trace = format_vxlan_gpe_ioam_v4_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(vxlan_gpe_decap_ioam_v4_error_strings), + .error_strings = vxlan_gpe_decap_ioam_v4_error_strings, + + .n_next_nodes = VXLAN_GPE_DECAP_IOAM_V4_N_NEXT, + + .next_nodes = { + [VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP] = "vxlan-gpe-pop-ioam-v4", + [VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c new file mode 100644 index 00000000000..4b18bfea533 --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> + +/* Statistics (not really errors) */ +#define foreach_vxlan_gpe_encap_ioam_v4_error \ +_(ENCAPSULATED, "good packets encapsulated") + +static char *vxlan_gpe_encap_ioam_v4_error_strings[] = { +#define _(sym,string) string, + foreach_vxlan_gpe_encap_ioam_v4_error +#undef _ +}; + +typedef enum +{ +#define _(sym,str) VXLAN_GPE_ENCAP_IOAM_V4_ERROR_##sym, + foreach_vxlan_gpe_encap_ioam_v4_error +#undef _ + VXLAN_GPE_ENCAP_IOAM_V4_N_ERROR, +} vxlan_gpe_encap_ioam_v4_error_t; + +typedef enum +{ + VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP, + VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP, + VXLAN_GPE_ENCAP_IOAM_V4_N_NEXT +} vxlan_gpe_encap_ioam_v4_next_t; + + +always_inline void +vxlan_gpe_encap_ioam_v4_two_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vxlan_gpe_main_t * ngm, + vlib_buffer_t * b0, vlib_buffer_t * b1, + u32 * next0, u32 * next1) +{ + next0[0] = next1[0] = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP; + vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0, next0, + VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP, + 0 /* use_adj */ ); + vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b1, next1, + VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP, + 0 /* use_adj */ ); +} + + +static uword +vxlan_gpe_encap_ioam_v4 (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + + next0 = next1 = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + vxlan_gpe_encap_ioam_v4_two_inline (vm, node, ngm, b0, b1, + &next0, &next1); + + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0, + &next0, + VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP, + 0 /* use_adj */ ); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0, + sizeof (*tr)); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vxlan_gpe_encap_ioam_v4_node) = { + .function = vxlan_gpe_encap_ioam_v4, + .name = "vxlan-gpe-encap-ioam-v4", + .vector_size = sizeof (u32), + .format_trace = format_vxlan_gpe_ioam_v4_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(vxlan_gpe_encap_ioam_v4_error_strings), + .error_strings = vxlan_gpe_encap_ioam_v4_error_strings, + + .n_next_nodes = VXLAN_GPE_ENCAP_IOAM_V4_N_NEXT, + + .next_nodes = { + [VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP] = "ip4-lookup", + [VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c new file mode 100644 index 00000000000..55c33b144a1 --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> + +/* Statistics (not really errors) */ +#define foreach_vxlan_gpe_pop_ioam_v4_error \ +_(POPPED, "good packets popped") + +static char *vxlan_gpe_pop_ioam_v4_error_strings[] = { +#define _(sym,string) string, + foreach_vxlan_gpe_pop_ioam_v4_error +#undef _ +}; + +typedef enum +{ +#define _(sym,str) VXLAN_GPE_POP_IOAM_V4_ERROR_##sym, + foreach_vxlan_gpe_pop_ioam_v4_error +#undef _ + VXLAN_GPE_POP_IOAM_V4_N_ERROR, +} vxlan_gpe_pop_ioam_v4_error_t; + +typedef struct +{ + ioam_trace_t fmt_trace; +} vxlan_gpe_pop_ioam_v4_trace_t; + + +u8 * +format_vxlan_gpe_pop_ioam_v4_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vxlan_gpe_pop_ioam_v4_trace_t *t1 + = va_arg (*args, vxlan_gpe_pop_ioam_v4_trace_t *); + ioam_trace_t *t = &(t1->fmt_trace); + vxlan_gpe_ioam_option_t *fmt_trace0; + vxlan_gpe_ioam_option_t *opt0, *limit0; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + u8 type0; + + fmt_trace0 = (vxlan_gpe_ioam_option_t *) t->option_data; + + s = format (s, "VXLAN_GPE_IOAM_POP: next_index %d len %d traced %d", + t->next_index, fmt_trace0->length, t->trace_len); + + opt0 = (vxlan_gpe_ioam_option_t *) (fmt_trace0 + 1); + limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) fmt_trace0) + t->trace_len; + + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad, just stop */ + opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1; + break; + + default: + if (hm->trace[type0]) + { + s = (*hm->trace[type0]) (s, opt0); + } + else + { + s = + format (s, "\n unrecognized option %d length %d", type0, + opt0->length); + } + opt0 = + (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (vxlan_gpe_ioam_option_t)); + break; + } + } + + return s; +} + +always_inline void +vxlan_gpe_ioam_pop_v4 (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_buffer_t * b0) +{ + ip4_header_t *ip0; + udp_header_t *udp_hdr0; + vxlan_gpe_header_t *gpe_hdr0; + vxlan_gpe_ioam_hdr_t *gpe_ioam0; + + ip0 = vlib_buffer_get_current (b0); + + udp_hdr0 = (udp_header_t *) (ip0 + 1); + gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1); + gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1); + + /* Pop the iOAM data */ + vlib_buffer_advance (b0, + (word) (sizeof (udp_header_t) + + sizeof (ip4_header_t) + + sizeof (vxlan_gpe_header_t) + + gpe_ioam0->length)); + + return; +} + + + +always_inline void +vxlan_gpe_pop_ioam_v4_one_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vxlan_gpe_main_t * ngm, + vlib_buffer_t * b0, u32 * next0) +{ + CLIB_UNUSED (ip4_header_t * ip0); + CLIB_UNUSED (udp_header_t * udp_hdr0); + CLIB_UNUSED (vxlan_gpe_header_t * gpe_hdr0); + CLIB_UNUSED (vxlan_gpe_ioam_hdr_t * gpe_ioam0); + CLIB_UNUSED (vxlan_gpe_ioam_option_t * opt0); + CLIB_UNUSED (vxlan_gpe_ioam_option_t * limit0); + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + + /* Pop the iOAM header */ + ip0 = vlib_buffer_get_current (b0); + udp_hdr0 = (udp_header_t *) (ip0 + 1); + gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1); + gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1); + opt0 = (vxlan_gpe_ioam_option_t *) (gpe_ioam0 + 1); + limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) gpe_ioam0 + gpe_ioam0->length); + + /* + * Basic validity checks + */ + if (gpe_ioam0->length > clib_net_to_host_u16 (ip0->length)) + { + next0[0] = VXLAN_GPE_INPUT_NEXT_DROP; + goto trace00; + } + + /* Scan the set of h-b-h options, process ones that we understand */ + while (opt0 < limit0) + { + u8 type0; + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad1 */ + opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1; + continue; + case 1: /* PadN */ + break; + default: + if (hm->pop_options[type0]) + { + if ((*hm->pop_options[type0]) (ip0, opt0) < 0) + { + next0[0] = VXLAN_GPE_INPUT_NEXT_DROP; + goto trace00; + } + } + break; + } + opt0 = + (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (vxlan_gpe_ioam_hdr_t)); + } + + + next0[0] = + (gpe_ioam0->protocol < VXLAN_GPE_PROTOCOL_MAX) ? + ngm-> + decap_next_node_list[gpe_ioam0->protocol] : VXLAN_GPE_INPUT_NEXT_DROP; + +trace00: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gpe_pop_ioam_v4_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + u32 trace_len = gpe_ioam0->length; + t->fmt_trace.next_index = next0[0]; + /* Capture the h-b-h option verbatim */ + trace_len = + trace_len < + ARRAY_LEN (t->fmt_trace. + option_data) ? trace_len : ARRAY_LEN (t->fmt_trace. + option_data); + t->fmt_trace.trace_len = trace_len; + clib_memcpy (&(t->fmt_trace.option_data), gpe_ioam0, trace_len); + } + + /* Remove the iOAM header inside the VxLAN-GPE header */ + vxlan_gpe_ioam_pop_v4 (vm, node, b0); + return; +} + +always_inline void +vxlan_gpe_pop_ioam_v4_two_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vxlan_gpe_main_t * ngm, + vlib_buffer_t * b0, vlib_buffer_t * b1, + u32 * next0, u32 * next1) +{ + + vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b0, next0); + vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b1, next1); +} + + + +static uword +vxlan_gpe_pop_ioam (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, u8 is_ipv6) +{ + u32 n_left_from, next_index, *from, *to_next; + vxlan_gpe_main_t *ngm = &vxlan_gpe_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + vxlan_gpe_pop_ioam_v4_two_inline (vm, node, ngm, b0, b1, &next0, + &next1); + + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b0, &next0); + + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + + +static uword +vxlan_gpe_pop_ioam_v4 (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return vxlan_gpe_pop_ioam (vm, node, from_frame, 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vxlan_gpe_pop_ioam_v4_node) = { + .function = vxlan_gpe_pop_ioam_v4, + .name = "vxlan-gpe-pop-ioam-v4", + .vector_size = sizeof (u32), + .format_trace = format_vxlan_gpe_pop_ioam_v4_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(vxlan_gpe_pop_ioam_v4_error_strings), + .error_strings = vxlan_gpe_pop_ioam_v4_error_strings, + + .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT, + + .next_nodes = { +#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n, + foreach_vxlan_gpe_input_next +#undef _ + }, +}; +/* *INDENT-ON* */ + + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c new file mode 100644 index 00000000000..b42c357c79b --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c @@ -0,0 +1,188 @@ + /* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/udp.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/fib_entry.h> + +/* Statistics (not really errors) */ +#define foreach_vxlan_gpe_transit_ioam_error \ +_(ENCAPSULATED, "good packets encapsulated") + +static char *vxlan_gpe_transit_ioam_error_strings[] = { +#define _(sym,string) string, + foreach_vxlan_gpe_transit_ioam_error +#undef _ +}; + +typedef enum +{ +#define _(sym,str) VXLAN_GPE_TRANSIT_IOAM_ERROR_##sym, + foreach_vxlan_gpe_transit_ioam_error +#undef _ + VXLAN_GPE_TRANSIT_IOAM_N_ERROR, +} vxlan_gpe_transit_ioam_error_t; + +typedef enum +{ + VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT, + VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP, + VXLAN_GPE_TRANSIT_IOAM_N_NEXT +} vxlan_gpe_transit_ioam_next_t; + + +/* *INDENT-OFF* */ +VNET_FEATURE_INIT (vxlan_gpe_transit_ioam, static) = +{ + .arc_name = "ip4-output", + .node_name = "vxlan-gpe-transit-ioam", + .runs_before = VNET_FEATURES ("interface-output"), +}; +/* *INDENT-ON* */ + +static uword +vxlan_gpe_transit_ioam (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + ip4_header_t *ip0; + u32 iph_offset = 0; + + b0 = vlib_get_buffer (vm, bi0); + iph_offset = vnet_buffer (b0)->ip.save_rewrite_length; + ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + + iph_offset); + + /* just forward non ipv4 packets */ + if (PREDICT_FALSE + ((ip0->ip_version_and_header_length & 0xF0) == 0x40)) + { + /* ipv4 packets */ + udp_header_t *udp_hdr0 = (udp_header_t *) (ip0 + 1); + if (PREDICT_FALSE + ((ip0->protocol == IP_PROTOCOL_UDP) && + (clib_net_to_host_u16 (udp_hdr0->dst_port) == + UDP_DST_PORT_vxlan_gpe))) + { + + /* Check the iOAM header */ + vxlan_gpe_header_t *gpe_hdr0 = + (vxlan_gpe_header_t *) (udp_hdr0 + 1); + + if (PREDICT_FALSE + (gpe_hdr0->protocol == VXLAN_GPE_PROTOCOL_IOAM)) + { + uword *t = NULL; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + fib_prefix_t key4; + memset (&key4, 0, sizeof (key4)); + key4.fp_proto = FIB_PROTOCOL_IP4; + key4.fp_addr.ip4.as_u32 = ip0->dst_address.as_u32; + t = hash_get_mem (hm->dst_by_ip4, &key4); + if (t) + { + + + vlib_buffer_advance (b0, + (word) (sizeof + (ethernet_header_t))); + vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, + b0, + &next0, + VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP, + 1 + /* use_adj */ + ); + vlib_buffer_advance (b0, + -(word) (sizeof + (ethernet_header_t))); + } + } + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vxlan_gpe_transit_ioam_node) = { + .function = vxlan_gpe_transit_ioam, + .name = "vxlan-gpe-transit-ioam", + .vector_size = sizeof (u32), + .format_trace = format_vxlan_gpe_ioam_v4_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(vxlan_gpe_transit_ioam_error_strings), + .error_strings = vxlan_gpe_transit_ioam_error_strings, + + .n_next_nodes = VXLAN_GPE_TRANSIT_IOAM_N_NEXT, + + .next_nodes = { + [VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT] = "interface-output", + [VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP] = "error-drop", + }, + +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api new file mode 100644 index 00000000000..056529a4e8a --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api @@ -0,0 +1,181 @@ +/* Hey Emacs use -*- mode: C -*- */ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VxLAN-GPE + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param id - profile id + @param trace_ppc - Trace PPC (none/encap/decap) + @param pow_enable - Proof of Work enabled or not flag + @param trace_enable - iOAM Trace enabled or not flag + +*/ +define vxlan_gpe_ioam_enable { + u32 client_index; + u32 context; + u16 id; + u8 trace_ppc; + u8 pow_enable; + u8 trace_enable; +}; + +/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VXLAN-GPE reply + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define vxlan_gpe_ioam_enable_reply { + u32 context; + i32 retval; +}; + + +/** \brief iOAM for VxLAN-GPE disable + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param id - profile id +*/ +define vxlan_gpe_ioam_disable +{ + u32 client_index; + u32 context; + u16 id; +}; + +/** \brief vxlan_gpe_ioam disable response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define vxlan_gpe_ioam_disable_reply +{ + u32 context; + i32 retval; +}; + +/** \brief Enable iOAM for a VNI (VXLAN-GPE) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vni - VXLAN-GPE VNI + @param local - IPv4/6 Address of the local VTEP + @param remote - IPv4/6 Address of the remote VTEP + +*/ +define vxlan_gpe_ioam_vni_enable { + u32 client_index; + u32 context; + u32 vni; + u8 local[16]; + u8 remote[16]; + u8 is_ipv6; +}; + +/** \brief Reply to enable iOAM for a VNI (VXLAN-GPE) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return value for request + +*/ +define vxlan_gpe_ioam_vni_enable_reply { + u32 client_index; + u32 context; + i32 retval; +}; + +/** \brief Disable iOAM for a VNI (VXLAN-GPE) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vni - VXLAN-GPE VNI + @param local - IPv4/6 Address of the local VTEP + @param remote - IPv4/6 Address of the remote VTEP + +*/ +define vxlan_gpe_ioam_vni_disable { + u32 client_index; + u32 context; + u32 vni; + u8 local[16]; + u8 remote[16]; + u8 is_ipv6; +}; + +/** \brief Reply to disable iOAM for a VNI (VXLAN-GPE) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return value for request + +*/ +define vxlan_gpe_ioam_vni_disable_reply { + u32 client_index; + u32 context; + i32 retval; +}; + + +/** \brief Enable iOAM for a VXLAN-GPE transit + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param dst_addr - IPv4/6 Address of the local VTEP + @param outer_fib_index- FIB index + +*/ +define vxlan_gpe_ioam_transit_enable { + u32 client_index; + u32 context; + u32 outer_fib_index; + u8 dst_addr[16]; + u8 is_ipv6; +}; + +/** \brief Reply to enable iOAM for VXLAN-GPE transit + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return value for request + +*/ +define vxlan_gpe_ioam_transit_enable_reply { + u32 client_index; + u32 context; + i32 retval; +}; + +/** \brief Disable iOAM for VXLAN-GPE transit + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param dst_addr - IPv4/6 Address of the local VTEP + @param outer_fib_index- FIB index + +*/ +define vxlan_gpe_ioam_transit_disable { + u32 client_index; + u32 context; + u32 outer_fib_index; + u8 dst_addr[16]; + u8 is_ipv6; +}; + +/** \brief Reply to disable iOAM for VXLAN-GPE transit + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return value for request + +*/ +define vxlan_gpe_ioam_transit_disable_reply { + u32 client_index; + u32 context; + i32 retval; +}; + + diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h new file mode 100644 index 00000000000..06fa0d2cd34 --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api.h> diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c new file mode 100644 index 00000000000..68752365f82 --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * vxlan_gpe_api.c - iOAM VxLAN-GPE related APIs to create + * and maintain profiles + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +/* define message IDs */ +#include <ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> +#undef vl_printfun + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> +#undef vl_api_version + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define VXLAN_GPE_REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +/* *INDENT-OFF* */ +#define VXLAN_GPE_REPLY_MACRO2(t, body) \ +do { \ + unix_shared_memory_queue_t * q; \ + rv = vl_msg_api_pd_handler (mp, rv); \ + q = vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); +/* *INDENT-ON* */ + +/* List of message types that this plugin understands */ + +#define foreach_vxlan_gpe_plugin_api_msg \ +_(VXLAN_GPE_IOAM_ENABLE, vxlan_gpe_ioam_enable) \ +_(VXLAN_GPE_IOAM_DISABLE, vxlan_gpe_ioam_disable) \ +_(VXLAN_GPE_IOAM_VNI_ENABLE, vxlan_gpe_ioam_vni_enable) \ +_(VXLAN_GPE_IOAM_VNI_DISABLE, vxlan_gpe_ioam_vni_disable) \ +_(VXLAN_GPE_IOAM_TRANSIT_ENABLE, vxlan_gpe_ioam_transit_enable) \ +_(VXLAN_GPE_IOAM_TRANSIT_DISABLE, vxlan_gpe_ioam_transit_disable) \ + + +static void vl_api_vxlan_gpe_ioam_enable_t_handler + (vl_api_vxlan_gpe_ioam_enable_t * mp) +{ + int rv = 0; + vl_api_vxlan_gpe_ioam_enable_reply_t *rmp; + clib_error_t *error; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + + /* Ignoring the profile id as currently a single profile + * is supported */ + error = + vxlan_gpe_ioam_enable (mp->trace_enable, mp->pow_enable, mp->trace_ppc); + if (error) + { + clib_error_report (error); + rv = clib_error_get_code (error); + } + + VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_ENABLE_REPLY); +} + +static void vl_api_vxlan_gpe_ioam_disable_t_handler + (vl_api_vxlan_gpe_ioam_disable_t * mp) +{ + int rv = 0; + vl_api_vxlan_gpe_ioam_disable_reply_t *rmp; + clib_error_t *error; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + + /* Ignoring the profile id as currently a single profile + * is supported */ + error = vxlan_gpe_ioam_disable (0, 0, 0); + if (error) + { + clib_error_report (error); + rv = clib_error_get_code (error); + } + + VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_DISABLE_REPLY); +} + +static void vl_api_vxlan_gpe_ioam_vni_enable_t_handler + (vl_api_vxlan_gpe_ioam_vni_enable_t * mp) +{ + int rv = 0; + vl_api_vxlan_gpe_ioam_vni_enable_reply_t *rmp; + clib_error_t *error; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + vxlan4_gpe_tunnel_key_t key4; + uword *p = NULL; + vxlan_gpe_main_t *gm = &vxlan_gpe_main; + vxlan_gpe_tunnel_t *t = 0; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + u32 vni; + + + if (!mp->is_ipv6) + { + clib_memcpy (&key4.local, &mp->local, sizeof (key4.local)); + clib_memcpy (&key4.remote, &mp->remote, sizeof (key4.remote)); + vni = clib_net_to_host_u32 (mp->vni); + key4.vni = clib_host_to_net_u32 (vni << 8); + key4.pad = 0; + + p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4); + } + else + { + return; + } + + if (!p) + return; + + t = pool_elt_at_index (gm->tunnels, p[0]); + + error = vxlan_gpe_ioam_set (t, hm->has_trace_option, + hm->has_pot_option, + hm->has_ppc_option, mp->is_ipv6); + + + if (error) + { + clib_error_report (error); + rv = clib_error_get_code (error); + } + + VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_VNI_ENABLE_REPLY); +} + + +static void vl_api_vxlan_gpe_ioam_vni_disable_t_handler + (vl_api_vxlan_gpe_ioam_vni_disable_t * mp) +{ + int rv = 0; + vl_api_vxlan_gpe_ioam_vni_enable_reply_t *rmp; + clib_error_t *error; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + vxlan4_gpe_tunnel_key_t key4; + uword *p = NULL; + vxlan_gpe_main_t *gm = &vxlan_gpe_main; + vxlan_gpe_tunnel_t *t = 0; + u32 vni; + + + if (!mp->is_ipv6) + { + clib_memcpy (&key4.local, &mp->local, sizeof (key4.local)); + clib_memcpy (&key4.remote, &mp->remote, sizeof (key4.remote)); + vni = clib_net_to_host_u32 (mp->vni); + key4.vni = clib_host_to_net_u32 (vni << 8); + key4.pad = 0; + + p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4); + } + else + { + return; + } + + if (!p) + return; + + t = pool_elt_at_index (gm->tunnels, p[0]); + + error = vxlan_gpe_ioam_clear (t, 0, 0, 0, 0); + + + if (error) + { + clib_error_report (error); + rv = clib_error_get_code (error); + } + + + VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_VNI_DISABLE_REPLY); +} + +static void vl_api_vxlan_gpe_ioam_transit_enable_t_handler + (vl_api_vxlan_gpe_ioam_transit_enable_t * mp) +{ + int rv = 0; + vl_api_vxlan_gpe_ioam_transit_enable_reply_t *rmp; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + ip46_address_t dst_addr; + + memset (&dst_addr.ip4, 0, sizeof (dst_addr.ip4)); + if (!mp->is_ipv6) + { + clib_memcpy (&dst_addr.ip4, &mp->dst_addr, sizeof (dst_addr.ip4)); + } + rv = vxlan_gpe_enable_disable_ioam_for_dest (sm->vlib_main, + dst_addr, + ntohl (mp->outer_fib_index), + mp->is_ipv6 ? 0 : 1, + 1 /* is_add */ ); + + VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_TRANSIT_ENABLE_REPLY); +} + +static void vl_api_vxlan_gpe_ioam_transit_disable_t_handler + (vl_api_vxlan_gpe_ioam_transit_disable_t * mp) +{ + int rv = 0; + vl_api_vxlan_gpe_ioam_transit_disable_reply_t *rmp; + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + ip46_address_t dst_addr; + + memset (&dst_addr.ip4, 0, sizeof (dst_addr.ip4)); + if (!mp->is_ipv6) + { + clib_memcpy (&dst_addr.ip4, &mp->dst_addr, sizeof (dst_addr.ip4)); + } + + rv = vxlan_gpe_ioam_disable_for_dest (sm->vlib_main, + dst_addr, + ntohl (mp->outer_fib_index), + mp->is_ipv6 ? 0 : 1); + VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_TRANSIT_DISABLE_REPLY); +} + + +/* + * This routine exists to convince the vlib plugin framework that + * we haven't accidentally copied a random .dll into the plugin directory. + * + * Also collects global variable pointers passed from the vpp engine + */ + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, + int from_early_init) +{ + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + clib_error_t *error = 0; + + sm->vlib_main = vm; + sm->vnet_main = h->vnet_main; + sm->unix_time_0 = (u32) time (0); /* Store starting time */ + sm->vlib_time_0 = vlib_time_now (vm); + return error; +} + +/* Set up the API message handling tables */ +static clib_error_t * +vxlan_gpe_plugin_api_hookup (vlib_main_t * vm) +{ + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vxlan_gpe_plugin_api_msg; +#undef _ + + return 0; +} + +static clib_error_t * +vxlan_gpe_init (vlib_main_t * vm) +{ + vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main; + clib_error_t *error = 0; + u8 *name; + u32 encap_node_index = vxlan_gpe_encap_ioam_v4_node.index; + u32 decap_node_index = vxlan_gpe_decap_ioam_v4_node.index; + vlib_node_t *vxlan_gpe_encap_node = NULL; + vlib_node_t *vxlan_gpe_decap_node = NULL; + uword next_node = 0; + + name = format (0, "ioam_vxlan_gpe_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + sm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + + error = vxlan_gpe_plugin_api_hookup (vm); + + /* Hook the ioam-encap node to vxlan-gpe-encap */ + vxlan_gpe_encap_node = vlib_get_node_by_name (vm, (u8 *) "vxlan-gpe-encap"); + sm->encap_v4_next_node = + vlib_node_add_next (vm, vxlan_gpe_encap_node->index, encap_node_index); + + vxlan_gpe_decap_node = + vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input"); + next_node = + vlib_node_add_next (vm, vxlan_gpe_decap_node->index, decap_node_index); + vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node); + + vec_new (vxlan_gpe_ioam_sw_interface_t, pool_elts (sm->sw_interfaces)); + sm->dst_by_ip4 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword)); + + sm->dst_by_ip6 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword)); + + vxlan_gpe_ioam_interface_init (); + vec_free (name); + + return error; +} + +VLIB_INIT_FUNCTION (vxlan_gpe_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c new file mode 100644 index 00000000000..6c04d9af210 --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <vnet/ip/format.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/fib_entry.h> + +vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main; + +int +vxlan_gpe_ioam_set_rewrite (vxlan_gpe_tunnel_t * t, int has_trace_option, + int has_pot_option, int has_ppc_option, + u8 ipv6_set) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + u32 size; + vxlan_gpe_ioam_hdr_t *vxlan_gpe_ioam_hdr; + u8 *current; + u8 trace_data_size = 0; + u8 pot_data_size = 0; + + if (has_trace_option == 0 && has_pot_option == 0) + return -1; + + /* Work out how much space we need */ + size = sizeof (vxlan_gpe_ioam_hdr_t); + + if (has_trace_option + && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] != 0) + { + size += sizeof (vxlan_gpe_ioam_option_t); + size += hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE]; + } + if (has_pot_option + && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0) + { + size += sizeof (vxlan_gpe_ioam_option_t); + size += hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]; + } + + t->rewrite_size = size; + + if (!ipv6_set) + { + vxlan4_gpe_rewrite (t, size, VXLAN_GPE_PROTOCOL_IOAM, + hm->encap_v4_next_node); + vxlan_gpe_ioam_hdr = + (vxlan_gpe_ioam_hdr_t *) (t->rewrite + + sizeof (ip4_vxlan_gpe_header_t)); + } + else + { + vxlan6_gpe_rewrite (t, size, VXLAN_GPE_PROTOCOL_IOAM, + VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP); + vxlan_gpe_ioam_hdr = + (vxlan_gpe_ioam_hdr_t *) (t->rewrite + + sizeof (ip6_vxlan_gpe_header_t)); + } + + + vxlan_gpe_ioam_hdr->type = VXLAN_GPE_PROTOCOL_IOAM; + /* Length of the header in octets */ + vxlan_gpe_ioam_hdr->length = size; + vxlan_gpe_ioam_hdr->protocol = t->protocol; + current = (u8 *) vxlan_gpe_ioam_hdr + sizeof (vxlan_gpe_ioam_hdr_t); + + if (has_trace_option + && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] != 0) + { + if (0 != hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] (current, + &trace_data_size)) + return -1; + current += trace_data_size; + } + if (has_pot_option + && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0) + { + pot_data_size = + hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]; + if (0 == + hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] + (current, &pot_data_size)) + current += pot_data_size; + } + + return 0; +} + +int +vxlan_gpe_ioam_clear_rewrite (vxlan_gpe_tunnel_t * t, int has_trace_option, + int has_pot_option, int has_ppc_option, + u8 ipv6_set) +{ + + t->rewrite_size = 0; + + if (!ipv6_set) + { + vxlan4_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP); + } + else + { + vxlan6_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP); + } + + + return 0; +} + +clib_error_t * +vxlan_gpe_ioam_clear (vxlan_gpe_tunnel_t * t, + int has_trace_option, int has_pot_option, + int has_ppc_option, u8 ipv6_set) +{ + int rv; + rv = vxlan_gpe_ioam_clear_rewrite (t, 0, 0, 0, 0); + + if (rv == 0) + { + return (0); + } + else + { + return clib_error_return_code (0, rv, 0, + "vxlan_gpe_ioam_clear_rewrite returned %d", + rv); + } + +} + + +clib_error_t * +vxlan_gpe_ioam_set (vxlan_gpe_tunnel_t * t, + int has_trace_option, int has_pot_option, + int has_ppc_option, u8 ipv6_set) +{ + int rv; + rv = vxlan_gpe_ioam_set_rewrite (t, has_trace_option, + has_pot_option, has_ppc_option, ipv6_set); + + if (rv == 0) + { + return (0); + } + else + { + return clib_error_return_code (0, rv, 0, + "vxlan_gpe_ioam_set_rewrite returned %d", + rv); + } + +} + +static void +vxlan_gpe_set_clear_output_feature_on_intf (vlib_main_t * vm, + u32 sw_if_index0, u8 is_add) +{ + + + + vnet_feature_enable_disable ("ip4-output", "vxlan-gpe-transit-ioam", + sw_if_index0, is_add, + 0 /* void *feature_config */ , + 0 /* u32 n_feature_config_bytes */ ); + return; +} + +void +vxlan_gpe_clear_output_feature_on_all_intfs (vlib_main_t * vm) +{ + vnet_sw_interface_t *si = 0; + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + + pool_foreach (si, im->sw_interfaces, ( + { + vxlan_gpe_set_clear_output_feature_on_intf + (vm, si->sw_if_index, 0); + })); + return; +} + + +extern fib_forward_chain_type_t +fib_entry_get_default_chain_type (const fib_entry_t * fib_entry); + +int +vxlan_gpe_enable_disable_ioam_for_dest (vlib_main_t * vm, + ip46_address_t dst_addr, + u32 outer_fib_index, + u8 is_ipv4, u8 is_add) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + u32 fib_index0 = 0; + u32 sw_if_index0 = ~0; + + fib_node_index_t fei = ~0; + fib_entry_t *fib_entry; + u32 adj_index0; + ip_adjacency_t *adj0; + fib_prefix_t fib_prefix; + //fib_forward_chain_type_t fct; + load_balance_t *lb_m, *lb_b; + const dpo_id_t *dpo0, *dpo1; + u32 i, j; + //vnet_hw_interface_t *hw; + + if (is_ipv4) + { + memset (&fib_prefix, 0, sizeof (fib_prefix_t)); + fib_prefix.fp_len = 32; + fib_prefix.fp_proto = FIB_PROTOCOL_IP4; + fib_prefix.fp_addr = dst_addr; + } + else + { + return 0; + } + + fei = fib_table_lookup (fib_index0, &fib_prefix); + fib_entry = fib_entry_get (fei); + + //fct = fib_entry_get_default_chain_type (fib_entry); + + if (!dpo_id_is_valid (&fib_entry->fe_lb /*[fct] */ )) + { + return (-1); + } + + lb_m = load_balance_get (fib_entry->fe_lb /*[fct] */ .dpoi_index); + + for (i = 0; i < lb_m->lb_n_buckets; i++) + { + dpo0 = load_balance_get_bucket_i (lb_m, i); + + if (dpo0->dpoi_type == DPO_LOAD_BALANCE) + { + lb_b = load_balance_get (dpo0->dpoi_index); + + for (j = 0; j < lb_b->lb_n_buckets; j++) + { + dpo1 = load_balance_get_bucket_i (lb_b, j); + + if (dpo1->dpoi_type == DPO_ADJACENCY) + { + adj_index0 = dpo1->dpoi_index; + + if (ADJ_INDEX_INVALID == adj_index0) + { + continue; + } + + adj0 = + ip_get_adjacency (&(ip4_main.lookup_main), adj_index0); + sw_if_index0 = adj0->rewrite_header.sw_if_index; + + if (~0 == sw_if_index0) + { + continue; + } + + + if (is_add) + { + vnet_feature_enable_disable ("ip4-output", + "vxlan-gpe-transit-ioam", + sw_if_index0, is_add, 0 + /* void *feature_config */ + , 0 /* u32 n_feature_config_bytes */ + ); + + vec_validate_init_empty (hm->bool_ref_by_sw_if_index, + sw_if_index0, ~0); + hm->bool_ref_by_sw_if_index[sw_if_index0] = 1; + } + else + { + hm->bool_ref_by_sw_if_index[sw_if_index0] = ~0; + } + } + } + } + } + + if (is_ipv4) + { + + uword *t = NULL; + vxlan_gpe_ioam_dest_tunnels_t *t1; + fib_prefix_t key4, *key4_copy; + hash_pair_t *hp; + memset (&key4, 0, sizeof (key4)); + key4.fp_proto = FIB_PROTOCOL_IP4; + key4.fp_addr.ip4.as_u32 = fib_prefix.fp_addr.ip4.as_u32; + t = hash_get_mem (hm->dst_by_ip4, &key4); + if (is_add) + { + if (t) + { + return 0; + } + pool_get_aligned (hm->dst_tunnels, t1, CLIB_CACHE_LINE_BYTES); + memset (t1, 0, sizeof (*t1)); + t1->fp_proto = FIB_PROTOCOL_IP4; + t1->dst_addr.ip4.as_u32 = fib_prefix.fp_addr.ip4.as_u32; + key4_copy = clib_mem_alloc (sizeof (*key4_copy)); + clib_memcpy (key4_copy, &key4, sizeof (*key4_copy)); + hash_set_mem (hm->dst_by_ip4, key4_copy, t1 - hm->dst_tunnels); + /* + * Attach to the FIB entry for the VxLAN-GPE destination + * and become its child. The dest route will invoke a callback + * when the fib entry changes, it can be used to + * re-program the output feature on the egress interface. + */ + + const fib_prefix_t tun_dst_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = {.ip4 = t1->dst_addr.ip4,} + }; + + t1->fib_entry_index = + fib_table_entry_special_add (outer_fib_index, + &tun_dst_pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + t1->sibling_index = + fib_entry_child_add (t1->fib_entry_index, + hm->fib_entry_type, t1 - hm->dst_tunnels); + t1->outer_fib_index = outer_fib_index; + + } + else + { + if (!t) + { + return 0; + } + t1 = pool_elt_at_index (hm->dst_tunnels, t[0]); + hp = hash_get_pair (hm->dst_by_ip4, &key4); + key4_copy = (void *) (hp->key); + hash_unset_mem (hm->dst_by_ip4, &key4); + clib_mem_free (key4_copy); + pool_put (hm->dst_tunnels, t1); + } + } + else + { + // TBD for IPv6 + } + + return 0; +} + +void +vxlan_gpe_refresh_output_feature_on_all_dest (void) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + vxlan_gpe_ioam_dest_tunnels_t *t; + u32 i; + if (pool_elts (hm->dst_tunnels) == 0) + return; + vxlan_gpe_clear_output_feature_on_all_intfs (hm->vlib_main); + i = vec_len (hm->bool_ref_by_sw_if_index); + vec_free (hm->bool_ref_by_sw_if_index); + vec_validate_init_empty (hm->bool_ref_by_sw_if_index, i, ~0); + pool_foreach (t, hm->dst_tunnels, ( + { + vxlan_gpe_enable_disable_ioam_for_dest + (hm->vlib_main, + t->dst_addr, + t->outer_fib_index, + (t->fp_proto == FIB_PROTOCOL_IP4), 1 + /* is_add */ + ); + } + )); + return; +} + +void +vxlan_gpe_clear_output_feature_on_select_intfs (void) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + u32 sw_if_index0 = 0; + for (sw_if_index0 = 0; + sw_if_index0 < vec_len (hm->bool_ref_by_sw_if_index); sw_if_index0++) + { + if (hm->bool_ref_by_sw_if_index[sw_if_index0] == 0xFF) + { + vxlan_gpe_set_clear_output_feature_on_intf + (hm->vlib_main, sw_if_index0, 0); + } + } + + return; +} + +static clib_error_t * +vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t * + vm, + unformat_input_t + * input, vlib_cli_command_t * cmd) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + ip46_address_t local, remote; + u8 local_set = 0; + u8 remote_set = 0; + u8 ipv4_set = 0; + u8 ipv6_set = 0; + u32 vni; + u8 vni_set = 0; + u8 disable = 0; + clib_error_t *rv = 0; + vxlan4_gpe_tunnel_key_t key4; + vxlan6_gpe_tunnel_key_t key6; + uword *p; + vxlan_gpe_main_t *gm = &vxlan_gpe_main; + vxlan_gpe_tunnel_t *t = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "local %U", unformat_ip4_address, &local.ip4)) + { + local_set = 1; + ipv4_set = 1; + } + else + if (unformat (input, "remote %U", unformat_ip4_address, &remote.ip4)) + { + remote_set = 1; + ipv4_set = 1; + } + else if (unformat (input, "local %U", unformat_ip6_address, &local.ip6)) + { + local_set = 1; + ipv6_set = 1; + } + else + if (unformat (input, "remote %U", unformat_ip6_address, &remote.ip6)) + { + remote_set = 1; + ipv6_set = 1; + } + else if (unformat (input, "vni %d", &vni)) + vni_set = 1; + else if (unformat (input, "disable")) + disable = 1; + else + break; + } + + if (local_set == 0) + return clib_error_return (0, "tunnel local address not specified"); + if (remote_set == 0) + return clib_error_return (0, "tunnel remote address not specified"); + if (ipv4_set && ipv6_set) + return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + if ((ipv4_set + && memcmp (&local.ip4, &remote.ip4, + sizeof (local.ip4)) == 0) || (ipv6_set + && + memcmp + (&local.ip6, + &remote.ip6, + sizeof (local.ip6)) == 0)) + return clib_error_return (0, "src and dst addresses are identical"); + if (vni_set == 0) + return clib_error_return (0, "vni not specified"); + if (!ipv6_set) + { + key4.local = local.ip4.as_u32; + key4.remote = remote.ip4.as_u32; + key4.vni = clib_host_to_net_u32 (vni << 8); + key4.pad = 0; + p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4); + } + else + { + key6.local.as_u64[0] = local.ip6.as_u64[0]; + key6.local.as_u64[1] = local.ip6.as_u64[1]; + key6.remote.as_u64[0] = remote.ip6.as_u64[0]; + key6.remote.as_u64[1] = remote.ip6.as_u64[1]; + key6.vni = clib_host_to_net_u32 (vni << 8); + p = hash_get_mem (gm->vxlan6_gpe_tunnel_by_key, &key6); + } + + if (!p) + return clib_error_return (0, "VxLAN Tunnel not found"); + t = pool_elt_at_index (gm->tunnels, p[0]); + if (!disable) + { + rv = + vxlan_gpe_ioam_set (t, hm->has_trace_option, + hm->has_pot_option, hm->has_ppc_option, ipv6_set); + } + else + { + rv = vxlan_gpe_ioam_clear (t, 0, 0, 0, 0); + } + return rv; +} + + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_rewrite_cmd, static) = { + .path = "set vxlan-gpe-ioam", + .short_help = "set vxlan-gpe-ioam vxlan <src-ip> <dst_ip> <vnid> [disable]", + .function = vxlan_gpe_set_ioam_rewrite_command_fn, +}; +/* *INDENT-ON* */ + + + +clib_error_t * +vxlan_gpe_ioam_enable (int has_trace_option, + int has_pot_option, int has_ppc_option) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + hm->has_trace_option = has_trace_option; + hm->has_pot_option = has_pot_option; + hm->has_ppc_option = has_ppc_option; + if (hm->has_trace_option) + { + vxlan_gpe_trace_profile_setup (); + } + + return 0; +} + +clib_error_t * +vxlan_gpe_ioam_disable (int + has_trace_option, + int has_pot_option, int has_ppc_option) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + hm->has_trace_option = has_trace_option; + hm->has_pot_option = has_pot_option; + hm->has_ppc_option = has_ppc_option; + if (!hm->has_trace_option) + { + vxlan_gpe_trace_profile_cleanup (); + } + + return 0; +} + +void +vxlan_gpe_set_next_override (uword next) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + hm->decap_v4_next_override = next; + return; +} + +static clib_error_t * +vxlan_gpe_set_ioam_flags_command_fn (vlib_main_t * vm, + unformat_input_t + * input, vlib_cli_command_t * cmd) +{ + int has_trace_option = 0; + int has_pot_option = 0; + int has_ppc_option = 0; + clib_error_t *rv = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "trace")) + has_trace_option = 1; + else if (unformat (input, "pot")) + has_pot_option = 1; + else if (unformat (input, "ppc encap")) + has_ppc_option = PPC_ENCAP; + else if (unformat (input, "ppc decap")) + has_ppc_option = PPC_DECAP; + else if (unformat (input, "ppc none")) + has_ppc_option = PPC_NONE; + else + break; + } + + + rv = + vxlan_gpe_ioam_enable (has_trace_option, has_pot_option, has_ppc_option); + return rv; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_flags_cmd, static) = +{ +.path = "set vxlan-gpe-ioam rewrite", +.short_help = "set vxlan-gpe-ioam [trace] [pot] [ppc <encap|decap>]", +.function = vxlan_gpe_set_ioam_flags_command_fn,}; +/* *INDENT-ON* */ + + +int vxlan_gpe_ioam_disable_for_dest + (vlib_main_t * vm, ip46_address_t dst_addr, u32 outer_fib_index, + u8 ipv4_set) +{ + vxlan_gpe_ioam_dest_tunnels_t *t; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + vxlan_gpe_enable_disable_ioam_for_dest (hm->vlib_main, + dst_addr, outer_fib_index, ipv4_set, + 0); + if (pool_elts (hm->dst_tunnels) == 0) + { + vxlan_gpe_clear_output_feature_on_select_intfs (); + return 0; + } + + pool_foreach (t, hm->dst_tunnels, ( + { + vxlan_gpe_enable_disable_ioam_for_dest + (hm->vlib_main, + t->dst_addr, + t->outer_fib_index, + (t->fp_proto == + FIB_PROTOCOL_IP4), 1 /* is_add */ ); + } + )); + vxlan_gpe_clear_output_feature_on_select_intfs (); + return (0); + +} + +static clib_error_t *vxlan_gpe_set_ioam_transit_rewrite_command_fn + (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + ip46_address_t dst_addr; + u8 dst_addr_set = 0; + u8 ipv4_set = 0; + u8 ipv6_set = 0; + u8 disable = 0; + clib_error_t *rv = 0; + u32 outer_fib_index = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "dst-ip %U", unformat_ip4_address, &dst_addr.ip4)) + { + dst_addr_set = 1; + ipv4_set = 1; + } + else + if (unformat + (input, "dst-ip %U", unformat_ip6_address, &dst_addr.ip6)) + { + dst_addr_set = 1; + ipv6_set = 1; + } + else if (unformat (input, "outer-fib-index %d", &outer_fib_index)) + { + } + + else if (unformat (input, "disable")) + disable = 1; + else + break; + } + + if (dst_addr_set == 0) + return clib_error_return (0, "tunnel destination address not specified"); + if (ipv4_set && ipv6_set) + return clib_error_return (0, "both IPv4 and IPv6 addresses specified"); + if (!disable) + { + vxlan_gpe_enable_disable_ioam_for_dest (hm->vlib_main, + dst_addr, outer_fib_index, + ipv4_set, 1); + } + else + { + vxlan_gpe_ioam_disable_for_dest + (vm, dst_addr, outer_fib_index, ipv4_set); + } + return rv; +} + + /* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_transit_rewrite_cmd, static) = { + .path = "set vxlan-gpe-ioam-transit", + .short_help = "set vxlan-gpe-ioam-transit dst-ip <dst_ip> [outer-fib-index <outer_fib_index>] [disable]", + .function = vxlan_gpe_set_ioam_transit_rewrite_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t *clear_vxlan_gpe_ioam_rewrite_command_fn + (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) +{ + return (vxlan_gpe_ioam_disable (0, 0, 0)); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vxlan_gpe_clear_ioam_flags_cmd, static) = +{ +.path = "clear vxlan-gpe-ioam rewrite", +.short_help = "clear vxlan-gpe-ioam rewrite", +.function = clear_vxlan_gpe_ioam_rewrite_command_fn, +}; +/* *INDENT-ON* */ + + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +vxlan_gpe_ioam_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) +{ + vxlan_gpe_refresh_output_feature_on_all_dest (); + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t * +vxlan_gpe_ioam_fib_node_get (fib_node_index_t index) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + return (&hm->node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +vxlan_gpe_ioam_last_lock_gone (fib_node_t * node) +{ + ASSERT (0); +} + + +/* + * Virtual function table registered by MPLS GRE tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t vxlan_gpe_ioam_vft = { + .fnv_get = vxlan_gpe_ioam_fib_node_get, + .fnv_last_lock = vxlan_gpe_ioam_last_lock_gone, + .fnv_back_walk = vxlan_gpe_ioam_back_walk, +}; + +void +vxlan_gpe_ioam_interface_init (void) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + hm->fib_entry_type = fib_node_register_new_type (&vxlan_gpe_ioam_vft); + return; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h new file mode 100644 index 00000000000..0711b87abbe --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vxlan_gpe_ioam_h__ +#define __included_vxlan_gpe_ioam_h__ + +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> +#include <vnet/ip/ip.h> + + +typedef struct vxlan_gpe_sw_interface_ +{ + u32 sw_if_index; +} vxlan_gpe_ioam_sw_interface_t; + +typedef struct vxlan_gpe_dest_tunnels_ +{ + ip46_address_t dst_addr; + u32 fp_proto; + u32 sibling_index; + fib_node_index_t fib_entry_index; + u32 outer_fib_index; +} vxlan_gpe_ioam_dest_tunnels_t; + +typedef struct vxlan_gpe_ioam_main_ +{ + /** + * Linkage into the FIB object graph + */ + fib_node_t node; + + /* time scale transform. Joy. */ + u32 unix_time_0; + f64 vlib_time_0; + + + /* Trace option */ + u8 has_trace_option; + + /* Pot option */ + u8 has_pot_option; + +#define PPC_NONE 0 +#define PPC_ENCAP 1 +#define PPC_DECAP 2 + u8 has_ppc_option; + +#define TSP_SECONDS 0 +#define TSP_MILLISECONDS 1 +#define TSP_MICROSECONDS 2 +#define TSP_NANOSECONDS 3 + + /* Array of function pointers to ADD and POP VxLAN-GPE iOAM option handling routines */ + u8 options_size[256]; + int (*add_options[256]) (u8 * rewrite_string, u8 * rewrite_size); + int (*pop_options[256]) (ip4_header_t * ip, vxlan_gpe_ioam_option_t * opt); + + /* Array of function pointers to iOAM option handling routines */ + int (*options[256]) (vlib_buffer_t * b, vxlan_gpe_ioam_option_t * opt, + u8 is_ipv4, u8 use_adj); + u8 *(*trace[256]) (u8 * s, vxlan_gpe_ioam_option_t * opt); + + /* API message ID base */ + u16 msg_id_base; + + /* Override to export for iOAM */ + uword decap_v4_next_override; + uword decap_v6_next_override; + + /* sequence of node graph for encap */ + uword encap_v4_next_node; + uword encap_v6_next_node; + + /* Software interfaces. */ + vxlan_gpe_ioam_sw_interface_t *sw_interfaces; + + /* hash ip4/ip6 -> list of destinations for doing transit iOAM operation */ + vxlan_gpe_ioam_dest_tunnels_t *dst_tunnels; + uword *dst_by_ip4; + uword *dst_by_ip6; + + /** per sw_if_index, to maintain bitmap */ + u8 *bool_ref_by_sw_if_index; + fib_node_type_t fib_entry_type; + + /** State convenience vlib_main_t */ + vlib_main_t *vlib_main; + /** State convenience vnet_main_t */ + vnet_main_t *vnet_main; + + +} vxlan_gpe_ioam_main_t; +extern vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main; + +/* + * Primary h-b-h handler trace support + */ +typedef struct +{ + u32 next_index; + u32 trace_len; + u8 option_data[256]; +} ioam_trace_t; + + +extern vlib_node_registration_t vxlan_gpe_encap_ioam_v4_node; +extern vlib_node_registration_t vxlan_gpe_decap_ioam_v4_node; +extern vlib_node_registration_t vxlan_gpe_transit_ioam_v4_node; + +clib_error_t *vxlan_gpe_ioam_enable (int has_trace_option, int has_pot_option, + int has_ppc_option); + +clib_error_t *vxlan_gpe_ioam_disable (int has_trace_option, + int has_pot_option, int has_ppc_option); + +clib_error_t *vxlan_gpe_ioam_set (vxlan_gpe_tunnel_t * t, + int has_trace_option, + int has_pot_option, + int has_ppc_option, u8 ipv6_set); +clib_error_t *vxlan_gpe_ioam_clear (vxlan_gpe_tunnel_t * t, + int has_trace_option, int has_pot_option, + int has_ppc_option, u8 ipv6_set); + +int vxlan_gpe_ioam_add_register_option (u8 option, + u8 size, + int rewrite_options (u8 * + rewrite_string, + u8 * + rewrite_size)); + +int vxlan_gpe_add_unregister_option (u8 option); + +int vxlan_gpe_ioam_register_option (u8 option, + int options (vlib_buffer_t * b, + vxlan_gpe_ioam_option_t * + opt, u8 is_ipv4, u8 use_adj), + u8 * trace (u8 * s, + vxlan_gpe_ioam_option_t * + opt)); +int vxlan_gpe_ioam_unregister_option (u8 option); + +int vxlan_gpe_trace_profile_setup (void); + +int vxlan_gpe_trace_profile_cleanup (void); +extern void vxlan_gpe_ioam_interface_init (void); +int +vxlan_gpe_enable_disable_ioam_for_dest (vlib_main_t * vm, + ip46_address_t dst_addr, + u32 outer_fib_index, + u8 is_ipv4, u8 is_add); +int vxlan_gpe_ioam_disable_for_dest + (vlib_main_t * vm, ip46_address_t dst_addr, u32 outer_fib_index, + u8 ipv4_set); + +typedef enum +{ + VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP, + VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP, + VXLAN_GPE_DECAP_IOAM_V4_N_NEXT +} vxlan_gpe_decap_ioam_v4_next_t; + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h new file mode 100644 index 00000000000..a7ef859ec58 --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vxlan_gpe_ioam_packet_h__ +#define __included_vxlan_gpe_ioam_packet_h__ + +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <vnet/ip/ip.h> + + + +#define VXLAN_GPE_OPTION_TYPE_IOAM_TRACE 59 +#define VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT 60 + +/** + * @brief VXLAN GPE Extension (iOAM) Header definition + */ +typedef struct +{ + u8 type; + u8 length; + /** Reserved */ + u8 reserved; + /** see vxlan_gpe_protocol_t */ + u8 protocol; +} vxlan_gpe_ioam_hdr_t; + +/* + * @brief VxLAN GPE iOAM Option definition + */ +typedef struct +{ + /* Option Type */ + u8 type; + /* Length in octets of the option data field */ + u8 length; +} vxlan_gpe_ioam_option_t; + + +#endif + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c new file mode 100644 index 00000000000..e37b1642d96 --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#include <ioam/lib-trace/trace_util.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> + +/* Timestamp precision multipliers for seconds, milliseconds, microseconds + * and nanoseconds respectively. + */ +static f64 trace_tsp_mul[4] = { 1, 1e3, 1e6, 1e9 }; + +typedef union +{ + u64 as_u64; + u32 as_u32[2]; +} time_u64_t; + + +/* *INDENT-OFF* */ +typedef CLIB_PACKED(struct { + vxlan_gpe_ioam_option_t hdr; + u8 ioam_trace_type; + u8 data_list_elts_left; + u32 elts[0]; /* Variable type. So keep it generic */ +}) vxlan_gpe_ioam_trace_option_t; +/* *INDENT-ON* */ + + +#define foreach_vxlan_gpe_ioam_trace_stats \ + _(SUCCESS, "Pkts updated with TRACE records") \ + _(FAILED, "Errors in TRACE due to lack of TRACE records") + +static char *vxlan_gpe_ioam_trace_stats_strings[] = { +#define _(sym,string) string, + foreach_vxlan_gpe_ioam_trace_stats +#undef _ +}; + +typedef enum +{ +#define _(sym,str) VXLAN_GPE_IOAM_TRACE_##sym, + foreach_vxlan_gpe_ioam_trace_stats +#undef _ + VXLAN_GPE_IOAM_TRACE_N_STATS, +} vxlan_gpe_ioam_trace_stats_t; + + +typedef struct +{ + /* stats */ + u64 counters[ARRAY_LEN (vxlan_gpe_ioam_trace_stats_strings)]; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} vxlan_gpe_ioam_trace_main_t; + +vxlan_gpe_ioam_trace_main_t vxlan_gpe_ioam_trace_main; + +int +vxlan_gpe_ioam_add_register_option (u8 option, + u8 size, + int rewrite_options (u8 * rewrite_string, + u8 * rewrite_size)) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->add_options)); + + /* Already registered */ + if (hm->add_options[option]) + return (-1); + + hm->add_options[option] = rewrite_options; + hm->options_size[option] = size; + + return (0); +} + +int +vxlan_gpe_add_unregister_option (u8 option) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->add_options)); + + /* Not registered */ + if (!hm->add_options[option]) + return (-1); + + hm->add_options[option] = NULL; + hm->options_size[option] = 0; + return (0); +} + + +int +vxlan_gpe_ioam_register_option (u8 option, + int options (vlib_buffer_t * b, + vxlan_gpe_ioam_option_t * opt, + u8 is_ipv4, u8 use_adj), + u8 * trace (u8 * s, + vxlan_gpe_ioam_option_t * opt)) +{ + vxlan_gpe_ioam_main_t *im = &vxlan_gpe_ioam_main; + + ASSERT (option < ARRAY_LEN (im->options)); + + /* Already registered */ + if (im->options[option]) + return (-1); + + im->options[option] = options; + im->trace[option] = trace; + + return (0); +} + +int +vxlan_gpe_ioam_unregister_option (u8 option) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->options)); + + /* Not registered */ + if (!hm->options[option]) + return (-1); + + hm->options[option] = NULL; + hm->trace[option] = NULL; + + return (0); +} + + +always_inline void +vxlan_gpe_ioam_trace_stats_increment_counter (u32 counter_index, + u64 increment) +{ + vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main; + + hm->counters[counter_index] += increment; +} + + +static u8 * +format_ioam_data_list_element (u8 * s, va_list * args) +{ + u32 *elt = va_arg (*args, u32 *); + u8 *trace_type_p = va_arg (*args, u8 *); + u8 trace_type = *trace_type_p; + + + if (trace_type & BIT_TTL_NODEID) + { + u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "ttl 0x%x node id 0x%x ", + ttl_node_id_host_byte_order >> 24, + ttl_node_id_host_byte_order & 0x00FFFFFF); + + elt++; + } + + if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE) + { + u32 ingress_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "ingress 0x%x egress 0x%x ", + ingress_host_byte_order >> 16, + ingress_host_byte_order & 0xFFFF); + elt++; + } + + if (trace_type & BIT_TIMESTAMP) + { + u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "ts 0x%x \n", ts_in_host_byte_order); + elt++; + } + + if (trace_type & BIT_APPDATA) + { + u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt); + s = format (s, "app 0x%x ", appdata_in_host_byte_order); + elt++; + } + + return s; +} + + + +int +vxlan_gpe_ioam_trace_rewrite_handler (u8 * rewrite_string, u8 * rewrite_size) +{ + vxlan_gpe_ioam_trace_option_t *trace_option = NULL; + u8 trace_data_size = 0; + u8 trace_option_elts = 0; + trace_profile *profile = NULL; + + + profile = trace_profile_find (); + + if (PREDICT_FALSE (!profile)) + { + return (-1); + } + + if (PREDICT_FALSE (!rewrite_string)) + return -1; + + trace_option_elts = profile->num_elts; + trace_data_size = fetch_trace_data_size (profile->trace_type); + trace_option = (vxlan_gpe_ioam_trace_option_t *) rewrite_string; + trace_option->hdr.type = VXLAN_GPE_OPTION_TYPE_IOAM_TRACE; + trace_option->hdr.length = 2 /*ioam_trace_type,data_list_elts_left */ + + trace_option_elts * trace_data_size; + trace_option->ioam_trace_type = profile->trace_type & TRACE_TYPE_MASK; + trace_option->data_list_elts_left = trace_option_elts; + *rewrite_size = + sizeof (vxlan_gpe_ioam_trace_option_t) + + (trace_option_elts * trace_data_size); + + return 0; +} + + +int +vxlan_gpe_ioam_trace_data_list_handler (vlib_buffer_t * b, + vxlan_gpe_ioam_option_t * opt, + u8 is_ipv4, u8 use_adj) +{ + u8 elt_index = 0; + vxlan_gpe_ioam_trace_option_t *trace = + (vxlan_gpe_ioam_trace_option_t *) opt; + time_u64_t time_u64; + u32 *elt; + int rv = 0; + trace_profile *profile = NULL; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + + profile = trace_profile_find (); + + if (PREDICT_FALSE (!profile)) + { + return (-1); + } + + + time_u64.as_u64 = 0; + + if (PREDICT_TRUE (trace->data_list_elts_left)) + { + trace->data_list_elts_left--; + /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes + * to skip to this node's location. + */ + elt_index = + trace->data_list_elts_left * + fetch_trace_data_size (trace->ioam_trace_type) / 4; + elt = &trace->elts[elt_index]; + if (is_ipv4) + { + if (trace->ioam_trace_type & BIT_TTL_NODEID) + { + ip4_header_t *ip0 = vlib_buffer_get_current (b); + /* The transit case is the only case where the TTL decrement happens + * before iOAM processing. For now, use the use_adj flag as an overload. + * We can probably use a separate flag instead of overloading the use_adj flag. + */ + *elt = clib_host_to_net_u32 (((ip0->ttl - 1 + use_adj) << 24) | + profile->node_id); + elt++; + } + + if (trace->ioam_trace_type & BIT_ING_INTERFACE) + { + u16 tx_if = 0; + u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX]; + ip4_main_t *im4 = &ip4_main; + ip_lookup_main_t *lm = &im4->lookup_main; + if (use_adj) + { + ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index); + tx_if = adj->rewrite_header.sw_if_index & 0xFFFF; + } + + *elt = + (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 | + tx_if; + *elt = clib_host_to_net_u32 (*elt); + elt++; + } + } + else + { + if (trace->ioam_trace_type & BIT_TTL_NODEID) + { + ip6_header_t *ip0 = vlib_buffer_get_current (b); + *elt = clib_host_to_net_u32 ((ip0->hop_limit << 24) | + profile->node_id); + elt++; + } + if (trace->ioam_trace_type & BIT_ING_INTERFACE) + { + u16 tx_if = 0; + u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX]; + ip6_main_t *im6 = &ip6_main; + ip_lookup_main_t *lm = &im6->lookup_main; + if (use_adj) + { + ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index); + tx_if = adj->rewrite_header.sw_if_index & 0xFFFF; + } + + *elt = + (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 | + tx_if; + *elt = clib_host_to_net_u32 (*elt); + elt++; + } + } + + if (trace->ioam_trace_type & BIT_TIMESTAMP) + { + /* Send least significant 32 bits */ + f64 time_f64 = + (f64) (((f64) hm->unix_time_0) + + (vlib_time_now (hm->vlib_main) - hm->vlib_time_0)); + + time_u64.as_u64 = time_f64 * trace_tsp_mul[profile->trace_tsp]; + *elt = clib_host_to_net_u32 (time_u64.as_u32[0]); + elt++; + } + + if (trace->ioam_trace_type & BIT_APPDATA) + { + /* $$$ set elt0->app_data */ + *elt = clib_host_to_net_u32 (profile->app_data); + elt++; + } + vxlan_gpe_ioam_trace_stats_increment_counter + (VXLAN_GPE_IOAM_TRACE_SUCCESS, 1); + } + else + { + vxlan_gpe_ioam_trace_stats_increment_counter + (VXLAN_GPE_IOAM_TRACE_FAILED, 1); + } + return (rv); +} + +u8 * +vxlan_gpe_ioam_trace_data_list_trace_handler (u8 * s, + vxlan_gpe_ioam_option_t * opt) +{ + vxlan_gpe_ioam_trace_option_t *trace; + u8 trace_data_size_in_words = 0; + u32 *elt; + int elt_index = 0; + + trace = (vxlan_gpe_ioam_trace_option_t *) opt; + s = + format (s, " Trace Type 0x%x , %d elts left\n", trace->ioam_trace_type, + trace->data_list_elts_left); + trace_data_size_in_words = + fetch_trace_data_size (trace->ioam_trace_type) / 4; + elt = &trace->elts[0]; + while ((u8 *) elt < ((u8 *) (&trace->elts[0]) + trace->hdr.length - 2 + /* -2 accounts for ioam_trace_type,elts_left */ )) + { + s = format (s, " [%d] %U\n", elt_index, + format_ioam_data_list_element, + elt, &trace->ioam_trace_type); + elt_index++; + elt += trace_data_size_in_words; + } + return (s); +} + + +static clib_error_t * +vxlan_gpe_show_ioam_trace_cmd_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main; + u8 *s = 0; + int i = 0; + + for (i = 0; i < VXLAN_GPE_IOAM_TRACE_N_STATS; i++) + { + s = format (s, " %s - %lu\n", vxlan_gpe_ioam_trace_stats_strings[i], + hm->counters[i]); + } + + vlib_cli_output (vm, "%v", s); + vec_free (s); + return 0; +} + + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vxlan_gpe_show_ioam_trace_cmd, static) = { + .path = "show ioam vxlan-gpe trace", + .short_help = "iOAM trace statistics", + .function = vxlan_gpe_show_ioam_trace_cmd_fn, +}; +/* *INDENT-ON* */ + + +static clib_error_t * +vxlan_gpe_ioam_trace_init (vlib_main_t * vm) +{ + vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main; + clib_error_t *error; + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return (error); + + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + if ((error = vlib_call_init_function (vm, vxlan_gpe_init))) + return (error); + + hm->vlib_main = vm; + hm->vnet_main = vnet_get_main (); + memset (hm->counters, 0, sizeof (hm->counters)); + + if (vxlan_gpe_ioam_register_option + (VXLAN_GPE_OPTION_TYPE_IOAM_TRACE, + vxlan_gpe_ioam_trace_data_list_handler, + vxlan_gpe_ioam_trace_data_list_trace_handler) < 0) + return (clib_error_create + ("registration of VXLAN_GPE_OPTION_TYPE_IOAM_TRACE failed")); + + + if (vxlan_gpe_ioam_add_register_option + (VXLAN_GPE_OPTION_TYPE_IOAM_TRACE, + sizeof (vxlan_gpe_ioam_trace_option_t), + vxlan_gpe_ioam_trace_rewrite_handler) < 0) + return (clib_error_create + ("registration of VXLAN_GPE_OPTION_TYPE_IOAM_TRACE for rewrite failed")); + + + return (0); +} + +VLIB_INIT_FUNCTION (vxlan_gpe_ioam_trace_init); + +int +vxlan_gpe_trace_profile_cleanup (void) +{ + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] = 0; + + return 0; + +} + +static int +vxlan_gpe_ioam_trace_get_sizeof_handler (u32 * result) +{ + u16 size = 0; + u8 trace_data_size = 0; + trace_profile *profile = NULL; + + *result = 0; + + profile = trace_profile_find (); + + if (PREDICT_FALSE (!profile)) + { + return (-1); + } + + trace_data_size = fetch_trace_data_size (profile->trace_type); + if (PREDICT_FALSE (trace_data_size == 0)) + return VNET_API_ERROR_INVALID_VALUE; + + if (PREDICT_FALSE (profile->num_elts * trace_data_size > 254)) + return VNET_API_ERROR_INVALID_VALUE; + + size += + sizeof (vxlan_gpe_ioam_trace_option_t) + + profile->num_elts * trace_data_size; + *result = size; + + return 0; +} + + +int +vxlan_gpe_trace_profile_setup (void) +{ + u32 trace_size = 0; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + trace_profile *profile = NULL; + + + profile = trace_profile_find (); + + if (PREDICT_FALSE (!profile)) + { + return (-1); + } + + + if (vxlan_gpe_ioam_trace_get_sizeof_handler (&trace_size) < 0) + return (-1); + + hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] = trace_size; + + return (0); +} + + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h new file mode 100644 index 00000000000..c0ad8d9d03a --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_vxlan_gpe_ioam_util_h__ +#define __included_vxlan_gpe_ioam_util_h__ + +#include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vnet/vxlan-gpe/vxlan_gpe_packet.h> +#include <vnet/ip/ip.h> + + +typedef struct +{ + u32 tunnel_index; + ioam_trace_t fmt_trace; +} vxlan_gpe_ioam_v4_trace_t; + + +static u8 * +format_vxlan_gpe_ioam_v4_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + vxlan_gpe_ioam_v4_trace_t *t1 = va_arg (*args, vxlan_gpe_ioam_v4_trace_t *); + ioam_trace_t *t = &(t1->fmt_trace); + vxlan_gpe_ioam_option_t *fmt_trace0; + vxlan_gpe_ioam_option_t *opt0, *limit0; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + u8 type0; + + fmt_trace0 = (vxlan_gpe_ioam_option_t *) t->option_data; + + s = format (s, "VXLAN-GPE-IOAM: next_index %d len %d traced %d", + t->next_index, fmt_trace0->length, t->trace_len); + + opt0 = (vxlan_gpe_ioam_option_t *) (fmt_trace0 + 1); + limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) fmt_trace0) + t->trace_len; + + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad, just stop */ + opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1; + break; + + default: + if (hm->trace[type0]) + { + s = (*hm->trace[type0]) (s, opt0); + } + else + { + s = + format (s, "\n unrecognized option %d length %d", type0, + opt0->length); + } + opt0 = + (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (vxlan_gpe_ioam_option_t)); + break; + } + } + + s = format (s, "VXLAN-GPE-IOAM: tunnel %d", t1->tunnel_index); + return s; +} + + +always_inline void +vxlan_gpe_encap_decap_ioam_v4_one_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * b0, + u32 * next0, u32 drop_node_val, + u8 use_adj) +{ + ip4_header_t *ip0; + udp_header_t *udp_hdr0; + vxlan_gpe_header_t *gpe_hdr0; + vxlan_gpe_ioam_hdr_t *gpe_ioam0; + vxlan_gpe_ioam_option_t *opt0; + vxlan_gpe_ioam_option_t *limit0; + vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main; + + /* Populate the iOAM header */ + ip0 = vlib_buffer_get_current (b0); + udp_hdr0 = (udp_header_t *) (ip0 + 1); + gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1); + gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1); + opt0 = (vxlan_gpe_ioam_option_t *) (gpe_ioam0 + 1); + limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) gpe_ioam0 + gpe_ioam0->length); + + /* + * Basic validity checks + */ + if (gpe_ioam0->length > clib_net_to_host_u16 (ip0->length)) + { + *next0 = drop_node_val; + return; + } + + /* Scan the set of h-b-h options, process ones that we understand */ + while (opt0 < limit0) + { + u8 type0; + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad1 */ + opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1; + continue; + case 1: /* PadN */ + break; + default: + if (hm->options[type0]) + { + if ((*hm->options[type0]) (b0, opt0, 1 /* is_ipv4 */ , + use_adj) < 0) + { + *next0 = drop_node_val; + return; + } + } + break; + } + opt0 = + (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (vxlan_gpe_ioam_hdr_t)); + } + + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + vxlan_gpe_ioam_v4_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + u32 trace_len = gpe_ioam0->length; + t->fmt_trace.next_index = *next0; + /* Capture the ioam option verbatim */ + trace_len = + trace_len < + ARRAY_LEN (t->fmt_trace. + option_data) ? trace_len : ARRAY_LEN (t->fmt_trace. + option_data); + t->fmt_trace.trace_len = trace_len; + clib_memcpy (&(t->fmt_trace.option_data), gpe_ioam0, trace_len); + } + return; +} + + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h new file mode 100644 index 00000000000..cc0a10a3fec --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vxlan_gpe_msg_enum_h +#define included_vxlan_gpe_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_vxlan_gpe_msg_enum_h */ diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c new file mode 100644 index 00000000000..47253eb67ab --- /dev/null +++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c @@ -0,0 +1,600 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + *------------------------------------------------------------------ + * vxlan_gpe_test.c - test harness for vxlan_gpe plugin + *------------------------------------------------------------------ + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> + +/* Declare message IDs */ +#include <ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h> +#undef vl_api_version +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h> +#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h> + +typedef struct +{ + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} vxlan_gpe_test_main_t; + +vxlan_gpe_test_main_t vxlan_gpe_test_main; + +#define foreach_standard_reply_retval_handler \ +_(vxlan_gpe_ioam_enable_reply) \ +_(vxlan_gpe_ioam_disable_reply) \ +_(vxlan_gpe_ioam_vni_enable_reply) \ +_(vxlan_gpe_ioam_vni_disable_reply) \ +_(vxlan_gpe_ioam_transit_enable_reply) \ +_(vxlan_gpe_ioam_transit_disable_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = vxlan_gpe_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(VXLAN_GPE_IOAM_ENABLE_REPLY, vxlan_gpe_ioam_enable_reply) \ +_(VXLAN_GPE_IOAM_DISABLE_REPLY, vxlan_gpe_ioam_disable_reply) \ +_(VXLAN_GPE_IOAM_VNI_ENABLE_REPLY, vxlan_gpe_ioam_vni_enable_reply) \ +_(VXLAN_GPE_IOAM_VNI_DISABLE_REPLY, vxlan_gpe_ioam_vni_disable_reply) \ +_(VXLAN_GPE_IOAM_TRANSIT_ENABLE_REPLY, vxlan_gpe_ioam_transit_enable_reply) \ +_(VXLAN_GPE_IOAM_TRANSIT_DISABLE_REPLY, vxlan_gpe_ioam_transit_disable_reply) \ + + +/* M: construct, but don't yet send a message */ + +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + + +static int +api_vxlan_gpe_ioam_enable (vat_main_t * vam) +{ + vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; + + unformat_input_t *input = vam->input; + vl_api_vxlan_gpe_ioam_enable_t *mp; + f64 timeout; + u32 id = 0; + int has_trace_option = 0; + int has_pow_option = 0; + int has_ppc_option = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "trace")) + has_trace_option = 1; + else if (unformat (input, "pow")) + has_pow_option = 1; + else if (unformat (input, "ppc encap")) + has_ppc_option = PPC_ENCAP; + else if (unformat (input, "ppc decap")) + has_ppc_option = PPC_DECAP; + else if (unformat (input, "ppc none")) + has_ppc_option = PPC_NONE; + else + break; + } + M (VXLAN_GPE_IOAM_ENABLE, vxlan_gpe_ioam_enable); + mp->id = htons (id); + mp->trace_ppc = has_ppc_option; + mp->pow_enable = has_pow_option; + mp->trace_enable = has_trace_option; + + + S; + W; + + return (0); +} + + +static int +api_vxlan_gpe_ioam_disable (vat_main_t * vam) +{ + vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; + vl_api_vxlan_gpe_ioam_disable_t *mp; + f64 timeout; + + M (VXLAN_GPE_IOAM_DISABLE, vxlan_gpe_ioam_disable); + S; + W; + return 0; +} + +static int +api_vxlan_gpe_ioam_vni_enable (vat_main_t * vam) +{ + vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; + + unformat_input_t *line_input = vam->input; + vl_api_vxlan_gpe_ioam_vni_enable_t *mp; + ip4_address_t local4, remote4; + ip6_address_t local6, remote6; + u8 ipv4_set = 0, ipv6_set = 0; + u8 local_set = 0; + u8 remote_set = 0; + u32 vni; + u8 vni_set = 0; + f64 timeout; + + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U", unformat_ip4_address, &local4)) + { + local_set = 1; + ipv4_set = 1; + } + else if (unformat (line_input, "remote %U", + unformat_ip4_address, &remote4)) + { + remote_set = 1; + ipv4_set = 1; + } + else if (unformat (line_input, "local %U", + unformat_ip6_address, &local6)) + { + local_set = 1; + ipv6_set = 1; + } + else if (unformat (line_input, "remote %U", + unformat_ip6_address, &remote6)) + { + remote_set = 1; + ipv6_set = 1; + } + + else if (unformat (line_input, "vni %d", &vni)) + vni_set = 1; + else + { + errmsg ("parse error '%U'\n", format_unformat_error, line_input); + return -99; + } + } + + if (local_set == 0) + { + errmsg ("tunnel local address not specified\n"); + return -99; + } + if (remote_set == 0) + { + errmsg ("tunnel remote address not specified\n"); + return -99; + } + if (ipv4_set && ipv6_set) + { + errmsg ("both IPv4 and IPv6 addresses specified"); + return -99; + } + + if (vni_set == 0) + { + errmsg ("vni not specified\n"); + return -99; + } + + M (VXLAN_GPE_IOAM_VNI_ENABLE, vxlan_gpe_ioam_vni_enable); + + + if (ipv6_set) + { + clib_memcpy (&mp->local, &local6, sizeof (local6)); + clib_memcpy (&mp->remote, &remote6, sizeof (remote6)); + } + else + { + clib_memcpy (&mp->local, &local4, sizeof (local4)); + clib_memcpy (&mp->remote, &remote4, sizeof (remote4)); + } + + mp->vni = ntohl (vni); + mp->is_ipv6 = ipv6_set; + + S; + W; + + return (0); +} + +static int +api_vxlan_gpe_ioam_vni_disable (vat_main_t * vam) +{ + vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; + + unformat_input_t *line_input = vam->input; + vl_api_vxlan_gpe_ioam_vni_disable_t *mp; + ip4_address_t local4, remote4; + ip6_address_t local6, remote6; + u8 ipv4_set = 0, ipv6_set = 0; + u8 local_set = 0; + u8 remote_set = 0; + u32 vni; + u8 vni_set = 0; + f64 timeout; + + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U", unformat_ip4_address, &local4)) + { + local_set = 1; + ipv4_set = 1; + } + else if (unformat (line_input, "remote %U", + unformat_ip4_address, &remote4)) + { + remote_set = 1; + ipv4_set = 1; + } + else if (unformat (line_input, "local %U", + unformat_ip6_address, &local6)) + { + local_set = 1; + ipv6_set = 1; + } + else if (unformat (line_input, "remote %U", + unformat_ip6_address, &remote6)) + { + remote_set = 1; + ipv6_set = 1; + } + + else if (unformat (line_input, "vni %d", &vni)) + vni_set = 1; + else + { + errmsg ("parse error '%U'\n", format_unformat_error, line_input); + return -99; + } + } + + if (local_set == 0) + { + errmsg ("tunnel local address not specified\n"); + return -99; + } + if (remote_set == 0) + { + errmsg ("tunnel remote address not specified\n"); + return -99; + } + if (ipv4_set && ipv6_set) + { + errmsg ("both IPv4 and IPv6 addresses specified"); + return -99; + } + + if (vni_set == 0) + { + errmsg ("vni not specified\n"); + return -99; + } + + M (VXLAN_GPE_IOAM_VNI_DISABLE, vxlan_gpe_ioam_vni_disable); + + + if (ipv6_set) + { + clib_memcpy (&mp->local, &local6, sizeof (local6)); + clib_memcpy (&mp->remote, &remote6, sizeof (remote6)); + } + else + { + clib_memcpy (&mp->local, &local4, sizeof (local4)); + clib_memcpy (&mp->remote, &remote4, sizeof (remote4)); + } + + mp->vni = ntohl (vni); + mp->is_ipv6 = ipv6_set; + + S; + W; + + return 0; +} + +static int +api_vxlan_gpe_ioam_transit_enable (vat_main_t * vam) +{ + vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; + + unformat_input_t *line_input = vam->input; + vl_api_vxlan_gpe_ioam_transit_enable_t *mp; + ip4_address_t local4; + ip6_address_t local6; + u8 ipv4_set = 0, ipv6_set = 0; + u8 local_set = 0; + u32 outer_fib_index = 0; + f64 timeout; + + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "dst-ip %U", unformat_ip4_address, &local4)) + { + local_set = 1; + ipv4_set = 1; + } + else if (unformat (line_input, "dst-ip %U", + unformat_ip6_address, &local6)) + { + local_set = 1; + ipv6_set = 1; + } + + else if (unformat (line_input, "outer-fib-index %d", &outer_fib_index)) + ; + else + { + errmsg ("parse error '%U'\n", format_unformat_error, line_input); + return -99; + } + } + + if (local_set == 0) + { + errmsg ("destination address not specified\n"); + return -99; + } + if (ipv4_set && ipv6_set) + { + errmsg ("both IPv4 and IPv6 addresses specified"); + return -99; + } + + + M (VXLAN_GPE_IOAM_TRANSIT_ENABLE, vxlan_gpe_ioam_transit_enable); + + + if (ipv6_set) + { + errmsg ("IPv6 currently unsupported"); + return -1; + } + else + { + clib_memcpy (&mp->dst_addr, &local4, sizeof (local4)); + } + + mp->outer_fib_index = htonl (outer_fib_index); + mp->is_ipv6 = ipv6_set; + + S; + W; + + return (0); +} + +static int +api_vxlan_gpe_ioam_transit_disable (vat_main_t * vam) +{ + vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; + + unformat_input_t *line_input = vam->input; + vl_api_vxlan_gpe_ioam_transit_disable_t *mp; + ip4_address_t local4; + ip6_address_t local6; + u8 ipv4_set = 0, ipv6_set = 0; + u8 local_set = 0; + u32 outer_fib_index; + f64 timeout; + + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "dst-ip %U", unformat_ip4_address, &local4)) + { + local_set = 1; + ipv4_set = 1; + } + else if (unformat (line_input, "dst-ip %U", + unformat_ip6_address, &local6)) + { + local_set = 1; + ipv6_set = 1; + } + + else if (unformat (line_input, "outer-fib-index %d", &outer_fib_index)) + ; + else + { + errmsg ("parse error '%U'\n", format_unformat_error, line_input); + return -99; + } + } + + if (local_set == 0) + { + errmsg ("destination address not specified\n"); + return -99; + } + if (ipv4_set && ipv6_set) + { + errmsg ("both IPv4 and IPv6 addresses specified"); + return -99; + } + + + M (VXLAN_GPE_IOAM_TRANSIT_DISABLE, vxlan_gpe_ioam_transit_disable); + + + if (ipv6_set) + { + return -1; + } + else + { + clib_memcpy (&mp->dst_addr, &local4, sizeof (local4)); + } + + mp->outer_fib_index = htonl (outer_fib_index); + mp->is_ipv6 = ipv6_set; + + S; + W; + + + return (0); +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(vxlan_gpe_ioam_enable, ""\ + "[trace] [pow] [ppc <encap|ppc decap>]") \ +_(vxlan_gpe_ioam_disable, "") \ +_(vxlan_gpe_ioam_vni_enable, ""\ + "local <local_vtep_ip> remote <remote_vtep_ip> vni <vnid>") \ +_(vxlan_gpe_ioam_vni_disable, ""\ + "local <local_vtep_ip> remote <remote_vtep_ip> vni <vnid>") \ +_(vxlan_gpe_ioam_transit_enable, ""\ + "dst-ip <dst_ip> [outer-fib-index <outer_fib_index>]") \ +_(vxlan_gpe_ioam_transit_disable, ""\ + "dst-ip <dst_ip> [outer-fib-index <outer_fib_index>]") \ + + +void +vat_api_hookup (vat_main_t * vam) +{ + vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * +vat_plugin_register (vat_main_t * vam) +{ + vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main; + u8 *name; + + sm->vat_main = vam; + + name = format (0, "ioam_vxlan_gpe_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~ 0) + vat_api_hookup (vam); + + vec_free (name); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/lb.am b/src/plugins/lb.am new file mode 100644 index 00000000000..352358fa88f --- /dev/null +++ b/src/plugins/lb.am @@ -0,0 +1,42 @@ +# Copyright (c) 2016 Cisco Systems, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppapitestplugins_LTLIBRARIES += lb_test_plugin.la +vppplugins_LTLIBRARIES += lb_plugin.la + +lb_plugin_la_SOURCES = \ + lb/lb.c \ + lb/node.c \ + lb/cli.c \ + lb/util.c \ + lb/refcount.c \ + lb/api.c + +BUILT_SOURCES += \ + lb/lb.api.h \ + lb/lb.api.json + +API_FILES += lb/lb.api + +noinst_HEADERS += \ + lb/lb.h \ + lb/util.h \ + lb/refcount.h \ + lb/lbhash.h \ + lb/lb.api.h + +lb_test_plugin_la_SOURCES = \ + lb/lb_test.c \ + lb/lb_plugin.api.h + +# vi:syntax=automake diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c new file mode 100644 index 00000000000..06c53fa1005 --- /dev/null +++ b/src/plugins/lb/api.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <lb/lb.h> + +#include <vppinfra/byte_order.h> +#include <vlibapi/api.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <lb/lb.api.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + + +/* define message structures */ +#define vl_typedefs +#include <lb/lb.api.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <lb/lb.api.h> +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <lb/lb.api.h> +#undef vl_api_version + +#define vl_msg_name_crc_list +#include <lb/lb.api.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (lb_main_t * lbm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + lbm->msg_id_base); + foreach_vl_msg_name_crc_lb; +#undef _ +} + +/* Macro to finish up custom dump fns */ +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+lbm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +static void +vl_api_lb_conf_t_handler +(vl_api_lb_conf_t * mp) +{ + lb_main_t *lbm = &lb_main; + vl_api_lb_conf_reply_t * rmp; + int rv = 0; + + rv = lb_conf((ip4_address_t *)&mp->ip4_src_address, + (ip6_address_t *)mp->ip6_src_address, + mp->sticky_buckets_per_core, + mp->flow_timeout); + + REPLY_MACRO (VL_API_LB_CONF_REPLY); +} + +static void *vl_api_lb_conf_t_print +(vl_api_lb_conf_t *mp, void * handle) +{ + u8 * s; + s = format (0, "SCRIPT: lb_conf "); + s = format (s, "%U ", format_ip4_address, (ip4_address_t *)&mp->ip4_src_address); + s = format (s, "%U ", format_ip6_address, (ip6_address_t *)mp->ip6_src_address); + s = format (s, "%u ", mp->sticky_buckets_per_core); + s = format (s, "%u ", mp->flow_timeout); + FINISH; +} + + +static void +vl_api_lb_add_del_vip_t_handler +(vl_api_lb_add_del_vip_t * mp) +{ + lb_main_t *lbm = &lb_main; + vl_api_lb_conf_reply_t * rmp; + int rv = 0; + ip46_address_t prefix; + memcpy(&prefix.ip6, mp->ip_prefix, sizeof(prefix.ip6)); + + if (mp->is_del) { + u32 vip_index; + if (!(rv = lb_vip_find_index(&prefix, mp->prefix_length, &vip_index))) + rv = lb_vip_del(vip_index); + } else { + u32 vip_index; + lb_vip_type_t type; + if (ip46_prefix_is_ip4(&prefix, mp->prefix_length)) { + type = mp->is_gre4?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6; + } else { + type = mp->is_gre4?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6; + } + + rv = lb_vip_add(&prefix, mp->prefix_length, type, + mp->new_flows_table_length, &vip_index); + } + REPLY_MACRO (VL_API_LB_CONF_REPLY); +} + +static void *vl_api_lb_add_del_vip_t_print +(vl_api_lb_add_del_vip_t *mp, void * handle) +{ + u8 * s; + s = format (0, "SCRIPT: lb_add_del_vip "); + s = format (s, "%U ", format_ip46_prefix, + (ip46_address_t *)mp->ip_prefix, mp->prefix_length, IP46_TYPE_ANY); + s = format (s, "%s ", mp->is_gre4?"gre4":"gre6"); + s = format (s, "%u ", mp->new_flows_table_length); + s = format (s, "%s ", mp->is_del?"del":"add"); + FINISH; +} + +static void +vl_api_lb_add_del_as_t_handler +(vl_api_lb_add_del_as_t * mp) +{ + lb_main_t *lbm = &lb_main; + vl_api_lb_conf_reply_t * rmp; + int rv = 0; + u32 vip_index; + if ((rv = lb_vip_find_index((ip46_address_t *)mp->vip_ip_prefix, + mp->vip_prefix_length, &vip_index))) + goto done; + + if (mp->is_del) + rv = lb_vip_del_ass(vip_index, (ip46_address_t *)mp->as_address, 1); + else + rv = lb_vip_add_ass(vip_index, (ip46_address_t *)mp->as_address, 1); + +done: + REPLY_MACRO (VL_API_LB_CONF_REPLY); +} + +static void *vl_api_lb_add_del_as_t_print +(vl_api_lb_add_del_as_t *mp, void * handle) +{ + u8 * s; + s = format (0, "SCRIPT: lb_add_del_as "); + s = format (s, "%U ", format_ip46_prefix, + (ip46_address_t *)mp->vip_ip_prefix, mp->vip_prefix_length, IP46_TYPE_ANY); + s = format (s, "%U ", format_ip46_address, + (ip46_address_t *)mp->as_address, IP46_TYPE_ANY); + s = format (s, "%s ", mp->is_del?"del":"add"); + FINISH; +} + +/* List of message types that this plugin understands */ +#define foreach_lb_plugin_api_msg \ +_(LB_CONF, lb_conf) \ +_(LB_ADD_DEL_VIP, lb_add_del_vip) \ +_(LB_ADD_DEL_AS, lb_add_del_as) + +static clib_error_t * lb_api_init (vlib_main_t * vm) +{ + lb_main_t *lbm = &lb_main; + u8 *name = format (0, "lb_%08x%c", api_version, 0); + lbm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + lbm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_lb_plugin_api_msg; +#undef _ + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (lbm, &api_main); + + return 0; +} + +VLIB_INIT_FUNCTION (lb_api_init); diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c new file mode 100644 index 00000000000..b59c6426241 --- /dev/null +++ b/src/plugins/lb/cli.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <lb/lb.h> +#include <lb/util.h> + +static clib_error_t * +lb_vip_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip46_address_t prefix; + u8 plen; + u32 new_len = 1024; + u8 del = 0; + int ret; + u32 gre4 = 0; + lb_vip_type_t type; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY, &plen)) + return clib_error_return (0, "invalid vip prefix: '%U'", + format_unformat_error, line_input); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(line_input, "new_len %d", &new_len)) + ; + else if (unformat(line_input, "del")) + del = 1; + else if (unformat(line_input, "encap gre4")) + gre4 = 1; + else if (unformat(line_input, "encap gre6")) + gre4 = 0; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + + if (ip46_prefix_is_ip4(&prefix, plen)) { + type = (gre4)?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6; + } else { + type = (gre4)?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6; + } + + lb_garbage_collection(); + + u32 index; + if (!del) { + if ((ret = lb_vip_add(&prefix, plen, type, new_len, &index))) { + return clib_error_return (0, "lb_vip_add error %d", ret); + } else { + vlib_cli_output(vm, "lb_vip_add ok %d", index); + } + } else { + if ((ret = lb_vip_find_index(&prefix, plen, &index))) + return clib_error_return (0, "lb_vip_find_index error %d", ret); + else if ((ret = lb_vip_del(index))) + return clib_error_return (0, "lb_vip_del error %d", ret); + } + return NULL; +} + +VLIB_CLI_COMMAND (lb_vip_command, static) = +{ + .path = "lb vip", + .short_help = "lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del]", + .function = lb_vip_command_fn, +}; + +static clib_error_t * +lb_as_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip46_address_t vip_prefix, as_addr; + u8 vip_plen; + ip46_address_t *as_array = 0; + u32 vip_index; + u8 del = 0; + int ret; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY)) + return clib_error_return (0, "invalid as address: '%U'", + format_unformat_error, line_input); + + if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index))) + return clib_error_return (0, "lb_vip_find_index error %d", ret); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(line_input, "%U", unformat_ip46_address, &as_addr, IP46_TYPE_ANY)) { + vec_add1(as_array, as_addr); + } else if (unformat(line_input, "del")) { + del = 1; + } else { + vec_free(as_array); + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + } + + if (!vec_len(as_array)) { + vec_free(as_array); + return clib_error_return (0, "No AS address provided"); + } + + lb_garbage_collection(); + clib_warning("vip index is %d", vip_index); + + if (del) { + if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array)))) { + vec_free(as_array); + return clib_error_return (0, "lb_vip_del_ass error %d", ret); + } + } else { + if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array)))) { + vec_free(as_array); + return clib_error_return (0, "lb_vip_add_ass error %d", ret); + } + } + + vec_free(as_array); + return 0; +} + +VLIB_CLI_COMMAND (lb_as_command, static) = +{ + .path = "lb as", + .short_help = "lb as <vip-prefix> [<address> [<address> [...]]] [del]", + .function = lb_as_command_fn, +}; + +static clib_error_t * +lb_conf_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + lb_main_t *lbm = &lb_main; + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4 = lbm->ip4_src_address; + ip6_address_t ip6 = lbm->ip6_src_address; + u32 per_cpu_sticky_buckets = lbm->per_cpu_sticky_buckets; + u32 per_cpu_sticky_buckets_log2 = 0; + u32 flow_timeout = lbm->flow_timeout; + int ret; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat(line_input, "ip4-src-address %U", unformat_ip4_address, &ip4)) + ; + else if (unformat(line_input, "ip6-src-address %U", unformat_ip6_address, &ip6)) + ; + else if (unformat(line_input, "buckets %d", &per_cpu_sticky_buckets)) + ; + else if (unformat(line_input, "buckets-log2 %d", &per_cpu_sticky_buckets_log2)) { + if (per_cpu_sticky_buckets_log2 >= 32) + return clib_error_return (0, "buckets-log2 value is too high"); + per_cpu_sticky_buckets = 1 << per_cpu_sticky_buckets_log2; + } else if (unformat(line_input, "timeout %d", &flow_timeout)) + ; + else + return clib_error_return (0, "parse error: '%U'", + format_unformat_error, line_input); + } + + unformat_free (line_input); + + lb_garbage_collection(); + + if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout))) + return clib_error_return (0, "lb_conf error %d", ret); + + return NULL; +} + +VLIB_CLI_COMMAND (lb_conf_command, static) = +{ + .path = "lb conf", + .short_help = "lb conf [ip4-src-address <addr>] [ip6-src-address <addr>] [buckets <n>] [timeout <s>]", + .function = lb_conf_command_fn, +}; + +static clib_error_t * +lb_show_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vlib_cli_output(vm, "%U", format_lb_main); + return NULL; +} + + +VLIB_CLI_COMMAND (lb_show_command, static) = +{ + .path = "show lb", + .short_help = "show lb", + .function = lb_show_command_fn, +}; + +static clib_error_t * +lb_show_vips_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t line_input; + lb_main_t *lbm = &lb_main; + lb_vip_t *vip; + u8 verbose = 0; + + if (!unformat_user (input, unformat_line_input, &line_input)) + return 0; + + if (unformat(&line_input, "verbose")) + verbose = 1; + + pool_foreach(vip, lbm->vips, { + vlib_cli_output(vm, "%U\n", verbose?format_lb_vip_detailed:format_lb_vip, vip); + }); + + unformat_free (&line_input); + return NULL; +} + +VLIB_CLI_COMMAND (lb_show_vips_command, static) = +{ + .path = "show lb vips", + .short_help = "show lb vips [verbose]", + .function = lb_show_vips_command_fn, +}; diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api new file mode 100644 index 00000000000..39ee3c8f98b --- /dev/null +++ b/src/plugins/lb/lb.api @@ -0,0 +1,71 @@ +/** \brief Configure Load-Balancer global parameters + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ip4_src_address - IPv4 address to be used as source for IPv4 GRE traffic. + @param ip6_src_address - IPv6 address to be used as source for IPv6 GRE traffic. + @param n_sticky_buckets - Number of buckets *per worker thread* in the + established flow table (must be power of 2). + @param flow_timeout - Time in seconds after which, if no packet is received + for a given flow, the flow is removed from the established flow table. +*/ +define lb_conf +{ + u32 client_index; + u32 context; + u32 ip4_src_address; + u8 ip6_src_address[16]; + u32 sticky_buckets_per_core; + u32 flow_timeout; +}; + +define lb_conf_reply { + u32 context; + i32 retval; +}; + +/** \brief Add a virtual address (or prefix) + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param ip_prefix - IP address (IPv4 in lower order 32 bits). + @param prefix_length - IP prefix length (96 + 'IPv4 prefix length' for IPv4). + @param is_gre4 - Encap is ip4 GRE (ip6 GRE otherwise). + @param new_flows_table_length - Size of the new connections flow table used + for this VIP (must be power of 2). + @param is_del - The VIP should be removed. +*/ +define lb_add_del_vip { + u32 client_index; + u32 context; + u8 ip_prefix[16]; + u8 prefix_length; + u8 is_gre4; + u32 new_flows_table_length; + u8 is_del; +}; + +define lb_add_del_vip_reply { + u32 context; + i32 retval; +}; + +/** \brief Add an application server for a given VIP + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vip_ip_prefix - VIP IP address (IPv4 in lower order 32 bits). + @param vip_ip_prefix - VIP IP prefix length (96 + 'IPv4 prefix length' for IPv4). + @param as_address - The application server address (IPv4 in lower order 32 bits). + @param is_del - The AS should be removed. +*/ +define lb_add_del_as { + u32 client_index; + u32 context; + u8 vip_ip_prefix[16]; + u8 vip_prefix_length; + u8 as_address[16]; + u8 is_del; +}; + +define lb_add_del_as_reply { + u32 context; + i32 retval; +}; diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c new file mode 100644 index 00000000000..1d9b987095b --- /dev/null +++ b/src/plugins/lb/lb.c @@ -0,0 +1,844 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <lb/lb.h> +#include <vnet/plugin/plugin.h> +#include <vnet/api_errno.h> + +//GC runs at most once every so many seconds +#define LB_GARBAGE_RUN 60 + +//After so many seconds. It is assumed that inter-core race condition will not occur. +#define LB_CONCURRENCY_TIMEOUT 10 + +lb_main_t lb_main; + +#define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1)) +#define lb_put_writer_lock() lb_main.writer_lock[0] = 0 + +static void lb_as_stack (lb_as_t *as); + + +const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL }; +const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL }; +const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] = + { + [DPO_PROTO_IP4] = lb_dpo_gre4_ip4, + [DPO_PROTO_IP6] = lb_dpo_gre4_ip6, + }; + +const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL }; +const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL }; +const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] = + { + [DPO_PROTO_IP4] = lb_dpo_gre6_ip4, + [DPO_PROTO_IP6] = lb_dpo_gre6_ip6, + }; + +u32 lb_hash_time_now(vlib_main_t * vm) +{ + return (u32) (vlib_time_now(vm) + 10000); +} + +u8 *format_lb_main (u8 * s, va_list * args) +{ + vlib_thread_main_t *tm = vlib_get_thread_main(); + lb_main_t *lbm = &lb_main; + s = format(s, "lb_main"); + s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address); + s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address); + s = format(s, " #vips: %u\n", pool_elts(lbm->vips)); + s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1); + + u32 cpu_index; + for(cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++ ) { + lb_hash_t *h = lbm->per_cpu[cpu_index].sticky_ht; + if (h) { + s = format(s, "core %d\n", cpu_index); + s = format(s, " timeout: %ds\n", h->timeout); + s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h)); + } + } + + return s; +} + +static char *lb_vip_type_strings[] = { + [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6", + [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4", + [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6", + [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4", +}; + +u8 *format_lb_vip_type (u8 * s, va_list * args) +{ + lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t); + u32 i; + for (i=0; i<LB_VIP_N_TYPES; i++) + if (vipt == i) + return format(s, lb_vip_type_strings[i]); + return format(s, "_WRONG_TYPE_"); +} + +uword unformat_lb_vip_type (unformat_input_t * input, va_list * args) +{ + lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *); + u32 i; + for (i=0; i<LB_VIP_N_TYPES; i++) + if (unformat(input, lb_vip_type_strings[i])) { + *vipt = i; + return 1; + } + return 0; +} + +u8 *format_lb_vip (u8 * s, va_list * args) +{ + lb_vip_t *vip = va_arg (*args, lb_vip_t *); + return format(s, "%U %U new_size:%u #as:%u%s", + format_lb_vip_type, vip->type, + format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY, + vip->new_flow_table_mask + 1, + pool_elts(vip->as_indexes), + (vip->flags & LB_VIP_FLAGS_USED)?"":" removed"); +} + +u8 *format_lb_as (u8 * s, va_list * args) +{ + lb_as_t *as = va_arg (*args, lb_as_t *); + return format(s, "%U %s", format_ip46_address, + &as->address, IP46_TYPE_ANY, + (as->flags & LB_AS_FLAGS_USED)?"used":"removed"); +} + +u8 *format_lb_vip_detailed (u8 * s, va_list * args) +{ + lb_main_t *lbm = &lb_main; + lb_vip_t *vip = va_arg (*args, lb_vip_t *); + uword indent = format_get_indent (s); + + s = format(s, "%U %U [%u] %U%s\n" + "%U new_size:%u\n", + format_white_space, indent, + format_lb_vip_type, vip->type, + vip - lbm->vips, format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY, + (vip->flags & LB_VIP_FLAGS_USED)?"":" removed", + format_white_space, indent, + vip->new_flow_table_mask + 1); + + //Print counters + s = format(s, "%U counters:\n", + format_white_space, indent); + u32 i; + for (i=0; i<LB_N_VIP_COUNTERS; i++) + s = format(s, "%U %s: %d\n", + format_white_space, indent, + lbm->vip_counters[i].name, + vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips)); + + + s = format(s, "%U #as:%u\n", + format_white_space, indent, + pool_elts(vip->as_indexes)); + + //Let's count the buckets for each AS + u32 *count = 0; + vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much... + lb_new_flow_entry_t *nfe; + vec_foreach(nfe, vip->new_flow_table) + count[nfe->as_index]++; + + lb_as_t *as; + u32 *as_index; + pool_foreach(as_index, vip->as_indexes, { + as = &lbm->ass[*as_index]; + s = format(s, "%U %U %d buckets %d flows dpo:%u %s\n", + format_white_space, indent, + format_ip46_address, &as->address, IP46_TYPE_ANY, + count[as - lbm->ass], + vlib_refcount_get(&lbm->as_refcount, as - lbm->ass), + as->dpo.dpoi_index, + (as->flags & LB_AS_FLAGS_USED)?"used":" removed"); + }); + + vec_free(count); + + /* + s = format(s, "%U new flows table:\n", format_white_space, indent); + lb_new_flow_entry_t *nfe; + vec_foreach(nfe, vip->new_flow_table) { + s = format(s, "%U %d: %d\n", format_white_space, indent, nfe - vip->new_flow_table, nfe->as_index); + } + */ + return s; +} + +typedef struct { + u32 as_index; + u32 last; + u32 skip; +} lb_pseudorand_t; + +static int lb_pseudorand_compare(void *a, void *b) +{ + lb_as_t *asa, *asb; + lb_main_t *lbm = &lb_main; + asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index]; + asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index]; + return memcmp(&asa->address, &asb->address, sizeof(asb->address)); +} + +static void lb_vip_garbage_collection(lb_vip_t *vip) +{ + lb_main_t *lbm = &lb_main; + ASSERT (lbm->writer_lock[0]); + + u32 now = (u32) vlib_time_now(vlib_get_main()); + if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN)) + return; + + vip->last_garbage_collection = now; + lb_as_t *as; + u32 *as_index; + pool_foreach(as_index, vip->as_indexes, { + as = &lbm->ass[*as_index]; + if (!(as->flags & LB_AS_FLAGS_USED) && //Not used + clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used + (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0)) + { //Not referenced + fib_entry_child_remove(as->next_hop_fib_entry_index, + as->next_hop_child_index); + fib_table_entry_delete_index(as->next_hop_fib_entry_index, + FIB_SOURCE_RR); + as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID; + + pool_put(vip->as_indexes, as_index); + pool_put(lbm->ass, as); + } + }); +} + +void lb_garbage_collection() +{ + lb_main_t *lbm = &lb_main; + lb_get_writer_lock(); + lb_vip_t *vip; + u32 *to_be_removed_vips = 0, *i; + pool_foreach(vip, lbm->vips, { + lb_vip_garbage_collection(vip); + + if (!(vip->flags & LB_VIP_FLAGS_USED) && + (pool_elts(vip->as_indexes) == 0)) { + vec_add1(to_be_removed_vips, vip - lbm->vips); + } + }); + + vec_foreach(i, to_be_removed_vips) { + vip = &lbm->vips[*i]; + pool_put(lbm->vips, vip); + pool_free(vip->as_indexes); + } + + vec_free(to_be_removed_vips); + lb_put_writer_lock(); +} + +static void lb_vip_update_new_flow_table(lb_vip_t *vip) +{ + lb_main_t *lbm = &lb_main; + lb_new_flow_entry_t *old_table; + u32 i, *as_index; + lb_new_flow_entry_t *new_flow_table = 0; + lb_as_t *as; + lb_pseudorand_t *pr, *sort_arr = 0; + u32 count; + + ASSERT (lbm->writer_lock[0]); //We must have the lock + + //Check if some AS is configured or not + i = 0; + pool_foreach(as_index, vip->as_indexes, { + as = &lbm->ass[*as_index]; + if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore + i = 1; + goto out; //Not sure 'break' works in this macro-loop + } + }); + +out: + if (i == 0) { + //Only the default. i.e. no AS + vec_validate(new_flow_table, vip->new_flow_table_mask); + for (i=0; i<vec_len(new_flow_table); i++) + new_flow_table[i].as_index = 0; + + goto finished; + } + + //First, let's sort the ASs + sort_arr = 0; + vec_alloc(sort_arr, pool_elts(vip->as_indexes)); + + i = 0; + pool_foreach(as_index, vip->as_indexes, { + as = &lbm->ass[*as_index]; + if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore + continue; + + sort_arr[i].as_index = as - lbm->ass; + i++; + }); + _vec_len(sort_arr) = i; + + vec_sort_with_function(sort_arr, lb_pseudorand_compare); + + //Now let's pseudo-randomly generate permutations + vec_foreach(pr, sort_arr) { + lb_as_t *as = &lbm->ass[pr->as_index]; + + u64 seed = clib_xxhash(as->address.as_u64[0] ^ + as->address.as_u64[1]); + /* We have 2^n buckets. + * skip must be prime with 2^n. + * So skip must be odd. + * MagLev actually state that M should be prime, + * but this has a big computation cost (% operation). + * Using 2^n is more better (& operation). + */ + pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask; + pr->last = (seed >> 32) & vip->new_flow_table_mask; + } + + //Let's create a new flow table + vec_validate(new_flow_table, vip->new_flow_table_mask); + for (i=0; i<vec_len(new_flow_table); i++) + new_flow_table[i].as_index = ~0; + + u32 done = 0; + while (1) { + vec_foreach(pr, sort_arr) { + while (1) { + u32 last = pr->last; + pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask; + if (new_flow_table[last].as_index == ~0) { + new_flow_table[last].as_index = pr->as_index; + break; + } + } + done++; + if (done == vec_len(new_flow_table)) + goto finished; + } + } + + vec_free(sort_arr); + +finished: + +//Count number of changed entries + count = 0; + for (i=0; i<vec_len(new_flow_table); i++) + if (vip->new_flow_table == 0 || + new_flow_table[i].as_index != vip->new_flow_table[i].as_index) + count++; + + old_table = vip->new_flow_table; + vip->new_flow_table = new_flow_table; + vec_free(old_table); +} + +int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address, + u32 per_cpu_sticky_buckets, u32 flow_timeout) +{ + lb_main_t *lbm = &lb_main; + + if (!is_pow2(per_cpu_sticky_buckets)) + return VNET_API_ERROR_INVALID_MEMORY_SIZE; + + lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self + lbm->ip4_src_address = *ip4_address; + lbm->ip6_src_address = *ip6_address; + lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets; + lbm->flow_timeout = flow_timeout; + lb_put_writer_lock(); + return 0; +} + +static +int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index) +{ + lb_main_t *lbm = &lb_main; + lb_vip_t *vip; + ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned + ip46_prefix_normalize(prefix, plen); + pool_foreach(vip, lbm->vips, { + if ((vip->flags & LB_AS_FLAGS_USED) && + vip->plen == plen && + vip->prefix.as_u64[0] == prefix->as_u64[0] && + vip->prefix.as_u64[1] == prefix->as_u64[1]) { + *vip_index = vip - lbm->vips; + return 0; + } + }); + return VNET_API_ERROR_NO_SUCH_ENTRY; +} + +int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index) +{ + int ret; + lb_get_writer_lock(); + ret = lb_vip_find_index_with_lock(prefix, plen, vip_index); + lb_put_writer_lock(); + return ret; +} + +static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index) +{ + lb_main_t *lbm = &lb_main; + ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned + lb_as_t *as; + u32 *asi; + pool_foreach(asi, vip->as_indexes, { + as = &lbm->ass[*asi]; + if (as->vip_index == (vip - lbm->vips) && + as->address.as_u64[0] == address->as_u64[0] && + as->address.as_u64[1] == address->as_u64[1]) { + *as_index = as - lbm->ass; + return 0; + } + }); + return -1; +} + +int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n) +{ + lb_main_t *lbm = &lb_main; + lb_get_writer_lock(); + lb_vip_t *vip; + if (!(vip = lb_vip_get_by_index(vip_index))) { + lb_put_writer_lock(); + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + ip46_type_t type = lb_vip_is_gre4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6; + u32 *to_be_added = 0; + u32 *to_be_updated = 0; + u32 i; + u32 *ip; + + //Sanity check + while (n--) { + + if (!lb_as_find_index_vip(vip, &addresses[n], &i)) { + if (lbm->ass[i].flags & LB_AS_FLAGS_USED) { + vec_free(to_be_added); + vec_free(to_be_updated); + lb_put_writer_lock(); + return VNET_API_ERROR_VALUE_EXIST; + } + vec_add1(to_be_updated, i); + goto next; + } + + if (ip46_address_type(&addresses[n]) != type) { + vec_free(to_be_added); + vec_free(to_be_updated); + lb_put_writer_lock(); + return VNET_API_ERROR_INVALID_ADDRESS_FAMILY; + } + + if (n) { + u32 n2 = n; + while(n2--) //Check for duplicates + if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] && + addresses[n2].as_u64[1] == addresses[n].as_u64[1]) + goto next; + } + + vec_add1(to_be_added, n); + +next: + continue; + } + + //Update reused ASs + vec_foreach(ip, to_be_updated) { + lbm->ass[*ip].flags = LB_AS_FLAGS_USED; + } + vec_free(to_be_updated); + + //Create those who have to be created + vec_foreach(ip, to_be_added) { + lb_as_t *as; + u32 *as_index; + pool_get(lbm->ass, as); + as->address = addresses[*ip]; + as->flags = LB_AS_FLAGS_USED; + as->vip_index = vip_index; + pool_get(vip->as_indexes, as_index); + *as_index = as - lbm->ass; + + /* + * become a child of the FIB entry + * so we are informed when its forwarding changes + */ + fib_prefix_t nh = {}; + if (lb_vip_is_gre4(vip)) { + nh.fp_addr.ip4 = as->address.ip4; + nh.fp_len = 32; + nh.fp_proto = FIB_PROTOCOL_IP4; + } else { + nh.fp_addr.ip6 = as->address.ip6; + nh.fp_len = 128; + nh.fp_proto = FIB_PROTOCOL_IP6; + } + + as->next_hop_fib_entry_index = + fib_table_entry_special_add(0, + &nh, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + as->next_hop_child_index = + fib_entry_child_add(as->next_hop_fib_entry_index, + lbm->fib_node_type, + as - lbm->ass); + + lb_as_stack(as); + } + vec_free(to_be_added); + + //Recompute flows + lb_vip_update_new_flow_table(vip); + + //Garbage collection maybe + lb_vip_garbage_collection(vip); + + lb_put_writer_lock(); + return 0; +} + +int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n) +{ + lb_main_t *lbm = &lb_main; + u32 now = (u32) vlib_time_now(vlib_get_main()); + u32 *ip = 0; + + lb_vip_t *vip; + if (!(vip = lb_vip_get_by_index(vip_index))) { + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + u32 *indexes = NULL; + while (n--) { + u32 i; + if (lb_as_find_index_vip(vip, &addresses[n], &i)) { + vec_free(indexes); + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + if (n) { //Check for duplicates + u32 n2 = n - 1; + while(n2--) { + if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] && + addresses[n2].as_u64[1] == addresses[n].as_u64[1]) + goto next; + } + } + + vec_add1(indexes, i); +next: + continue; + } + + //Garbage collection maybe + lb_vip_garbage_collection(vip); + + if (indexes != NULL) { + vec_foreach(ip, indexes) { + lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED; + lbm->ass[*ip].last_used = now; + } + + //Recompute flows + lb_vip_update_new_flow_table(vip); + } + + vec_free(indexes); + return 0; +} + +int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n) +{ + lb_get_writer_lock(); + int ret = lb_vip_del_ass_withlock(vip_index, addresses, n); + lb_put_writer_lock(); + return ret; +} + +/** + * Add the VIP adjacency to the ip4 or ip6 fib + */ +static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip) +{ + dpo_proto_t proto = 0; + dpo_id_t dpo = DPO_INVALID; + fib_prefix_t pfx = {}; + if (lb_vip_is_ip4(vip)) { + pfx.fp_addr.ip4 = vip->prefix.ip4; + pfx.fp_len = vip->plen - 96; + pfx.fp_proto = FIB_PROTOCOL_IP4; + proto = DPO_PROTO_IP4; + } else { + pfx.fp_addr.ip6 = vip->prefix.ip6; + pfx.fp_len = vip->plen; + pfx.fp_proto = FIB_PROTOCOL_IP6; + proto = DPO_PROTO_IP6; + } + dpo_set(&dpo, lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type, + proto, vip - lbm->vips); + fib_table_entry_special_dpo_add(0, + &pfx, + FIB_SOURCE_PLUGIN_HI, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + dpo_reset(&dpo); +} + +/** + * Deletes the adjacency associated with the VIP + */ +static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip) +{ + fib_prefix_t pfx = {}; + if (lb_vip_is_ip4(vip)) { + pfx.fp_addr.ip4 = vip->prefix.ip4; + pfx.fp_len = vip->plen - 96; + pfx.fp_proto = FIB_PROTOCOL_IP4; + } else { + pfx.fp_addr.ip6 = vip->prefix.ip6; + pfx.fp_len = vip->plen; + pfx.fp_proto = FIB_PROTOCOL_IP6; + } + fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI); +} + +int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u32 new_length, u32 *vip_index) +{ + lb_main_t *lbm = &lb_main; + lb_vip_t *vip; + lb_get_writer_lock(); + ip46_prefix_normalize(prefix, plen); + + if (!lb_vip_find_index_with_lock(prefix, plen, vip_index)) { + lb_put_writer_lock(); + return VNET_API_ERROR_VALUE_EXIST; + } + + if (!is_pow2(new_length)) { + lb_put_writer_lock(); + return VNET_API_ERROR_INVALID_MEMORY_SIZE; + } + + if (ip46_prefix_is_ip4(prefix, plen) && + (type != LB_VIP_TYPE_IP4_GRE4) && + (type != LB_VIP_TYPE_IP4_GRE6)) + return VNET_API_ERROR_INVALID_ADDRESS_FAMILY; + + + //Allocate + pool_get(lbm->vips, vip); + + //Init + vip->prefix = *prefix; + vip->plen = plen; + vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main()); + vip->type = type; + vip->flags = LB_VIP_FLAGS_USED; + vip->as_indexes = 0; + + //Validate counters + u32 i; + for (i = 0; i < LB_N_VIP_COUNTERS; i++) { + vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips); + vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips); + } + + //Configure new flow table + vip->new_flow_table_mask = new_length - 1; + vip->new_flow_table = 0; + + //Create a new flow hash table full of the default entry + lb_vip_update_new_flow_table(vip); + + //Create adjacency to direct traffic + lb_vip_add_adjacency(lbm, vip); + + //Return result + *vip_index = vip - lbm->vips; + + lb_put_writer_lock(); + return 0; +} + +int lb_vip_del(u32 vip_index) +{ + lb_main_t *lbm = &lb_main; + lb_vip_t *vip; + lb_get_writer_lock(); + if (!(vip = lb_vip_get_by_index(vip_index))) { + lb_put_writer_lock(); + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + //FIXME: This operation is actually not working + //We will need to remove state before performing this. + + { + //Remove all ASs + ip46_address_t *ass = 0; + lb_as_t *as; + u32 *as_index; + pool_foreach(as_index, vip->as_indexes, { + as = &lbm->ass[*as_index]; + vec_add1(ass, as->address); + }); + if (vec_len(ass)) + lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass)); + vec_free(ass); + } + + //Delete adjacency + lb_vip_del_adjacency(lbm, vip); + + //Set the VIP as unused + vip->flags &= ~LB_VIP_FLAGS_USED; + + lb_put_writer_lock(); + return 0; +} + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, + vnet_plugin_handoff_t * h, + int from_early_init) +{ + clib_error_t *error = 0; + return error; +} + + +u8 *format_lb_dpo (u8 * s, va_list * va) +{ + index_t index = va_arg (*va, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*va, u32); + lb_main_t *lbm = &lb_main; + lb_vip_t *vip = pool_elt_at_index (lbm->vips, index); + return format (s, "%U", format_lb_vip, vip); +} + +static void lb_dpo_lock (dpo_id_t *dpo) {} +static void lb_dpo_unlock (dpo_id_t *dpo) {} + +static fib_node_t * +lb_fib_node_get_node (fib_node_index_t index) +{ + lb_main_t *lbm = &lb_main; + lb_as_t *as = pool_elt_at_index (lbm->ass, index); + return (&as->fib_node); +} + +static void +lb_fib_node_last_lock_gone (fib_node_t *node) +{ +} + +static lb_as_t * +lb_as_from_fib_node (fib_node_t *node) +{ + return ((lb_as_t*)(((char*)node) - + STRUCT_OFFSET_OF(lb_as_t, fib_node))); +} + +static void +lb_as_stack (lb_as_t *as) +{ + lb_main_t *lbm = &lb_main; + lb_vip_t *vip = &lbm->vips[as->vip_index]; + dpo_stack(lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type, + lb_vip_is_ip4(vip)?DPO_PROTO_IP4:DPO_PROTO_IP6, + &as->dpo, + fib_entry_contribute_ip_forwarding( + as->next_hop_fib_entry_index)); +} + +static fib_node_back_walk_rc_t +lb_fib_node_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + lb_as_stack(lb_as_from_fib_node(node)); + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +clib_error_t * +lb_init (vlib_main_t * vm) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + lb_main_t *lbm = &lb_main; + lb_as_t *default_as; + fib_node_vft_t lb_fib_node_vft = { + .fnv_get = lb_fib_node_get_node, + .fnv_last_lock = lb_fib_node_last_lock_gone, + .fnv_back_walk = lb_fib_node_back_walk_notify, + }; + dpo_vft_t lb_vft = { + .dv_lock = lb_dpo_lock, + .dv_unlock = lb_dpo_unlock, + .dv_format = format_lb_dpo, + }; + + lbm->vips = 0; + lbm->per_cpu = 0; + vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1); + lbm->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + lbm->writer_lock[0] = 0; + lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS; + lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT; + lbm->ip4_src_address.as_u32 = 0xffffffff; + lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL; + lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL; + lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes); + lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes); + lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft); + + //Init AS reference counters + vlib_refcount_init(&lbm->as_refcount); + + //Allocate and init default AS. + lbm->ass = 0; + pool_get(lbm->ass, default_as); + default_as->flags = 0; + default_as->dpo.dpoi_next_node = LB_NEXT_DROP; + default_as->vip_index = ~0; + default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL; + default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL; + +#define _(a,b,c) lbm->vip_counters[c].name = b; + lb_foreach_vip_counter +#undef _ + return NULL; +} + +VLIB_INIT_FUNCTION (lb_init); diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h new file mode 100644 index 00000000000..882b9b30f7e --- /dev/null +++ b/src/plugins/lb/lb.h @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * lb-plugin implements a MagLev-like load balancer. + * http://research.google.com/pubs/pub44824.html + * + * It hasn't been tested for interoperability with the original MagLev + * but intends to provide similar functionality. + * The load-balancer receives traffic destined to VIP (Virtual IP) + * addresses from one or multiple(ECMP) routers. + * The load-balancer tunnels the traffic toward many application servers + * ensuring session stickyness (i.e. that a single sessions is tunneled + * towards a single application server). + * + */ + +#ifndef LB_PLUGIN_LB_LB_H_ +#define LB_PLUGIN_LB_LB_H_ + +#include <lb/util.h> +#include <lb/refcount.h> + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/dpo/dpo.h> +#include <vnet/fib/fib_table.h> + +#include <lb/lbhash.h> + +#define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10 +#define LB_DEFAULT_FLOW_TIMEOUT 40 + +typedef enum { + LB_NEXT_DROP, + LB_N_NEXT, +} lb_next_t; + +/** + * Each VIP is configured with a set of + * application server. + */ +typedef struct { + /** + * Registration to FIB event. + */ + fib_node_t fib_node; + + /** + * Destination address used to tunnel traffic towards + * that application server. + * The address is also used as ID and pseudo-random + * seed for the load-balancing process. + */ + ip46_address_t address; + + /** + * ASs are indexed by address and VIP Index. + * Which means there will be duplicated if the same server + * address is used for multiple VIPs. + */ + u32 vip_index; + + /** + * Some per-AS flags. + * For now only LB_AS_FLAGS_USED is defined. + */ + u8 flags; + +#define LB_AS_FLAGS_USED 0x1 + + /** + * Rotating timestamp of when LB_AS_FLAGS_USED flag was last set. + * + * AS removal is based on garbage collection and reference counting. + * When an AS is removed, there is a race between configuration core + * and worker cores which may still add a reference while it should not + * be used. This timestamp is used to not remove the AS while a race condition + * may happen. + */ + u32 last_used; + + /** + * The FIB entry index for the next-hop + */ + fib_node_index_t next_hop_fib_entry_index; + + /** + * The child index on the FIB entry + */ + u32 next_hop_child_index; + + /** + * The next DPO in the graph to follow. + */ + dpo_id_t dpo; + +} lb_as_t; + +format_function_t format_lb_as; + +typedef struct { + u32 as_index; +} lb_new_flow_entry_t; + +#define lb_foreach_vip_counter \ + _(NEXT_PACKET, "packet from existing sessions", 0) \ + _(FIRST_PACKET, "first session packet", 1) \ + _(UNTRACKED_PACKET, "untracked packet", 2) \ + _(NO_SERVER, "no server configured", 3) + +typedef enum { +#define _(a,b,c) LB_VIP_COUNTER_##a = c, + lb_foreach_vip_counter +#undef _ + LB_N_VIP_COUNTERS +} lb_vip_counter_t; + +/** + * The load balancer supports IPv4 and IPv6 traffic + * and GRE4 and GRE6 encap. + */ +typedef enum { + LB_VIP_TYPE_IP6_GRE6, + LB_VIP_TYPE_IP6_GRE4, + LB_VIP_TYPE_IP4_GRE6, + LB_VIP_TYPE_IP4_GRE4, + LB_VIP_N_TYPES, +} lb_vip_type_t; + +format_function_t format_lb_vip_type; +unformat_function_t unformat_lb_vip_type; + +/** + * Load balancing service is provided per VIP. + * In this data model, a VIP can be a whole prefix. + * But load balancing only + * occurs on a per-source-address/port basis. Meaning that if a given source + * reuses the same port for multiple destinations within the same VIP, + * they will be considered as a single flow. + */ +typedef struct { + + //Runtime + + /** + * Vector mapping (flow-hash & new_connect_table_mask) to AS index. + * This is used for new flows. + */ + lb_new_flow_entry_t *new_flow_table; + + /** + * New flows table length - 1 + * (length MUST be a power of 2) + */ + u32 new_flow_table_mask; + + /** + * Last time garbage collection was run to free the ASs. + */ + u32 last_garbage_collection; + + //Not runtime + + /** + * A Virtual IP represents a given service delivered + * by a set of application servers. It can be a single + * address or a prefix. + * IPv4 prefixes are encoded using IPv4-in-IPv6 embedded address + * (i.e. ::/96 prefix). + */ + ip46_address_t prefix; + + /** + * The VIP prefix length. + * In case of IPv4, plen = 96 + ip4_plen. + */ + u8 plen; + + /** + * The type of traffic for this. + * LB_TYPE_UNDEFINED if unknown. + */ + lb_vip_type_t type; + + /** + * Flags related to this VIP. + * LB_VIP_FLAGS_USED means the VIP is active. + * When it is not set, the VIP in the process of being removed. + * We cannot immediately remove a VIP because the VIP index still may be stored + * in the adjacency index. + */ + u8 flags; +#define LB_VIP_FLAGS_USED 0x1 + + /** + * Pool of AS indexes used for this VIP. + * This also includes ASs that have been removed (but are still referenced). + */ + u32 *as_indexes; +} lb_vip_t; + +#define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 || (vip)->type == LB_VIP_TYPE_IP4_GRE4) +#define lb_vip_is_gre4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 || (vip)->type == LB_VIP_TYPE_IP4_GRE4) +format_function_t format_lb_vip; +format_function_t format_lb_vip_detailed; + +typedef struct { + /** + * Each CPU has its own sticky flow hash table. + * One single table is used for all VIPs. + */ + lb_hash_t *sticky_ht; +} lb_per_cpu_t; + +typedef struct { + /** + * Pool of all Virtual IPs + */ + lb_vip_t *vips; + + /** + * Pool of ASs. + * ASs are referenced by address and vip index. + * The first element (index 0) is special and used only to fill + * new_flow_tables when no AS has been configured. + */ + lb_as_t *ass; + + /** + * Each AS has an associated reference counter. + * As ass[0] has a special meaning, its associated counter + * starts at 0 and is decremented instead. i.e. do not use it. + */ + vlib_refcount_t as_refcount; + + /** + * Some global data is per-cpu + */ + lb_per_cpu_t *per_cpu; + + /** + * Node next index for IP adjacencies, for each of the traffic types. + */ + u32 ip_lookup_next_index[LB_VIP_N_TYPES]; + + /** + * Source address used in IPv6 encapsulated traffic + */ + ip6_address_t ip6_src_address; + + /** + * Source address used for IPv4 encapsulated traffic + */ + ip4_address_t ip4_src_address; + + /** + * Number of buckets in the per-cpu sticky hash table. + */ + u32 per_cpu_sticky_buckets; + + /** + * Flow timeout in seconds. + */ + u32 flow_timeout; + + /** + * Per VIP counter + */ + vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS]; + + /** + * DPO used to send packet from IP4/6 lookup to LB node. + */ + dpo_type_t dpo_gre4_type; + dpo_type_t dpo_gre6_type; + + /** + * Node type for registering to fib changes. + */ + fib_node_type_t fib_node_type; + + /** + * API dynamically registered base ID. + */ + u16 msg_id_base; + + volatile u32 *writer_lock; +} lb_main_t; + +extern lb_main_t lb_main; +extern vlib_node_registration_t lb6_node; +extern vlib_node_registration_t lb4_node; + +/** + * Fix global load-balancer parameters. + * @param ip4_address IPv4 source address used for encapsulated traffic + * @param ip6_address IPv6 source address used for encapsulated traffic + * @return 0 on success. VNET_LB_ERR_XXX on error + */ +int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address, + u32 sticky_buckets, u32 flow_timeout); + +int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, + u32 new_length, u32 *vip_index); +int lb_vip_del(u32 vip_index); + +int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index); + +#define lb_vip_get_by_index(index) (pool_is_free_index(lb_main.vips, index)?NULL:pool_elt_at_index(lb_main.vips, index)) + +int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n); +int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n); + +u32 lb_hash_time_now(vlib_main_t * vm); + +void lb_garbage_collection(); + +format_function_t format_lb_main; + +#endif /* LB_PLUGIN_LB_LB_H_ */ diff --git a/src/plugins/lb/lb_plugin_doc.md b/src/plugins/lb/lb_plugin_doc.md new file mode 100644 index 00000000000..c7885ffb837 --- /dev/null +++ b/src/plugins/lb/lb_plugin_doc.md @@ -0,0 +1,141 @@ +# Load Balancer plugin for VPP {#lb_plugin_doc} + +## Version + +The load balancer plugin is currently in *beta* version. +Both CLIs and APIs are subject to *heavy* changes. +Wich also means feedback is really welcome regarding features, apis, etc... + +## Overview + +This plugin provides load balancing for VPP in a way that is largely inspired +from Google's MagLev: http://research.google.com/pubs/pub44824.html + +The load balancer is configured with a set of Virtual IPs (VIP, which can be +prefixes), and for each VIP, with a set of Application Server addresses (ASs). + +Traffic received for a given VIP (or VIP prefix) is tunneled using GRE towards +the different ASs in a way that (tries to) ensure that a given session will +always be tunneled to the same AS. + +Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must be using +the same encap. type (i.e. IPv4+GRE or IPv6+GRE). Meaning that for a given VIP, +all AS addresses must be of the same family. + +## Performances + +The load balancer has been tested up to 1 millions flows and still forwards more +than 3Mpps per core in such circumstances. +Although 3Mpps seems already good, it is likely that performances will be improved +in next versions. + +## Configuration + +### Global LB parameters + +The load balancer needs to be configured with some parameters: + + lb conf [ip4-src-address <addr>] [ip6-src-address <addr>] + [buckets <n>] [timeout <s>] + +ip4-src-address: the source address used to send encap. packets using IPv4. + +ip6-src-address: the source address used to send encap. packets using IPv6. + +buckets: the *per-thread* established-connexions-table number of buckets. + +timeout: the number of seconds a connection will remain in the + established-connexions-table while no packet for this flow + is received. + + +### Configure the VIPs + + lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del] + +new_len is the size of the new-connection-table. It should be 1 or 2 orders of +magnitude bigger than the number of ASs for the VIP in order to ensure a good +load balancing. + +Examples: + + lb vip 2002::/16 encap gre6 new_len 1024 + lb vip 2003::/16 encap gre4 new_len 2048 + lb vip 80.0.0.0/8 encap gre6 new_len 16 + lb vip 90.0.0.0/8 encap gre4 new_len 1024 + +### Configure the ASs (for each VIP) + + lb as <vip-prefix> [<address> [<address> [...]]] [del] + +You can add (or delete) as many ASs at a time (for a single VIP). +Note that the AS address family must correspond to the VIP encap. IP family. + +Examples: + + lb as 2002::/16 2001::2 2001::3 2001::4 + lb as 2003::/16 10.0.0.1 10.0.0.2 + lb as 80.0.0.0/8 2001::2 + lb as 90.0.0.0/8 10.0.0.1 + + + +## Monitoring + +The plugin provides quite a bunch of counters and information. +These are still subject to quite significant changes. + + show lb + show lb vip + show lb vip verbose + + show node counters + + +## Design notes + +### Multi-Threading + +MagLev is a distributed system which pseudo-randomly generates a +new-connections-table based on AS names such that each server configured with +the same set of ASs ends up with the same table. Connection stickyness is then +ensured with an established-connections-table. Using ECMP, it is assumed (but +not relied on) that servers will mostly receive traffic for different flows. + +This implementation pushes the parallelism a little bit further by using +one established-connections table per thread. This is equivalent to assuming +that RSS will make a job similar to ECMP, and is pretty useful as threads don't +need to get a lock in order to write in the table. + +### Hash Table + +A load balancer requires an efficient read and write hash table. The hash table +used by ip6-forward is very read-efficient, but not so much for writing. In +addition, it is not a big deal if writing into the hash table fails (again, +MagLev uses a flow table but does not heaviliy relies on it). + +The plugin therefore uses a very specific (and stupid) hash table. + - Fixed (and power of 2) number of buckets (configured at runtime) + - Fixed (and power of 2) elements per buckets (configured at compilation time) + +### Reference counting + +When an AS is removed, there is two possible ways to react. + - Keep using the AS for established connections + - Change AS for established connections (likely to cause error for TCP) + +In the first case, although an AS is removed from the configuration, its +associated state needs to stay around as long as it is used by at least one +thread. + +In order to avoid locks, a specific reference counter is used. The design is quite +similar to clib counters but: + - It is possible to decrease the value + - Summing will not zero the per-thread counters + - Only the thread can reallocate its own counters vector (to avoid concurrency issues) + +This reference counter is lock free, but reading a count of 0 does not mean +the value can be freed unless it is ensured by *other* means that no other thread +is concurrently referencing the object. In the case of this plugin, it is assumed +that no concurrent event will take place after a few seconds. + diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c new file mode 100644 index 00000000000..8c2eaa91ce9 --- /dev/null +++ b/src/plugins/lb/lb_test.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> +#include <lb/lb.h> + +//TODO: Move that to vat/plugin_api.c +////////////////////////// +uword unformat_ip46_address (unformat_input_t * input, va_list * args) +{ + ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); + ip46_type_t type = va_arg (*args, ip46_type_t); + if ((type != IP46_TYPE_IP6) && + unformat(input, "%U", unformat_ip4_address, &ip46->ip4)) { + ip46_address_mask_ip4(ip46); + return 1; + } else if ((type != IP46_TYPE_IP4) && + unformat(input, "%U", unformat_ip6_address, &ip46->ip6)) { + return 1; + } + return 0; +} +uword unformat_ip46_prefix (unformat_input_t * input, va_list * args) +{ + ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); + u8 *len = va_arg (*args, u8 *); + ip46_type_t type = va_arg (*args, ip46_type_t); + + u32 l; + if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) { + if (l > 32) + return 0; + *len = l + 96; + ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0; + } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) { + if (l > 128) + return 0; + *len = l; + } else { + return 0; + } + return 1; +} +///////////////////////// + +#define vl_msg_id(n,h) n, +typedef enum { +#include <lb/lb.api.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +/* define message structures */ +#define vl_typedefs +#include <lb/lb.api.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <lb/lb.api.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <lb/lb.api.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <lb/lb.api.h> +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} lb_test_main_t; + +lb_test_main_t lb_test_main; + +#define foreach_standard_reply_retval_handler \ +_(lb_conf_reply) \ +_(lb_add_del_vip_reply) \ +_(lb_add_del_as_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = lb_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ + _(LB_CONF_REPLY, lb_conf_reply) \ + _(LB_ADD_DEL_VIP_REPLY, lb_add_del_vip_reply) \ + _(LB_ADD_DEL_AS_REPLY, lb_add_del_as_reply) + +/* M: construct, but don't yet send a message */ +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memcpy (mp, &mps, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + lbtm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int api_lb_conf (vat_main_t * vam) +{ + lb_test_main_t *lbtm = &lb_test_main; + unformat_input_t *i = vam->input; + f64 timeout; + vl_api_lb_conf_t mps, *mp; + + if (!unformat(i, "%U %U %u %u", + unformat_ip4_address, &mps.ip4_src_address, + unformat_ip6_address, mps.ip6_src_address, + &mps.sticky_buckets_per_core, + &mps.flow_timeout)) { + errmsg ("invalid arguments\n"); + return -99; + } + + M(LB_CONF, lb_conf); S; W; + + /* NOTREACHED */ + return 0; +} + +static int api_lb_add_del_vip (vat_main_t * vam) +{ + lb_test_main_t *lbtm = &lb_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_lb_add_del_vip_t mps, *mp; + mps.is_del = 0; + mps.is_gre4 = 0; + + if (!unformat(i, "%U", + unformat_ip46_prefix, mps.ip_prefix, &mps.prefix_length, IP46_TYPE_ANY)) { + errmsg ("invalid prefix\n"); + return -99; + } + + if (unformat(i, "gre4")) { + mps.is_gre4 = 1; + } else if (unformat(i, "gre6")) { + mps.is_gre4 = 0; + } else { + errmsg ("no encap\n"); + return -99; + } + + if (!unformat(i, "%d", &mps.new_flows_table_length)) { + errmsg ("no table lentgh\n"); + return -99; + } + + if (unformat(i, "del")) { + mps.is_del = 1; + } + + M(LB_ADD_DEL_VIP, lb_add_del_vip); S; W; + /* NOTREACHED */ + return 0; +} + +static int api_lb_add_del_as (vat_main_t * vam) +{ + lb_test_main_t *lbtm = &lb_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_lb_add_del_as_t mps, *mp; + mps.is_del = 0; + + if (!unformat(i, "%U %U", + unformat_ip46_prefix, mps.vip_ip_prefix, &mps.vip_prefix_length, IP46_TYPE_ANY, + unformat_ip46_address, mps.as_address)) { + errmsg ("invalid prefix or address\n"); + return -99; + } + + if (unformat(i, "del")) { + mps.is_del = 1; + } + + M(LB_ADD_DEL_AS, lb_add_del_as); S; W; + /* NOTREACHED */ + return 0; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(lb_conf, "<ip4-src-addr> <ip6-src-address> <sticky_buckets_per_core> <flow_timeout>") \ +_(lb_add_del_vip, "<ip-prefix> [gre4|gre6] <new_table_len> [del]") \ +_(lb_add_del_as, "<vip-ip-prefix> <address> [del]") + +void vat_api_hookup (vat_main_t *vam) +{ + lb_test_main_t * lbtm = &lb_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + lbtm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + lb_test_main_t * lbtm = &lb_test_main; + + u8 * name; + + lbtm->vat_main = vam; + + /* Ask the vpp engine for the first assigned message-id */ + name = format (0, "lb_%08x%c", api_version, 0); + lbtm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (lbtm->msg_id_base != (u16) ~0) + vat_api_hookup (vam); + + vec_free(name); + + return 0; +} diff --git a/src/plugins/lb/lbhash.h b/src/plugins/lb/lbhash.h new file mode 100644 index 00000000000..ca3cc143dc2 --- /dev/null +++ b/src/plugins/lb/lbhash.h @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * vppinfra already includes tons of different hash tables. + * MagLev flow table is a bit different. It has to be very efficient + * for both writing and reading operations. But it does not need to + * be 100% reliable (write can fail). It also needs to recycle + * old entries in a lazy way. + * + * This hash table is the most dummy hash table you can do. + * Fixed total size, fixed bucket size. + * Advantage is that it could be very efficient (maybe). + * + */ + +#ifndef LB_PLUGIN_LB_LBHASH_H_ +#define LB_PLUGIN_LB_LBHASH_H_ + +#include <vnet/vnet.h> + +#if defined (__SSE4_2__) +#include <immintrin.h> +#endif + +/* + * @brief Number of entries per bucket. + */ +#define LBHASH_ENTRY_PER_BUCKET 4 + +#define LB_HASH_DO_NOT_USE_SSE_BUCKETS 0 + +/* + * @brief One bucket contains 4 entries. + * Each bucket takes one 64B cache line in memory. + */ +typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 hash[LBHASH_ENTRY_PER_BUCKET]; + u32 timeout[LBHASH_ENTRY_PER_BUCKET]; + u32 vip[LBHASH_ENTRY_PER_BUCKET]; + u32 value[LBHASH_ENTRY_PER_BUCKET]; +} lb_hash_bucket_t; + +typedef struct { + u32 buckets_mask; + u32 timeout; + lb_hash_bucket_t buckets[]; +} lb_hash_t; + +#define lb_hash_nbuckets(h) (((h)->buckets_mask) + 1) +#define lb_hash_size(h) ((h)->buckets_mask + LBHASH_ENTRY_PER_BUCKET) + +#define lb_hash_foreach_bucket(h, bucket) \ + for (bucket = (h)->buckets; \ + bucket < (h)->buckets + lb_hash_nbuckets(h); \ + bucket++) + +#define lb_hash_foreach_entry(h, bucket, i) \ + lb_hash_foreach_bucket(h, bucket) \ + for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++) + +#define lb_hash_foreach_valid_entry(h, bucket, i, now) \ + lb_hash_foreach_entry(h, bucket, i) \ + if (!clib_u32_loop_gt((now), bucket->timeout[i])) + +static_always_inline +lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout) +{ + if (!is_pow2(buckets)) + return NULL; + + // Allocate 1 more bucket for prefetch + u32 size = ((u64)&((lb_hash_t *)(0))->buckets[0]) + + sizeof(lb_hash_bucket_t) * (buckets + 1); + u8 *mem = 0; + lb_hash_t *h; + vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES); + h = (lb_hash_t *)mem; + h->buckets_mask = (buckets - 1); + h->timeout = timeout; + return h; +} + +static_always_inline +void lb_hash_free(lb_hash_t *h) +{ + u8 *mem = (u8 *)h; + vec_free(mem); +} + +#if __SSE4_2__ +static_always_inline +u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) +{ + u64 val = 0; + val = _mm_crc32_u64(val, k0); + val = _mm_crc32_u64(val, k1); + val = _mm_crc32_u64(val, k2); + val = _mm_crc32_u64(val, k3); + val = _mm_crc32_u64(val, k4); + return (u32) val; +} +#else +static_always_inline +u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4) +{ + u64 tmp = k0 ^ k1 ^ k2 ^ k3 ^ k4; + return (u32)clib_xxhash (tmp); +} +#endif + +static_always_inline +void lb_hash_prefetch_bucket(lb_hash_t *ht, u32 hash) +{ + lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask]; + CLIB_PREFETCH(bucket, sizeof(*bucket), READ); +} + +static_always_inline +void lb_hash_get(lb_hash_t *ht, u32 hash, u32 vip, u32 time_now, + u32 *available_index, u32 *found_value) +{ + lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask]; + *found_value = ~0; + *available_index = ~0; +#if __SSE4_2__ && LB_HASH_DO_NOT_USE_SSE_BUCKETS == 0 + u32 bitmask, found_index; + __m128i mask; + + // mask[*] = timeout[*] > now + mask = _mm_cmpgt_epi32(_mm_loadu_si128 ((__m128i *) bucket->timeout), + _mm_set1_epi32 (time_now)); + // bitmask[*] = now <= timeout[*/4] + bitmask = (~_mm_movemask_epi8(mask)) & 0xffff; + // Get first index with now <= timeout[*], if any. + *available_index = (bitmask)?__builtin_ctz(bitmask)/4:*available_index; + + // mask[*] = (timeout[*] > now) && (hash[*] == hash) + mask = _mm_and_si128(mask, + _mm_cmpeq_epi32( + _mm_loadu_si128 ((__m128i *) bucket->hash), + _mm_set1_epi32 (hash))); + + // Load the array of vip values + // mask[*] = (timeout[*] > now) && (hash[*] == hash) && (vip[*] == vip) + mask = _mm_and_si128(mask, + _mm_cmpeq_epi32( + _mm_loadu_si128 ((__m128i *) bucket->vip), + _mm_set1_epi32 (vip))); + + // mask[*] = (timeout[*x4] > now) && (hash[*x4] == hash) && (vip[*x4] == vip) + bitmask = _mm_movemask_epi8(mask); + // Get first index, if any + found_index = (bitmask)?__builtin_ctzll(bitmask)/4:0; + ASSERT(found_index < 4); + *found_value = (bitmask)?bucket->value[found_index]:*found_value; + bucket->timeout[found_index] = + (bitmask)?time_now + ht->timeout:bucket->timeout[found_index]; +#else + u32 i; + for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++) { + u8 cmp = (bucket->hash[i] == hash && bucket->vip[i] == vip); + u8 timeouted = clib_u32_loop_gt(time_now, bucket->timeout[i]); + *found_value = (cmp || timeouted)?*found_value:bucket->value[i]; + bucket->timeout[i] = (cmp || timeouted)?time_now + ht->timeout:bucket->timeout[i]; + *available_index = (timeouted && (*available_index == ~0))?i:*available_index; + + if (!cmp) + return; + } +#endif +} + +static_always_inline +u32 lb_hash_available_value(lb_hash_t *h, u32 hash, u32 available_index) +{ + return h->buckets[hash & h->buckets_mask].value[available_index]; +} + +static_always_inline +void lb_hash_put(lb_hash_t *h, u32 hash, u32 value, u32 vip, + u32 available_index, u32 time_now) +{ + lb_hash_bucket_t *bucket = &h->buckets[hash & h->buckets_mask]; + bucket->hash[available_index] = hash; + bucket->value[available_index] = value; + bucket->timeout[available_index] = time_now + h->timeout; + bucket->vip[available_index] = vip; +} + +static_always_inline +u32 lb_hash_elts(lb_hash_t *h, u32 time_now) +{ + u32 tot = 0; + lb_hash_bucket_t *bucket; + u32 i; + lb_hash_foreach_valid_entry(h, bucket, i, time_now) { + tot++; + } + return tot; +} + +#endif /* LB_PLUGIN_LB_LBHASH_H_ */ diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c new file mode 100644 index 00000000000..8b763c537d5 --- /dev/null +++ b/src/plugins/lb/node.c @@ -0,0 +1,419 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <lb/lb.h> + +#include <vnet/gre/packet.h> +#include <lb/lbhash.h> + +#define foreach_lb_error \ + _(NONE, "no error") \ + _(PROTO_NOT_SUPPORTED, "protocol not supported") + +typedef enum { +#define _(sym,str) LB_ERROR_##sym, + foreach_lb_error +#undef _ + LB_N_ERROR, +} lb_error_t; + +static char *lb_error_strings[] = { +#define _(sym,string) string, + foreach_lb_error +#undef _ +}; + +typedef struct { + u32 vip_index; + u32 as_index; +} lb_trace_t; + +u8 * +format_lb_trace (u8 * s, va_list * args) +{ + lb_main_t *lbm = &lb_main; + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lb_trace_t *t = va_arg (*args, lb_trace_t *); + if (pool_is_free_index(lbm->vips, t->vip_index)) { + s = format(s, "lb vip[%d]: This VIP was freed since capture\n"); + } else { + s = format(s, "lb vip[%d]: %U\n", t->vip_index, format_lb_vip, &lbm->vips[t->vip_index]); + } + if (pool_is_free_index(lbm->ass, t->as_index)) { + s = format(s, "lb as[%d]: This AS was freed since capture\n"); + } else { + s = format(s, "lb as[%d]: %U\n", t->as_index, format_lb_as, &lbm->ass[t->as_index]); + } + return s; +} + +lb_hash_t *lb_get_sticky_table(u32 cpu_index) +{ + lb_main_t *lbm = &lb_main; + lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht; + //Check if size changed + if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht)))) + { + //Dereference everything in there + lb_hash_bucket_t *b; + u32 i; + lb_hash_foreach_entry(sticky_ht, b, i) { + vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1); + vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1); + } + + lb_hash_free(sticky_ht); + sticky_ht = NULL; + } + + //Create if necessary + if (PREDICT_FALSE(sticky_ht == NULL)) { + lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout); + sticky_ht = lbm->per_cpu[cpu_index].sticky_ht; + clib_warning("Regenerated sticky table %p", sticky_ht); + } + + ASSERT(sticky_ht); + + //Update timeout + sticky_ht->timeout = lbm->flow_timeout; + return sticky_ht; +} + +u64 +lb_node_get_other_ports4(ip4_header_t *ip40) +{ + return 0; +} + +u64 +lb_node_get_other_ports6(ip6_header_t *ip60) +{ + return 0; +} + +static_always_inline u32 +lb_node_get_hash(vlib_buffer_t *p, u8 is_input_v4) +{ + u32 hash; + if (is_input_v4) + { + ip4_header_t *ip40; + u64 ports; + ip40 = vlib_buffer_get_current (p); + if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP || + ip40->protocol == IP_PROTOCOL_UDP)) + ports = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 16) | + ((u64)((udp_header_t *)(ip40 + 1))->dst_port); + else + ports = lb_node_get_other_ports4(ip40); + + hash = lb_hash_hash(*((u64 *)&ip40->address_pair), ports, + 0, 0, 0); + } + else + { + ip6_header_t *ip60; + ip60 = vlib_buffer_get_current (p); + u64 ports; + if (PREDICT_TRUE (ip60->protocol == IP_PROTOCOL_TCP || + ip60->protocol == IP_PROTOCOL_UDP)) + ports = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 16) | + ((u64)((udp_header_t *)(ip60 + 1))->dst_port); + else + ports = lb_node_get_other_ports6(ip60); + + hash = lb_hash_hash(ip60->src_address.as_u64[0], + ip60->src_address.as_u64[1], + ip60->dst_address.as_u64[0], + ip60->dst_address.as_u64[1], + ports); + } + return hash; +} + +static_always_inline uword +lb_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame, + u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6) + u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6) +{ + lb_main_t *lbm = &lb_main; + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + u32 cpu_index = os_get_cpu_number(); + u32 lb_time = lb_hash_time_now(vm); + + lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + u32 nexthash0 = 0; + if (PREDICT_TRUE(n_left_from > 0)) + nexthash0 = lb_node_get_hash(vlib_get_buffer (vm, from[0]), is_input_v4); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0; + vlib_buffer_t *p0; + lb_vip_t *vip0; + u32 asindex0; + u16 len0; + u32 available_index0; + u8 counter = 0; + u32 hash0 = nexthash0; + + if (PREDICT_TRUE(n_left_from > 1)) + { + vlib_buffer_t *p1 = vlib_get_buffer (vm, from[1]); + //Compute next hash and prefetch bucket + nexthash0 = lb_node_get_hash(p1, is_input_v4); + lb_hash_prefetch_bucket(sticky_ht, nexthash0); + //Prefetch for encap, next + CLIB_PREFETCH (vlib_buffer_get_current(p1) - 64, 64, STORE); + } + + if (PREDICT_TRUE(n_left_from > 2)) + { + vlib_buffer_t *p2; + p2 = vlib_get_buffer(vm, from[2]); + /* prefetch packet header and data */ + vlib_prefetch_buffer_header(p2, STORE); + CLIB_PREFETCH (vlib_buffer_get_current(p2), 64, STORE); + } + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + vip0 = pool_elt_at_index (lbm->vips, + vnet_buffer (p0)->ip.adj_index[VLIB_TX]); + + if (is_input_v4) + { + ip4_header_t *ip40; + ip40 = vlib_buffer_get_current (p0); + len0 = clib_net_to_host_u16(ip40->length); + } + else + { + ip6_header_t *ip60; + ip60 = vlib_buffer_get_current (p0); + len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t); + } + + lb_hash_get(sticky_ht, hash0, vnet_buffer (p0)->ip.adj_index[VLIB_TX], + lb_time, &available_index0, &asindex0); + + if (PREDICT_TRUE(asindex0 != ~0)) + { + //Found an existing entry + counter = LB_VIP_COUNTER_NEXT_PACKET; + } + else if (PREDICT_TRUE(available_index0 != ~0)) + { + //There is an available slot for a new flow + asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index; + counter = LB_VIP_COUNTER_FIRST_PACKET; + counter = (asindex0 == 0)?LB_VIP_COUNTER_NO_SERVER:counter; + + //TODO: There are race conditions with as0 and vip0 manipulation. + //Configuration may be changed, vectors resized, etc... + + //Dereference previously used + vlib_refcount_add(&lbm->as_refcount, cpu_index, + lb_hash_available_value(sticky_ht, hash0, available_index0), -1); + vlib_refcount_add(&lbm->as_refcount, cpu_index, + asindex0, 1); + + //Add sticky entry + //Note that when there is no AS configured, an entry is configured anyway. + //But no configured AS is not something that should happen + lb_hash_put(sticky_ht, hash0, asindex0, + vnet_buffer (p0)->ip.adj_index[VLIB_TX], + available_index0, lb_time); + } + else + { + //Could not store new entry in the table + asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index; + counter = LB_VIP_COUNTER_UNTRACKED_PACKET; + } + + vlib_increment_simple_counter(&lbm->vip_counters[counter], + cpu_index, + vnet_buffer (p0)->ip.adj_index[VLIB_TX], + 1); + + //Now let's encap + { + gre_header_t *gre0; + if (is_encap_v4) + { + ip4_header_t *ip40; + vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t)); + ip40 = vlib_buffer_get_current(p0); + gre0 = (gre_header_t *)(ip40 + 1); + ip40->src_address = lbm->ip4_src_address; + ip40->dst_address = lbm->ass[asindex0].address.ip4; + ip40->ip_version_and_header_length = 0x45; + ip40->ttl = 128; + ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t)); + ip40->protocol = IP_PROTOCOL_GRE; + ip40->checksum = ip4_header_checksum (ip40); + } + else + { + ip6_header_t *ip60; + vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t)); + ip60 = vlib_buffer_get_current(p0); + gre0 = (gre_header_t *)(ip60 + 1); + ip60->dst_address = lbm->ass[asindex0].address.ip6; + ip60->src_address = lbm->ip6_src_address; + ip60->hop_limit = 128; + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28); + ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t)); + ip60->protocol = IP_PROTOCOL_GRE; + } + + gre0->flags_and_version = 0; + gre0->protocol = (is_input_v4)? + clib_host_to_net_u16(0x0800): + clib_host_to_net_u16(0x86DD); + } + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->as_index = asindex0; + tr->vip_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + } + + //Enqueue to next + //Note that this is going to error if asindex0 == 0 + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbm->ass[asindex0].dpo.dpoi_index; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, + lbm->ass[asindex0].dpo.dpoi_next_node); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +lb6_gre6_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return lb_node_fn(vm, node, frame, 0, 0); +} + +static uword +lb6_gre4_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return lb_node_fn(vm, node, frame, 0, 1); +} + +static uword +lb4_gre6_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return lb_node_fn(vm, node, frame, 1, 0); +} + +static uword +lb4_gre4_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return lb_node_fn(vm, node, frame, 1, 1); +} + +VLIB_REGISTER_NODE (lb6_gre6_node) = +{ + .function = lb6_gre6_node_fn, + .name = "lb6-gre6", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + + .n_next_nodes = LB_N_NEXT, + .next_nodes = + { + [LB_NEXT_DROP] = "error-drop" + }, +}; + +VLIB_REGISTER_NODE (lb6_gre4_node) = +{ + .function = lb6_gre4_node_fn, + .name = "lb6-gre4", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + + .n_next_nodes = LB_N_NEXT, + .next_nodes = + { + [LB_NEXT_DROP] = "error-drop" + }, +}; + +VLIB_REGISTER_NODE (lb4_gre6_node) = +{ + .function = lb4_gre6_node_fn, + .name = "lb4-gre6", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + + .n_next_nodes = LB_N_NEXT, + .next_nodes = + { + [LB_NEXT_DROP] = "error-drop" + }, +}; + +VLIB_REGISTER_NODE (lb4_gre4_node) = +{ + .function = lb4_gre4_node_fn, + .name = "lb4-gre4", + .vector_size = sizeof (u32), + .format_trace = format_lb_trace, + + .n_errors = LB_N_ERROR, + .error_strings = lb_error_strings, + + .n_next_nodes = LB_N_NEXT, + .next_nodes = + { + [LB_NEXT_DROP] = "error-drop" + }, +}; + diff --git a/src/plugins/lb/refcount.c b/src/plugins/lb/refcount.c new file mode 100644 index 00000000000..22415c8889e --- /dev/null +++ b/src/plugins/lb/refcount.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <lb/refcount.h> + +void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size) +{ + u32 *new_counter = 0, *old_counter; + vec_validate(new_counter, size); + memcpy(new_counter, per_cpu->counters, per_cpu->length); + old_counter = per_cpu->counters; + per_cpu->counters = new_counter; + CLIB_MEMORY_BARRIER(); + per_cpu->length = vec_len(new_counter); + vec_free(old_counter); +} + +u64 vlib_refcount_get(vlib_refcount_t *r, u32 index) +{ + u64 count = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 cpu_index; + for (cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++) { + if (r->per_cpu[cpu_index].length > index) + count += r->per_cpu[cpu_index].counters[index]; + } + return count; +} + diff --git a/src/plugins/lb/refcount.h b/src/plugins/lb/refcount.h new file mode 100644 index 00000000000..8c26e7be76f --- /dev/null +++ b/src/plugins/lb/refcount.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * vlib provides lock-free counters but those + * - Have 16bits per-CPU counter, which may overflow. + * - Would only increment. + * + * This is very similar to vlib counters, but may be used to count reference. + * Such a counter includes an arbitrary number of counters. Each counter + * is identified by its index. This is used to aggregate per-cpu memory. + * + * Warning: + * This reference counter is lock-free but is not race-condition free. + * The counting result is approximate and another mechanism needs to be used + * in order to ensure that an object may be freed. + * + */ + +#include <vnet/vnet.h> + +typedef struct { + u32 *counters; + u32 length; + u32 *reader_lengths; + CLIB_CACHE_LINE_ALIGN_MARK(o); +} vlib_refcount_per_cpu_t; + +typedef struct { + vlib_refcount_per_cpu_t *per_cpu; +} vlib_refcount_t; + +void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size); + +static_always_inline +void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v) +{ + vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[cpu_index]; + if (PREDICT_FALSE(counter_index >= per_cpu->length)) + __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2)); + + per_cpu->counters[counter_index] += v; +} + +u64 vlib_refcount_get(vlib_refcount_t *r, u32 index); + +static_always_inline +void vlib_refcount_init(vlib_refcount_t *r) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + r->per_cpu = 0; + vec_validate (r->per_cpu, tm->n_vlib_mains - 1); +} + + diff --git a/src/plugins/lb/util.c b/src/plugins/lb/util.c new file mode 100644 index 00000000000..d969d168dce --- /dev/null +++ b/src/plugins/lb/util.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <lb/util.h> + +void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen) +{ + if (plen == 0) { + prefix->as_u64[0] = 0; + prefix->as_u64[1] = 0; + } else if (plen <= 64) { + prefix->as_u64[0] &= clib_host_to_net_u64(0xffffffffffffffffL << (64 - plen)); + prefix->as_u64[1] = 0; + } else { + prefix->as_u64[1] &= clib_host_to_net_u64(0xffffffffffffffffL << (128 - plen)); + } + +} + +uword unformat_ip46_prefix (unformat_input_t * input, va_list * args) +{ + ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); + u8 *len = va_arg (*args, u8 *); + ip46_type_t type = va_arg (*args, ip46_type_t); + + u32 l; + if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) { + if (l > 32) + return 0; + *len = l + 96; + ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0; + } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) { + if (l > 128) + return 0; + *len = l; + } else { + return 0; + } + return 1; +} + +u8 *format_ip46_prefix (u8 * s, va_list * args) +{ + ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); + u32 len = va_arg (*args, u32); //va_arg cannot use u8 or u16 + ip46_type_t type = va_arg (*args, ip46_type_t); + + int is_ip4 = 0; + if (type == IP46_TYPE_IP4) + is_ip4 = 1; + else if (type == IP46_TYPE_IP6) + is_ip4 = 0; + else + is_ip4 = (len >= 96) && ip46_address_is_ip4(ip46); + + return is_ip4 ? + format(s, "%U/%d", format_ip4_address, &ip46->ip4, len - 96): + format(s, "%U/%d", format_ip6_address, &ip46->ip6, len); +} + diff --git a/src/plugins/lb/util.h b/src/plugins/lb/util.h new file mode 100644 index 00000000000..3f082310b69 --- /dev/null +++ b/src/plugins/lb/util.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Non-LB specific stuff comes here + */ + +#ifndef LB_PLUGIN_LB_UTIL_H_ +#define LB_PLUGIN_LB_UTIL_H_ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> + +#define ip46_address_type(ip46) (ip46_address_is_ip4(ip46)?IP46_TYPE_IP4:IP46_TYPE_IP6) +#define ip46_prefix_is_ip4(ip46, len) ((len) >= 96 && ip46_address_is_ip4(ip46)) +#define ip46_prefix_type(ip46, len) (ip46_prefix_is_ip4(ip46, len)?IP46_TYPE_IP4:IP46_TYPE_IP6) + +void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen); +uword unformat_ip46_prefix (unformat_input_t * input, va_list * args); +u8 *format_ip46_prefix (u8 * s, va_list * args); + +/** + * 32 bits integer comparison for running values. + * 1 > 0 is true. But 1 > 0xffffffff also is. + */ +#define clib_u32_loop_gt(a, b) (((u32)(a)) - ((u32)(b)) < 0x7fffffff) + +#endif /* LB_PLUGIN_LB_UTIL_H_ */ diff --git a/src/plugins/snat.am b/src/plugins/snat.am new file mode 100644 index 00000000000..7ff2386ec83 --- /dev/null +++ b/src/plugins/snat.am @@ -0,0 +1,33 @@ + +# Copyright (c) <current-year> <your-organization> +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppapitestplugins_LTLIBRARIES += snat_test_plugin.la +vppplugins_LTLIBRARIES += snat_plugin.la + +snat_plugin_la_SOURCES = snat/snat.c \ + snat/in2out.c \ + snat/out2in.c \ + snat/snat_plugin.api.h + +API_FILES += snat/snat.api + +nobase_apiinclude_HEADERS += \ + snat/snat_all_api_h.h \ + snat/snat_msg_enum.h \ + snat/snat.api.h + +snat_test_plugin_la_SOURCES = \ + snat/snat_test.c snat/snat_plugin.api.h + +# vi:syntax=automake diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c new file mode 100644 index 00000000000..c78fdd76631 --- /dev/null +++ b/src/plugins/snat/in2out.c @@ -0,0 +1,1597 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/handoff.h> + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/ip4_fib.h> +#include <snat/snat.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 session_index; + u32 is_slow_path; +} snat_in2out_trace_t; + +typedef struct { + u32 next_worker_index; + u8 do_handoff; +} snat_in2out_worker_handoff_trace_t; + +/* packet trace format function */ +static u8 * format_snat_in2out_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *); + char * tag; + + tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH"; + + s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, + t->sw_if_index, t->next_index, t->session_index); + + return s; +} + +static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *); + + s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + + return s; +} + +static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_in2out_worker_handoff_trace_t * t = + va_arg (*args, snat_in2out_worker_handoff_trace_t *); + char * m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} + +vlib_node_registration_t snat_in2out_node; +vlib_node_registration_t snat_in2out_slowpath_node; +vlib_node_registration_t snat_in2out_fast_node; +vlib_node_registration_t snat_in2out_worker_handoff_node; + +#define foreach_snat_in2out_error \ +_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ +_(IN2OUT_PACKETS, "Good in2out packets processed") \ +_(OUT_OF_PORTS, "Out of ports") \ +_(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \ +_(BAD_ICMP_TYPE, "icmp type not echo-request") \ +_(NO_TRANSLATION, "No translation") + +typedef enum { +#define _(sym,str) SNAT_IN2OUT_ERROR_##sym, + foreach_snat_in2out_error +#undef _ + SNAT_IN2OUT_N_ERROR, +} snat_in2out_error_t; + +static char * snat_in2out_error_strings[] = { +#define _(sym,string) string, + foreach_snat_in2out_error +#undef _ +}; + +typedef enum { + SNAT_IN2OUT_NEXT_LOOKUP, + SNAT_IN2OUT_NEXT_DROP, + SNAT_IN2OUT_NEXT_SLOW_PATH, + SNAT_IN2OUT_N_NEXT, +} snat_in2out_next_t; + +static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0, + ip4_header_t * ip0, + u32 rx_fib_index0, + snat_session_key_t * key0, + snat_session_t ** sessionp, + vlib_node_runtime_t * node, + u32 next0, + u32 cpu_index) +{ + snat_user_t *u; + snat_user_key_t user_key; + snat_session_t *s; + clib_bihash_kv_8_8_t kv0, value0; + u32 oldest_per_user_translation_list_index; + dlist_elt_t * oldest_per_user_translation_list_elt; + dlist_elt_t * per_user_translation_list_elt; + dlist_elt_t * per_user_list_head_elt; + u32 session_index; + snat_session_key_t key1; + u32 address_index = ~0; + u32 outside_fib_index; + uword * p; + snat_static_mapping_key_t worker_by_out_key; + + p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id); + if (! p) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB]; + return SNAT_IN2OUT_NEXT_DROP; + } + outside_fib_index = p[0]; + + user_key.addr = ip0->src_address; + user_key.fib_index = rx_fib_index0; + kv0.key = user_key.as_u64; + + /* Ever heard of the "user" = src ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + { + /* no, make a new one */ + pool_get (sm->per_thread_data[cpu_index].users, u); + memset (u, 0, sizeof (*u)); + u->addr = ip0->src_address; + + pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt); + + u->sessions_per_user_list_head_index = per_user_list_head_elt - + sm->per_thread_data[cpu_index].list_pool; + + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index); + + kv0.value = u - sm->per_thread_data[cpu_index].users; + + /* add user */ + clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + } + else + { + u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + value0.value); + } + + /* Over quota? Recycle the least recently used dynamic translation */ + if (u->nsessions >= sm->max_translations_per_user) + { + /* Remove the oldest dynamic translation */ + do { + oldest_per_user_translation_list_index = + clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index); + + ASSERT (oldest_per_user_translation_list_index != ~0); + + /* add it back to the end of the LRU list */ + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index, + oldest_per_user_translation_list_index); + /* Get the list element */ + oldest_per_user_translation_list_elt = + pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool, + oldest_per_user_translation_list_index); + + /* Get the session index from the list element */ + session_index = oldest_per_user_translation_list_elt->value; + + /* Get the session */ + s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + session_index); + } while (snat_is_session_static (s)); + + /* Remove in2out, out2in keys */ + kv0.key = s->in2out.as_u64; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */)) + clib_warning ("in2out key delete failed"); + kv0.key = s->out2in.as_u64; + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */)) + clib_warning ("out2in key delete failed"); + + snat_free_outside_address_and_port + (sm, &s->out2in, s->outside_address_index); + s->outside_address_index = ~0; + + if (snat_alloc_outside_address_and_port (sm, &key1, &address_index)) + { + ASSERT(0); + + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + return SNAT_IN2OUT_NEXT_DROP; + } + s->outside_address_index = address_index; + } + else + { + u8 static_mapping = 1; + + /* First try to match static mapping by local address and port */ + if (snat_static_mapping_match (sm, *key0, &key1, 0)) + { + static_mapping = 0; + /* Try to create dynamic translation */ + if (snat_alloc_outside_address_and_port (sm, &key1, &address_index)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS]; + return SNAT_IN2OUT_NEXT_DROP; + } + } + + /* Create a new session */ + pool_get (sm->per_thread_data[cpu_index].sessions, s); + memset (s, 0, sizeof (*s)); + + s->outside_address_index = address_index; + + if (static_mapping) + { + u->nstaticsessions++; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + } + else + { + u->nsessions++; + } + + /* Create list elts */ + pool_get (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt); + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + + per_user_translation_list_elt->value = + s - sm->per_thread_data[cpu_index].sessions; + s->per_user_index = per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s->per_user_list_head_index, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + } + + s->in2out = *key0; + s->out2in = key1; + s->out2in.protocol = key0->protocol; + s->out2in.fib_index = outside_fib_index; + *sessionp = s; + + /* Add to translation hashes */ + kv0.key = s->in2out.as_u64; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + clib_warning ("in2out key add failed"); + + kv0.key = s->out2in.as_u64; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; + + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + clib_warning ("out2in key add failed"); + + /* Add to translated packets worker lookup */ + worker_by_out_key.addr = s->out2in.addr; + worker_by_out_key.port = s->out2in.port; + worker_by_out_key.fib_index = s->out2in.fib_index; + kv0.key = worker_by_out_key.as_u64; + kv0.value = cpu_index; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); + return next0; +} + +static inline u32 icmp_in2out_slow_path (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, + f64 now, + u32 cpu_index) +{ + snat_session_key_t key0; + icmp_echo_header_t *echo0; + clib_bihash_kv_8_8_t kv0, value0; + snat_session_t * s0; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + + if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE]; + return SNAT_IN2OUT_NEXT_DROP; + } + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + key0.addr = ip0->src_address; + key0.port = echo0->identifier; + key0.protocol = SNAT_PROTOCOL_ICMP; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + return next0; + + next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, + &s0, node, next0, cpu_index); + + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + return next0; + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->src_address.as_u32; + ip0->src_address = s0->out2in.addr; + new_addr0 = ip0->src_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + old_id0 = echo0->identifier; + new_id0 = s0->out2in.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); + /* Per-user LRU list maintenance for dynamic translations */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + + return next0; +} + +/** + * @brief Hairpinning + * + * Hairpinning allows two endpoints on the internal side of the NAT to + * communicate even if they only use each other's external IP addresses + * and ports. + * + * @param sm SNAT main. + * @param b0 Vlib buffer. + * @param ip0 IP header. + * @param udp0 UDP header. + * @param tcp0 TCP header. + * @param proto0 SNAT protocol. + */ +static inline void +snat_hairpinning (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + udp_header_t * udp0, + tcp_header_t * tcp0, + u32 proto0) +{ + snat_session_key_t key0, sm0; + snat_static_mapping_key_t k0; + snat_session_t * s0; + clib_bihash_kv_8_8_t kv0, value0; + ip_csum_t sum0; + u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si; + u16 new_dst_port0, old_dst_port0; + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = sm->outside_fib_index; + kv0.key = key0.as_u64; + + /* Check if destination is in active sessions */ + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* or static mappings */ + if (!snat_static_mapping_match(sm, key0, &sm0, 1)) + { + new_dst_addr0 = sm0.addr.as_u32; + new_dst_port0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + } + } + else + { + si = value0.value; + if (sm->num_workers > 1) + { + k0.addr = ip0->dst_address; + k0.port = udp0->dst_port; + k0.fib_index = sm->outside_fib_index; + kv0.key = k0.as_u64; + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + ASSERT(0); + else + ti = value0.value; + } + else + ti = sm->num_workers; + + s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); + new_dst_addr0 = s0->in2out.addr.as_u32; + new_dst_port0 = s0->in2out.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + } + + /* Destination is behind the same NAT, use internal address and port */ + if (new_dst_addr0) + { + old_dst_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_dst_addr0; + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, + ip4_header_t, dst_address); + ip0->checksum = ip_csum_fold (sum0); + + old_dst_port0 = tcp0->ports.dst; + if (PREDICT_TRUE(new_dst_port0 != old_dst_port0)) + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + tcp0->ports.dst = new_dst_port0; + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, + ip4_header_t, dst_address); + sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0, + ip4_header_t /* cheat */, length); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + udp0->dst_port = new_dst_port0; + udp0->checksum = 0; + } + } + } +} + +static inline uword +snat_in2out_node_fn_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_slow_path) +{ + u32 n_left_from, * from, * to_next; + snat_in2out_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + f64 now = vlib_time_now (vm); + u32 stats_node_index; + u32 cpu_index = os_get_cpu_number (); + + stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index : + snat_in2out_node.index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0, next1; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, * ip1; + ip_csum_t sum0, sum1; + u32 new_addr0, old_addr0, new_addr1, old_addr1; + u16 old_port0, new_port0, old_port1, new_port1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + icmp46_header_t * icmp0, * icmp1; + snat_session_key_t key0, key1; + u32 rx_fib_index0, rx_fib_index1; + u32 proto0, proto1; + snat_session_t * s0 = 0, * s1 = 0; + clib_bihash_kv_8_8_t kv0, value0, kv1, value1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP; + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto0 == ~0)) + goto trace00; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_in2out_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, + node, next0, now, cpu_index); + goto trace00; + } + } + else + { + if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + } + + key0.addr = ip0->src_address; + key0.port = udp0->src_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0)) + { + if (is_slow_path) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + goto trace00; + + next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, + &s0, node, next0, cpu_index); + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + goto trace00; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace00; + } + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->src_address.as_u32; + ip0->src_address = s0->out2in.addr; + new_addr0 = ip0->src_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.src; + tcp0->ports.src = s0->out2in.port; + new_port0 = tcp0->ports.src; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->src_port; + udp0->src_port = s0->out2in.port; + udp0->checksum = 0; + } + + /* Hairpinning */ + snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0); + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = is_slow_path; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + icmp1 = (icmp46_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index1); + + proto1 = ~0; + proto1 = (ip1->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto1; + proto1 = (ip1->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto1; + proto1 = (ip1->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto1; + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto1 == ~0)) + goto trace01; + + if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) + { + next1 = icmp_in2out_slow_path + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, + next1, now, cpu_index); + goto trace01; + } + } + else + { + if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP)) + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + } + + key1.addr = ip1->src_address; + key1.port = udp1->src_port; + key1.protocol = proto1; + key1.fib_index = rx_fib_index1; + + kv1.key = key1.as_u64; + + if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0)) + { + if (is_slow_path) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index1)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index1, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index1; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip1->dst_address.as_u32 == + rt->cached_ip4_address)) + goto trace01; + + next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1, + &s1, node, next1, cpu_index); + if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP)) + goto trace01; + } + else + { + next1 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace01; + } + } + else + s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value1.value); + + old_addr1 = ip1->src_address.as_u32; + ip1->src_address = s1->out2in.addr; + new_addr1 = ip1->src_address.as_u32; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + src_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + old_port1 = tcp1->ports.src; + tcp1->ports.src = s1->out2in.port; + new_port1 = tcp1->ports.src; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */); + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + old_port1 = udp1->src_port; + udp1->src_port = s1->out2in.port; + udp1->checksum = 0; + } + + /* Hairpinning */ + snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1); + + /* Accounting */ + s1->last_heard = now; + s1->total_pkts++; + s1->total_bytes += vlib_buffer_length_in_chain (vm, b1); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s1)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_list_head_index, + s1->per_user_index); + } + trace01: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (s1) + t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 old_port0, new_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0; + u32 rx_fib_index0; + u32 proto0; + snat_session_t * s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + /* Next configured feature, probably ip4-lookup */ + if (is_slow_path) + { + if (PREDICT_FALSE (proto0 == ~0)) + goto trace0; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_in2out_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, cpu_index); + goto trace0; + } + } + else + { + if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + } + + key0.addr = ip0->src_address; + key0.port = udp0->src_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0)) + { + if (is_slow_path) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + goto trace0; + + next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0, + &s0, node, next0, cpu_index); + if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP)) + goto trace0; + } + else + { + next0 = SNAT_IN2OUT_NEXT_SLOW_PATH; + goto trace0; + } + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->src_address.as_u32; + ip0->src_address = s0->out2in.addr; + new_addr0 = ip0->src_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.src; + tcp0->ports.src = s0->out2in.port; + new_port0 = tcp0->ports.src; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->src_port; + udp0->src_port = s0->out2in.port; + udp0->checksum = 0; + } + + /* Hairpinning */ + snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0); + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + + trace0: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->is_slow_path = is_slow_path; + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, stats_node_index, + SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +static uword +snat_in2out_fast_path_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */); +} + +VLIB_REGISTER_NODE (snat_in2out_node) = { + .function = snat_in2out_fast_path_fn, + .name = "snat-in2out", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn); + +static uword +snat_in2out_slow_path_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */); +} + +VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = { + .function = snat_in2out_slow_path_fn, + .name = "snat-in2out-slowpath", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn); + +static uword +snat_in2out_worker_handoff_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + snat_main_t *sm = &snat_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_t *f = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 cpu_index = os_get_cpu_number (); + + ASSERT (vec_len (sm->workers)); + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + sm->first_worker_index + sm->num_workers - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 sw_if_index0; + u32 rx_fib_index0; + ip4_header_t * ip0; + snat_user_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + ip0 = vlib_buffer_get_current (b0); + + key0.addr = ip0->src_address; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + + /* add non-traslated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); + } + else + next_worker_index = value0.value; + + if (PREDICT_FALSE (next_worker_index != cpu_index)) + { + do_handoff = 1; + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, snat_in2out_node.index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_worker_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, snat_in2out_node.index, f); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = { + .function = snat_in2out_worker_handoff_fn, + .name = "snat-in2out-worker-handoff", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_worker_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn); + +static inline u32 icmp_in2out_static_map (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + vlib_node_runtime_t * node, + u32 next0, + u32 rx_fib_index0) +{ + snat_session_key_t key0, sm0; + icmp_echo_header_t *echo0; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + key0.addr = ip0->src_address; + key0.port = echo0->identifier; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 0)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + return next0; + + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + return SNAT_IN2OUT_NEXT_DROP; + } + + new_addr0 = sm0.addr.as_u32; + new_id0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + old_addr0 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } + + return next0; +} + +static uword +snat_in2out_fast_static_map_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_in2out_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + u32 stats_node_index; + + stats_node_index = snat_in2out_fast_node.index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 old_port0, new_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0, sm0; + u32 proto0; + u32 rx_fib_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + next0 = SNAT_IN2OUT_NEXT_LOOKUP; + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace0; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + goto trace0; + + next0 = icmp_in2out_static_map + (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0); + goto trace0; + } + + key0.addr = ip0->src_address; + key0.port = udp0->src_port; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 0)) + { + b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION]; + next0= SNAT_IN2OUT_NEXT_DROP; + goto trace0; + } + + new_addr0 = sm0.addr.as_u32; + new_port0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + old_addr0 = ip0->src_address.as_u32; + ip0->src_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + src_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_port0 != udp0->dst_port)) + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.src; + tcp0->ports.src = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->src_port; + udp0->src_port = new_port0; + udp0->checksum = 0; + } + } + else + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + } + + /* Hairpinning */ + snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0); + + trace0: + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_in2out_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, stats_node_index, + SNAT_IN2OUT_ERROR_IN2OUT_PACKETS, + pkts_processed); + return frame->n_vectors; +} + + +VLIB_REGISTER_NODE (snat_in2out_fast_node) = { + .function = snat_in2out_fast_static_map_fn, + .name = "snat-in2out-fast", + .vector_size = sizeof (u32), + .format_trace = format_snat_in2out_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_in2out_error_strings), + .error_strings = snat_in2out_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_IN2OUT_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_IN2OUT_NEXT_DROP] = "error-drop", + [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn); diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c new file mode 100644 index 00000000000..f1f4159cdce --- /dev/null +++ b/src/plugins/snat/out2in.c @@ -0,0 +1,1261 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/handoff.h> + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/ip4_fib.h> +#include <snat/snat.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +typedef struct { + u32 sw_if_index; + u32 next_index; + u32 session_index; +} snat_out2in_trace_t; + +typedef struct { + u32 next_worker_index; + u8 do_handoff; +} snat_out2in_worker_handoff_trace_t; + +/* packet trace format function */ +static u8 * format_snat_out2in_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *); + + s = format (s, "SNAT_OUT2IN: sw_if_index %d, next index %d, session index %d", + t->sw_if_index, t->next_index, t->session_index); + return s; +} + +static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *); + + s = format (s, "SNAT_OUT2IN_FAST: sw_if_index %d, next index %d", + t->sw_if_index, t->next_index); + return s; +} + +static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + snat_out2in_worker_handoff_trace_t * t = + va_arg (*args, snat_out2in_worker_handoff_trace_t *); + char * m; + + m = t->do_handoff ? "next worker" : "same worker"; + s = format (s, "SNAT_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index); + + return s; +} + +vlib_node_registration_t snat_out2in_node; +vlib_node_registration_t snat_out2in_fast_node; +vlib_node_registration_t snat_out2in_worker_handoff_node; + +#define foreach_snat_out2in_error \ +_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \ +_(OUT2IN_PACKETS, "Good out2in packets processed") \ +_(BAD_ICMP_TYPE, "icmp type not echo-reply") \ +_(NO_TRANSLATION, "No translation") + +typedef enum { +#define _(sym,str) SNAT_OUT2IN_ERROR_##sym, + foreach_snat_out2in_error +#undef _ + SNAT_OUT2IN_N_ERROR, +} snat_out2in_error_t; + +static char * snat_out2in_error_strings[] = { +#define _(sym,string) string, + foreach_snat_out2in_error +#undef _ +}; + +typedef enum { + SNAT_OUT2IN_NEXT_DROP, + SNAT_OUT2IN_NEXT_LOOKUP, + SNAT_OUT2IN_N_NEXT, +} snat_out2in_next_t; + +/** + * @brief Create session for static mapping. + * + * Create NAT session initiated by host from external network with static + * mapping. + * + * @param sm SNAT main. + * @param b0 Vlib buffer. + * @param in2out In2out SNAT session key. + * @param out2in Out2in SNAT session key. + * @param node Vlib node. + * + * @returns SNAT session if successfully created otherwise 0. + */ +static inline snat_session_t * +create_session_for_static_mapping (snat_main_t *sm, + vlib_buffer_t *b0, + snat_session_key_t in2out, + snat_session_key_t out2in, + vlib_node_runtime_t * node, + u32 cpu_index) +{ + snat_user_t *u; + snat_user_key_t user_key; + snat_session_t *s; + clib_bihash_kv_8_8_t kv0, value0; + dlist_elt_t * per_user_translation_list_elt; + dlist_elt_t * per_user_list_head_elt; + + user_key.addr = in2out.addr; + user_key.fib_index = in2out.fib_index; + kv0.key = user_key.as_u64; + + /* Ever heard of the "user" = inside ip4 address before? */ + if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0)) + { + /* no, make a new one */ + pool_get (sm->per_thread_data[cpu_index].users, u); + memset (u, 0, sizeof (*u)); + u->addr = in2out.addr; + + pool_get (sm->per_thread_data[cpu_index].list_pool, + per_user_list_head_elt); + + u->sessions_per_user_list_head_index = per_user_list_head_elt - + sm->per_thread_data[cpu_index].list_pool; + + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + u->sessions_per_user_list_head_index); + + kv0.value = u - sm->per_thread_data[cpu_index].users; + + /* add user */ + clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */); + + /* add non-traslated packets worker lookup */ + kv0.value = cpu_index; + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1); + } + else + { + u = pool_elt_at_index (sm->per_thread_data[cpu_index].users, + value0.value); + } + + pool_get (sm->per_thread_data[cpu_index].sessions, s); + memset (s, 0, sizeof (*s)); + + s->outside_address_index = ~0; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + u->nstaticsessions++; + + /* Create list elts */ + pool_get (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt); + clib_dlist_init (sm->per_thread_data[cpu_index].list_pool, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + + per_user_translation_list_elt->value = + s - sm->per_thread_data[cpu_index].sessions; + s->per_user_index = + per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool; + s->per_user_list_head_index = u->sessions_per_user_list_head_index; + + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s->per_user_list_head_index, + per_user_translation_list_elt - + sm->per_thread_data[cpu_index].list_pool); + + s->in2out = in2out; + s->out2in = out2in; + s->in2out.protocol = out2in.protocol; + + /* Add to translation hashes */ + kv0.key = s->in2out.as_u64; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; + if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */)) + clib_warning ("in2out key add failed"); + + kv0.key = s->out2in.as_u64; + kv0.value = s - sm->per_thread_data[cpu_index].sessions; + + if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */)) + clib_warning ("out2in key add failed"); + + return s; +} + +static inline u32 icmp_out2in_slow_path (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + u32 rx_fib_index0, + vlib_node_runtime_t * node, + u32 next0, f64 now, + u32 cpu_index) +{ + snat_session_key_t key0, sm0; + icmp_echo_header_t *echo0; + clib_bihash_kv_8_8_t kv0, value0; + snat_session_t * s0; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + key0.addr = ip0->dst_address; + key0.port = echo0->identifier; + key0.protocol = SNAT_PROTOCOL_ICMP; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + return next0; + + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + return SNAT_OUT2IN_NEXT_DROP; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, + node, cpu_index); + if (!s0) + return SNAT_OUT2IN_NEXT_DROP; + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + old_id0 = echo0->identifier; + new_id0 = s0->in2out.port; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + + return next0; +} + +static uword +snat_out2in_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_out2in_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + f64 now = vlib_time_now (vm); + u32 cpu_index = os_get_cpu_number (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0, sw_if_index1; + ip4_header_t * ip0, *ip1; + ip_csum_t sum0, sum1; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + u32 new_addr1, old_addr1; + u16 new_port1, old_port1; + udp_header_t * udp0, * udp1; + tcp_header_t * tcp0, * tcp1; + icmp46_header_t * icmp0, * icmp1; + snat_session_key_t key0, key1, sm0, sm1; + u32 rx_fib_index0, rx_fib_index1; + u32 proto0, proto1; + snat_session_t * s0 = 0, * s1 = 0; + clib_bihash_kv_8_8_t kv0, kv1, value0, value1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace0; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, cpu_index); + goto trace0; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace0; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, + cpu_index); + if (!s0) + goto trace0; + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.dst; + tcp0->ports.dst = s0->in2out.port; + new_port0 = tcp0->ports.dst; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = s0->in2out.port; + udp0->checksum = 0; + } + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + trace0: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + + ip1 = vlib_buffer_get_current (b1); + udp1 = ip4_next_header (ip1); + tcp1 = (tcp_header_t *) udp1; + icmp1 = (icmp46_header_t *) udp1; + + sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; + rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index1); + + proto1 = ~0; + proto1 = (ip1->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto1; + proto1 = (ip1->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto1; + proto1 = (ip1->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto1; + + if (PREDICT_FALSE (proto1 == ~0)) + goto trace1; + + if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP)) + { + next1 = icmp_out2in_slow_path + (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, + next1, now, cpu_index); + goto trace1; + } + + key1.addr = ip1->dst_address; + key1.port = udp1->dst_port; + key1.protocol = proto1; + key1.fib_index = rx_fib_index1; + + kv1.key = key1.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key1, &sm1, 1)) + { + b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace1; + } + + /* Create session initiated by host from external network */ + s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node, + cpu_index); + if (!s1) + goto trace1; + } + else + s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value1.value); + + old_addr1 = ip1->dst_address.as_u32; + ip1->dst_address = s1->in2out.addr; + new_addr1 = ip1->dst_address.as_u32; + vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index; + + sum1 = ip1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */); + ip1->checksum = ip_csum_fold (sum1); + + if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP)) + { + old_port1 = tcp1->ports.dst; + tcp1->ports.dst = s1->in2out.port; + new_port1 = tcp1->ports.dst; + + sum1 = tcp1->checksum; + sum1 = ip_csum_update (sum1, old_addr1, new_addr1, + ip4_header_t, + dst_address /* changed member */); + + sum1 = ip_csum_update (sum1, old_port1, new_port1, + ip4_header_t /* cheat */, + length /* changed member */); + tcp1->checksum = ip_csum_fold(sum1); + } + else + { + old_port1 = udp1->dst_port; + udp1->dst_port = s1->in2out.port; + udp1->checksum = 0; + } + + /* Accounting */ + s1->last_heard = now; + s1->total_pkts++; + s1->total_bytes += vlib_buffer_length_in_chain (vm, b1); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s1)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s1->per_user_list_head_index, + s1->per_user_index); + } + trace1: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->sw_if_index = sw_if_index1; + t->next_index = next1; + t->session_index = ~0; + if (s1) + t->session_index = s1 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0, sm0; + u32 rx_fib_index0; + u32 proto0; + snat_session_t * s0 = 0; + clib_bihash_kv_8_8_t kv0, value0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index, + sw_if_index0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace00; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_slow_path + (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, + next0, now, cpu_index); + goto trace00; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.protocol = proto0; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0)) + { + /* Try to match static mapping by external address and port, + destination address and port in packet */ + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace00; + } + + /* Create session initiated by host from external network */ + s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node, + cpu_index); + if (!s0) + goto trace00; + } + else + s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions, + value0.value); + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address = s0->in2out.addr; + new_addr0 = ip0->dst_address.as_u32; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.dst; + tcp0->ports.dst = s0->in2out.port; + new_port0 = tcp0->ports.dst; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = s0->in2out.port; + udp0->checksum = 0; + } + + /* Accounting */ + s0->last_heard = now; + s0->total_pkts++; + s0->total_bytes += vlib_buffer_length_in_chain (vm, b0); + /* Per-user LRU list maintenance for dynamic translation */ + if (!snat_is_session_static (s0)) + { + clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_index); + clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool, + s0->per_user_list_head_index, + s0->per_user_index); + } + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + t->session_index = ~0; + if (s0) + t->session_index = s0 - sm->per_thread_data[cpu_index].sessions; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_out2in_node.index, + SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_node) = { + .function = snat_out2in_node_fn, + .name = "snat-out2in", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn); + +static uword +snat_out2in_worker_handoff_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + snat_main_t *sm = &snat_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + u32 n_left_from, *from, *to_next = 0; + static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index; + static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index + = 0; + vlib_frame_queue_elt_t *hf = 0; + vlib_frame_t *f = 0; + int i; + u32 n_left_to_next_worker = 0, *to_next_worker = 0; + u32 next_worker_index = 0; + u32 current_worker_index = ~0; + u32 cpu_index = os_get_cpu_number (); + + ASSERT (vec_len (sm->workers)); + + if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0)) + { + vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1); + + vec_validate_init_empty (congested_handoff_queue_by_worker_index, + sm->first_worker_index + sm->num_workers - 1, + (vlib_frame_queue_t *) (~0)); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 sw_if_index0; + u32 rx_fib_index0; + ip4_header_t * ip0; + udp_header_t * udp0; + snat_static_mapping_key_t key0; + clib_bihash_kv_8_8_t kv0, value0; + u8 do_handoff; + + bi0 = from[0]; + from += 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.fib_index = rx_fib_index0; + + kv0.key = key0.as_u64; + + /* Ever heard of of the "user" before? */ + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + key0.port = 0; + kv0.key = key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0)) + { + /* No, assign next available worker (RR) */ + next_worker_index = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + } + else + { + /* Static mapping without port */ + next_worker_index = value0.value; + } + + /* Add to translated packets worker lookup */ + kv0.value = next_worker_index; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1); + } + else + next_worker_index = value0.value; + + if (PREDICT_FALSE (next_worker_index != cpu_index)) + { + do_handoff = 1; + + if (next_worker_index != current_worker_index) + { + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index, + next_worker_index, + handoff_queue_elt_by_worker_index); + + n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors; + to_next_worker = &hf->buffer_index[hf->n_vectors]; + current_worker_index = next_worker_index; + } + + /* enqueue to correct worker thread */ + to_next_worker[0] = bi0; + to_next_worker++; + n_left_to_next_worker--; + + if (n_left_to_next_worker == 0) + { + hf->n_vectors = VLIB_FRAME_SIZE; + vlib_put_frame_queue_elt (hf); + current_worker_index = ~0; + handoff_queue_elt_by_worker_index[next_worker_index] = 0; + hf = 0; + } + } + else + { + do_handoff = 0; + /* if this is 1st frame */ + if (!f) + { + f = vlib_get_frame_to_node (vm, snat_out2in_node.index); + to_next = vlib_frame_vector_args (f); + } + + to_next[0] = bi0; + to_next += 1; + f->n_vectors++; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_worker_handoff_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_worker_index = next_worker_index; + t->do_handoff = do_handoff; + } + } + + if (f) + vlib_put_frame_to_node (vm, snat_out2in_node.index, f); + + if (hf) + hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker; + + /* Ship frames to the worker nodes */ + for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++) + { + if (handoff_queue_elt_by_worker_index[i]) + { + hf = handoff_queue_elt_by_worker_index[i]; + /* + * It works better to let the handoff node + * rate-adapt, always ship the handoff queue element. + */ + if (1 || hf->n_vectors == hf->last_n_vectors) + { + vlib_put_frame_queue_elt (hf); + handoff_queue_elt_by_worker_index[i] = 0; + } + else + hf->last_n_vectors = hf->n_vectors; + } + congested_handoff_queue_by_worker_index[i] = + (vlib_frame_queue_t *) (~0); + } + hf = 0; + current_worker_index = ~0; + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = { + .function = snat_out2in_worker_handoff_fn, + .name = "snat-out2in-worker-handoff", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_worker_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_next_nodes = 1, + + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn); + +static inline u32 icmp_out2in_fast (snat_main_t *sm, + vlib_buffer_t * b0, + ip4_header_t * ip0, + icmp46_header_t * icmp0, + u32 sw_if_index0, + vlib_node_runtime_t * node, + u32 next0, + u32 rx_fib_index0) +{ + snat_session_key_t key0, sm0; + icmp_echo_header_t *echo0; + u32 new_addr0, old_addr0; + u16 old_id0, new_id0; + ip_csum_t sum0; + snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data; + + echo0 = (icmp_echo_header_t *)(icmp0+1); + + key0.addr = ip0->dst_address; + key0.port = echo0->identifier; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + ip4_address_t * first_int_addr; + + if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0)) + { + first_int_addr = + ip4_interface_first_address (sm->ip4_main, sw_if_index0, + 0 /* just want the address */); + rt->cached_sw_if_index = sw_if_index0; + rt->cached_ip4_address = first_int_addr->as_u32; + } + + /* Don't NAT packet aimed at the intfc address */ + if (PREDICT_FALSE(ip0->dst_address.as_u32 == + rt->cached_ip4_address)) + return next0; + + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + return SNAT_OUT2IN_NEXT_DROP; + } + + new_addr0 = sm0.addr.as_u32; + new_id0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_id0 != echo0->identifier)) + { + old_id0 = echo0->identifier; + echo0->identifier = new_id0; + + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t, + identifier); + icmp0->checksum = ip_csum_fold (sum0); + } + + return next0; +} + +static uword +snat_out2in_fast_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 n_left_from, * from, * to_next; + snat_out2in_next_t next_index; + u32 pkts_processed = 0; + snat_main_t * sm = &snat_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u32 next0 = SNAT_OUT2IN_NEXT_DROP; + u32 sw_if_index0; + ip4_header_t * ip0; + ip_csum_t sum0; + u32 new_addr0, old_addr0; + u16 new_port0, old_port0; + udp_header_t * udp0; + tcp_header_t * tcp0; + icmp46_header_t * icmp0; + snat_session_key_t key0, sm0; + u32 proto0; + u32 rx_fib_index0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + udp0 = ip4_next_header (ip0); + tcp0 = (tcp_header_t *) udp0; + icmp0 = (icmp46_header_t *) udp0; + + sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; + rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0); + + vnet_feature_next (sw_if_index0, &next0, b0); + + proto0 = ~0; + proto0 = (ip0->protocol == IP_PROTOCOL_UDP) + ? SNAT_PROTOCOL_UDP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_TCP) + ? SNAT_PROTOCOL_TCP : proto0; + proto0 = (ip0->protocol == IP_PROTOCOL_ICMP) + ? SNAT_PROTOCOL_ICMP : proto0; + + if (PREDICT_FALSE (proto0 == ~0)) + goto trace00; + + if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP)) + { + next0 = icmp_out2in_fast + (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0); + goto trace00; + } + + key0.addr = ip0->dst_address; + key0.port = udp0->dst_port; + key0.fib_index = rx_fib_index0; + + if (snat_static_mapping_match(sm, key0, &sm0, 1)) + { + b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION]; + goto trace00; + } + + new_addr0 = sm0.addr.as_u32; + new_port0 = sm0.port; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index; + old_addr0 = ip0->dst_address.as_u32; + ip0->dst_address.as_u32 = new_addr0; + + sum0 = ip0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + + if (PREDICT_FALSE(new_port0 != udp0->dst_port)) + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + old_port0 = tcp0->ports.dst; + tcp0->ports.dst = new_port0; + + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + sum0 = ip_csum_update (sum0, old_port0, new_port0, + ip4_header_t /* cheat */, + length /* changed member */); + tcp0->checksum = ip_csum_fold(sum0); + } + else + { + old_port0 = udp0->dst_port; + udp0->dst_port = new_port0; + udp0->checksum = 0; + } + } + else + { + if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP)) + { + sum0 = tcp0->checksum; + sum0 = ip_csum_update (sum0, old_addr0, new_addr0, + ip4_header_t, + dst_address /* changed member */); + + tcp0->checksum = ip_csum_fold(sum0); + } + } + + trace00: + + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + snat_out2in_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = sw_if_index0; + t->next_index = next0; + } + + pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, snat_out2in_fast_node.index, + SNAT_OUT2IN_ERROR_OUT2IN_PACKETS, + pkts_processed); + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (snat_out2in_fast_node) = { + .function = snat_out2in_fast_node_fn, + .name = "snat-out2in-fast", + .vector_size = sizeof (u32), + .format_trace = format_snat_out2in_fast_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(snat_out2in_error_strings), + .error_strings = snat_out2in_error_strings, + + .runtime_data_bytes = sizeof (snat_runtime_t), + + .n_next_nodes = SNAT_OUT2IN_N_NEXT, + + /* edit / add dispositions here */ + .next_nodes = { + [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup", + [SNAT_OUT2IN_NEXT_DROP] = "error-drop", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn); diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api new file mode 100644 index 00000000000..a191eed5944 --- /dev/null +++ b/src/plugins/snat/snat.api @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file snat.api + * @brief VPP control-plane API messages. + * + * This file defines VPP control-plane API messages which are generally + * called through a shared memory interface. + */ + +/** \brief Add/del S-NAT address range + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ip4 - 1 if address type is IPv4 + @first_ip_address - first IP address + @last_ip_address - last IP address + @is_add - 1 if add, 0 if delete +*/ +define snat_add_address_range { + u32 client_index; + u32 context; + u8 is_ip4; + u8 first_ip_address[16]; + u8 last_ip_address[16]; + u8 is_add; +}; + +/** \brief Add S-NAT address range reply + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_add_address_range_reply { + u32 context; + i32 retval; +}; + +/** \brief Dump S-NAT addresses + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_address_dump { + u32 client_index; + u32 context; +}; + +/** \brief S-NAT address details response + @param context - sender context, to match reply w/ request + @param is_ip4 - 1 if address type is IPv4 + @param ip_address - IP address +*/ +define snat_address_details { + u32 context; + u8 is_ip4; + u8 ip_address[16]; +}; + +/** \brief Enable/disable S-NAT feature on the interface + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - 1 if add, 0 if delete + @param is_inside - 1 if inside, 0 if outside + @param sw_if_index - software index of the interface +*/ +define snat_interface_add_del_feature { + u32 client_index; + u32 context; + u8 is_add; + u8 is_inside; + u32 sw_if_index; +}; + +/** \brief Enable/disable S-NAT feature on the interface reply + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_interface_add_del_feature_reply { + u32 context; + i32 retval; +}; + +/** \brief Dump interfaces with S-NAT feature + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_interface_dump { + u32 client_index; + u32 context; +}; + +/** \brief S-NAT interface details response + @param context - sender context, to match reply w/ request + @param is_inside - 1 if inside, 0 if outside + @param sw_if_index - software index of the interface +*/ +define snat_interface_details { + u32 context; + u8 is_inside; + u32 sw_if_index; +}; + +/** \brief Add/delete S-NAT static mapping + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_add - 1 if add, 0 if delete + @param is_ip4 - 1 if address type is IPv4 + @param addr_only - 1 if address only mapping + @param local_ip_address - local IP address + @param external_ip_address - external IP address + @param local_port - local port number + @param external_port - external port number + @param vfr_id - VRF ID +*/ +define snat_add_static_mapping { + u32 client_index; + u32 context; + u8 is_add; + u8 is_ip4; + u8 addr_only; + u8 local_ip_address[16]; + u8 external_ip_address[16]; + u16 local_port; + u16 external_port; + u32 vrf_id; +}; + +/** \brief Add/delete S-NAT static mapping reply + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_add_static_mapping_reply { + u32 context; + i32 retval; +}; + +/** \brief Dump S-NAT static mappings + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_static_mapping_dump { + u32 client_index; + u32 context; +}; + +/** \brief S-NAT static mapping details response + @param context - sender context, to match reply w/ request + @param is_ip4 - 1 if address type is IPv4 + @param addr_only - 1 if address only mapping + @param local_ip_address - local IP address + @param external_ip_address - external IP address + @param local_port - local port number + @param external_port - external port number + @param vfr_id - VRF ID +*/ +define snat_static_mapping_details { + u32 context; + u8 is_ip4; + u8 addr_only; + u8 local_ip_address[16]; + u8 external_ip_address[16]; + u16 local_port; + u16 external_port; + u32 vrf_id; +}; + +/** \brief Control ping from client to api server request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_control_ping +{ + u32 client_index; + u32 context; +}; + +/** \brief Control ping from the client to the server response + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param vpe_pid - the pid of the vpe, returned by the server +*/ +define snat_control_ping_reply +{ + u32 context; + i32 retval; + u32 client_index; + u32 vpe_pid; +}; + +/** \brief Show S-NAT plugin startup config + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_show_config +{ + u32 client_index; + u32 context; +}; + +/** \brief Show S-NAT plugin startup config reply + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param static_mapping_only - if 1 dynamic translations disabled + @param static_mapping_connection_tracking - if 1 create session data + @param translation_buckets - number of translation hash buckets + @param translation_memory_size - translation hash memory size + @param user_buckets - number of user hash buckets + @param user_memory_size - user hash memory size + @param max_translations_per_user - maximum number of translations per user + @param outside_vrf_id - outside VRF id + @param inside_vrf_id - default inside VRF id +*/ +define snat_show_config_reply +{ + u32 context; + i32 retval; + u8 static_mapping_only; + u8 static_mapping_connection_tracking; + u32 translation_buckets; + u32 translation_memory_size; + u32 user_buckets; + u32 user_memory_size; + u32 max_translations_per_user; + u32 outside_vrf_id; + u32 inside_vrf_id; +}; + +/** \brief Set S-NAT workers + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param worker_mask - S-NAT workers mask +*/ +define snat_set_workers { + u32 client_index; + u32 context; + u64 worker_mask; +}; + +/** \brief Set S-NAT workers reply + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param retval - return code +*/ +define snat_set_workers_reply { + u32 context; + i32 retval; +}; + +/** \brief Dump S-NAT workers + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +define snat_worker_dump { + u32 client_index; + u32 context; +}; + +/** \brief S-NAT workers details response + @param context - sender context, to match reply w/ request + @param worker_index - worker index + @param lcore_id - lcore ID + @param name - worker name +*/ +define snat_worker_details { + u32 context; + u32 worker_index; + u32 lcore_id; + u8 name[64]; +}; diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c new file mode 100644 index 00000000000..bc9956841d2 --- /dev/null +++ b/src/plugins/snat/snat.c @@ -0,0 +1,1957 @@ +/* + * snat.c - simple nat plugin + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <vlibapi/api.h> +#include <snat/snat.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> + +snat_main_t snat_main; + +/* define message IDs */ +#include <snat/snat_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <snat/snat_all_api_h.h> +#undef vl_typedefs + +/* define generated endian-swappers */ +#define vl_endianfun +#include <snat/snat_all_api_h.h> +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +/* Get the API version number */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <snat/snat_all_api_h.h> +#undef vl_api_version + +/* Macro to finish up custom dump fns */ +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + +/* + * A handy macro to set up a message reply. + * Assumes that the following variables are available: + * mp - pointer to request message + * rmp - pointer to reply message type + * rv - return value + */ + +#define REPLY_MACRO(t) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + +#define REPLY_MACRO2(t, body) \ +do { \ + unix_shared_memory_queue_t * q = \ + vl_api_client_index_to_input_queue (mp->client_index); \ + if (!q) \ + return; \ + \ + rmp = vl_msg_api_alloc (sizeof (*rmp)); \ + rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \ + rmp->context = mp->context; \ + rmp->retval = ntohl(rv); \ + do {body;} while (0); \ + vl_msg_api_send_shmem (q, (u8 *)&rmp); \ +} while(0); + + +/* Hook up input features */ +VNET_FEATURE_INIT (ip4_snat_in2out, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-in2out", + .runs_before = VNET_FEATURES ("snat-out2in"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-out2in", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-in2out-worker-handoff", + .runs_before = VNET_FEATURES ("snat-out2in-worker-handoff"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in_worker_handoff, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-out2in-worker-handoff", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; +VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-in2out-fast", + .runs_before = VNET_FEATURES ("snat-out2in-fast"), +}; +VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = { + .arc_name = "ip4-unicast", + .node_name = "snat-out2in-fast", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; + + +/* + * This routine exists to convince the vlib plugin framework that + * we haven't accidentally copied a random .dll into the plugin directory. + * + * Also collects global variable pointers passed from the vpp engine + */ + +clib_error_t * +vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h, + int from_early_init) +{ + snat_main_t * sm = &snat_main; + clib_error_t * error = 0; + + sm->vlib_main = vm; + sm->vnet_main = h->vnet_main; + sm->ethernet_main = h->ethernet_main; + + return error; +} + +/*$$$$$ move to an installed header file */ +#if (1 || CLIB_DEBUG > 0) /* "trust, but verify" */ + +#define VALIDATE_SW_IF_INDEX(mp) \ + do { u32 __sw_if_index = ntohl(mp->sw_if_index); \ + vnet_main_t *__vnm = vnet_get_main(); \ + if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ + __sw_if_index)) { \ + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ + goto bad_sw_if_index; \ + } \ +} while(0); + +#define BAD_SW_IF_INDEX_LABEL \ +do { \ +bad_sw_if_index: \ + ; \ +} while (0); + +#define VALIDATE_RX_SW_IF_INDEX(mp) \ + do { u32 __rx_sw_if_index = ntohl(mp->rx_sw_if_index); \ + vnet_main_t *__vnm = vnet_get_main(); \ + if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ + __rx_sw_if_index)) { \ + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ + goto bad_rx_sw_if_index; \ + } \ +} while(0); + +#define BAD_RX_SW_IF_INDEX_LABEL \ +do { \ +bad_rx_sw_if_index: \ + ; \ +} while (0); + +#define VALIDATE_TX_SW_IF_INDEX(mp) \ + do { u32 __tx_sw_if_index = ntohl(mp->tx_sw_if_index); \ + vnet_main_t *__vnm = vnet_get_main(); \ + if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \ + __tx_sw_if_index)) { \ + rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \ + goto bad_tx_sw_if_index; \ + } \ +} while(0); + +#define BAD_TX_SW_IF_INDEX_LABEL \ +do { \ +bad_tx_sw_if_index: \ + ; \ +} while (0); + +#else + +#define VALIDATE_SW_IF_INDEX(mp) +#define BAD_SW_IF_INDEX_LABEL +#define VALIDATE_RX_SW_IF_INDEX(mp) +#define BAD_RX_SW_IF_INDEX_LABEL +#define VALIDATE_TX_SW_IF_INDEX(mp) +#define BAD_TX_SW_IF_INDEX_LABEL + +#endif /* CLIB_DEBUG > 0 */ + +void snat_add_address (snat_main_t *sm, ip4_address_t *addr) +{ + snat_address_t * ap; + + /* Check if address already exists */ + vec_foreach (ap, sm->addresses) + { + if (ap->addr.as_u32 == addr->as_u32) + return; + } + + vec_add2 (sm->addresses, ap, 1); + ap->addr = *addr; + clib_bitmap_alloc (ap->busy_port_bitmap, 65535); +} + +static int is_snat_address_used_in_static_mapping (snat_main_t *sm, + ip4_address_t addr) +{ + snat_static_mapping_t *m; + pool_foreach (m, sm->static_mappings, + ({ + if (m->external_addr.as_u32 == addr.as_u32) + return 1; + })); + + return 0; +} + +int snat_del_address (snat_main_t *sm, ip4_address_t addr) +{ + snat_address_t *a = 0; + snat_session_t *ses; + u32 *ses_to_be_removed = 0, *ses_index; + clib_bihash_kv_8_8_t kv, value; + snat_user_key_t user_key; + snat_user_t *u; + snat_main_per_thread_data_t *tsm; + + int i; + + /* Find SNAT address */ + for (i=0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == addr.as_u32) + { + a = sm->addresses + i; + break; + } + } + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* Check if address is used in some static mapping */ + if (is_snat_address_used_in_static_mapping(sm, addr)) + { + clib_warning ("address used in static mapping"); + return VNET_API_ERROR_UNSPECIFIED; + } + + /* Delete sessions using address */ + if (a->busy_ports) + { + vec_foreach (tsm, sm->per_thread_data) + { + pool_foreach (ses, tsm->sessions, ({ + if (ses->out2in.addr.as_u32 == addr.as_u32) + { + vec_add1 (ses_to_be_removed, ses - tsm->sessions); + kv.key = ses->in2out.as_u64; + clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0); + kv.key = ses->out2in.as_u64; + clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0); + clib_dlist_remove (tsm->list_pool, ses->per_user_index); + user_key.addr = ses->in2out.addr; + user_key.fib_index = ses->in2out.fib_index; + kv.key = user_key.as_u64; + if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + u = pool_elt_at_index (tsm->users, value.value); + u->nsessions--; + } + } + })); + + vec_foreach (ses_index, ses_to_be_removed) + pool_put_index (tsm->sessions, ses_index[0]); + + vec_free (ses_to_be_removed); + } + } + + vec_del1 (sm->addresses, i); + + return 0; +} + +static void increment_v4_address (ip4_address_t * a) +{ + u32 v; + + v = clib_net_to_host_u32(a->as_u32) + 1; + a->as_u32 = clib_host_to_net_u32(v); +} + +/** + * @brief Add static mapping. + * + * Create static mapping between local addr+port and external addr+port. + * + * @param l_addr Local IPv4 address. + * @param e_addr External IPv4 address. + * @param l_port Local port number. + * @param e_port External port number. + * @param vrf_id VRF ID. + * @param addr_only If 0 address port and pair mapping, otherwise address only. + * @param is_add If 0 delete static mapping, otherwise add. + * + * @returns + */ +int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr, + u16 l_port, u16 e_port, u32 vrf_id, int addr_only, + int is_add) +{ + snat_main_t * sm = &snat_main; + snat_static_mapping_t *m; + snat_static_mapping_key_t m_key; + clib_bihash_kv_8_8_t kv, value; + snat_address_t *a = 0; + u32 fib_index = ~0; + uword * p; + int i; + + /* If outside FIB index is not resolved yet */ + if (sm->outside_fib_index == ~0) + { + p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id); + if (!p) + return VNET_API_ERROR_NO_SUCH_FIB; + sm->outside_fib_index = p[0]; + } + + m_key.addr = e_addr; + m_key.port = addr_only ? 0 : e_port; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + m = 0; + else + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (is_add) + { + if (m) + return VNET_API_ERROR_VALUE_EXIST; + + /* Convert VRF id to FIB index */ + if (vrf_id != ~0) + { + p = hash_get (sm->ip4_main->fib_index_by_table_id, vrf_id); + if (!p) + return VNET_API_ERROR_NO_SUCH_FIB; + fib_index = p[0]; + } + /* If not specified use inside VRF id from SNAT plugin startup config */ + else + { + if (sm->inside_fib_index == ~0) + { + p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->inside_vrf_id); + if (!p) + return VNET_API_ERROR_NO_SUCH_FIB; + fib_index = p[0]; + sm->inside_fib_index = fib_index; + } + else + fib_index = sm->inside_fib_index; + + vrf_id = sm->inside_vrf_id; + } + + /* Find external address in allocated addresses and reserve port for + address and port pair mapping when dynamic translations enabled */ + if (!addr_only && !(sm->static_mapping_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + /* External port must be unused */ + if (clib_bitmap_get_no_check (a->busy_port_bitmap, e_port)) + return VNET_API_ERROR_INVALID_VALUE; + clib_bitmap_set_no_check (a->busy_port_bitmap, e_port, 1); + if (e_port > 1024) + a->busy_ports++; + + break; + } + } + /* External address must be allocated */ + if (!a) + return VNET_API_ERROR_NO_SUCH_ENTRY; + } + + pool_get (sm->static_mappings, m); + memset (m, 0, sizeof (*m)); + m->local_addr = l_addr; + m->external_addr = e_addr; + m->addr_only = addr_only; + m->vrf_id = vrf_id; + m->fib_index = fib_index; + if (!addr_only) + { + m->local_port = l_port; + m->external_port = e_port; + } + + m_key.addr = m->local_addr; + m_key.port = m->local_port; + m_key.fib_index = m->fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1); + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + kv.value = m - sm->static_mappings; + clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1); + + /* Assign worker */ + if (sm->workers) + { + snat_user_key_t w_key0; + snat_static_mapping_key_t w_key1; + + w_key0.addr = m->local_addr; + w_key0.fib_index = m->fib_index; + kv.key = w_key0.as_u64; + + if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + { + kv.value = sm->first_worker_index + + sm->workers[sm->next_worker++ % vec_len (sm->workers)]; + + clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1); + } + else + { + kv.value = value.value; + } + + w_key1.addr = m->external_addr; + w_key1.port = clib_host_to_net_u16 (m->external_port); + w_key1.fib_index = sm->outside_fib_index; + kv.key = w_key1.as_u64; + clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1); + } + } + else + { + if (!m) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* Free external address port */ + if (!addr_only && !(sm->static_mapping_only)) + { + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].addr.as_u32 == e_addr.as_u32) + { + a = sm->addresses + i; + clib_bitmap_set_no_check (a->busy_port_bitmap, e_port, 0); + a->busy_ports--; + + break; + } + } + } + + m_key.addr = m->local_addr; + m_key.port = m->local_port; + m_key.fib_index = m->fib_index; + kv.key = m_key.as_u64; + clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0); + + m_key.addr = m->external_addr; + m_key.port = m->external_port; + m_key.fib_index = sm->outside_fib_index; + kv.key = m_key.as_u64; + clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0); + + /* Delete session(s) for static mapping if exist */ + if (!(sm->static_mapping_only) || + (sm->static_mapping_only && sm->static_mapping_connection_tracking)) + { + snat_user_key_t u_key; + snat_user_t *u; + dlist_elt_t * head, * elt; + u32 elt_index, head_index, del_elt_index; + u32 ses_index; + u64 user_index; + snat_session_t * s; + snat_main_per_thread_data_t *tsm; + + u_key.addr = m->local_addr; + u_key.fib_index = m->fib_index; + kv.key = u_key.as_u64; + if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value)) + { + user_index = value.value; + if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value)) + tsm = vec_elt_at_index (sm->per_thread_data, value.value); + else + tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); + u = pool_elt_at_index (tsm->users, user_index); + if (u->nstaticsessions) + { + head_index = u->sessions_per_user_list_head_index; + head = pool_elt_at_index (tsm->list_pool, head_index); + elt_index = head->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + while (ses_index != ~0) + { + s = pool_elt_at_index (tsm->sessions, ses_index); + del_elt_index = elt_index; + elt_index = elt->next; + elt = pool_elt_at_index (tsm->list_pool, elt_index); + ses_index = elt->value; + + if (!addr_only) + { + if ((s->out2in.addr.as_u32 != e_addr.as_u32) && + (clib_net_to_host_u16 (s->out2in.port) != e_port)) + continue; + } + + value.key = s->in2out.as_u64; + clib_bihash_add_del_8_8 (&sm->in2out, &value, 0); + value.key = s->out2in.as_u64; + clib_bihash_add_del_8_8 (&sm->out2in, &value, 0); + pool_put (tsm->sessions, s); + + clib_dlist_remove (tsm->list_pool, del_elt_index); + pool_put_index (tsm->list_pool, del_elt_index); + u->nstaticsessions--; + + if (!addr_only) + break; + } + if (addr_only) + { + pool_put (tsm->users, u); + clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 0); + } + } + } + } + + /* Delete static mapping from pool */ + pool_put (sm->static_mappings, m); + } + + return 0; +} + +static int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del) +{ + snat_main_t *sm = &snat_main; + snat_interface_t *i; + const char * feature_name; + + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + feature_name = is_inside ? "snat-in2out-fast" : "snat-out2in-fast"; + else + { + if (sm->num_workers > 1) + feature_name = is_inside ? "snat-in2out-worker-handoff" : "snat-out2in-worker-handoff"; + else + feature_name = is_inside ? "snat-in2out" : "snat-out2in"; + } + + vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, + !is_del, 0, 0); + + if (sm->fq_in2out_index == ~0) + sm->fq_in2out_index = vlib_frame_queue_main_init (snat_in2out_node.index, 0); + + if (sm->fq_out2in_index == ~0) + sm->fq_out2in_index = vlib_frame_queue_main_init (snat_out2in_node.index, 0); + + pool_foreach (i, sm->interfaces, + ({ + if (i->sw_if_index == sw_if_index) + { + if (is_del) + pool_put (sm->interfaces, i); + else + return VNET_API_ERROR_VALUE_EXIST; + + return 0; + } + })); + + if (is_del) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + pool_get (sm->interfaces, i); + i->sw_if_index = sw_if_index; + i->is_inside = is_inside; + + return 0; +} + +static int snat_set_workers (uword * bitmap) +{ + snat_main_t *sm = &snat_main; + int i; + + if (sm->num_workers < 2) + return VNET_API_ERROR_FEATURE_DISABLED; + + if (clib_bitmap_last_set (bitmap) >= sm->num_workers) + return VNET_API_ERROR_INVALID_WORKER; + + vec_free (sm->workers); + clib_bitmap_foreach (i, bitmap, + ({ + vec_add1(sm->workers, i); + })); + + return 0; +} + +static void +vl_api_snat_add_address_range_t_handler +(vl_api_snat_add_address_range_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_add_address_range_reply_t * rmp; + ip4_address_t this_addr; + u32 start_host_order, end_host_order; + int i, count; + int rv = 0; + u32 * tmp; + + if (mp->is_ip4 != 1) + { + rv = VNET_API_ERROR_UNIMPLEMENTED; + goto send_reply; + } + + if (sm->static_mapping_only) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto send_reply; + } + + tmp = (u32 *) mp->first_ip_address; + start_host_order = clib_host_to_net_u32 (tmp[0]); + tmp = (u32 *) mp->last_ip_address; + end_host_order = clib_host_to_net_u32 (tmp[0]); + + count = (end_host_order - start_host_order) + 1; + + if (count > 1024) + clib_warning ("%U - %U, %d addresses...", + format_ip4_address, mp->first_ip_address, + format_ip4_address, mp->last_ip_address, + count); + + memcpy (&this_addr.as_u8, mp->first_ip_address, 4); + + for (i = 0; i < count; i++) + { + if (mp->is_add) + snat_add_address (sm, &this_addr); + else + rv = snat_del_address (sm, this_addr); + + if (rv) + goto send_reply; + + increment_v4_address (&this_addr); + } + + send_reply: + REPLY_MACRO (VL_API_SNAT_ADD_ADDRESS_RANGE_REPLY); +} + +static void *vl_api_snat_add_address_range_t_print +(vl_api_snat_add_address_range_t *mp, void * handle) +{ + u8 * s; + + s = format (0, "SCRIPT: snat_add_address_range "); + s = format (s, "%U ", format_ip4_address, mp->first_ip_address); + if (memcmp (mp->first_ip_address, mp->last_ip_address, 4)) + { + s = format (s, " - %U ", format_ip4_address, mp->last_ip_address); + } + FINISH; +} + +static void +send_snat_address_details +(snat_address_t * a, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_snat_address_details_t *rmp; + snat_main_t * sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_SNAT_ADDRESS_DETAILS+sm->msg_id_base); + rmp->is_ip4 = 1; + clib_memcpy (rmp->ip_address, &(a->addr), 4); + rmp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_snat_address_dump_t_handler +(vl_api_snat_address_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t * sm = &snat_main; + snat_address_t * a; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vec_foreach (a, sm->addresses) + send_snat_address_details (a, q, mp->context); +} + +static void *vl_api_snat_address_dump_t_print +(vl_api_snat_address_dump_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_address_dump "); + + FINISH; +} + +static void +vl_api_snat_interface_add_del_feature_t_handler +(vl_api_snat_interface_add_del_feature_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_interface_add_del_feature_reply_t * rmp; + u8 is_del = mp->is_add == 0; + u32 sw_if_index = ntohl(mp->sw_if_index); + int rv = 0; + + VALIDATE_SW_IF_INDEX(mp); + + rv = snat_interface_add_del (sw_if_index, mp->is_inside, is_del); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO(VL_API_SNAT_INTERFACE_ADD_DEL_FEATURE_REPLY); +} + +static void *vl_api_snat_interface_add_del_feature_t_print +(vl_api_snat_interface_add_del_feature_t * mp, void *handle) +{ + u8 * s; + + s = format (0, "SCRIPT: snat_interface_add_del_feature "); + s = format (s, "sw_if_index %d %s %s", + clib_host_to_net_u32(mp->sw_if_index), + mp->is_inside ? "in":"out", + mp->is_add ? "" : "del"); + + FINISH; +} + +static void +send_snat_interface_details +(snat_interface_t * i, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_snat_interface_details_t *rmp; + snat_main_t * sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_SNAT_INTERFACE_DETAILS+sm->msg_id_base); + rmp->sw_if_index = ntohl (i->sw_if_index); + rmp->is_inside = i->is_inside; + rmp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_snat_interface_dump_t_handler +(vl_api_snat_interface_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t * sm = &snat_main; + snat_interface_t * i; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + pool_foreach (i, sm->interfaces, + ({ + send_snat_interface_details(i, q, mp->context); + })); +} + +static void *vl_api_snat_interface_dump_t_print +(vl_api_snat_interface_dump_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_interface_dump "); + + FINISH; +}static void + +vl_api_snat_add_static_mapping_t_handler +(vl_api_snat_add_static_mapping_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_add_static_mapping_reply_t * rmp; + ip4_address_t local_addr, external_addr; + u16 local_port = 0, external_port = 0; + u32 vrf_id; + int rv = 0; + + if (mp->is_ip4 != 1) + { + rv = VNET_API_ERROR_UNIMPLEMENTED; + goto send_reply; + } + + memcpy (&local_addr.as_u8, mp->local_ip_address, 4); + memcpy (&external_addr.as_u8, mp->external_ip_address, 4); + if (mp->addr_only == 0) + { + local_port = clib_net_to_host_u16 (mp->local_port); + external_port = clib_net_to_host_u16 (mp->external_port); + } + vrf_id = clib_net_to_host_u32 (mp->vrf_id); + + rv = snat_add_static_mapping(local_addr, external_addr, local_port, + external_port, vrf_id, mp->addr_only, + mp->is_add); + + send_reply: + REPLY_MACRO (VL_API_SNAT_ADD_ADDRESS_RANGE_REPLY); +} + +static void *vl_api_snat_add_static_mapping_t_print +(vl_api_snat_add_static_mapping_t *mp, void * handle) +{ + u8 * s; + + s = format (0, "SCRIPT: snat_add_static_mapping "); + s = format (s, "local_addr %U external_addr %U ", + format_ip4_address, mp->local_ip_address, + format_ip4_address, mp->external_ip_address); + + if (mp->addr_only == 0) + s = format (s, "local_port %d external_port %d ", + clib_net_to_host_u16 (mp->local_port), + clib_net_to_host_u16 (mp->external_port)); + + if (mp->vrf_id != ~0) + s = format (s, "vrf %d", clib_net_to_host_u32 (mp->vrf_id)); + + FINISH; +} + +static void +send_snat_static_mapping_details +(snat_static_mapping_t * m, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_snat_static_mapping_details_t *rmp; + snat_main_t * sm = &snat_main; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_SNAT_STATIC_MAPPING_DETAILS+sm->msg_id_base); + rmp->is_ip4 = 1; + rmp->addr_only = m->addr_only; + clib_memcpy (rmp->local_ip_address, &(m->local_addr), 4); + clib_memcpy (rmp->external_ip_address, &(m->external_addr), 4); + rmp->local_port = htons (m->local_port); + rmp->external_port = htons (m->external_port); + rmp->vrf_id = htonl (m->vrf_id); + rmp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_snat_static_mapping_dump_t_handler +(vl_api_snat_static_mapping_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t * sm = &snat_main; + snat_static_mapping_t * m; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + pool_foreach (m, sm->static_mappings, + ({ + send_snat_static_mapping_details (m, q, mp->context); + })); +} + +static void *vl_api_snat_static_mapping_dump_t_print +(vl_api_snat_static_mapping_dump_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_static_mapping_dump "); + + FINISH; +} + +static void +vl_api_snat_control_ping_t_handler +(vl_api_snat_control_ping_t * mp) +{ + vl_api_snat_control_ping_reply_t *rmp; + snat_main_t * sm = &snat_main; + int rv = 0; + + REPLY_MACRO2(VL_API_SNAT_CONTROL_PING_REPLY, + ({ + rmp->vpe_pid = ntohl (getpid()); + })); +} + +static void *vl_api_snat_control_ping_t_print +(vl_api_snat_control_ping_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_control_ping "); + + FINISH; +} + +static void +vl_api_snat_show_config_t_handler +(vl_api_snat_show_config_t * mp) +{ + vl_api_snat_show_config_reply_t *rmp; + snat_main_t * sm = &snat_main; + int rv = 0; + + REPLY_MACRO2(VL_API_SNAT_SHOW_CONFIG_REPLY, + ({ + rmp->translation_buckets = htonl (sm->translation_buckets); + rmp->translation_memory_size = htonl (sm->translation_memory_size); + rmp->user_buckets = htonl (sm->user_buckets); + rmp->user_memory_size = htonl (sm->user_memory_size); + rmp->max_translations_per_user = htonl (sm->max_translations_per_user); + rmp->outside_vrf_id = htonl (sm->outside_vrf_id); + rmp->inside_vrf_id = htonl (sm->inside_vrf_id); + rmp->static_mapping_only = sm->static_mapping_only; + rmp->static_mapping_connection_tracking = + sm->static_mapping_connection_tracking; + })); +} + +static void *vl_api_snat_show_config_t_print +(vl_api_snat_show_config_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_show_config "); + + FINISH; +} + +static void +vl_api_snat_set_workers_t_handler +(vl_api_snat_set_workers_t * mp) +{ + snat_main_t * sm = &snat_main; + vl_api_snat_set_workers_reply_t * rmp; + int rv = 0; + uword *bitmap = 0; + u64 mask = clib_net_to_host_u64 (mp->worker_mask); + + if (sm->num_workers < 2) + { + rv = VNET_API_ERROR_FEATURE_DISABLED; + goto send_reply; + } + + bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask)); + rv = snat_set_workers(bitmap); + clib_bitmap_free (bitmap); + + send_reply: + REPLY_MACRO (VL_API_SNAT_SET_WORKERS_REPLY); +} + +static void *vl_api_snat_set_workers_t_print +(vl_api_snat_set_workers_t *mp, void * handle) +{ + u8 * s; + uword *bitmap = 0; + u8 first = 1; + int i; + u64 mask = clib_net_to_host_u64 (mp->worker_mask); + + s = format (0, "SCRIPT: snat_set_workers "); + bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask)); + clib_bitmap_foreach (i, bitmap, + ({ + if (first) + s = format (s, "%d", i); + else + s = format (s, ",%d", i); + first = 0; + })); + clib_bitmap_free (bitmap); + FINISH; +} + +static void +send_snat_worker_details +(u32 worker_index, unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_snat_worker_details_t *rmp; + snat_main_t * sm = &snat_main; + vlib_worker_thread_t *w = + vlib_worker_threads + worker_index + sm->first_worker_index; + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + memset (rmp, 0, sizeof (*rmp)); + rmp->_vl_msg_id = ntohs (VL_API_SNAT_WORKER_DETAILS+sm->msg_id_base); + rmp->context = context; + rmp->worker_index = htonl (worker_index); + rmp->lcore_id = htonl (w->lcore_id); + strncpy ((char *) rmp->name, (char *) w->name, ARRAY_LEN (rmp->name) - 1); + + vl_msg_api_send_shmem (q, (u8 *) & rmp); +} + +static void +vl_api_snat_worker_dump_t_handler +(vl_api_snat_worker_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + snat_main_t * sm = &snat_main; + u32 * worker_index; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + vec_foreach (worker_index, sm->workers) + { + send_snat_worker_details(*worker_index, q, mp->context); + } +} + +static void *vl_api_snat_worker_dump_t_print +(vl_api_snat_worker_dump_t *mp, void * handle) +{ + u8 *s; + + s = format (0, "SCRIPT: snat_worker_dump "); + + FINISH; +} + +/* List of message types that this plugin understands */ +#define foreach_snat_plugin_api_msg \ +_(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range) \ +_(SNAT_INTERFACE_ADD_DEL_FEATURE, snat_interface_add_del_feature) \ +_(SNAT_ADD_STATIC_MAPPING, snat_add_static_mapping) \ +_(SNAT_CONTROL_PING, snat_control_ping) \ +_(SNAT_STATIC_MAPPING_DUMP, snat_static_mapping_dump) \ +_(SNAT_SHOW_CONFIG, snat_show_config) \ +_(SNAT_ADDRESS_DUMP, snat_address_dump) \ +_(SNAT_INTERFACE_DUMP, snat_interface_dump) \ +_(SNAT_SET_WORKERS, snat_set_workers) \ +_(SNAT_WORKER_DUMP, snat_worker_dump) + +/* Set up the API message handling tables */ +static clib_error_t * +snat_plugin_api_hookup (vlib_main_t *vm) +{ + snat_main_t * sm __attribute__ ((unused)) = &snat_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_snat_plugin_api_msg; +#undef _ + + return 0; +} + +#define vl_msg_name_crc_list +#include <snat/snat_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (snat_main_t * sm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base); + foreach_vl_msg_name_crc_snat; +#undef _ +} + +static void plugin_custom_dump_configure (snat_main_t * sm) +{ +#define _(n,f) sm->api_main->msg_print_handlers \ + [VL_API_##n + sm->msg_id_base] \ + = (void *) vl_api_##f##_t_print; + foreach_snat_plugin_api_msg; +#undef _ +} + +static clib_error_t * snat_init (vlib_main_t * vm) +{ + snat_main_t * sm = &snat_main; + clib_error_t * error = 0; + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u8 * name; + uword *p; + vlib_thread_registration_t *tr; + vlib_thread_main_t *tm = vlib_get_thread_main (); + uword *bitmap = 0; + u32 i; + + name = format (0, "snat_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + sm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + + sm->vlib_main = vm; + sm->vnet_main = vnet_get_main(); + sm->ip4_main = im; + sm->ip4_lookup_main = lm; + sm->api_main = &api_main; + sm->first_worker_index = 0; + sm->next_worker = 0; + sm->num_workers = 0; + sm->workers = 0; + sm->fq_in2out_index = ~0; + sm->fq_out2in_index = ~0; + + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + if (p) + { + tr = (vlib_thread_registration_t *) p[0]; + if (tr) + { + sm->num_workers = tr->count; + sm->first_worker_index = tr->first_index; + } + } + + /* Use all available workers by default */ + if (sm->num_workers > 1) + { + for (i=0; i < sm->num_workers; i++) + bitmap = clib_bitmap_set (bitmap, i, 1); + snat_set_workers(bitmap); + clib_bitmap_free (bitmap); + } + + error = snat_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (sm, &api_main); + + plugin_custom_dump_configure (sm); + vec_free(name); + + return error; +} + +VLIB_INIT_FUNCTION (snat_init); + +void snat_free_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 address_index) +{ + snat_address_t *a; + u16 port_host_byte_order = clib_net_to_host_u16 (k->port); + + ASSERT (address_index < vec_len (sm->addresses)); + + a = sm->addresses + address_index; + + ASSERT (clib_bitmap_get_no_check (a->busy_port_bitmap, + port_host_byte_order) == 1); + + clib_bitmap_set_no_check (a->busy_port_bitmap, port_host_byte_order, 0); + a->busy_ports--; +} + +/** + * @brief Match SNAT static mapping. + * + * @param sm SNAT main. + * @param match Address and port to match. + * @param mapping External or local address and port of the matched mapping. + * @param by_external If 0 match by local address otherwise match by external + * address. + * + * @returns 0 if match found otherwise 1. + */ +int snat_static_mapping_match (snat_main_t * sm, + snat_session_key_t match, + snat_session_key_t * mapping, + u8 by_external) +{ + clib_bihash_kv_8_8_t kv, value; + snat_static_mapping_t *m; + snat_static_mapping_key_t m_key; + clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local; + + if (by_external) + mapping_hash = &sm->static_mapping_by_external; + + m_key.addr = match.addr; + m_key.port = clib_net_to_host_u16 (match.port); + m_key.fib_index = match.fib_index; + + kv.key = m_key.as_u64; + + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + { + /* Try address only mapping */ + m_key.port = 0; + kv.key = m_key.as_u64; + if (clib_bihash_search_8_8 (mapping_hash, &kv, &value)) + return 1; + } + + m = pool_elt_at_index (sm->static_mappings, value.value); + + if (by_external) + { + mapping->addr = m->local_addr; + /* Address only mapping doesn't change port */ + mapping->port = m->addr_only ? match.port + : clib_host_to_net_u16 (m->local_port); + mapping->fib_index = m->fib_index; + } + else + { + mapping->addr = m->external_addr; + /* Address only mapping doesn't change port */ + mapping->port = m->addr_only ? match.port + : clib_host_to_net_u16 (m->external_port); + mapping->fib_index = sm->outside_fib_index; + } + + return 0; +} + +int snat_alloc_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 * address_indexp) +{ + int i; + snat_address_t *a; + u32 portnum; + + for (i = 0; i < vec_len (sm->addresses); i++) + { + if (sm->addresses[i].busy_ports < (65535-1024)) + { + a = sm->addresses + i; + + while (1) + { + portnum = random_u32 (&sm->random_seed); + portnum &= 0xFFFF; + if (portnum < 1024) + continue; + if (clib_bitmap_get_no_check (a->busy_port_bitmap, portnum)) + continue; + clib_bitmap_set_no_check (a->busy_port_bitmap, portnum, 1); + a->busy_ports++; + /* Caller sets protocol and fib index */ + k->addr = a->addr; + k->port = clib_host_to_net_u16(portnum); + *address_indexp = i; + return 0; + } + } + } + /* Totally out of translations to use... */ + return 1; +} + + +static clib_error_t * +add_address_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + snat_main_t * sm = &snat_main; + ip4_address_t start_addr, end_addr, this_addr; + u32 start_host_order, end_host_order; + int i, count; + int is_add = 1; + int rv = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U - %U", + unformat_ip4_address, &start_addr, + unformat_ip4_address, &end_addr)) + ; + else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr)) + end_addr = start_addr; + else if (unformat (line_input, "del")) + is_add = 0; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (sm->static_mapping_only) + return clib_error_return (0, "static mapping only mode"); + + start_host_order = clib_host_to_net_u32 (start_addr.as_u32); + end_host_order = clib_host_to_net_u32 (end_addr.as_u32); + + if (end_host_order < start_host_order) + return clib_error_return (0, "end address less than start address"); + + count = (end_host_order - start_host_order) + 1; + + if (count > 1024) + clib_warning ("%U - %U, %d addresses...", + format_ip4_address, &start_addr, + format_ip4_address, &end_addr, + count); + + this_addr = start_addr; + + for (i = 0; i < count; i++) + { + if (is_add) + snat_add_address (sm, &this_addr); + else + rv = snat_del_address (sm, this_addr); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "S-NAT address not exist."); + break; + case VNET_API_ERROR_UNSPECIFIED: + return clib_error_return (0, "S-NAT address used in static mapping."); + break; + default: + break; + } + + increment_v4_address (&this_addr); + } + + return 0; +} + +VLIB_CLI_COMMAND (add_address_command, static) = { + .path = "snat add address", + .short_help = "snat add addresses <ip4-range-start> [- <ip4-range-end>] [del]", + .function = add_address_command_fn, +}; + +static clib_error_t * +snat_feature_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index; + u32 * inside_sw_if_indices = 0; + u32 * outside_sw_if_indices = 0; + int is_del = 0; + int i; + + sw_if_index = ~0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "in %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (inside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "out %U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + vec_add1 (outside_sw_if_indices, sw_if_index); + else if (unformat (line_input, "del")) + is_del = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (vec_len (inside_sw_if_indices)) + { + for (i = 0; i < vec_len(inside_sw_if_indices); i++) + { + sw_if_index = inside_sw_if_indices[i]; + snat_interface_add_del (sw_if_index, 1, is_del); + } + } + + if (vec_len (outside_sw_if_indices)) + { + for (i = 0; i < vec_len(outside_sw_if_indices); i++) + { + sw_if_index = outside_sw_if_indices[i]; + snat_interface_add_del (sw_if_index, 0, is_del); + } + } + + vec_free (inside_sw_if_indices); + vec_free (outside_sw_if_indices); + + return error; +} + +VLIB_CLI_COMMAND (set_interface_snat_command, static) = { + .path = "set interface snat", + .function = snat_feature_command_fn, + .short_help = "set interface snat in <intfc> out <intfc> [del]", +}; + +static clib_error_t * +add_static_mapping_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t * error = 0; + ip4_address_t l_addr, e_addr; + u32 l_port = 0, e_port = 0, vrf_id = ~0; + int is_add = 1; + int addr_only = 1; + int rv; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "local %U %u", unformat_ip4_address, &l_addr, + &l_port)) + addr_only = 0; + else if (unformat (line_input, "local %U", unformat_ip4_address, &l_addr)) + ; + else if (unformat (line_input, "external %U %u", unformat_ip4_address, + &e_addr, &e_port)) + addr_only = 0; + else if (unformat (line_input, "external %U", unformat_ip4_address, + &e_addr)) + ; + else if (unformat (line_input, "vrf %u", &vrf_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else + return clib_error_return (0, "unknown input: '%U'", + format_unformat_error, line_input); + } + unformat_free (line_input); + + rv = snat_add_static_mapping(l_addr, e_addr, (u16) l_port, (u16) e_port, + vrf_id, addr_only, is_add); + + switch (rv) + { + case VNET_API_ERROR_INVALID_VALUE: + return clib_error_return (0, "External port already in use."); + break; + case VNET_API_ERROR_NO_SUCH_ENTRY: + if (is_add) + return clib_error_return (0, "External addres must be allocated."); + else + return clib_error_return (0, "Mapping not exist."); + break; + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "No such VRF id."); + case VNET_API_ERROR_VALUE_EXIST: + return clib_error_return (0, "Mapping already exist."); + default: + break; + } + + return error; +} + +/*? + * @cliexpar + * @cliexstart{snat add static mapping} + * Static mapping allows hosts on the external network to initiate connection + * to to the local network host. + * To create static mapping between local host address 10.0.0.3 port 6303 and + * external address 4.4.4.4 port 3606 use: + * vpp# snat add static mapping local 10.0.0.3 6303 external 4.4.4.4 3606 + * If not runnig "static mapping only" S-NAT plugin mode use before: + * vpp# snat add address 4.4.4.4 + * To create static mapping between local and external address use: + * vpp# snat add static mapping local 10.0.0.3 external 4.4.4.4 + * @cliexend +?*/ +VLIB_CLI_COMMAND (add_static_mapping_command, static) = { + .path = "snat add static mapping", + .function = add_static_mapping_command_fn, + .short_help = + "snat add static mapping local <addr> [<port>] external <addr> [<port>] [vrf <table-id>] [del]", +}; + +static clib_error_t * +set_workers_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + uword *bitmap = 0; + int rv = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap)) + ; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + unformat_free (line_input); + + if (bitmap == 0) + return clib_error_return (0, "List of workers must be specified."); + + rv = snat_set_workers(bitmap); + + clib_bitmap_free (bitmap); + + switch (rv) + { + case VNET_API_ERROR_INVALID_WORKER: + return clib_error_return (0, "Invalid worker(s)."); + break; + case VNET_API_ERROR_FEATURE_DISABLED: + return clib_error_return (0, + "Supported only if 2 or more workes available."); + break; + default: + break; + } + + return 0; +} + +/*? + * @cliexpar + * @cliexstart{set snat workers} + * Set SNAT workers if 2 or more workers available, use: + * vpp# set snat workers 0-2,5 + * @cliexend +?*/ +VLIB_CLI_COMMAND (set_workers_command, static) = { + .path = "set snat workers", + .function = set_workers_command_fn, + .short_help = + "set snat workers <workers-list>", +}; + +static clib_error_t * +snat_config (vlib_main_t * vm, unformat_input_t * input) +{ + snat_main_t * sm = &snat_main; + u32 translation_buckets = 1024; + u32 translation_memory_size = 128<<20; + u32 user_buckets = 128; + u32 user_memory_size = 64<<20; + u32 max_translations_per_user = 100; + u32 outside_vrf_id = 0; + u32 inside_vrf_id = 0; + u32 static_mapping_buckets = 1024; + u32 static_mapping_memory_size = 64<<20; + u8 static_mapping_only = 0; + u8 static_mapping_connection_tracking = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "translation hash buckets %d", &translation_buckets)) + ; + else if (unformat (input, "translation hash memory %d", + &translation_memory_size)); + else if (unformat (input, "user hash buckets %d", &user_buckets)) + ; + else if (unformat (input, "user hash memory %d", + &user_memory_size)) + ; + else if (unformat (input, "max translations per user %d", + &max_translations_per_user)) + ; + else if (unformat (input, "outside VRF id %d", + &outside_vrf_id)) + ; + else if (unformat (input, "inside VRF id %d", + &inside_vrf_id)) + ; + else if (unformat (input, "static mapping only")) + { + static_mapping_only = 1; + if (unformat (input, "connection tracking")) + static_mapping_connection_tracking = 1; + } + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + /* for show commands, etc. */ + sm->translation_buckets = translation_buckets; + sm->translation_memory_size = translation_memory_size; + sm->user_buckets = user_buckets; + sm->user_memory_size = user_memory_size; + sm->max_translations_per_user = max_translations_per_user; + sm->outside_vrf_id = outside_vrf_id; + sm->outside_fib_index = ~0; + sm->inside_vrf_id = inside_vrf_id; + sm->inside_fib_index = ~0; + sm->static_mapping_only = static_mapping_only; + sm->static_mapping_connection_tracking = static_mapping_connection_tracking; + + if (!static_mapping_only || + (static_mapping_only && static_mapping_connection_tracking)) + { + clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets, + user_memory_size); + + clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets, + user_memory_size); + + vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1); + + clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets, + translation_memory_size); + + clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets, + translation_memory_size); + + clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets, + user_memory_size); + } + clib_bihash_init_8_8 (&sm->static_mapping_by_local, + "static_mapping_by_local", static_mapping_buckets, + static_mapping_memory_size); + + clib_bihash_init_8_8 (&sm->static_mapping_by_external, + "static_mapping_by_external", static_mapping_buckets, + static_mapping_memory_size); + return 0; +} + +VLIB_CONFIG_FUNCTION (snat_config, "snat"); + +u8 * format_snat_key (u8 * s, va_list * args) +{ + snat_session_key_t * key = va_arg (*args, snat_session_key_t *); + char * protocol_string = "unknown"; + static char *protocol_strings[] = { + "UDP", + "TCP", + "ICMP", + }; + + if (key->protocol < ARRAY_LEN(protocol_strings)) + protocol_string = protocol_strings[key->protocol]; + + s = format (s, "%U proto %s port %d fib %d", + format_ip4_address, &key->addr, protocol_string, + clib_net_to_host_u16 (key->port), key->fib_index); + return s; +} + +u8 * format_snat_session (u8 * s, va_list * args) +{ + snat_main_t * sm __attribute__((unused)) = va_arg (*args, snat_main_t *); + snat_session_t * sess = va_arg (*args, snat_session_t *); + + s = format (s, " i2o %U\n", format_snat_key, &sess->in2out); + s = format (s, " o2i %U\n", format_snat_key, &sess->out2in); + s = format (s, " last heard %.2f\n", sess->last_heard); + s = format (s, " total pkts %d, total bytes %lld\n", + sess->total_pkts, sess->total_bytes); + if (snat_is_session_static (sess)) + s = format (s, " static translation\n"); + else + s = format (s, " dynamic translation\n"); + + return s; +} + +u8 * format_snat_user (u8 * s, va_list * args) +{ + snat_main_per_thread_data_t * sm = va_arg (*args, snat_main_per_thread_data_t *); + snat_user_t * u = va_arg (*args, snat_user_t *); + int verbose = va_arg (*args, int); + dlist_elt_t * head, * elt; + u32 elt_index, head_index; + u32 session_index; + snat_session_t * sess; + + s = format (s, "%U: %d dynamic translations, %d static translations\n", + format_ip4_address, &u->addr, u->nsessions, u->nstaticsessions); + + if (verbose == 0) + return s; + + if (u->nsessions || u->nstaticsessions) + { + head_index = u->sessions_per_user_list_head_index; + head = pool_elt_at_index (sm->list_pool, head_index); + + elt_index = head->next; + elt = pool_elt_at_index (sm->list_pool, elt_index); + session_index = elt->value; + + while (session_index != ~0) + { + sess = pool_elt_at_index (sm->sessions, session_index); + + s = format (s, " %U\n", format_snat_session, sm, sess); + + elt_index = elt->next; + elt = pool_elt_at_index (sm->list_pool, elt_index); + session_index = elt->value; + } + } + + return s; +} + +u8 * format_snat_static_mapping (u8 * s, va_list * args) +{ + snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *); + + if (m->addr_only) + s = format (s, "local %U external %U vrf %d", + format_ip4_address, &m->local_addr, + format_ip4_address, &m->external_addr, + m->vrf_id); + else + s = format (s, "local %U:%d external %U:%d vrf %d", + format_ip4_address, &m->local_addr, m->local_port, + format_ip4_address, &m->external_addr, m->external_port, + m->vrf_id); + + return s; +} + +static clib_error_t * +show_snat_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int verbose = 0; + snat_main_t * sm = &snat_main; + snat_user_t * u; + snat_static_mapping_t *m; + snat_interface_t *i; + snat_address_t * ap; + vnet_main_t *vnm = vnet_get_main(); + snat_main_per_thread_data_t *tsm; + u32 users_num = 0, sessions_num = 0, *worker; + uword j = 0; + + if (unformat (input, "detail")) + verbose = 1; + else if (unformat (input, "verbose")) + verbose = 2; + + if (sm->static_mapping_only) + { + if (sm->static_mapping_connection_tracking) + vlib_cli_output (vm, "SNAT mode: static mapping only connection " + "tracking"); + else + vlib_cli_output (vm, "SNAT mode: static mapping only"); + } + else + { + vlib_cli_output (vm, "SNAT mode: dynamic translations enabled"); + } + + if (verbose > 0) + { + pool_foreach (i, sm->interfaces, + ({ + vlib_cli_output (vm, "%U %s", format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, i->sw_if_index), + i->is_inside ? "in" : "out"); + })); + + vec_foreach (ap, sm->addresses) + { + u8 * s = format (0, ""); + vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr); + clib_bitmap_foreach (j, ap->busy_port_bitmap, + ({ + s = format (s, " %d", j); + })); + vlib_cli_output (vm, " %d busy ports:%v", ap->busy_ports, s); + } + } + + if (sm->num_workers > 1) + { + vlib_cli_output (vm, "%d workers", vec_len (sm->workers)); + if (verbose > 0) + { + vec_foreach (worker, sm->workers) + { + vlib_worker_thread_t *w = + vlib_worker_threads + *worker + sm->first_worker_index; + vlib_cli_output (vm, " %v", w->name); + } + } + } + + if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking)) + { + vlib_cli_output (vm, "%d static mappings", + pool_elts (sm->static_mappings)); + + if (verbose > 0) + { + pool_foreach (m, sm->static_mappings, + ({ + vlib_cli_output (vm, "%U", format_snat_static_mapping, m); + })); + } + } + else + { + vec_foreach (tsm, sm->per_thread_data) + { + users_num += pool_elts (tsm->users); + sessions_num += pool_elts (tsm->sessions); + } + + vlib_cli_output (vm, "%d users, %d outside addresses, %d active sessions," + " %d static mappings", + users_num, + vec_len (sm->addresses), + sessions_num, + pool_elts (sm->static_mappings)); + + if (verbose > 0) + { + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in, + verbose - 1); + vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out, + verbose - 1); + vec_foreach_index (j, sm->per_thread_data) + { + tsm = vec_elt_at_index (sm->per_thread_data, j); + + if (pool_elts (tsm->users) == 0) + continue; + + vlib_worker_thread_t *w = vlib_worker_threads + j; + vlib_cli_output (vm, "Thread %d (%v at lcore %u):", j, w->name, + w->lcore_id); + vlib_cli_output (vm, " %d list pool elements", + pool_elts (tsm->list_pool)); + + pool_foreach (u, tsm->users, + ({ + vlib_cli_output (vm, " %U", format_snat_user, tsm, u, + verbose - 1); + })); + } + + if (pool_elts (sm->static_mappings)) + { + vlib_cli_output (vm, "static mappings:"); + pool_foreach (m, sm->static_mappings, + ({ + vlib_cli_output (vm, "%U", format_snat_static_mapping, m); + })); + } + } + } + + return 0; +} + +VLIB_CLI_COMMAND (show_snat_command, static) = { + .path = "show snat", + .short_help = "show snat", + .function = show_snat_command_fn, +}; diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h new file mode 100644 index 00000000000..cb31dc51423 --- /dev/null +++ b/src/plugins/snat/snat.h @@ -0,0 +1,259 @@ + +/* + * snat.h - simple nat definitions + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_snat_h__ +#define __included_snat_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/icmp46_packet.h> +#include <vnet/api_errno.h> +#include <vppinfra/bihash_8_8.h> +#include <vppinfra/dlist.h> +#include <vppinfra/error.h> +#include <vlibapi/api.h> + +/* Key */ +typedef struct { + union + { + struct + { + ip4_address_t addr; + u16 port; + u16 protocol:3, + fib_index:13; + }; + u64 as_u64; + }; +} snat_session_key_t; + +typedef struct { + union + { + struct + { + ip4_address_t addr; + u32 fib_index; + }; + u64 as_u64; + }; +} snat_user_key_t; + +typedef struct { + union + { + struct + { + ip4_address_t addr; + u16 port; + u16 fib_index; + }; + u64 as_u64; + }; +} snat_static_mapping_key_t; + + +typedef enum { + SNAT_PROTOCOL_UDP = 0, + SNAT_PROTOCOL_TCP, + SNAT_PROTOCOL_ICMP, +} snat_protocol_t; + + +#define SNAT_SESSION_FLAG_STATIC_MAPPING 1 + +typedef CLIB_PACKED(struct { + snat_session_key_t out2in; /* 0-15 */ + + snat_session_key_t in2out; /* 16-31 */ + + u32 flags; /* 32-35 */ + + /* per-user translations */ + u32 per_user_index; /* 36-39 */ + + u32 per_user_list_head_index; /* 40-43 */ + + /* Last heard timer */ + f64 last_heard; /* 44-51 */ + + u64 total_bytes; /* 52-59 */ + + u32 total_pkts; /* 60-63 */ + + /* Outside address */ + u32 outside_address_index; /* 64-67 */ + +}) snat_session_t; + + +typedef struct { + ip4_address_t addr; + u32 sessions_per_user_list_head_index; + u32 nsessions; + u32 nstaticsessions; +} snat_user_t; + +typedef struct { + ip4_address_t addr; + u32 busy_ports; + uword * busy_port_bitmap; +} snat_address_t; + +typedef struct { + ip4_address_t local_addr; + ip4_address_t external_addr; + u16 local_port; + u16 external_port; + u8 addr_only; + u32 vrf_id; + u32 fib_index; +} snat_static_mapping_t; + +typedef struct { + u32 sw_if_index; + u8 is_inside; +} snat_interface_t; + +typedef struct { + /* User pool */ + snat_user_t * users; + + /* Session pool */ + snat_session_t * sessions; + + /* Pool of doubly-linked list elements */ + dlist_elt_t * list_pool; +} snat_main_per_thread_data_t; + +typedef struct { + /* Main lookup tables */ + clib_bihash_8_8_t out2in; + clib_bihash_8_8_t in2out; + + /* Find-a-user => src address lookup */ + clib_bihash_8_8_t user_hash; + + /* Non-translated packets worker lookup => src address + VRF */ + clib_bihash_8_8_t worker_by_in; + + /* Translated packets worker lookup => IP address + port number */ + clib_bihash_8_8_t worker_by_out; + + u32 num_workers; + u32 first_worker_index; + u32 next_worker; + u32 * workers; + + /* Per thread data */ + snat_main_per_thread_data_t * per_thread_data; + + /* Find a static mapping by local */ + clib_bihash_8_8_t static_mapping_by_local; + + /* Find a static mapping by external */ + clib_bihash_8_8_t static_mapping_by_external; + + /* Static mapping pool */ + snat_static_mapping_t * static_mappings; + + /* Interface pool */ + snat_interface_t * interfaces; + + /* Vector of outside addresses */ + snat_address_t * addresses; + + /* Randomize port allocation order */ + u32 random_seed; + + /* Worker handoff index */ + u32 fq_in2out_index; + u32 fq_out2in_index; + + /* Config parameters */ + u8 static_mapping_only; + u8 static_mapping_connection_tracking; + u32 translation_buckets; + u32 translation_memory_size; + u32 user_buckets; + u32 user_memory_size; + u32 max_translations_per_user; + u32 outside_vrf_id; + u32 outside_fib_index; + u32 inside_vrf_id; + u32 inside_fib_index; + + /* API message ID base */ + u16 msg_id_base; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; + ip4_main_t * ip4_main; + ip_lookup_main_t * ip4_lookup_main; + ethernet_main_t * ethernet_main; + api_main_t * api_main; +} snat_main_t; + +extern snat_main_t snat_main; +extern vlib_node_registration_t snat_in2out_node; +extern vlib_node_registration_t snat_out2in_node; +extern vlib_node_registration_t snat_in2out_fast_node; +extern vlib_node_registration_t snat_out2in_fast_node; +extern vlib_node_registration_t snat_in2out_worker_handoff_node; +extern vlib_node_registration_t snat_out2in_worker_handoff_node; + +void snat_free_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 address_index); + +int snat_alloc_outside_address_and_port (snat_main_t * sm, + snat_session_key_t * k, + u32 * address_indexp); + +int snat_static_mapping_match (snat_main_t * sm, + snat_session_key_t match, + snat_session_key_t * mapping, + u8 by_external); + +format_function_t format_snat_user; + +typedef struct { + u32 cached_sw_if_index; + u32 cached_ip4_address; +} snat_runtime_t; + +/** \brief Check if SNAT session is created from static mapping. + @param s SNAT session + @return 1 if SNAT session is created from static mapping otherwise 0 +*/ +#define snat_is_session_static(s) s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING + +/* + * Why is this here? Because we don't need to touch this layer to + * simply reply to an icmp. We need to change id to a unique + * value to NAT an echo request/reply. + */ + +typedef struct { + u16 identifier; + u16 sequence; +} icmp_echo_header_t; + +#endif /* __included_snat_h__ */ diff --git a/src/plugins/snat/snat_all_api_h.h b/src/plugins/snat/snat_all_api_h.h new file mode 100644 index 00000000000..490177008e0 --- /dev/null +++ b/src/plugins/snat/snat_all_api_h.h @@ -0,0 +1,19 @@ + +/* + * snat_all_api_h.h - skeleton vpp engine plug-in api #include file + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <snat/snat.api.h> diff --git a/src/plugins/snat/snat_msg_enum.h b/src/plugins/snat/snat_msg_enum.h new file mode 100644 index 00000000000..2c76fd51158 --- /dev/null +++ b/src/plugins/snat/snat_msg_enum.h @@ -0,0 +1,31 @@ + +/* + * snat_msg_enum.h - skeleton vpp engine plug-in message enumeration + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_snat_msg_enum_h +#define included_snat_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <snat/snat_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_snat_msg_enum_h */ diff --git a/src/plugins/snat/snat_test.c b/src/plugins/snat/snat_test.c new file mode 100644 index 00000000000..2a003ba60c6 --- /dev/null +++ b/src/plugins/snat/snat_test.c @@ -0,0 +1,602 @@ + +/* + * snat.c - skeleton vpp-api-test plug-in + * + * Copyright (c) <current-year> <your-organization> + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vat/vat.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vlibsocket/api.h> +#include <vppinfra/error.h> +#include <vnet/ip/ip.h> + +uword unformat_sw_if_index (unformat_input_t * input, va_list * args); + +/* Declare message IDs */ +#include <snat/snat_msg_enum.h> + +/* define message structures */ +#define vl_typedefs +#include <snat/snat_all_api_h.h> +#undef vl_typedefs + +/* declare message handlers for each api */ + +#define vl_endianfun /* define message structures */ +#include <snat/snat_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) +#define vl_printfun +#include <snat/snat_all_api_h.h> +#undef vl_printfun + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include <snat/snat_all_api_h.h> +#undef vl_api_version + +typedef struct { + /* API message ID base */ + u16 msg_id_base; + vat_main_t *vat_main; +} snat_test_main_t; + +snat_test_main_t snat_test_main; + +#define foreach_standard_reply_retval_handler \ +_(snat_add_address_range_reply) \ +_(snat_interface_add_del_feature_reply) \ +_(snat_add_static_mapping_reply) \ +_(snat_set_workers_reply) + +#define _(n) \ + static void vl_api_##n##_t_handler \ + (vl_api_##n##_t * mp) \ + { \ + vat_main_t * vam = snat_test_main.vat_main; \ + i32 retval = ntohl(mp->retval); \ + if (vam->async_mode) { \ + vam->async_errors += (retval < 0); \ + } else { \ + vam->retval = retval; \ + vam->result_ready = 1; \ + } \ + } +foreach_standard_reply_retval_handler; +#undef _ + +/* + * Table of message reply handlers, must include boilerplate handlers + * we just generated + */ +#define foreach_vpe_api_reply_msg \ +_(SNAT_ADD_ADDRESS_RANGE_REPLY, snat_add_address_range_reply) \ +_(SNAT_INTERFACE_ADD_DEL_FEATURE_REPLY, \ + snat_interface_add_del_feature_reply) \ +_(SNAT_ADD_STATIC_MAPPING_REPLY, snat_add_static_mapping_reply) \ +_(SNAT_CONTROL_PING_REPLY, snat_control_ping_reply) \ +_(SNAT_STATIC_MAPPING_DETAILS, snat_static_mapping_details) \ +_(SNAT_SHOW_CONFIG_REPLY, snat_show_config_reply) \ +_(SNAT_ADDRESS_DETAILS, snat_address_details) \ +_(SNAT_INTERFACE_DETAILS, snat_interface_details) \ +_(SNAT_SET_WORKERS_REPLY, snat_set_workers_reply) \ +_(SNAT_WORKER_DETAILS, snat_worker_details) + +/* M: construct, but don't yet send a message */ +#define M(T,t) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +#define M2(T,t,n) \ +do { \ + vam->result_ready = 0; \ + mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \ + memset (mp, 0, sizeof (*mp)); \ + mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \ + mp->client_index = vam->my_client_index; \ +} while(0); + +/* S: send a message */ +#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp)) + +/* W: wait for results, with timeout */ +#define W \ +do { \ + timeout = vat_time_now (vam) + 1.0; \ + \ + while (vat_time_now (vam) < timeout) { \ + if (vam->result_ready == 1) { \ + return (vam->retval); \ + } \ + } \ + return -99; \ +} while(0); + +static int api_snat_add_address_range (vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + ip4_address_t start_addr, end_addr; + u32 start_host_order, end_host_order; + vl_api_snat_add_address_range_t * mp; + u8 is_add = 1; + int count; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U - %U", + unformat_ip4_address, &start_addr, + unformat_ip4_address, &end_addr)) + ; + else if (unformat (i, "%U", unformat_ip4_address, &start_addr)) + end_addr = start_addr; + else if (unformat (i, "del")) + is_add = 0; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + start_host_order = clib_host_to_net_u32 (start_addr.as_u32); + end_host_order = clib_host_to_net_u32 (end_addr.as_u32); + + if (end_host_order < start_host_order) + { + errmsg ("end address less than start address\n"); + return -99; + } + + count = (end_host_order - start_host_order) + 1; + + if (count > 1024) + { + errmsg ("%U - %U, %d addresses...\n", + format_ip4_address, &start_addr, + format_ip4_address, &end_addr, + count); + } + + M(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range); + + memcpy (mp->first_ip_address, &start_addr, 4); + memcpy (mp->last_ip_address, &end_addr, 4); + mp->is_ip4 = 1; + mp->is_add = is_add; + + S; W; + + /* NOTREACHED */ + return 0; +} + +static int api_snat_interface_add_del_feature (vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_snat_interface_add_del_feature_t * mp; + u32 sw_if_index; + u8 sw_if_index_set = 0; + u8 is_inside = 1; + u8 is_add = 1; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "sw_if_index %d", &sw_if_index)) + sw_if_index_set = 1; + else if (unformat (i, "out")) + is_inside = 0; + else if (unformat (i, "in")) + is_inside = 1; + else if (unformat (i, "del")) + is_add = 0; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (sw_if_index_set == 0) + { + errmsg ("interface / sw_if_index required\n"); + return -99; + } + + M(SNAT_INTERFACE_ADD_DEL_FEATURE, snat_interface_add_del_feature); + mp->sw_if_index = ntohl(sw_if_index); + mp->is_add = is_add; + mp->is_inside = is_inside; + + S; W; + /* NOTREACHED */ + return 0; +} + +static int api_snat_add_static_mapping(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_snat_add_static_mapping_t * mp; + u8 addr_set_n = 0; + u8 is_add = 1; + u8 addr_only = 1; + ip4_address_t local_addr, external_addr; + u32 local_port = 0, external_port = 0, vrf_id = ~0; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "local_addr %U", unformat_ip4_address, &local_addr)) + addr_set_n++; + else if (unformat (i, "external_addr %U", unformat_ip4_address, + &external_addr)) + addr_set_n++; + else if (unformat (i, "local_port %u", &local_port)) + addr_only = 0; + else if (unformat (i, "external_port %u", &external_port)) + addr_only = 0; + else if (unformat (i, "vrf %u", &vrf_id)) + ; + else if (unformat (i, "del")) + is_add = 0; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + if (addr_set_n != 2) + { + errmsg ("local_addr and remote_addr required\n"); + return -99; + } + + M(SNAT_ADD_STATIC_MAPPING, snat_add_static_mapping); + mp->is_add = is_add; + mp->is_ip4 = 1; + mp->addr_only = addr_only; + mp->local_port = ntohs ((u16) local_port); + mp->external_port = ntohs ((u16) external_port); + mp->vrf_id = ntohl (vrf_id); + memcpy (mp->local_ip_address, &local_addr, 4); + memcpy (mp->external_ip_address, &external_addr, 4); + + S; W; + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_control_ping_reply_t_handler + (vl_api_snat_control_ping_reply_t * mp) +{ + vat_main_t *vam = &vat_main; + i32 retval = ntohl (mp->retval); + if (vam->async_mode) + { + vam->async_errors += (retval < 0); + } + else + { + vam->retval = retval; + vam->result_ready = 1; + } +} + +static void vl_api_snat_static_mapping_details_t_handler + (vl_api_snat_static_mapping_details_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + + if (mp->addr_only) + fformat (vam->ofp, "%15U%6s%15U%6s%11d\n", + format_ip4_address, &mp->local_ip_address, "", + format_ip4_address, &mp->external_ip_address, "", + ntohl (mp->vrf_id)); + else + fformat (vam->ofp, "%15U%6d%15U%6d%11d\n", + format_ip4_address, &mp->local_ip_address, + ntohs (mp->local_port), + format_ip4_address, &mp->external_ip_address, + ntohs (mp->external_port), + ntohl (mp->vrf_id)); + +} + +static int api_snat_static_mapping_dump(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_static_mapping_dump_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_static_mapping_dump"); + return -99; + } + + fformat (vam->ofp, "%21s%21s\n", "local", "external"); + fformat (vam->ofp, "%15s%6s%15s%6s%11s\n", "address", "port", "address", + "port", "vrf"); + + M(SNAT_STATIC_MAPPING_DUMP, snat_static_mapping_dump); + S; + /* Use a control ping for synchronization */ + { + vl_api_snat_control_ping_t *mp; + M (SNAT_CONTROL_PING, snat_control_ping); + S; + } + W; + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_show_config_reply_t_handler + (vl_api_snat_show_config_reply_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + i32 retval = ntohl (mp->retval); + + if (retval >= 0) + { + fformat (vam->ofp, "translation hash buckets %d\n", + ntohl (mp->translation_buckets)); + fformat (vam->ofp, "translation hash memory %d\n", + ntohl (mp->translation_memory_size)); + fformat (vam->ofp, "user hash buckets %d\n", ntohl (mp->user_buckets)); + fformat (vam->ofp, "user hash memory %d\n", ntohl (mp->user_memory_size)); + fformat (vam->ofp, "max translations per user %d\n", + ntohl (mp->max_translations_per_user)); + fformat (vam->ofp, "outside VRF id %d\n", ntohl (mp->outside_vrf_id)); + fformat (vam->ofp, "inside VRF id %d\n", ntohl (mp->inside_vrf_id)); + if (mp->static_mapping_only) + { + fformat (vam->ofp, "static mapping only"); + if (mp->static_mapping_connection_tracking) + fformat (vam->ofp, " connection tracking"); + fformat (vam->ofp, "\n"); + } + } + vam->retval = retval; + vam->result_ready = 1; +} + +static int api_snat_show_config(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_show_config_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_show_config"); + return -99; + } + + M(SNAT_SHOW_CONFIG, snat_show_config); + S; W; + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_address_details_t_handler + (vl_api_snat_address_details_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + + fformat (vam->ofp, "%U\n", format_ip4_address, &mp->ip_address); +} + +static int api_snat_address_dump(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_address_dump_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_address_dump"); + return -99; + } + + M(SNAT_ADDRESS_DUMP, snat_address_dump); + S; + /* Use a control ping for synchronization */ + { + vl_api_snat_control_ping_t *mp; + M (SNAT_CONTROL_PING, snat_control_ping); + S; + } + W; + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_interface_details_t_handler + (vl_api_snat_interface_details_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + + fformat (vam->ofp, "sw_if_index %d %s\n", ntohl (mp->sw_if_index), + mp->is_inside ? "in" : "out"); +} + +static int api_snat_interface_dump(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_interface_dump_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_address_dump"); + return -99; + } + + M(SNAT_INTERFACE_DUMP, snat_interface_dump); + S; + /* Use a control ping for synchronization */ + { + vl_api_snat_control_ping_t *mp; + M (SNAT_CONTROL_PING, snat_control_ping); + S; + } + W; + /* NOTREACHED */ + return 0; +} + +static int api_snat_set_workers (vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + unformat_input_t * i = vam->input; + f64 timeout; + vl_api_snat_set_workers_t * mp; + uword *bitmap; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "%U", unformat_bitmap_list, &bitmap)) + ; + else + { + clib_warning("unknown input '%U'", format_unformat_error, i); + return -99; + } + } + + M(SNAT_SET_WORKERS, snat_set_workers); + mp->worker_mask = clib_host_to_net_u64 (bitmap[0]); + + S; W; + + /* NOTREACHED */ + return 0; +} + +static void vl_api_snat_worker_details_t_handler + (vl_api_snat_worker_details_t *mp) +{ + snat_test_main_t * sm = &snat_test_main; + vat_main_t *vam = sm->vat_main; + + fformat (vam->ofp, "worker_index %d (%s at lcore %u)\n", + ntohl (mp->worker_index), mp->name, ntohl (mp->lcore_id)); +} + +static int api_snat_worker_dump(vat_main_t * vam) +{ + snat_test_main_t * sm = &snat_test_main; + f64 timeout; + vl_api_snat_worker_dump_t * mp; + + if (vam->json_output) + { + clib_warning ("JSON output not supported for snat_address_dump"); + return -99; + } + + M(SNAT_WORKER_DUMP, snat_worker_dump); + S; + /* Use a control ping for synchronization */ + { + vl_api_snat_control_ping_t *mp; + M (SNAT_CONTROL_PING, snat_control_ping); + S; + } + W; + /* NOTREACHED */ + return 0; +} + +/* + * List of messages that the api test plugin sends, + * and that the data plane plugin processes + */ +#define foreach_vpe_api_msg \ +_(snat_add_address_range, "<start-addr> [- <end-addr] [del]") \ +_(snat_interface_add_del_feature, \ + "<intfc> | sw_if_index <id> [in] [out] [del]") \ +_(snat_add_static_mapping, "local_addr <ip> external_addr <ip> " \ + "[local_port <n>] [external_port <n>] [vrf <table-id>] [del]") \ +_(snat_set_workers, "<wokrers_bitmap>") \ +_(snat_static_mapping_dump, "") \ +_(snat_show_config, "") \ +_(snat_address_dump, "") \ +_(snat_interface_dump, "") \ +_(snat_worker_dump, "") + +void vat_api_hookup (vat_main_t *vam) +{ + snat_test_main_t * sm __attribute__((unused)) = &snat_test_main; + /* Hook up handlers for replies from the data plane plug-in */ +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_reply_msg; +#undef _ + + /* API messages we can send */ +#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n); + foreach_vpe_api_msg; +#undef _ + + /* Help strings */ +#define _(n,h) hash_set_mem (vam->help_by_name, #n, h); + foreach_vpe_api_msg; +#undef _ +} + +clib_error_t * vat_plugin_register (vat_main_t *vam) +{ + snat_test_main_t * sm = &snat_test_main; + u8 * name; + + sm->vat_main = vam; + + /* Ask the vpp engine for the first assigned message-id */ + name = format (0, "snat_%08x%c", api_version, 0); + sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name); + + if (sm->msg_id_base != (u16) ~0) + vat_api_hookup (vam); + + vec_free(name); + + return 0; +} |