aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-12-28 18:38:59 +0100
committerDamjan Marion <damarion@cisco.com>2017-01-01 18:11:43 +0100
commitcb034b9b374927c7552e36dcbc306d8456b2a0cb (patch)
tree9ff64f9792560630c8cf8faa2f74fc20671c30f1 /src/plugins
parentfdc62abdc113ea63dc867375bd49ef3043dcd290 (diff)
Move java,lua api and remaining plugins to src/
Change-Id: I1c3b87e886603678368428ae56a6bd3327cbc90d Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/Makefile.am18
-rw-r--r--src/plugins/acl.am35
-rw-r--r--src/plugins/acl/acl.api444
-rw-r--r--src/plugins/acl/acl.c1901
-rw-r--r--src/plugins/acl/acl.h148
-rw-r--r--src/plugins/acl/acl_all_api_h.h321
-rw-r--r--src/plugins/acl/acl_msg_enum.h28
-rw-r--r--src/plugins/acl/acl_test.c1024
-rw-r--r--src/plugins/acl/l2sess.c243
-rw-r--r--src/plugins/acl/l2sess.h150
-rw-r--r--src/plugins/acl/l2sess_node.c816
-rw-r--r--src/plugins/acl/node_in.c168
-rw-r--r--src/plugins/acl/node_in.h12
-rw-r--r--src/plugins/acl/node_out.c175
-rw-r--r--src/plugins/acl/node_out.h12
-rwxr-xr-xsrc/plugins/acl/test/run-python28
-rwxr-xr-xsrc/plugins/acl/test/run-scapy26
-rw-r--r--src/plugins/acl/test/test_acl_plugin.py118
-rw-r--r--src/plugins/ioam.am150
-rw-r--r--src/plugins/ioam/dir.dox18
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_e2e.c232
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_e2e.h47
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_pot.c276
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_seqno.c109
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_seqno.h70
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_seqno_analyse.c141
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_trace.c438
-rw-r--r--src/plugins/ioam/export-common/ioam_export.h616
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api42
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c271
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h16
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h28
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c215
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c49
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c162
-rw-r--r--src/plugins/ioam/export/ioam_export.api42
-rw-r--r--src/plugins/ioam/export/ioam_export.c282
-rw-r--r--src/plugins/ioam/export/ioam_export_all_api_h.h16
-rw-r--r--src/plugins/ioam/export/ioam_export_msg_enum.h28
-rw-r--r--src/plugins/ioam/export/ioam_export_test.c206
-rw-r--r--src/plugins/ioam/export/ioam_export_thread.c38
-rw-r--r--src/plugins/ioam/export/node.c151
-rw-r--r--src/plugins/ioam/ioam_plugin_doc.md464
-rw-r--r--src/plugins/ioam/lib-pot/math64.h159
-rw-r--r--src/plugins/ioam/lib-pot/pot.api133
-rw-r--r--src/plugins/ioam/lib-pot/pot_all_api_h.h16
-rw-r--r--src/plugins/ioam/lib-pot/pot_api.c292
-rw-r--r--src/plugins/ioam/lib-pot/pot_msg_enum.h28
-rw-r--r--src/plugins/ioam/lib-pot/pot_test.c365
-rw-r--r--src/plugins/ioam/lib-pot/pot_util.c445
-rw-r--r--src/plugins/ioam/lib-pot/pot_util.h195
-rw-r--r--src/plugins/ioam/lib-trace/trace.api92
-rw-r--r--src/plugins/ioam/lib-trace/trace_all_api_h.h16
-rw-r--r--src/plugins/ioam/lib-trace/trace_api.c252
-rw-r--r--src/plugins/ioam/lib-trace/trace_msg_enum.h28
-rw-r--r--src/plugins/ioam/lib-trace/trace_test.c292
-rw-r--r--src/plugins/ioam/lib-trace/trace_util.c206
-rw-r--r--src/plugins/ioam/lib-trace/trace_util.h247
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c223
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c194
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c353
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c188
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api181
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h16
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c378
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c773
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h183
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h61
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c552
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h172
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h28
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c600
-rw-r--r--src/plugins/lb.am42
-rw-r--r--src/plugins/lb/api.c228
-rw-r--r--src/plugins/lb/cli.c250
-rw-r--r--src/plugins/lb/lb.api71
-rw-r--r--src/plugins/lb/lb.c844
-rw-r--r--src/plugins/lb/lb.h333
-rw-r--r--src/plugins/lb/lb_plugin_doc.md141
-rw-r--r--src/plugins/lb/lb_test.c293
-rw-r--r--src/plugins/lb/lbhash.h216
-rw-r--r--src/plugins/lb/node.c419
-rw-r--r--src/plugins/lb/refcount.c41
-rw-r--r--src/plugins/lb/refcount.h67
-rw-r--r--src/plugins/lb/util.c72
-rw-r--r--src/plugins/lb/util.h40
-rw-r--r--src/plugins/snat.am33
-rw-r--r--src/plugins/snat/in2out.c1597
-rw-r--r--src/plugins/snat/out2in.c1261
-rw-r--r--src/plugins/snat/snat.api283
-rw-r--r--src/plugins/snat/snat.c1957
-rw-r--r--src/plugins/snat/snat.h259
-rw-r--r--src/plugins/snat/snat_all_api_h.h19
-rw-r--r--src/plugins/snat/snat_msg_enum.h31
-rw-r--r--src/plugins/snat/snat_test.c602
95 files changed, 25541 insertions, 0 deletions
diff --git a/src/plugins/Makefile.am b/src/plugins/Makefile.am
index ffc4b3abddc..987310b7b40 100644
--- a/src/plugins/Makefile.am
+++ b/src/plugins/Makefile.am
@@ -22,10 +22,15 @@ BUILT_SOURCES =
vppplugins_LTLIBRARIES =
vppapitestplugins_LTLIBRARIES =
noinst_HEADERS =
+nobase_apiinclude_HEADERS =
vppapitestpluginsdir = ${libdir}/vpp_api_test_plugins
vpppluginsdir = ${libdir}/vpp_plugins
+if ENABLE_ACL_PLUGIN
+include acl.am
+endif
+
if ENABLE_FLOWPERPKT_PLUGIN
include flowperpkt.am
endif
@@ -34,10 +39,22 @@ if ENABLE_ILA_PLUGIN
include ila.am
endif
+if ENABLE_IOAM_PLUGIN
+include ioam.am
+endif
+
+if ENABLE_LB_PLUGIN
+include lb.am
+endif
+
if ENABLE_SIXRD_PLUGIN
include sixrd.am
endif
+if ENABLE_SNAT_PLUGIN
+include snat.am
+endif
+
include ../suffix-rules.mk
# Remove *.la files
@@ -50,6 +67,7 @@ install-data-hook:
###############################################################################
apidir = $(prefix)/share/vpp/api/plugins
+apiincludedir = ${includedir}/vpp_plugins
api_DATA = \
$(patsubst %.api,%.api.json,$(API_FILES))
diff --git a/src/plugins/acl.am b/src/plugins/acl.am
new file mode 100644
index 00000000000..efed31c2912
--- /dev/null
+++ b/src/plugins/acl.am
@@ -0,0 +1,35 @@
+# Copyright (c) 2016 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += acl_test_plugin.la
+vppplugins_LTLIBRARIES += acl_plugin.la
+
+acl_plugin_la_SOURCES = \
+ acl/acl.c \
+ acl/node_in.c \
+ acl/node_out.c \
+ acl/l2sess.c \
+ acl/l2sess_node.c \
+ acl/l2sess.h \
+ acl/acl_plugin.api.h
+
+API_FILES += acl/acl.api
+
+nobase_apiinclude_HEADERS += \
+ acl/acl_all_api_h.h \
+ acl/acl_msg_enum.h \
+ acl/acl.api.h
+
+acl_test_plugin_la_SOURCES = acl/acl_test.c acl/acl_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api
new file mode 100644
index 00000000000..58a5a17180e
--- /dev/null
+++ b/src/plugins/acl/acl.api
@@ -0,0 +1,444 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+ This file defines the vpp control-plane API messages
+ used to control the ACL plugin
+*/
+
+
+/** \brief Get the plugin version
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+
+define acl_plugin_get_version
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply to get the plugin version
+ @param context - returned sender context, to match reply w/ request
+ @param major - Incremented every time a known breaking behavior change is introduced
+ @param minor - Incremented with small changes, may be used to avoid buggy versions
+*/
+
+define acl_plugin_get_version_reply
+{
+ u32 context;
+ u32 major;
+ u32 minor;
+};
+
+/** \brief Access List Rule entry
+ @param is_permit - deny (0), permit (1), or permit+reflect(2) action on this rule.
+ @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0)
+ @param src_ip_addr - Source prefix value
+ @param src_ip_prefix_len - Source prefix length
+ @param dst_ip_addr - Destination prefix value
+ @param dst_ip_prefix_len - Destination prefix length
+ @param proto - L4 protocol (http://www.iana.org/assignments/protocol-numbers/protocol-numbers.xhtml)
+ @param srcport_or_icmptype_first - beginning of source port or ICMP4/6 type range
+ @param srcport_or_icmptype_last - end of source port or ICMP4/6 type range
+ @param dstport_or_icmpcode_first - beginning of destination port or ICMP4/6 code range
+ @param dstport_or_icmpcode_last - end of destination port or ICMP4/6 code range
+ @param tcp_flags_mask - if proto==6, match masked TCP flags with this value
+ @param tcp_flags_value - if proto==6, mask to AND the TCP flags in the packet with
+*/
+
+typeonly manual_print manual_endian define acl_rule
+{
+ u8 is_permit;
+ u8 is_ipv6;
+ u8 src_ip_addr[16];
+ u8 src_ip_prefix_len;
+ u8 dst_ip_addr[16];
+ u8 dst_ip_prefix_len;
+/*
+ * L4 protocol. IANA number. 1 = ICMP, 58 = ICMPv6, 6 = TCP, 17 = UDP.
+ * 0 => ignore L4 and ignore the ports/tcpflags when matching.
+ */
+ u8 proto;
+/*
+ * If the L4 protocol is TCP or UDP, the below
+ * hold ranges of ports, else if the L4 is ICMP/ICMPv6
+ * they hold ranges of ICMP(v6) types/codes.
+ *
+ * Ranges are inclusive, i.e. to match "any" TCP/UDP port,
+ * use first=0,last=65535. For ICMP(v6),
+ * use first=0,last=255.
+ */
+ u16 srcport_or_icmptype_first;
+ u16 srcport_or_icmptype_last;
+ u16 dstport_or_icmpcode_first;
+ u16 dstport_or_icmpcode_last;
+/*
+ * for proto = 6, this matches if the
+ * TCP flags in the packet, ANDed with tcp_flags_mask,
+ * is equal to tcp_flags_value.
+ */
+ u8 tcp_flags_mask;
+ u8 tcp_flags_value;
+};
+
+/** \brief MACIP Access List Rule entry
+ @param is_permit - deny (0), permit (1) action on this rule.
+ @param is_ipv6 - IP addresses in this rule are IPv6 (1) or IPv4 (0)
+ @param src_mac - match masked source MAC address against this value
+ @param src_mac_mask - AND source MAC address with this value before matching
+ @param src_ip_addr - Source prefix value
+ @param src_ip_prefix_len - Source prefix length
+*/
+
+typeonly manual_print manual_endian define macip_acl_rule
+{
+ u8 is_permit;
+ u8 is_ipv6;
+/*
+ * The source mac of the packet ANDed with src_mac_mask.
+ * The source ip[46] address in the packet is matched
+ * against src_ip_addr, with src_ip_prefix_len set to 0.
+ *
+ * For better performance, minimize the number of
+ * (src_mac_mask, src_ip_prefix_len) combinations
+ * in a MACIP ACL.
+ */
+ u8 src_mac[6];
+ u8 src_mac_mask[6];
+ u8 src_ip_addr[16];
+ u8 src_ip_prefix_len;
+};
+
+/** \brief Replace an existing ACL in-place or create a new ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - an existing ACL entry (0..0xfffffffe) to replace, or 0xffffffff to make new ACL
+ @param tag - a string value stored along with the ACL, for descriptive purposes
+ @param count - number of ACL rules
+ @r - Rules for this access-list
+*/
+
+manual_print manual_endian define acl_add_replace
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index; /* ~0 to add, existing ACL# to replace */
+ u8 tag[64]; /* What gets in here gets out in the corresponding tag field when dumping the ACLs. */
+ u32 count;
+ vl_api_acl_rule_t r[count];
+};
+
+/** \brief Reply to add/replace ACL
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - index of the updated or newly created ACL
+ @param retval 0 - no error
+*/
+
+define acl_add_replace_reply
+{
+ u32 context;
+ u32 acl_index;
+ i32 retval;
+};
+
+/** \brief Delete an ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - ACL index to delete
+*/
+
+define acl_del
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index;
+};
+
+/** \brief Reply to delete the ACL
+ @param context - returned sender context, to match reply w/ request
+ @param retval 0 - no error
+*/
+
+define acl_del_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/* acl_interface_add_del(_reply) to be deprecated in lieu of acl_interface_set_acl_list */
+/** \brief Use acl_interface_set_acl_list instead
+ Append/remove an ACL index to/from the list of ACLs checked for an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add or delete the ACL index from the list
+ @param is_input - check the ACL on input (1) or output (0)
+ @param sw_if_index - the interface to alter the list of ACLs on
+ @param acl_index - index of ACL for the operation
+*/
+
+define acl_interface_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+/*
+ * is_input = 0 => ACL applied on interface egress
+ * is_input = 1 => ACL applied on interface ingress
+ */
+ u8 is_input;
+ u32 sw_if_index;
+ u32 acl_index;
+};
+
+/** \brief Reply to alter the ACL list
+ @param context - returned sender context, to match reply w/ request
+ @param retval 0 - no error
+*/
+
+define acl_interface_add_del_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Set the vector of input/output ACLs checked for an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface to alter the list of ACLs on
+ @param count - total number of ACL indices in the vector
+ @param n_input - this many first elements correspond to input ACLs, the rest - output
+ @param acls - vector of ACL indices
+*/
+
+manual_endian define acl_interface_set_acl_list
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 count;
+ u8 n_input; /* First n_input ACLs are set as a list of input ACLs, the rest are applied as output */
+ u32 acls[count];
+};
+
+/** \brief Reply to set the ACL list on an interface
+ @param context - returned sender context, to match reply w/ request
+ @param retval 0 - no error
+*/
+
+define acl_interface_set_acl_list_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Dump the specific ACL contents or all of the ACLs' contents
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - ACL index to dump, ~0 to dump all ACLs
+*/
+
+define acl_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index; /* ~0 for all ACLs */
+};
+
+/** \brief Details about a single ACL contents
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - ACL index whose contents are being sent in this message
+ @param tag - Descriptive tag value which was supplied at ACL creation
+ @param count - Number of rules in this ACL
+ @param r - Array of rules within this ACL
+*/
+
+manual_print manual_endian define acl_details
+{
+ u32 context;
+ u32 acl_index;
+ u8 tag[64]; /* Same blob that was supplied to us when creating the ACL, one hopes. */
+ u32 count;
+ vl_api_acl_rule_t r[count];
+};
+
+/** \brief Dump the list(s) of ACL applied to specific or all interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to dump the ACL list for
+*/
+
+define acl_interface_list_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index; /* ~0 for all interfaces */
+};
+
+/** \brief Details about a single ACL contents
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - interface for which the list of ACLs is applied
+ @param count - total length of acl indices vector
+ @param n_input - this many of indices in the beginning are input ACLs, the rest - output
+ @param acls - the vector of ACL indices
+*/
+
+manual_endian define acl_interface_list_details
+{
+ u32 context;
+ u32 sw_if_index;
+ u8 count;
+ u8 n_input;
+ u32 acls[count];
+};
+
+/** \brief Add a MACIP ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param tag - descriptive value for this MACIP ACL
+ @param count - number of rules in this ACL
+ @param r - vector of MACIP ACL rules
+*/
+
+manual_print manual_endian define macip_acl_add
+{
+ u32 client_index;
+ u32 context;
+ u8 tag[64];
+ u32 count;
+ vl_api_macip_acl_rule_t r[count];
+};
+
+/** \brief Reply to add MACIP ACL
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - index of the newly created ACL
+ @param retval 0 - no error
+*/
+
+define macip_acl_add_reply
+{
+ u32 context;
+ u32 acl_index;
+ i32 retval;
+};
+
+/** \brief Delete a MACIP ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - MACIP ACL index to delete
+*/
+
+define macip_acl_del
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index;
+};
+
+/** \brief Reply to delete the MACIP ACL
+ @param context - returned sender context, to match reply w/ request
+ @param retval 0 - no error
+*/
+
+define macip_acl_del_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Add or delete a MACIP ACL to/from interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add (1) or delete (0) ACL from being used on an interface
+ @param sw_if_index - interface to apply the action to
+ @param acl_index - MACIP ACL index
+*/
+
+define macip_acl_interface_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ /* macip ACLs are always input */
+ u32 sw_if_index;
+ u32 acl_index;
+};
+
+/** \brief Reply to apply/unapply the MACIP ACL
+ @param context - returned sender context, to match reply w/ request
+ @param retval 0 - no error
+*/
+
+define macip_acl_interface_add_del_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Dump one or all defined MACIP ACLs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param acl_index - MACIP ACL index or ~0 to dump all ACLs
+*/
+
+define macip_acl_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 acl_index; /* ~0 for all ACLs */
+};
+
+/** \brief Details about one MACIP ACL
+ @param context - returned sender context, to match reply w/ request
+ @param acl_index - index of this MACIP ACL
+ @param tag - descriptive tag which was supplied during the creation
+ @param count - length of the vector of MACIP ACL rules
+ @param r - rules comprising this ACL
+*/
+
+manual_print manual_endian define macip_acl_details
+{
+ u32 context;
+ u32 acl_index;
+ u8 tag[64];
+ u32 count;
+ vl_api_macip_acl_rule_t r[count];
+};
+
+/** \brief Get the vector of MACIP ACL IDs applied to the interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+
+define macip_acl_interface_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply with the vector of MACIP ACLs by sw_if_index
+ @param context - returned sender context, to match reply w/ request
+ @param count - total number of elements in the vector
+ @param acls - the vector of active MACACL indices per sw_if_index
+*/
+
+define macip_acl_interface_get_reply
+{
+ u32 context;
+ u32 count;
+ u32 acls[count];
+};
+
diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c
new file mode 100644
index 00000000000..8ff5a6b721c
--- /dev/null
+++ b/src/plugins/acl/acl.c
@@ -0,0 +1,1901 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stddef.h>
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <acl/acl.h>
+#include <acl/l2sess.h>
+
+#include <vnet/l2/l2_classify.h>
+#include <vnet/classify/input_acl.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <acl/acl_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <acl/acl_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <acl/acl_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <acl/acl_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <acl/acl_all_api_h.h>
+#undef vl_api_version
+
+#include "node_in.h"
+#include "node_out.h"
+
+acl_main_t acl_main;
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+am->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define REPLY_MACRO3(t, n, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \
+ rmp->_vl_msg_id = ntohs((t)+am->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+
+/* List of message types that this plugin understands */
+
+#define foreach_acl_plugin_api_msg \
+_(ACL_PLUGIN_GET_VERSION, acl_plugin_get_version) \
+_(ACL_ADD_REPLACE, acl_add_replace) \
+_(ACL_DEL, acl_del) \
+_(ACL_INTERFACE_ADD_DEL, acl_interface_add_del) \
+_(ACL_INTERFACE_SET_ACL_LIST, acl_interface_set_acl_list) \
+_(ACL_DUMP, acl_dump) \
+_(ACL_INTERFACE_LIST_DUMP, acl_interface_list_dump) \
+_(MACIP_ACL_ADD, macip_acl_add) \
+_(MACIP_ACL_DEL, macip_acl_del) \
+_(MACIP_ACL_INTERFACE_ADD_DEL, macip_acl_interface_add_del) \
+_(MACIP_ACL_DUMP, macip_acl_dump) \
+_(MACIP_ACL_INTERFACE_GET, macip_acl_interface_get)
+
+/*
+ * This routine exists to convince the vlib plugin framework that
+ * we haven't accidentally copied a random .dll into the plugin directory.
+ *
+ * Also collects global variable pointers passed from the vpp engine
+ */
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h,
+ int from_early_init)
+{
+ acl_main_t *am = &acl_main;
+ clib_error_t *error = 0;
+
+ am->vlib_main = vm;
+ am->vnet_main = h->vnet_main;
+ am->ethernet_main = h->ethernet_main;
+
+ l2sess_vlib_plugin_register(vm, h, from_early_init);
+
+ return error;
+}
+
+
+static void
+vl_api_acl_plugin_get_version_t_handler (vl_api_acl_plugin_get_version_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_acl_plugin_get_version_reply_t *rmp;
+ int msg_size = sizeof (*rmp);
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (msg_size);
+ memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_ACL_PLUGIN_GET_VERSION_REPLY + am->msg_id_base);
+ rmp->context = mp->context;
+ rmp->major = htonl (ACL_PLUGIN_VERSION_MAJOR);
+ rmp->minor = htonl (ACL_PLUGIN_VERSION_MINOR);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+
+static int
+acl_add_list (u32 count, vl_api_acl_rule_t rules[],
+ u32 * acl_list_index, u8 * tag)
+{
+ acl_main_t *am = &acl_main;
+ acl_list_t *a;
+ acl_rule_t *r;
+ acl_rule_t *acl_new_rules;
+ int i;
+
+ if (*acl_list_index != ~0)
+ {
+ /* They supplied some number, let's see if this ACL exists */
+ if (pool_is_free_index (am->acls, *acl_list_index))
+ {
+ /* tried to replace a non-existent ACL, no point doing anything */
+ return -1;
+ }
+ }
+
+ /* Create and populate the rules */
+ acl_new_rules = clib_mem_alloc_aligned (sizeof (acl_rule_t) * count,
+ CLIB_CACHE_LINE_BYTES);
+ if (!acl_new_rules)
+ {
+ /* Could not allocate rules. New or existing ACL - bail out regardless */
+ return -1;
+ }
+
+ for (i = 0; i < count; i++)
+ {
+ r = &acl_new_rules[i];
+ r->is_permit = rules[i].is_permit;
+ r->is_ipv6 = rules[i].is_ipv6;
+ if (r->is_ipv6)
+ {
+ memcpy (&r->src, rules[i].src_ip_addr, sizeof (r->src));
+ memcpy (&r->dst, rules[i].dst_ip_addr, sizeof (r->dst));
+ }
+ else
+ {
+ memcpy (&r->src.ip4, rules[i].src_ip_addr, sizeof (r->src.ip4));
+ memcpy (&r->dst.ip4, rules[i].dst_ip_addr, sizeof (r->dst.ip4));
+ }
+ r->src_prefixlen = rules[i].src_ip_prefix_len;
+ r->dst_prefixlen = rules[i].dst_ip_prefix_len;
+ r->proto = rules[i].proto;
+ r->src_port_or_type_first = rules[i].srcport_or_icmptype_first;
+ r->src_port_or_type_last = rules[i].srcport_or_icmptype_last;
+ r->dst_port_or_code_first = rules[i].dstport_or_icmpcode_first;
+ r->dst_port_or_code_last = rules[i].dstport_or_icmpcode_last;
+ r->tcp_flags_value = rules[i].tcp_flags_value;
+ r->tcp_flags_mask = rules[i].tcp_flags_mask;
+ }
+
+ if (~0 == *acl_list_index)
+ {
+ /* Get ACL index */
+ pool_get_aligned (am->acls, a, CLIB_CACHE_LINE_BYTES);
+ memset (a, 0, sizeof (*a));
+ /* Will return the newly allocated ACL index */
+ *acl_list_index = a - am->acls;
+ }
+ else
+ {
+ a = am->acls + *acl_list_index;
+ /* Get rid of the old rules */
+ clib_mem_free (a->rules);
+ }
+ a->rules = acl_new_rules;
+ a->count = count;
+ memcpy (a->tag, tag, sizeof (a->tag));
+
+ return 0;
+}
+
+static int
+acl_del_list (u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ acl_list_t *a;
+ int i, ii;
+ if (pool_is_free_index (am->acls, acl_list_index))
+ {
+ return -1;
+ }
+
+ /* delete any references to the ACL */
+ for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index); i++)
+ {
+ for (ii = 0; ii < vec_len (am->output_acl_vec_by_sw_if_index[i]);
+ /* see body */ )
+ {
+ if (acl_list_index == am->output_acl_vec_by_sw_if_index[i][ii])
+ {
+ vec_del1 (am->output_acl_vec_by_sw_if_index[i], ii);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+ }
+ for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index); i++)
+ {
+ for (ii = 0; ii < vec_len (am->input_acl_vec_by_sw_if_index[i]);
+ /* see body */ )
+ {
+ if (acl_list_index == am->input_acl_vec_by_sw_if_index[i][ii])
+ {
+ vec_del1 (am->input_acl_vec_by_sw_if_index[i], ii);
+ }
+ else
+ {
+ ii++;
+ }
+ }
+ }
+
+ /* now we can delete the ACL itself */
+ a = &am->acls[acl_list_index];
+ if (a->rules)
+ {
+ clib_mem_free (a->rules);
+ }
+ pool_put (am->acls, a);
+ return 0;
+}
+
+/* Some aids in ASCII graphing the content */
+#define XX "\377"
+#define __ "\000"
+#define _(x)
+#define v
+
+u8 ip4_5tuple_mask[] =
+_(" dmac smac etype ")
+_(ether) __ __ __ __ __ __ v __ __ __ __ __ __ v __ __ v
+ _(" v ihl totlen ")
+ _(0x0000)
+ __ __ __ __
+ _(" ident fl+fo ")
+ _(0x0004)
+ __ __ __ __
+ _(" ttl pr checksum ")
+ _(0x0008)
+ __ XX __ __
+ _(" src address ")
+ _(0x000C)
+ XX XX XX XX
+ _(" dst address ")
+ _(0x0010)
+ XX XX XX XX
+ _("L4 T/U sport dport ")
+ _(tcpudp)
+ XX XX XX XX
+ _(padpad)
+ __ __ __ __
+ _(padpad)
+ __ __ __ __
+ _(padeth)
+ __ __;
+
+ u8 ip6_5tuple_mask[] =
+ _(" dmac smac etype ")
+ _(ether) __ __ __ __ __ __ v __ __ __ __ __ __ v __ __ v
+ _(" v tc + flow ")
+ _(0x0000) __ __ __ __
+ _(" plen nh hl ")
+ _(0x0004) __ __ XX __
+ _(" src address ")
+ _(0x0008) XX XX XX XX
+ _(0x000C) XX XX XX XX
+ _(0x0010) XX XX XX XX
+ _(0x0014) XX XX XX XX
+ _(" dst address ")
+ _(0x0018) XX XX XX XX
+ _(0x001C) XX XX XX XX
+ _(0x0020) XX XX XX XX
+ _(0x0024) XX XX XX XX
+ _("L4T/U sport dport ")
+ _(tcpudp) XX XX XX XX _(padpad) __ __ __ __ _(padeth) __ __;
+
+#undef XX
+#undef __
+#undef _
+#undef v
+
+ static int count_skip (u8 * p, u32 size)
+{
+ u64 *p64 = (u64 *) p;
+ /* Be tolerant to null pointer */
+ if (0 == p)
+ return 0;
+
+ while ((0ULL == *p64) && ((u8 *) p64 - p) < size)
+ {
+ p64++;
+ }
+ return (p64 - (u64 *) p) / 2;
+}
+
+static int
+acl_classify_add_del_table_big (vnet_classify_main_t * cm, u8 * mask,
+ u32 mask_len, u32 next_table_index,
+ u32 miss_next_index, u32 * table_index,
+ int is_add)
+{
+ u32 nbuckets = 65536;
+ u32 memory_size = 2 << 30;
+ u32 skip = count_skip (mask, mask_len);
+ u32 match = (mask_len / 16) - skip;
+ u8 *skip_mask_ptr = mask + 16 * skip;
+ u32 current_data_flag = 0;
+ int current_data_offset = 0;
+
+ if (0 == match)
+ match = 1;
+
+ return vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets,
+ memory_size, skip, match,
+ next_table_index, miss_next_index,
+ table_index, current_data_flag,
+ current_data_offset, is_add,
+ 1 /* delete_chain */);
+}
+
+static int
+acl_classify_add_del_table_small (vnet_classify_main_t * cm, u8 * mask,
+ u32 mask_len, u32 next_table_index,
+ u32 miss_next_index, u32 * table_index,
+ int is_add)
+{
+ u32 nbuckets = 32;
+ u32 memory_size = 2 << 20;
+ u32 skip = count_skip (mask, mask_len);
+ u32 match = (mask_len / 16) - skip;
+ u8 *skip_mask_ptr = mask + 16 * skip;
+ u32 current_data_flag = 0;
+ int current_data_offset = 0;
+
+ if (0 == match)
+ match = 1;
+
+ return vnet_classify_add_del_table (cm, skip_mask_ptr, nbuckets,
+ memory_size, skip, match,
+ next_table_index, miss_next_index,
+ table_index, current_data_flag,
+ current_data_offset, is_add,
+ 1 /* delete_chain */);
+}
+
+
+static int
+acl_unhook_l2_input_classify (acl_main_t * am, u32 sw_if_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+
+ vec_validate_init_empty (am->acl_ip4_input_classify_table_by_sw_if_index,
+ sw_if_index, ~0);
+ vec_validate_init_empty (am->acl_ip6_input_classify_table_by_sw_if_index,
+ sw_if_index, ~0);
+
+ vnet_l2_input_classify_enable_disable (sw_if_index, 0);
+
+ if (am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] != ~0)
+ {
+ ip4_table_index =
+ am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index];
+ am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] = ~0;
+ acl_classify_add_del_table_big (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl,
+ &ip4_table_index, 0);
+ }
+ if (am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] != ~0)
+ {
+ ip6_table_index =
+ am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index];
+ am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] = ~0;
+ acl_classify_add_del_table_big (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl,
+ &ip6_table_index, 0);
+ }
+
+ return 0;
+}
+
+static int
+acl_unhook_l2_output_classify (acl_main_t * am, u32 sw_if_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+
+ vec_validate_init_empty (am->acl_ip4_output_classify_table_by_sw_if_index,
+ sw_if_index, ~0);
+ vec_validate_init_empty (am->acl_ip6_output_classify_table_by_sw_if_index,
+ sw_if_index, ~0);
+
+ vnet_l2_output_classify_enable_disable (sw_if_index, 0);
+
+ if (am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] != ~0)
+ {
+ ip4_table_index =
+ am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index];
+ am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] = ~0;
+ acl_classify_add_del_table_big (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl,
+ &ip4_table_index, 0);
+ }
+ if (am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] != ~0)
+ {
+ ip6_table_index =
+ am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index];
+ am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] = ~0;
+ acl_classify_add_del_table_big (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl,
+ &ip6_table_index, 0);
+ }
+
+ return 0;
+}
+
+static int
+acl_hook_l2_input_classify (acl_main_t * am, u32 sw_if_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ int rv;
+
+ /* in case there were previous tables attached */
+ acl_unhook_l2_input_classify (am, sw_if_index);
+ rv =
+ acl_classify_add_del_table_big (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl,
+ &ip4_table_index, 1);
+ if (rv)
+ return rv;
+ rv =
+ acl_classify_add_del_table_big (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl,
+ &ip6_table_index, 1);
+ if (rv)
+ {
+ acl_classify_add_del_table_big (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl,
+ &ip4_table_index, 0);
+ return rv;
+ }
+ rv =
+ vnet_l2_input_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index, ~0);
+ clib_warning
+ ("ACL enabling on interface sw_if_index %d, setting tables to the following: ip4: %d ip6: %d\n",
+ sw_if_index, ip4_table_index, ip6_table_index);
+ if (rv)
+ {
+ acl_classify_add_del_table_big (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl,
+ &ip6_table_index, 0);
+ acl_classify_add_del_table_big (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_input_classify_next_acl,
+ &ip4_table_index, 0);
+ return rv;
+ }
+
+ am->acl_ip4_input_classify_table_by_sw_if_index[sw_if_index] =
+ ip4_table_index;
+ am->acl_ip6_input_classify_table_by_sw_if_index[sw_if_index] =
+ ip6_table_index;
+
+ vnet_l2_input_classify_enable_disable (sw_if_index, 1);
+ return rv;
+}
+
+static int
+acl_hook_l2_output_classify (acl_main_t * am, u32 sw_if_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 ip4_table_index = ~0;
+ u32 ip6_table_index = ~0;
+ int rv;
+
+ /* in case there were previous tables attached */
+ acl_unhook_l2_output_classify (am, sw_if_index);
+ rv =
+ acl_classify_add_del_table_big (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl,
+ &ip4_table_index, 1);
+ if (rv)
+ return rv;
+ rv =
+ acl_classify_add_del_table_big (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl,
+ &ip6_table_index, 1);
+ if (rv)
+ {
+ acl_classify_add_del_table_big (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl,
+ &ip4_table_index, 0);
+ return rv;
+ }
+ rv =
+ vnet_l2_output_classify_set_tables (sw_if_index, ip4_table_index,
+ ip6_table_index, ~0);
+ clib_warning
+ ("ACL enabling on interface sw_if_index %d, setting tables to the following: ip4: %d ip6: %d\n",
+ sw_if_index, ip4_table_index, ip6_table_index);
+ if (rv)
+ {
+ acl_classify_add_del_table_big (cm, ip6_5tuple_mask,
+ sizeof (ip6_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl,
+ &ip6_table_index, 0);
+ acl_classify_add_del_table_big (cm, ip4_5tuple_mask,
+ sizeof (ip4_5tuple_mask) - 1, ~0,
+ am->l2_output_classify_next_acl,
+ &ip4_table_index, 0);
+ return rv;
+ }
+
+ am->acl_ip4_output_classify_table_by_sw_if_index[sw_if_index] =
+ ip4_table_index;
+ am->acl_ip6_output_classify_table_by_sw_if_index[sw_if_index] =
+ ip6_table_index;
+
+ vnet_l2_output_classify_enable_disable (sw_if_index, 1);
+ return rv;
+}
+
+
+int
+acl_interface_in_enable_disable (acl_main_t * am, u32 sw_if_index,
+ int enable_disable)
+{
+ int rv;
+
+ /* Utterly wrong? */
+ if (pool_is_free_index (am->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (enable_disable)
+ {
+ rv = acl_hook_l2_input_classify (am, sw_if_index);
+ }
+ else
+ {
+ rv = acl_unhook_l2_input_classify (am, sw_if_index);
+ }
+
+ return rv;
+}
+
+int
+acl_interface_out_enable_disable (acl_main_t * am, u32 sw_if_index,
+ int enable_disable)
+{
+ int rv;
+
+ /* Utterly wrong? */
+ if (pool_is_free_index (am->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (enable_disable)
+ {
+ rv = acl_hook_l2_output_classify (am, sw_if_index);
+ }
+ else
+ {
+ rv = acl_unhook_l2_output_classify (am, sw_if_index);
+ }
+
+ return rv;
+}
+
+
+static int
+acl_interface_add_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ if (is_input)
+ {
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ vec_add (am->input_acl_vec_by_sw_if_index[sw_if_index], &acl_list_index,
+ 1);
+ acl_interface_in_enable_disable (am, sw_if_index, 1);
+ }
+ else
+ {
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+ vec_add (am->output_acl_vec_by_sw_if_index[sw_if_index],
+ &acl_list_index, 1);
+ acl_interface_out_enable_disable (am, sw_if_index, 1);
+ }
+ return 0;
+}
+
+static int
+acl_interface_del_inout_acl (u32 sw_if_index, u8 is_input, u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ int i;
+ int rv = -1;
+ if (is_input)
+ {
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]);
+ i++)
+ {
+ if (acl_list_index ==
+ am->input_acl_vec_by_sw_if_index[sw_if_index][i])
+ {
+ vec_del1 (am->input_acl_vec_by_sw_if_index[sw_if_index], i);
+ rv = 0;
+ break;
+ }
+ }
+ if (0 == vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]))
+ {
+ acl_interface_in_enable_disable (am, sw_if_index, 0);
+ }
+ }
+ else
+ {
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+ for (i = 0;
+ i < vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]); i++)
+ {
+ if (acl_list_index ==
+ am->output_acl_vec_by_sw_if_index[sw_if_index][i])
+ {
+ vec_del1 (am->output_acl_vec_by_sw_if_index[sw_if_index], i);
+ rv = 0;
+ break;
+ }
+ }
+ if (0 == vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]))
+ {
+ acl_interface_out_enable_disable (am, sw_if_index, 0);
+ }
+ }
+ return rv;
+}
+
+static void
+acl_interface_reset_inout_acls (u32 sw_if_index, u8 is_input)
+{
+ acl_main_t *am = &acl_main;
+ if (is_input)
+ {
+ acl_interface_in_enable_disable (am, sw_if_index, 0);
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ vec_reset_length (am->input_acl_vec_by_sw_if_index[sw_if_index]);
+ }
+ else
+ {
+ acl_interface_out_enable_disable (am, sw_if_index, 0);
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+ vec_reset_length (am->output_acl_vec_by_sw_if_index[sw_if_index]);
+ }
+}
+
+static int
+acl_interface_add_del_inout_acl (u32 sw_if_index, u8 is_add, u8 is_input,
+ u32 acl_list_index)
+{
+ int rv = -1;
+ if (is_add)
+ {
+ rv =
+ acl_interface_add_inout_acl (sw_if_index, is_input, acl_list_index);
+ }
+ else
+ {
+ rv =
+ acl_interface_del_inout_acl (sw_if_index, is_input, acl_list_index);
+ }
+ return rv;
+}
+
+
+static void *
+get_ptr_to_offset (vlib_buffer_t * b0, int offset)
+{
+ u8 *p = vlib_buffer_get_current (b0) + offset;
+ return p;
+}
+
+static u8
+acl_get_l4_proto (vlib_buffer_t * b0, int node_is_ip6)
+{
+ u8 proto;
+ int proto_offset;
+ if (node_is_ip6)
+ {
+ proto_offset = 20;
+ }
+ else
+ {
+ proto_offset = 23;
+ }
+ proto = *((u8 *) vlib_buffer_get_current (b0) + proto_offset);
+ return proto;
+}
+
+static int
+acl_match_addr (ip46_address_t * addr1, ip46_address_t * addr2, int prefixlen,
+ int is_ip6)
+{
+ if (prefixlen == 0)
+ {
+ /* match any always succeeds */
+ return 1;
+ }
+ if (is_ip6)
+ {
+ if (memcmp (addr1, addr2, prefixlen / 8))
+ {
+ /* If the starting full bytes do not match, no point in bittwidling the thumbs further */
+ return 0;
+ }
+ if (prefixlen % 8)
+ {
+ u8 b1 = *((u8 *) addr1 + 1 + prefixlen / 8);
+ u8 b2 = *((u8 *) addr2 + 1 + prefixlen / 8);
+ u8 mask0 = (0xff - ((1 << (8 - (prefixlen % 8))) - 1));
+ return (b1 & mask0) == b2;
+ }
+ else
+ {
+ /* The prefix fits into integer number of bytes, so nothing left to do */
+ return 1;
+ }
+ }
+ else
+ {
+ uint32_t a1 = ntohl (addr1->ip4.as_u32);
+ uint32_t a2 = ntohl (addr2->ip4.as_u32);
+ uint32_t mask0 = 0xffffffff - ((1 << (32 - prefixlen)) - 1);
+ return (a1 & mask0) == a2;
+ }
+}
+
+static int
+acl_match_port (u16 port, u16 port_first, u16 port_last, int is_ip6)
+{
+ return ((port >= port_first) && (port <= port_last));
+}
+
+static int
+acl_packet_match (acl_main_t * am, u32 acl_index, vlib_buffer_t * b0,
+ u8 * r_action, int *r_is_ip6, u32 * r_acl_match_p,
+ u32 * r_rule_match_p, u32 * trace_bitmap)
+{
+ ethernet_header_t *h0;
+ u16 type0;
+
+ ip46_address_t src, dst;
+ int is_ip6;
+ int is_ip4;
+ u8 proto;
+ u16 src_port;
+ u16 dst_port;
+ u8 tcp_flags = 0;
+ int i;
+ acl_list_t *a;
+ acl_rule_t *r;
+
+ h0 = vlib_buffer_get_current (b0);
+ type0 = clib_net_to_host_u16 (h0->type);
+ is_ip4 = (type0 == ETHERNET_TYPE_IP4);
+ is_ip6 = (type0 == ETHERNET_TYPE_IP6);
+
+ if (!(is_ip4 || is_ip6))
+ {
+ return 0;
+ }
+ /* The bunch of hardcoded offsets here is intentional to get rid of them
+ ASAP, when getting to a faster matching code */
+ if (is_ip4)
+ {
+ clib_memcpy (&src.ip4, get_ptr_to_offset (b0, 26), 4);
+ clib_memcpy (&dst.ip4, get_ptr_to_offset (b0, 30), 4);
+ proto = acl_get_l4_proto (b0, 0);
+ if (1 == proto)
+ {
+ *trace_bitmap |= 0x00000001;
+ /* type */
+ src_port = *(u8 *) get_ptr_to_offset (b0, 34);
+ /* code */
+ dst_port = *(u8 *) get_ptr_to_offset (b0, 35);
+ }
+ else
+ {
+ /* assume TCP/UDP */
+ src_port = (*(u16 *) get_ptr_to_offset (b0, 34));
+ dst_port = (*(u16 *) get_ptr_to_offset (b0, 36));
+ /* UDP gets ability to check on an oddball data byte as a bonus */
+ tcp_flags = *(u8 *) get_ptr_to_offset (b0, 14 + 20 + 13);
+ }
+ }
+ else /* is_ipv6 implicitly */
+ {
+ clib_memcpy (&src, get_ptr_to_offset (b0, 22), 16);
+ clib_memcpy (&dst, get_ptr_to_offset (b0, 38), 16);
+ proto = acl_get_l4_proto (b0, 1);
+ if (58 == proto)
+ {
+ *trace_bitmap |= 0x00000002;
+ /* type */
+ src_port = *(u8 *) get_ptr_to_offset (b0, 54);
+ /* code */
+ dst_port = *(u8 *) get_ptr_to_offset (b0, 55);
+ }
+ else
+ {
+ /* assume TCP/UDP */
+ src_port = (*(u16 *) get_ptr_to_offset (b0, 54));
+ dst_port = (*(u16 *) get_ptr_to_offset (b0, 56));
+ tcp_flags = *(u8 *) get_ptr_to_offset (b0, 14 + 40 + 13);
+ }
+ }
+ if (pool_is_free_index (am->acls, acl_index))
+ {
+ if (r_acl_match_p)
+ *r_acl_match_p = acl_index;
+ if (r_rule_match_p)
+ *r_rule_match_p = -1;
+ /* the ACL does not exist but is used for policy. Block traffic. */
+ return 0;
+ }
+ a = am->acls + acl_index;
+ for (i = 0; i < a->count; i++)
+ {
+ r = a->rules + i;
+ if (is_ip6 != r->is_ipv6)
+ {
+ continue;
+ }
+ if (!acl_match_addr (&dst, &r->dst, r->dst_prefixlen, is_ip6))
+ continue;
+ if (!acl_match_addr (&src, &r->src, r->src_prefixlen, is_ip6))
+ continue;
+ if (r->proto)
+ {
+ if (proto != r->proto)
+ continue;
+ if (!acl_match_port
+ (src_port, r->src_port_or_type_first, r->src_port_or_type_last,
+ is_ip6))
+ continue;
+ if (!acl_match_port
+ (dst_port, r->dst_port_or_code_first, r->dst_port_or_code_last,
+ is_ip6))
+ continue;
+ /* No need for check of proto == TCP, since in other rules both fields should be zero, so this match will succeed */
+ if ((tcp_flags & r->tcp_flags_mask) != r->tcp_flags_value)
+ continue;
+ }
+ /* everything matches! */
+ *r_action = r->is_permit;
+ *r_is_ip6 = is_ip6;
+ if (r_acl_match_p)
+ *r_acl_match_p = acl_index;
+ if (r_rule_match_p)
+ *r_rule_match_p = i;
+ return 1;
+ }
+ return 0;
+}
+
+void
+input_acl_packet_match (u32 sw_if_index, vlib_buffer_t * b0, u32 * nextp,
+ u32 * acl_match_p, u32 * rule_match_p,
+ u32 * trace_bitmap)
+{
+ acl_main_t *am = &acl_main;
+ uint8_t action = 0;
+ int is_ip6 = 0;
+ int i;
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ for (i = 0; i < vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]);
+ i++)
+ {
+ if (acl_packet_match
+ (am, am->input_acl_vec_by_sw_if_index[sw_if_index][i], b0, &action,
+ &is_ip6, acl_match_p, rule_match_p, trace_bitmap))
+ {
+ if (is_ip6)
+ {
+ *nextp = am->acl_in_ip6_match_next[action];
+ }
+ else
+ {
+ *nextp = am->acl_in_ip4_match_next[action];
+ }
+ return;
+ }
+ }
+ if (vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]) > 0)
+ {
+ /* If there are ACLs and none matched, deny by default */
+ *nextp = 0;
+ }
+
+}
+
+void
+output_acl_packet_match (u32 sw_if_index, vlib_buffer_t * b0, u32 * nextp,
+ u32 * acl_match_p, u32 * rule_match_p,
+ u32 * trace_bitmap)
+{
+ acl_main_t *am = &acl_main;
+ uint8_t action = 0;
+ int is_ip6 = 0;
+ int i;
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+ for (i = 0; i < vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]);
+ i++)
+ {
+ if (acl_packet_match
+ (am, am->output_acl_vec_by_sw_if_index[sw_if_index][i], b0, &action,
+ &is_ip6, acl_match_p, rule_match_p, trace_bitmap))
+ {
+ if (is_ip6)
+ {
+ *nextp = am->acl_out_ip6_match_next[action];
+ }
+ else
+ {
+ *nextp = am->acl_out_ip4_match_next[action];
+ }
+ return;
+ }
+ }
+ if (vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]) > 0)
+ {
+ /* If there are ACLs and none matched, deny by default */
+ *nextp = 0;
+ }
+}
+
+typedef struct
+{
+ u8 is_ipv6;
+ u8 mac_mask[6];
+ u8 prefix_len;
+ u32 count;
+ u32 table_index;
+} macip_match_type_t;
+
+static u32
+macip_find_match_type (macip_match_type_t * mv, u8 * mac_mask, u8 prefix_len,
+ u8 is_ipv6)
+{
+ u32 i;
+ if (mv)
+ {
+ for (i = 0; i < vec_len (mv); i++)
+ {
+ if ((mv[i].prefix_len == prefix_len) && (mv[i].is_ipv6 == is_ipv6)
+ && (0 == memcmp (mv[i].mac_mask, mac_mask, 6)))
+ {
+ return i;
+ }
+ }
+ }
+ return ~0;
+}
+
+
+/* Get metric used to sort match types.
+ The more specific and the more often seen - the bigger the metric */
+static int
+match_type_metric (macip_match_type_t * m)
+{
+ /* FIXME: count the ones in the MAC mask as well, check how well this heuristic works in real life */
+ return m->prefix_len + m->is_ipv6 + 10 * m->count;
+}
+
+static int
+match_type_compare (macip_match_type_t * m1, macip_match_type_t * m2)
+{
+ /* Ascending sort based on the metric values */
+ return match_type_metric (m1) - match_type_metric (m2);
+}
+
+/* Get the offset of L3 source within ethernet packet */
+static int
+get_l3_src_offset(int is6)
+{
+ if(is6)
+ return (sizeof(ethernet_header_t) + offsetof(ip6_header_t, src_address));
+ else
+ return (sizeof(ethernet_header_t) + offsetof(ip4_header_t, src_address));
+}
+
+static int
+macip_create_classify_tables (acl_main_t * am, u32 macip_acl_index)
+{
+ macip_match_type_t *mvec = NULL;
+ macip_match_type_t *mt;
+ macip_acl_list_t *a = &am->macip_acls[macip_acl_index];
+ int i;
+ u32 match_type_index;
+ u32 last_table;
+ u8 mask[5 * 16];
+ vnet_classify_main_t *cm = &vnet_classify_main;
+
+ /* Count the number of different types of rules */
+ for (i = 0; i < a->count; i++)
+ {
+ if (~0 ==
+ (match_type_index =
+ macip_find_match_type (mvec, a->rules[i].src_mac_mask,
+ a->rules[i].src_prefixlen,
+ a->rules[i].is_ipv6)))
+ {
+ match_type_index = vec_len (mvec);
+ vec_validate (mvec, match_type_index);
+ memcpy (mvec[match_type_index].mac_mask,
+ a->rules[match_type_index].src_mac_mask, 6);
+ mvec[match_type_index].prefix_len = a->rules[i].src_prefixlen;
+ mvec[match_type_index].is_ipv6 = a->rules[i].is_ipv6;
+ mvec[match_type_index].table_index = ~0;
+ }
+ mvec[match_type_index].count++;
+ }
+ /* Put the most frequently used tables last in the list so we can create classifier tables in reverse order */
+ vec_sort_with_function (mvec, match_type_compare);
+ /* Create the classifier tables */
+ last_table = ~0;
+ vec_foreach (mt, mvec)
+ {
+ int mask_len;
+ int is6 = mt->is_ipv6;
+ int l3_src_offs = get_l3_src_offset(is6);
+ memset (mask, 0, sizeof (mask));
+ memcpy (&mask[6], mt->mac_mask, 6);
+ for (i = 0; i < (mt->prefix_len / 8); i++)
+ {
+ mask[l3_src_offs + i] = 0xff;
+ }
+ if (mt->prefix_len % 8)
+ {
+ mask[l3_src_offs + (mt->prefix_len / 8)] =
+ 0xff - ((1 << (8 - mt->prefix_len % 8)) - 1);
+ }
+ /*
+ * Round-up the number of bytes needed to store the prefix,
+ * and round up the number of vectors too
+ */
+ mask_len = ((l3_src_offs + ((mt->prefix_len+7) / 8) +
+ (sizeof (u32x4)-1))/sizeof(u32x4)) * sizeof (u32x4);
+ acl_classify_add_del_table_small (cm, mask, mask_len, last_table,
+ (~0 == last_table) ? 0 : ~0, &mt->table_index,
+ 1);
+ last_table = mt->table_index;
+ }
+ a->ip4_table_index = ~0;
+ a->ip6_table_index = ~0;
+ a->l2_table_index = last_table;
+
+ /* Populate the classifier tables with rules from the MACIP ACL */
+ for (i = 0; i < a->count; i++)
+ {
+ u32 action = 0;
+ u32 metadata = 0;
+ int is6 = a->rules[i].is_ipv6;
+ int l3_src_offs = get_l3_src_offset(is6);
+ memset (mask, 0, sizeof (mask));
+ memcpy (&mask[6], a->rules[i].src_mac, 6);
+ if (is6)
+ {
+ memcpy (&mask[l3_src_offs], &a->rules[i].src_ip_addr.ip6, 16);
+ }
+ else
+ {
+ memcpy (&mask[l3_src_offs], &a->rules[i].src_ip_addr.ip4, 4);
+ }
+ match_type_index =
+ macip_find_match_type (mvec, a->rules[i].src_mac_mask,
+ a->rules[i].src_prefixlen,
+ a->rules[i].is_ipv6);
+ /* add session to table mvec[match_type_index].table_index; */
+ vnet_classify_add_del_session (cm, mvec[match_type_index].table_index,
+ mask, a->rules[i].is_permit ? ~0 : 0, i,
+ 0, action, metadata, 1);
+ }
+ return 0;
+}
+
+static void
+macip_destroy_classify_tables (acl_main_t * am, u32 macip_acl_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ macip_acl_list_t *a = &am->macip_acls[macip_acl_index];
+
+ if (a->ip4_table_index != ~0)
+ {
+ acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->ip4_table_index, 0);
+ a->ip4_table_index = ~0;
+ }
+ if (a->ip6_table_index != ~0)
+ {
+ acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->ip6_table_index, 0);
+ a->ip6_table_index = ~0;
+ }
+ if (a->l2_table_index != ~0)
+ {
+ acl_classify_add_del_table_small (cm, 0, ~0, ~0, ~0, &a->l2_table_index, 0);
+ a->l2_table_index = ~0;
+ }
+}
+
+static int
+macip_acl_add_list (u32 count, vl_api_macip_acl_rule_t rules[],
+ u32 * acl_list_index, u8 * tag)
+{
+ acl_main_t *am = &acl_main;
+ macip_acl_list_t *a;
+ macip_acl_rule_t *r;
+ macip_acl_rule_t *acl_new_rules;
+ int i;
+
+ /* Create and populate the rules */
+ acl_new_rules = clib_mem_alloc_aligned (sizeof (macip_acl_rule_t) * count,
+ CLIB_CACHE_LINE_BYTES);
+ if (!acl_new_rules)
+ {
+ /* Could not allocate rules. New or existing ACL - bail out regardless */
+ return -1;
+ }
+
+ for (i = 0; i < count; i++)
+ {
+ r = &acl_new_rules[i];
+ r->is_permit = rules[i].is_permit;
+ r->is_ipv6 = rules[i].is_ipv6;
+ memcpy (&r->src_mac, rules[i].src_mac, 6);
+ memcpy (&r->src_mac_mask, rules[i].src_mac_mask, 6);
+ if(rules[i].is_ipv6)
+ memcpy (&r->src_ip_addr.ip6, rules[i].src_ip_addr, 16);
+ else
+ memcpy (&r->src_ip_addr.ip4, rules[i].src_ip_addr, 4);
+ r->src_prefixlen = rules[i].src_ip_prefix_len;
+ }
+
+ /* Get ACL index */
+ pool_get_aligned (am->macip_acls, a, CLIB_CACHE_LINE_BYTES);
+ memset (a, 0, sizeof (*a));
+ /* Will return the newly allocated ACL index */
+ *acl_list_index = a - am->macip_acls;
+
+ a->rules = acl_new_rules;
+ a->count = count;
+ memcpy (a->tag, tag, sizeof (a->tag));
+
+ /* Create and populate the classifer tables */
+ macip_create_classify_tables (am, *acl_list_index);
+
+ return 0;
+}
+
+
+/* No check for validity of sw_if_index - the callers were supposed to validate */
+
+static int
+macip_acl_interface_del_acl (acl_main_t * am, u32 sw_if_index)
+{
+ int rv;
+ u32 macip_acl_index;
+ macip_acl_list_t *a;
+ vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0);
+ macip_acl_index = am->macip_acl_by_sw_if_index[sw_if_index];
+ /* No point in deleting MACIP ACL which is not applied */
+ if (~0 == macip_acl_index)
+ return -1;
+ a = &am->macip_acls[macip_acl_index];
+ /* remove the classifier tables off the interface L2 ACL */
+ rv =
+ vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index,
+ a->ip6_table_index, a->l2_table_index, 0);
+ /* Unset the MACIP ACL index */
+ am->macip_acl_by_sw_if_index[sw_if_index] = ~0;
+ return rv;
+}
+
+/* No check for validity of sw_if_index - the callers were supposed to validate */
+
+static int
+macip_acl_interface_add_acl (acl_main_t * am, u32 sw_if_index,
+ u32 macip_acl_index)
+{
+ macip_acl_list_t *a;
+ int rv;
+ if (pool_is_free_index (am->macip_acls, macip_acl_index))
+ {
+ return -1;
+ }
+ a = &am->macip_acls[macip_acl_index];
+ vec_validate_init_empty (am->macip_acl_by_sw_if_index, sw_if_index, ~0);
+ /* If there already a MACIP ACL applied, unapply it */
+ if (~0 != am->macip_acl_by_sw_if_index[sw_if_index])
+ macip_acl_interface_del_acl(am, sw_if_index);
+ am->macip_acl_by_sw_if_index[sw_if_index] = macip_acl_index;
+ /* Apply the classifier tables for L2 ACLs */
+ rv =
+ vnet_set_input_acl_intfc (am->vlib_main, sw_if_index, a->ip4_table_index,
+ a->ip6_table_index, a->l2_table_index, 1);
+ return rv;
+}
+
+static int
+macip_acl_del_list (u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ macip_acl_list_t *a;
+ int i;
+ if (pool_is_free_index (am->macip_acls, acl_list_index))
+ {
+ return -1;
+ }
+
+ /* delete any references to the ACL */
+ for (i = 0; i < vec_len (am->macip_acl_by_sw_if_index); i++)
+ {
+ if (am->macip_acl_by_sw_if_index[i] == acl_list_index)
+ {
+ macip_acl_interface_del_acl (am, i);
+ }
+ }
+
+ /* Now that classifier tables are detached, clean them up */
+ macip_destroy_classify_tables (am, acl_list_index);
+
+ /* now we can delete the ACL itself */
+ a = &am->macip_acls[acl_list_index];
+ if (a->rules)
+ {
+ clib_mem_free (a->rules);
+ }
+ pool_put (am->macip_acls, a);
+ return 0;
+}
+
+
+static int
+macip_acl_interface_add_del_acl (u32 sw_if_index, u8 is_add,
+ u32 acl_list_index)
+{
+ acl_main_t *am = &acl_main;
+ int rv = -1;
+ if (is_add)
+ {
+ rv = macip_acl_interface_add_acl (am, sw_if_index, acl_list_index);
+ }
+ else
+ {
+ rv = macip_acl_interface_del_acl (am, sw_if_index);
+ }
+ return rv;
+}
+
+/* API message handler */
+static void
+vl_api_acl_add_replace_t_handler (vl_api_acl_add_replace_t * mp)
+{
+ vl_api_acl_add_replace_reply_t *rmp;
+ acl_main_t *am = &acl_main;
+ int rv;
+ u32 acl_list_index = ntohl (mp->acl_index);
+
+ rv = acl_add_list (ntohl (mp->count), mp->r, &acl_list_index, mp->tag);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_ACL_ADD_REPLACE_REPLY,
+ ({
+ rmp->acl_index = htonl(acl_list_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_acl_del_t_handler (vl_api_acl_del_t * mp)
+{
+ acl_main_t *sm = &acl_main;
+ vl_api_acl_del_reply_t *rmp;
+ int rv;
+
+ rv = acl_del_list (ntohl (mp->acl_index));
+
+ REPLY_MACRO (VL_API_ACL_DEL_REPLY);
+}
+
+static void
+vl_api_acl_interface_add_del_t_handler (vl_api_acl_interface_add_del_t * mp)
+{
+ acl_main_t *sm = &acl_main;
+ vnet_interface_main_t *im = &sm->vnet_main->interface_main;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ vl_api_acl_interface_add_del_reply_t *rmp;
+ int rv = -1;
+
+ if (pool_is_free_index(im->sw_interfaces, sw_if_index))
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ rv =
+ acl_interface_add_del_inout_acl (sw_if_index, mp->is_add,
+ mp->is_input, ntohl (mp->acl_index));
+
+ REPLY_MACRO (VL_API_ACL_INTERFACE_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_acl_interface_set_acl_list_t_handler
+ (vl_api_acl_interface_set_acl_list_t * mp)
+{
+ acl_main_t *sm = &acl_main;
+ vl_api_acl_interface_set_acl_list_reply_t *rmp;
+ int rv = 0;
+ int i;
+ vnet_interface_main_t *im = &sm->vnet_main->interface_main;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (pool_is_free_index(im->sw_interfaces, sw_if_index))
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ {
+ acl_interface_reset_inout_acls (sw_if_index, 0);
+ acl_interface_reset_inout_acls (sw_if_index, 1);
+
+ for (i = 0; i < mp->count; i++)
+ {
+ acl_interface_add_del_inout_acl (sw_if_index, 1, (i < mp->n_input),
+ ntohl (mp->acls[i]));
+ }
+ }
+
+ REPLY_MACRO (VL_API_ACL_INTERFACE_SET_ACL_LIST_REPLY);
+}
+
+static void
+copy_acl_rule_to_api_rule (vl_api_acl_rule_t * api_rule, acl_rule_t * r)
+{
+ api_rule->is_permit = r->is_permit;
+ api_rule->is_ipv6 = r->is_ipv6;
+ if(r->is_ipv6)
+ {
+ memcpy (api_rule->src_ip_addr, &r->src, sizeof (r->src));
+ memcpy (api_rule->dst_ip_addr, &r->dst, sizeof (r->dst));
+ }
+ else
+ {
+ memcpy (api_rule->src_ip_addr, &r->src.ip4, sizeof (r->src.ip4));
+ memcpy (api_rule->dst_ip_addr, &r->dst.ip4, sizeof (r->dst.ip4));
+ }
+ api_rule->src_ip_prefix_len = r->src_prefixlen;
+ api_rule->dst_ip_prefix_len = r->dst_prefixlen;
+ api_rule->proto = r->proto;
+ api_rule->srcport_or_icmptype_first = r->src_port_or_type_first;
+ api_rule->srcport_or_icmptype_last = r->src_port_or_type_last;
+ api_rule->dstport_or_icmpcode_first = r->dst_port_or_code_first;
+ api_rule->dstport_or_icmpcode_last = r->dst_port_or_code_last;
+ api_rule->tcp_flags_mask = r->tcp_flags_mask;
+ api_rule->tcp_flags_value = r->tcp_flags_value;
+}
+
+static void
+send_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q,
+ acl_list_t * acl, u32 context)
+{
+ vl_api_acl_details_t *mp;
+ vl_api_acl_rule_t *rules;
+ int i;
+ int msg_size = sizeof (*mp) + sizeof (mp->r[0]) * acl->count;
+
+ mp = vl_msg_api_alloc (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_ACL_DETAILS + am->msg_id_base);
+
+ /* fill in the message */
+ mp->context = context;
+ mp->count = htonl (acl->count);
+ mp->acl_index = htonl (acl - am->acls);
+ memcpy (mp->tag, acl->tag, sizeof (mp->tag));
+ // clib_memcpy (mp->r, acl->rules, acl->count * sizeof(acl->rules[0]));
+ rules = mp->r;
+ for (i = 0; i < acl->count; i++)
+ {
+ copy_acl_rule_to_api_rule (&rules[i], &acl->rules[i]);
+ }
+
+ clib_warning("Sending acl details for ACL index %d", ntohl(mp->acl_index));
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+
+static void
+vl_api_acl_dump_t_handler (vl_api_acl_dump_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ u32 acl_index;
+ acl_list_t *acl;
+
+ int rv = -1;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->acl_index == ~0)
+ {
+ /* *INDENT-OFF* */
+ /* Just dump all ACLs */
+ pool_foreach (acl, am->acls,
+ ({
+ send_acl_details(am, q, acl, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ acl_index = ntohl (mp->acl_index);
+ if (!pool_is_free_index (am->acls, acl_index))
+ {
+ acl = &am->acls[acl_index];
+ send_acl_details (am, q, acl, mp->context);
+ }
+ }
+
+ if (rv == -1)
+ {
+ /* FIXME API: should we signal an error here at all ? */
+ return;
+ }
+}
+
+static void
+send_acl_interface_list_details (acl_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 sw_if_index, u32 context)
+{
+ vl_api_acl_interface_list_details_t *mp;
+ int msg_size;
+ int n_input;
+ int n_output;
+ int count;
+ int i = 0;
+
+ vec_validate (am->input_acl_vec_by_sw_if_index, sw_if_index);
+ vec_validate (am->output_acl_vec_by_sw_if_index, sw_if_index);
+
+ n_input = vec_len (am->input_acl_vec_by_sw_if_index[sw_if_index]);
+ n_output = vec_len (am->output_acl_vec_by_sw_if_index[sw_if_index]);
+ count = n_input + n_output;
+
+ msg_size = sizeof (*mp);
+ msg_size += sizeof (mp->acls[0]) * count;
+
+ mp = vl_msg_api_alloc (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id =
+ ntohs (VL_API_ACL_INTERFACE_LIST_DETAILS + am->msg_id_base);
+
+ /* fill in the message */
+ mp->context = context;
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->count = count;
+ mp->n_input = n_input;
+ for (i = 0; i < n_input; i++)
+ {
+ mp->acls[i] = htonl (am->input_acl_vec_by_sw_if_index[sw_if_index][i]);
+ }
+ for (i = 0; i < n_output; i++)
+ {
+ mp->acls[n_input + i] =
+ htonl (am->output_acl_vec_by_sw_if_index[sw_if_index][i]);
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_acl_interface_list_dump_t_handler (vl_api_acl_interface_list_dump_t *
+ mp)
+{
+ acl_main_t *am = &acl_main;
+ vnet_sw_interface_t *swif;
+ vnet_interface_main_t *im = &am->vnet_main->interface_main;
+
+ u32 sw_if_index;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->sw_if_index == ~0)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (swif, im->sw_interfaces,
+ ({
+ send_acl_interface_list_details(am, q, swif->sw_if_index, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ sw_if_index = ntohl (mp->sw_if_index);
+ if (!pool_is_free_index(im->sw_interfaces, sw_if_index))
+ send_acl_interface_list_details (am, q, sw_if_index, mp->context);
+ }
+}
+
+/* MACIP ACL API handlers */
+
+static void
+vl_api_macip_acl_add_t_handler (vl_api_macip_acl_add_t * mp)
+{
+ vl_api_macip_acl_add_reply_t *rmp;
+ acl_main_t *am = &acl_main;
+ int rv;
+ u32 acl_list_index = ~0;
+
+ rv =
+ macip_acl_add_list (ntohl (mp->count), mp->r, &acl_list_index, mp->tag);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLY,
+ ({
+ rmp->acl_index = htonl(acl_list_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_macip_acl_del_t_handler (vl_api_macip_acl_del_t * mp)
+{
+ acl_main_t *sm = &acl_main;
+ vl_api_macip_acl_del_reply_t *rmp;
+ int rv;
+
+ rv = macip_acl_del_list (ntohl (mp->acl_index));
+
+ REPLY_MACRO (VL_API_MACIP_ACL_DEL_REPLY);
+}
+
+static void
+vl_api_macip_acl_interface_add_del_t_handler
+ (vl_api_macip_acl_interface_add_del_t * mp)
+{
+ acl_main_t *sm = &acl_main;
+ vl_api_macip_acl_interface_add_del_reply_t *rmp;
+ int rv = -1;
+ vnet_interface_main_t *im = &sm->vnet_main->interface_main;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (pool_is_free_index(im->sw_interfaces, sw_if_index))
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ else
+ rv =
+ macip_acl_interface_add_del_acl (ntohl (mp->sw_if_index), mp->is_add,
+ ntohl (mp->acl_index));
+
+ REPLY_MACRO (VL_API_MACIP_ACL_INTERFACE_ADD_DEL_REPLY);
+}
+
+static void
+send_macip_acl_details (acl_main_t * am, unix_shared_memory_queue_t * q,
+ macip_acl_list_t * acl, u32 context)
+{
+ vl_api_macip_acl_details_t *mp;
+ vl_api_macip_acl_rule_t *rules;
+ macip_acl_rule_t *r;
+ int i;
+ int msg_size = sizeof (*mp) + (acl ? sizeof (mp->r[0]) * acl->count : 0);
+
+ mp = vl_msg_api_alloc (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_DETAILS + am->msg_id_base);
+
+ /* fill in the message */
+ mp->context = context;
+ if (acl)
+ {
+ memcpy (mp->tag, acl->tag, sizeof (mp->tag));
+ mp->count = htonl (acl->count);
+ mp->acl_index = htonl (acl - am->macip_acls);
+ rules = mp->r;
+ for (i = 0; i < acl->count; i++)
+ {
+ r = &acl->rules[i];
+ rules[i].is_permit = r->is_permit;
+ rules[i].is_ipv6 = r->is_ipv6;
+ memcpy (rules[i].src_mac, &r->src_mac, sizeof (r->src_mac));
+ memcpy (rules[i].src_mac_mask, &r->src_mac_mask,
+ sizeof (r->src_mac_mask));
+ if (r->is_ipv6)
+ memcpy (rules[i].src_ip_addr, &r->src_ip_addr.ip6,
+ sizeof (r->src_ip_addr.ip6));
+ else
+ memcpy (rules[i].src_ip_addr, &r->src_ip_addr.ip4,
+ sizeof (r->src_ip_addr.ip4));
+ rules[i].src_ip_prefix_len = r->src_prefixlen;
+ }
+ }
+ else
+ {
+ /* No martini, no party - no ACL applied to this interface. */
+ mp->acl_index = ~0;
+ mp->count = 0;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+
+static void
+vl_api_macip_acl_dump_t_handler (vl_api_macip_acl_dump_t * mp)
+{
+ acl_main_t *am = &acl_main;
+ macip_acl_list_t *acl;
+
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ if (mp->acl_index == ~0)
+ {
+ /* Just dump all ACLs for now, with sw_if_index = ~0 */
+ pool_foreach (acl, am->macip_acls, (
+ {
+ send_macip_acl_details (am, q, acl,
+ mp->
+ context);}
+ ));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ u32 acl_index = ntohl (mp->acl_index);
+ if (!pool_is_free_index (am->macip_acls, acl_index))
+ {
+ acl = &am->macip_acls[acl_index];
+ send_macip_acl_details (am, q, acl, mp->context);
+ }
+ }
+}
+
+static void
+vl_api_macip_acl_interface_get_t_handler (vl_api_macip_acl_interface_get_t *
+ mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_macip_acl_interface_get_reply_t *rmp;
+ u32 count = vec_len (am->macip_acl_by_sw_if_index);
+ int msg_size = sizeof (*rmp) + sizeof (rmp->acls[0]) * count;
+ unix_shared_memory_queue_t *q;
+ int i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ rmp = vl_msg_api_alloc (msg_size);
+ memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_MACIP_ACL_INTERFACE_GET_REPLY + am->msg_id_base);
+ rmp->context = mp->context;
+ rmp->count = htonl (count);
+ for (i = 0; i < count; i++)
+ {
+ rmp->acls[i] = htonl (am->macip_acl_by_sw_if_index[i]);
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+
+
+/* Set up the API message handling tables */
+static clib_error_t *
+acl_plugin_api_hookup (vlib_main_t * vm)
+{
+ acl_main_t *sm = &acl_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_acl_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <acl/acl_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (acl_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_acl;
+#undef _
+}
+
+u32
+register_match_action_nexts (u32 next_in_ip4, u32 next_in_ip6,
+ u32 next_out_ip4, u32 next_out_ip6)
+{
+ acl_main_t *am = &acl_main;
+ u32 act = am->n_match_actions;
+ if (am->n_match_actions == 255)
+ {
+ return ~0;
+ }
+ am->n_match_actions++;
+ am->acl_in_ip4_match_next[act] = next_in_ip4;
+ am->acl_in_ip6_match_next[act] = next_in_ip6;
+ am->acl_out_ip4_match_next[act] = next_out_ip4;
+ am->acl_out_ip6_match_next[act] = next_out_ip6;
+ return act;
+}
+
+void
+acl_setup_nodes (void)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ acl_main_t *am = &acl_main;
+ vlib_node_t *n;
+
+ n = vlib_get_node_by_name (vm, (u8 *) "l2-input-classify");
+ am->l2_input_classify_next_acl =
+ vlib_node_add_next_with_slot (vm, n->index, acl_in_node.index, ~0);
+ n = vlib_get_node_by_name (vm, (u8 *) "l2-output-classify");
+ am->l2_output_classify_next_acl =
+ vlib_node_add_next_with_slot (vm, n->index, acl_out_node.index, ~0);
+
+ feat_bitmap_init_next_nodes (vm, acl_in_node.index, L2INPUT_N_FEAT,
+ l2input_get_feat_names (),
+ am->acl_in_node_input_next_node_index);
+
+ memset (&am->acl_in_ip4_match_next[0], 0,
+ sizeof (am->acl_in_ip4_match_next));
+ memset (&am->acl_in_ip6_match_next[0], 0,
+ sizeof (am->acl_in_ip6_match_next));
+ memset (&am->acl_out_ip4_match_next[0], 0,
+ sizeof (am->acl_out_ip4_match_next));
+ memset (&am->acl_out_ip6_match_next[0], 0,
+ sizeof (am->acl_out_ip6_match_next));
+ am->n_match_actions = 0;
+
+ register_match_action_nexts (0, 0, 0, 0); /* drop */
+ register_match_action_nexts (~0, ~0, ~0, ~0); /* permit */
+ register_match_action_nexts (ACL_IN_L2S_INPUT_IP4_ADD, ACL_IN_L2S_INPUT_IP6_ADD, ACL_OUT_L2S_OUTPUT_IP4_ADD, ACL_OUT_L2S_OUTPUT_IP6_ADD); /* permit + create session */
+}
+
+
+
+static clib_error_t *
+acl_init (vlib_main_t * vm)
+{
+ acl_main_t *am = &acl_main;
+ clib_error_t *error = 0;
+ memset (am, 0, sizeof (*am));
+ am->vlib_main = vm;
+ am->vnet_main = vnet_get_main ();
+
+ u8 *name = format (0, "acl_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ am->msg_id_base = vl_msg_api_get_msg_ids ((char *) name,
+ VL_MSG_FIRST_AVAILABLE);
+
+ error = acl_plugin_api_hookup (vm);
+ acl_setup_nodes ();
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (am, &api_main);
+
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (acl_init);
diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h
new file mode 100644
index 00000000000..afc9b289cee
--- /dev/null
+++ b/src/plugins/acl/acl.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_acl_h
+#define included_acl_h
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/l2/l2_output.h>
+
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#define ACL_PLUGIN_VERSION_MAJOR 1
+#define ACL_PLUGIN_VERSION_MINOR 1
+
+extern vlib_node_registration_t acl_in_node;
+extern vlib_node_registration_t acl_out_node;
+
+void input_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap);
+void output_acl_packet_match(u32 sw_if_index, vlib_buffer_t * b0, u32 *nextp, u32 *acl_match_p, u32 *rule_match_p, u32 *trace_bitmap);
+
+enum address_e { IP4, IP6 };
+typedef struct
+{
+ enum address_e type;
+ union {
+ ip6_address_t ip6;
+ ip4_address_t ip4;
+ } addr;
+} address_t;
+
+/*
+ * ACL rules
+ */
+typedef struct
+{
+ u8 is_permit;
+ u8 is_ipv6;
+ ip46_address_t src;
+ u8 src_prefixlen;
+ ip46_address_t dst;
+ u8 dst_prefixlen;
+ u8 proto;
+ u16 src_port_or_type_first;
+ u16 src_port_or_type_last;
+ u16 dst_port_or_code_first;
+ u16 dst_port_or_code_last;
+ u8 tcp_flags_value;
+ u8 tcp_flags_mask;
+} acl_rule_t;
+
+typedef struct
+{
+ u8 is_permit;
+ u8 is_ipv6;
+ u8 src_mac[6];
+ u8 src_mac_mask[6];
+ ip46_address_t src_ip_addr;
+ u8 src_prefixlen;
+} macip_acl_rule_t;
+
+/*
+ * ACL
+ */
+typedef struct
+{
+ u8 tag[64];
+ u32 count;
+ acl_rule_t *rules;
+} acl_list_t;
+
+typedef struct
+{
+ u8 tag[64];
+ u32 count;
+ macip_acl_rule_t *rules;
+ /* References to the classifier tables that will enforce the rules */
+ u32 ip4_table_index;
+ u32 ip6_table_index;
+ u32 l2_table_index;
+} macip_acl_list_t;
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+
+ acl_list_t *acls; /* Pool of ACLs */
+ macip_acl_list_t *macip_acls; /* Pool of MAC-IP ACLs */
+
+ /* ACLs associated with interfaces */
+ u32 **input_acl_vec_by_sw_if_index;
+ u32 **output_acl_vec_by_sw_if_index;
+
+ /*
+ * Classify tables used to grab the packets for the ACL check,
+ * and serving as the 5-tuple session tables at the same time
+ */
+ u32 *acl_ip4_input_classify_table_by_sw_if_index;
+ u32 *acl_ip6_input_classify_table_by_sw_if_index;
+ u32 *acl_ip4_output_classify_table_by_sw_if_index;
+ u32 *acl_ip6_output_classify_table_by_sw_if_index;
+
+ /* MACIP (input) ACLs associated with the interfaces */
+ u32 *macip_acl_by_sw_if_index;
+
+ /* next indices for our nodes in the l2-classify tables */
+ u32 l2_input_classify_next_acl;
+ u32 l2_output_classify_next_acl;
+
+ /* next node indices for feature bitmap */
+ u32 acl_in_node_input_next_node_index[32];
+ /* the respective thing for the output feature */
+ l2_output_next_nodes_st acl_out_output_next_nodes;
+
+ /* ACL match actions (must be coherent across in/out ACLs to next indices (can differ) */
+
+ u32 acl_in_ip4_match_next[256];
+ u32 acl_in_ip6_match_next[256];
+ u32 acl_out_ip4_match_next[256];
+ u32 acl_out_ip6_match_next[256];
+ u32 n_match_actions;
+
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ ethernet_main_t * ethernet_main;
+} acl_main_t;
+
+extern acl_main_t acl_main;
+
+
+#endif
diff --git a/src/plugins/acl/acl_all_api_h.h b/src/plugins/acl/acl_all_api_h.h
new file mode 100644
index 00000000000..96eca56d31c
--- /dev/null
+++ b/src/plugins/acl/acl_all_api_h.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <acl/acl.api.h>
+
+#ifdef vl_printfun
+
+#ifdef LP64
+#define _uword_fmt "%lld"
+#define _uword_cast (long long)
+#else
+#define _uword_fmt "%ld"
+#define _uword_cast long
+#endif
+
+static inline void *
+vl_api_acl_rule_t_print (vl_api_acl_rule_t * a, void *handle)
+{
+ vl_print (handle, "vl_api_acl_rule_t:\n");
+ vl_print (handle, "is_permit: %u\n", (unsigned) a->is_permit);
+ vl_print (handle, "is_ipv6: %u\n", (unsigned) a->is_ipv6);
+ {
+ int _i;
+ for (_i = 0; _i < 16; _i++)
+ {
+ vl_print (handle, "src_ip_addr[%d]: %u\n", _i, a->src_ip_addr[_i]);
+ }
+ }
+ vl_print (handle, "src_ip_prefix_len: %u\n",
+ (unsigned) a->src_ip_prefix_len);
+ {
+ int _i;
+ for (_i = 0; _i < 16; _i++)
+ {
+ vl_print (handle, "dst_ip_addr[%d]: %u\n", _i, a->dst_ip_addr[_i]);
+ }
+ }
+ vl_print (handle, "dst_ip_prefix_len: %u\n",
+ (unsigned) a->dst_ip_prefix_len);
+ vl_print (handle, "proto: %u\n", (unsigned) a->proto);
+ vl_print (handle, "srcport_or_icmptype_first: %u\n",
+ (unsigned) a->srcport_or_icmptype_first);
+ vl_print (handle, "srcport_or_icmptype_last: %u\n",
+ (unsigned) a->srcport_or_icmptype_last);
+ vl_print (handle, "dstport_or_icmpcode_first: %u\n",
+ (unsigned) a->dstport_or_icmpcode_first);
+ vl_print (handle, "dstport_or_icmpcode_last: %u\n",
+ (unsigned) a->dstport_or_icmpcode_last);
+ vl_print (handle, "tcp_flags_mask: %u\n", (unsigned) a->tcp_flags_mask);
+ vl_print (handle, "tcp_flags_value: %u\n", (unsigned) a->tcp_flags_value);
+ return handle;
+}
+
+static inline void *
+vl_api_acl_add_replace_t_print (vl_api_acl_add_replace_t * a, void *handle)
+{
+ int i;
+ vl_print (handle, "vl_api_acl_add_replace_t:\n");
+ vl_print (handle, "_vl_msg_id: %u\n", (unsigned) a->_vl_msg_id);
+ vl_print (handle, "client_index: %u\n", (unsigned) a->client_index);
+ vl_print (handle, "context: %u\n", (unsigned) a->context);
+ vl_print (handle, "acl_index: %u\n", (unsigned) a->acl_index);
+ vl_print (handle, "count: %u\n", (unsigned) a->count);
+ vl_print (handle, "r ----- \n");
+ for (i = 0; i < a->count; i++)
+ {
+ vl_print (handle, " r[%d]:\n", i);
+ vl_api_acl_rule_t_print (&a->r[i], handle);
+ }
+ vl_print (handle, "r ----- END \n");
+ return handle;
+}
+
+
+static inline void *vl_api_acl_details_t_print (vl_api_acl_details_t *a,void *handle)
+{
+ vl_print(handle, "vl_api_acl_details_t:\n");
+ vl_print(handle, "_vl_msg_id: %u\n", (unsigned) a->_vl_msg_id);
+ vl_print(handle, "context: %u\n", (unsigned) a->context);
+ vl_print(handle, "acl_index: %u\n", (unsigned) a->acl_index);
+ {
+ int _i;
+ for (_i = 0; _i < 64; _i++) {
+ vl_print(handle, "tag[%d]: %u\n", _i, a->tag[_i]);
+ }
+ }
+ vl_print(handle, "count: %u\n", (unsigned) a->count);
+ vl_print(handle, "r ----- \n");
+ // FIXME vl_api_acl_rule_t_print(&a->r, handle);
+ vl_print(handle, "r ----- END \n");
+ return handle;
+}
+
+static inline void *
+vl_api_macip_acl_rule_t_print (vl_api_macip_acl_rule_t * a, void *handle)
+{
+ vl_print (handle, "vl_api_macip_acl_rule_t:\n");
+ vl_print (handle, "is_permit: %u\n", (unsigned) a->is_permit);
+ vl_print (handle, "is_ipv6: %u\n", (unsigned) a->is_ipv6);
+ {
+ int _i;
+ for (_i = 0; _i < 6; _i++)
+ {
+ vl_print (handle, "src_mac[%d]: %u\n", _i, a->src_mac[_i]);
+ }
+ }
+ {
+ int _i;
+ for (_i = 0; _i < 6; _i++)
+ {
+ vl_print (handle, "src_mac_mask[%d]: %u\n", _i, a->src_mac_mask[_i]);
+ }
+ }
+ {
+ int _i;
+ for (_i = 0; _i < 16; _i++)
+ {
+ vl_print (handle, "src_ip_addr[%d]: %u\n", _i, a->src_ip_addr[_i]);
+ }
+ }
+ vl_print (handle, "src_ip_prefix_len: %u\n",
+ (unsigned) a->src_ip_prefix_len);
+ return handle;
+}
+
+static inline void *
+vl_api_macip_acl_add_t_print (vl_api_macip_acl_add_t * a, void *handle)
+{
+ int i;
+ vl_print (handle, "vl_api_macip_acl_add_t:\n");
+ vl_print (handle, "_vl_msg_id: %u\n", (unsigned) a->_vl_msg_id);
+ vl_print (handle, "client_index: %u\n", (unsigned) a->client_index);
+ vl_print (handle, "context: %u\n", (unsigned) a->context);
+ vl_print (handle, "count: %u\n", (unsigned) a->count);
+ vl_print (handle, "r ----- \n");
+ for (i = 0; i < a->count; i++)
+ {
+ vl_print (handle, " r[%d]:\n", i);
+ vl_api_macip_acl_rule_t_print (&a->r[i], handle);
+ }
+ vl_print (handle, "r ----- END \n");
+ return handle;
+}
+
+static inline void *vl_api_macip_acl_details_t_print (vl_api_macip_acl_details_t *a,void *handle)
+{
+ int i;
+ vl_print(handle, "vl_api_macip_acl_details_t:\n");
+ vl_print(handle, "_vl_msg_id: %u\n", (unsigned) a->_vl_msg_id);
+ vl_print(handle, "context: %u\n", (unsigned) a->context);
+ vl_print(handle, "acl_index: %u\n", (unsigned) a->acl_index);
+ {
+ int _i;
+ for (_i = 0; _i < 64; _i++) {
+ vl_print(handle, "tag[%d]: %u\n", _i, a->tag[_i]);
+ }
+ }
+ vl_print(handle, "count: %u\n", (unsigned) a->count);
+ vl_print(handle, "r ----- \n");
+ for (i = 0; i < a->count; i++)
+ {
+ vl_print (handle, " r[%d]:\n", i);
+ vl_api_macip_acl_rule_t_print (&a->r[i], handle);
+ }
+ vl_print(handle, "r ----- END \n");
+ return handle;
+}
+
+#endif /* vl_printfun */
+
+
+#ifdef vl_endianfun
+
+#undef clib_net_to_host_uword
+#ifdef LP64
+#define clib_net_to_host_uword clib_net_to_host_u64
+#else
+#define clib_net_to_host_uword clib_net_to_host_u32
+#endif
+
+/*
+ * Manual endian/print functions created by copypasting the automatically
+ * generated ones with small required adjustments. Appears the codegen
+ * can't make code to print the contents of custom-type array.
+ */
+
+static inline void
+vl_api_acl_rule_t_endian (vl_api_acl_rule_t * a)
+{
+ /* a->is_permit = a->is_permit (no-op) */
+ /* a->is_ipv6 = a->is_ipv6 (no-op) */
+ /* a->src_ip_addr[0..15] = a->src_ip_addr[0..15] (no-op) */
+ /* a->src_ip_prefix_len = a->src_ip_prefix_len (no-op) */
+ /* a->dst_ip_addr[0..15] = a->dst_ip_addr[0..15] (no-op) */
+ /* a->dst_ip_prefix_len = a->dst_ip_prefix_len (no-op) */
+ /* a->proto = a->proto (no-op) */
+ a->srcport_or_icmptype_first =
+ clib_net_to_host_u16 (a->srcport_or_icmptype_first);
+ a->srcport_or_icmptype_last =
+ clib_net_to_host_u16 (a->srcport_or_icmptype_last);
+ a->dstport_or_icmpcode_first =
+ clib_net_to_host_u16 (a->dstport_or_icmpcode_first);
+ a->dstport_or_icmpcode_last =
+ clib_net_to_host_u16 (a->dstport_or_icmpcode_last);
+ /* a->tcp_flags_mask = a->tcp_flags_mask (no-op) */
+ /* a->tcp_flags_value = a->tcp_flags_value (no-op) */
+}
+
+static inline void
+vl_api_acl_add_replace_t_endian (vl_api_acl_add_replace_t * a)
+{
+ int i;
+ a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id);
+ a->client_index = clib_net_to_host_u32 (a->client_index);
+ a->context = clib_net_to_host_u32 (a->context);
+ a->acl_index = clib_net_to_host_u32 (a->acl_index);
+ a->count = clib_net_to_host_u32 (a->count);
+ for (i = 0; i < a->count; i++)
+ {
+ vl_api_acl_rule_t_endian (&a->r[i]);
+ }
+}
+
+static inline void vl_api_acl_details_t_endian (vl_api_acl_details_t *a)
+{
+ int i;
+ a->_vl_msg_id = clib_net_to_host_u16(a->_vl_msg_id);
+ a->context = clib_net_to_host_u32(a->context);
+ a->acl_index = clib_net_to_host_u32(a->acl_index);
+ /* a->tag[0..63] = a->tag[0..63] (no-op) */
+ a->count = clib_net_to_host_u32(a->count);
+ for (i = 0; i < a->count; i++)
+ {
+ vl_api_acl_rule_t_endian (&a->r[i]);
+ }
+}
+
+static inline void vl_api_acl_interface_list_details_t_endian (vl_api_acl_interface_list_details_t *a)
+{
+ int i;
+ a->_vl_msg_id = clib_net_to_host_u16(a->_vl_msg_id);
+ a->context = clib_net_to_host_u32(a->context);
+ a->sw_if_index = clib_net_to_host_u32(a->sw_if_index);
+ /* a->count = a->count (no-op) */
+ /* a->n_input = a->n_input (no-op) */
+ for(i=0; i<a->count; i++) {
+ a->acls[i] = clib_net_to_host_u32(a->acls[i]);
+ }
+}
+
+static inline void vl_api_acl_interface_set_acl_list_t_endian (vl_api_acl_interface_set_acl_list_t *a)
+{
+ int i;
+ a->_vl_msg_id = clib_net_to_host_u16(a->_vl_msg_id);
+ a->client_index = clib_net_to_host_u32(a->client_index);
+ a->context = clib_net_to_host_u32(a->context);
+ a->sw_if_index = clib_net_to_host_u32(a->sw_if_index);
+ /* a->count = a->count (no-op) */
+ /* a->n_input = a->n_input (no-op) */
+ for(i=0; i<a->count; i++) {
+ a->acls[i] = clib_net_to_host_u32(a->acls[i]);
+ }
+}
+
+static inline void
+vl_api_macip_acl_rule_t_endian (vl_api_macip_acl_rule_t * a)
+{
+ /* a->is_permit = a->is_permit (no-op) */
+ /* a->is_ipv6 = a->is_ipv6 (no-op) */
+ /* a->src_mac[0..5] = a->src_mac[0..5] (no-op) */
+ /* a->src_mac_mask[0..5] = a->src_mac_mask[0..5] (no-op) */
+ /* a->src_ip_addr[0..15] = a->src_ip_addr[0..15] (no-op) */
+ /* a->src_ip_prefix_len = a->src_ip_prefix_len (no-op) */
+}
+
+static inline void
+vl_api_macip_acl_add_t_endian (vl_api_macip_acl_add_t * a)
+{
+ int i;
+ a->_vl_msg_id = clib_net_to_host_u16 (a->_vl_msg_id);
+ a->client_index = clib_net_to_host_u32 (a->client_index);
+ a->context = clib_net_to_host_u32 (a->context);
+ a->count = clib_net_to_host_u32 (a->count);
+ for (i = 0; i < a->count; i++)
+ {
+ vl_api_macip_acl_rule_t_endian (&a->r[i]);
+ }
+}
+
+static inline void vl_api_macip_acl_details_t_endian (vl_api_macip_acl_details_t *a)
+{
+ int i;
+ a->_vl_msg_id = clib_net_to_host_u16(a->_vl_msg_id);
+ a->context = clib_net_to_host_u32(a->context);
+ a->acl_index = clib_net_to_host_u32(a->acl_index);
+ /* a->tag[0..63] = a->tag[0..63] (no-op) */
+ a->count = clib_net_to_host_u32(a->count);
+ for (i = 0; i < a->count; i++)
+ {
+ vl_api_macip_acl_rule_t_endian (&a->r[i]);
+ }
+}
+
+
+
+
+#endif /* vl_printfun */
+
+
diff --git a/src/plugins/acl/acl_msg_enum.h b/src/plugins/acl/acl_msg_enum.h
new file mode 100644
index 00000000000..14d8b48c207
--- /dev/null
+++ b/src/plugins/acl/acl_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_acl_msg_enum_h
+#define included_acl_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <acl/acl_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif
diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c
new file mode 100644
index 00000000000..a0e413e16da
--- /dev/null
+++ b/src/plugins/acl/acl_test.c
@@ -0,0 +1,1024 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * acl_test.c - test harness plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <arpa/inet.h>
+
+uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
+
+/* Declare message IDs */
+#include <acl/acl_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <acl/acl_all_api_h.h>
+#undef vl_typedefs
+
+/* define message structures */
+#define vl_endianfun
+#include <acl/acl_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <acl/acl_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <acl/acl_all_api_h.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} acl_test_main_t;
+
+acl_test_main_t acl_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(acl_del_reply) \
+_(acl_interface_add_del_reply) \
+_(macip_acl_interface_add_del_reply) \
+_(acl_interface_set_acl_list_reply) \
+_(macip_acl_del_reply)
+
+#define foreach_reply_retval_aclindex_handler \
+_(acl_add_replace_reply) \
+_(macip_acl_add_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = acl_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = acl_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ clib_warning("ACL index: %d", ntohl(mp->acl_index)); \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_reply_retval_aclindex_handler;
+#undef _
+
+/* These two ought to be in a library somewhere but they aren't */
+static uword
+my_unformat_mac_address (unformat_input_t * input, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return unformat (input, "%x:%x:%x:%x:%x:%x", &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5]);
+}
+
+static u8 *
+my_format_mac_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
+ a[0], a[1], a[2], a[3], a[4], a[5]);
+}
+
+
+
+static void vl_api_acl_plugin_get_version_reply_t_handler
+ (vl_api_acl_plugin_get_version_reply_t * mp)
+ {
+ vat_main_t * vam = acl_test_main.vat_main;
+ clib_warning("ACL plugin version: %d.%d", ntohl(mp->major), ntohl(mp->minor));
+ vam->result_ready = 1;
+ }
+
+static void vl_api_acl_interface_list_details_t_handler
+ (vl_api_acl_interface_list_details_t * mp)
+ {
+ int i;
+ vat_main_t * vam = acl_test_main.vat_main;
+ u8 *out = 0;
+ vl_api_acl_interface_list_details_t_endian(mp);
+ out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input);
+ out = format(out, " input ");
+ for(i=0; i<mp->count; i++) {
+ out = format(out, "%d ", mp->acls[i]);
+ if (i == mp->n_input-1)
+ out = format(out, "\n output ");
+ }
+ out = format(out, "\n");
+ clib_warning("%s", out);
+ vec_free(out);
+ vam->result_ready = 1;
+ }
+
+
+static inline u8 *
+vl_api_acl_rule_t_pretty_format (u8 *out, vl_api_acl_rule_t * a)
+{
+ int af = a->is_ipv6 ? AF_INET6 : AF_INET;
+ u8 src[INET6_ADDRSTRLEN];
+ u8 dst[INET6_ADDRSTRLEN];
+ inet_ntop(af, a->src_ip_addr, (void *)src, sizeof(src));
+ inet_ntop(af, a->dst_ip_addr, (void *)dst, sizeof(dst));
+
+ out = format(out, "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport %d-%d tcpflags %d %d",
+ a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit,
+ src, a->src_ip_prefix_len,
+ dst, a->dst_ip_prefix_len,
+ a->proto,
+ a->srcport_or_icmptype_first, a->srcport_or_icmptype_last,
+ a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last,
+ a->tcp_flags_mask, a->tcp_flags_value);
+ return(out);
+}
+
+
+
+static void vl_api_acl_details_t_handler
+ (vl_api_acl_details_t * mp)
+ {
+ int i;
+ vat_main_t * vam = acl_test_main.vat_main;
+ vl_api_acl_details_t_endian(mp);
+ u8 *out = 0;
+ out = format(0, "acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag);
+ for(i=0; i<mp->count; i++) {
+ out = format(out, " ");
+ out = vl_api_acl_rule_t_pretty_format(out, &mp->r[i]);
+ out = format(out, "%s\n", i<mp->count-1 ? "," : "");
+ }
+ clib_warning("%s", out);
+ vec_free(out);
+ vam->result_ready = 1;
+ }
+
+static inline u8 *
+vl_api_macip_acl_rule_t_pretty_format (u8 *out, vl_api_macip_acl_rule_t * a)
+{
+ int af = a->is_ipv6 ? AF_INET6 : AF_INET;
+ u8 src[INET6_ADDRSTRLEN];
+ inet_ntop(af, a->src_ip_addr, (void *)src, sizeof(src));
+
+ out = format(out, "%s action %d ip %s/%d mac %U mask %U",
+ a->is_ipv6 ? "ipv6" : "ipv4", a->is_permit,
+ src, a->src_ip_prefix_len,
+ my_format_mac_address, a->src_mac,
+ my_format_mac_address, a->src_mac_mask);
+ return(out);
+}
+
+
+static void vl_api_macip_acl_details_t_handler
+ (vl_api_macip_acl_details_t * mp)
+ {
+ int i;
+ vat_main_t * vam = acl_test_main.vat_main;
+ vl_api_macip_acl_details_t_endian(mp);
+ u8 *out = format(0,"MACIP acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag);
+ for(i=0; i<mp->count; i++) {
+ out = format(out, " ");
+ out = vl_api_macip_acl_rule_t_pretty_format(out, &mp->r[i]);
+ out = format(out, "%s\n", i<mp->count-1 ? "," : "");
+ }
+ clib_warning("%s", out);
+ vec_free(out);
+ vam->result_ready = 1;
+ }
+
+static void vl_api_macip_acl_interface_get_reply_t_handler
+ (vl_api_macip_acl_interface_get_reply_t * mp)
+ {
+ int i;
+ vat_main_t * vam = acl_test_main.vat_main;
+ u8 *out = format(0, "sw_if_index with MACIP ACL count: %d\n", ntohl(mp->count));
+ for(i=0; i<ntohl(mp->count); i++) {
+ out = format(out, " macip_acl_interface_add_del sw_if_index %d add acl %d\n", i, ntohl(mp->acls[i]));
+ }
+ out = format(out, "\n");
+ clib_warning("%s", out);
+ vec_free(out);
+ vam->result_ready = 1;
+ }
+
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(ACL_ADD_REPLACE_REPLY, acl_add_replace_reply) \
+_(ACL_DEL_REPLY, acl_del_reply) \
+_(ACL_INTERFACE_ADD_DEL_REPLY, acl_interface_add_del_reply) \
+_(ACL_INTERFACE_SET_ACL_LIST_REPLY, acl_interface_set_acl_list_reply) \
+_(ACL_INTERFACE_LIST_DETAILS, acl_interface_list_details) \
+_(ACL_DETAILS, acl_details) \
+_(MACIP_ACL_ADD_REPLY, macip_acl_add_reply) \
+_(MACIP_ACL_DEL_REPLY, macip_acl_del_reply) \
+_(MACIP_ACL_DETAILS, macip_acl_details) \
+_(MACIP_ACL_INTERFACE_ADD_DEL_REPLY, macip_acl_interface_add_del_reply) \
+_(MACIP_ACL_INTERFACE_GET_REPLY, macip_acl_interface_get_reply) \
+_(ACL_PLUGIN_GET_VERSION_REPLY, acl_plugin_get_version_reply)
+
+/* M: construct, but don't yet send a message */
+
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T,t,n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+static int api_acl_plugin_get_version (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ vl_api_acl_plugin_get_version_t * mp;
+ u32 msg_size = sizeof(*mp);
+ f64 timeout;
+
+ vam->result_ready = 0;
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_ACL_PLUGIN_GET_VERSION + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+
+ return 0;
+}
+
+static int api_macip_acl_interface_get (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ vl_api_acl_plugin_get_version_t * mp;
+ u32 msg_size = sizeof(*mp);
+ f64 timeout;
+
+ vam->result_ready = 0;
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_INTERFACE_GET + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+
+ return 0;
+}
+
+#define vec_validate_acl_rules(v, idx) \
+ do { \
+ if (vec_len(v) < idx+1) { \
+ vec_validate(v, idx); \
+ v[idx].is_permit = 0x1; \
+ v[idx].srcport_or_icmptype_last = 0xffff; \
+ v[idx].dstport_or_icmpcode_last = 0xffff; \
+ } \
+ } while (0)
+
+
+static int api_acl_add_replace (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_acl_add_replace_t * mp;
+ u32 acl_index = ~0;
+ u32 msg_size = sizeof (*mp); /* without the rules */
+
+ vl_api_acl_rule_t *rules = 0;
+ int rule_idx = 0;
+ int n_rules = 0;
+ u32 proto = 0;
+ u32 port1 = 0;
+ u32 port2 = 0;
+ u32 action = 0;
+ u32 tcpflags, tcpmask;
+ u32 src_prefix_length = 0, dst_prefix_length = 0;
+ ip4_address_t src_v4address, dst_v4address;
+ ip6_address_t src_v6address, dst_v6address;
+ u8 *tag = 0;
+
+ if (!unformat (i, "%d", &acl_index)) {
+ /* Just assume -1 */
+ }
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "ipv4"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "permit+reflect"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 2;
+ }
+ else if (unformat (i, "permit"))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 1;
+ }
+ else if (unformat (i, "action %d", &action))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = action;
+ }
+ else if (unformat (i, "src %U/%d",
+ unformat_ip4_address, &src_v4address, &src_prefix_length))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "src %U/%d",
+ unformat_ip6_address, &src_v6address, &src_prefix_length))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "dst %U/%d",
+ unformat_ip4_address, &dst_v4address, &dst_prefix_length))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].dst_ip_addr, &dst_v4address, 4);
+ rules[rule_idx].dst_ip_prefix_len = dst_prefix_length;
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "dst %U/%d",
+ unformat_ip6_address, &dst_v6address, &dst_prefix_length))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].dst_ip_addr, &dst_v6address, 16);
+ rules[rule_idx].dst_ip_prefix_len = dst_prefix_length;
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "sport %d-%d", &port1, &port2))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].srcport_or_icmptype_first = htons(port1);
+ rules[rule_idx].srcport_or_icmptype_last = htons(port2);
+ }
+ else if (unformat (i, "sport %d", &port1))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].srcport_or_icmptype_first = htons(port1);
+ rules[rule_idx].srcport_or_icmptype_last = htons(port1);
+ }
+ else if (unformat (i, "dport %d-%d", &port1, &port2))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].dstport_or_icmpcode_first = htons(port1);
+ rules[rule_idx].dstport_or_icmpcode_last = htons(port2);
+ }
+ else if (unformat (i, "dport %d", &port1))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].dstport_or_icmpcode_first = htons(port1);
+ rules[rule_idx].dstport_or_icmpcode_last = htons(port1);
+ }
+ else if (unformat (i, "tcpflags %d %d", &tcpflags, &tcpmask))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].tcp_flags_value = tcpflags;
+ rules[rule_idx].tcp_flags_mask = tcpmask;
+ }
+ else if (unformat (i, "proto %d", &proto))
+ {
+ vec_validate_acl_rules(rules, rule_idx);
+ rules[rule_idx].proto = proto;
+ }
+ else if (unformat (i, "tag %s", &tag))
+ {
+ }
+ else if (unformat (i, ","))
+ {
+ rule_idx++;
+ vec_validate_acl_rules(rules, rule_idx);
+ }
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ vam->result_ready = 0;
+
+ if(rules)
+ n_rules = vec_len(rules);
+ else
+ n_rules = 0;
+
+ msg_size += n_rules*sizeof(rules[0]);
+
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_ACL_ADD_REPLACE + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+ if (n_rules > 0)
+ clib_memcpy(mp->r, rules, n_rules*sizeof (vl_api_acl_rule_t));
+ if (tag)
+ {
+ if (vec_len(tag) >= sizeof(mp->tag))
+ {
+ tag[sizeof(mp->tag)-1] = 0;
+ _vec_len(tag) = sizeof(mp->tag);
+ }
+ clib_memcpy(mp->tag, tag, vec_len(tag));
+ vec_free(tag);
+ }
+ mp->acl_index = ntohl(acl_index);
+ mp->count = htonl(n_rules);
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+static int api_acl_del (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_acl_del_t * mp;
+ u32 acl_index = ~0;
+
+ if (!unformat (i, "%d", &acl_index)) {
+ errmsg ("missing acl index\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M(ACL_DEL, acl_del);
+ mp->acl_index = ntohl(acl_index);
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+static int api_macip_acl_del (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_acl_del_t * mp;
+ u32 acl_index = ~0;
+
+ if (!unformat (i, "%d", &acl_index)) {
+ errmsg ("missing acl index\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M(MACIP_ACL_DEL, acl_del);
+ mp->acl_index = ntohl(acl_index);
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+static int api_acl_interface_add_del (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_acl_interface_add_del_t * mp;
+ u32 sw_if_index = ~0;
+ u32 acl_index = ~0;
+ u8 is_input = 0;
+ u8 is_add = 0;
+
+// acl_interface_add_del <intfc> | sw_if_index <if-idx> acl_index <acl-idx> [out] [del]
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%d", &acl_index))
+ ;
+ else
+ break;
+ }
+
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "acl %d", &acl_index))
+ ;
+ else if (unformat (i, "input"))
+ is_input = 1;
+ else if (unformat (i, "output"))
+ is_input = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0) {
+ errmsg ("missing interface name / explicit sw_if_index number \n");
+ return -99;
+ }
+
+ if (acl_index == ~0) {
+ errmsg ("missing ACL index\n");
+ return -99;
+ }
+
+
+
+ /* Construct the API message */
+ M(ACL_INTERFACE_ADD_DEL, acl_interface_add_del);
+ mp->acl_index = ntohl(acl_index);
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->is_add = is_add;
+ mp->is_input = is_input;
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+static int api_macip_acl_interface_add_del (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_macip_acl_interface_add_del_t * mp;
+ u32 sw_if_index = ~0;
+ u32 acl_index = ~0;
+ u8 is_add = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "acl %d", &acl_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0) {
+ errmsg ("missing interface name / explicit sw_if_index number \n");
+ return -99;
+ }
+
+ if (acl_index == ~0) {
+ errmsg ("missing ACL index\n");
+ return -99;
+ }
+
+
+
+ /* Construct the API message */
+ M(MACIP_ACL_INTERFACE_ADD_DEL, macip_acl_interface_add_del);
+ mp->acl_index = ntohl(acl_index);
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->is_add = is_add;
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+static int api_acl_interface_set_acl_list (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_acl_interface_set_acl_list_t * mp;
+ u32 sw_if_index = ~0;
+ u32 acl_index = ~0;
+ u32 *inacls = 0;
+ u32 *outacls = 0;
+ u8 is_input = 0;
+
+// acl_interface_set_acl_list <intfc> | sw_if_index <if-idx> input [acl-idx list] output [acl-idx list]
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "%d", &acl_index))
+ {
+ if(is_input)
+ vec_add1(inacls, htonl(acl_index));
+ else
+ vec_add1(outacls, htonl(acl_index));
+ }
+ else if (unformat (i, "acl %d", &acl_index))
+ ;
+ else if (unformat (i, "input"))
+ is_input = 1;
+ else if (unformat (i, "output"))
+ is_input = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0) {
+ errmsg ("missing interface name / explicit sw_if_index number \n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M2(ACL_INTERFACE_SET_ACL_LIST, acl_interface_set_acl_list, sizeof(u32) * (vec_len(inacls) + vec_len(outacls)));
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->n_input = vec_len(inacls);
+ mp->count = vec_len(inacls) + vec_len(outacls);
+ vec_append(inacls, outacls);
+ if (vec_len(inacls) > 0)
+ clib_memcpy(mp->acls, inacls, vec_len(inacls)*sizeof(u32));
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+
+static int api_acl_interface_list_dump (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ u32 sw_if_index = ~0;
+ vl_api_acl_interface_list_dump_t * mp;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M(ACL_INTERFACE_LIST_DUMP, acl_interface_list_dump);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+static int api_acl_dump (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ u32 acl_index = ~0;
+ vl_api_acl_dump_t * mp;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%d", &acl_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M(ACL_DUMP, acl_dump);
+ mp->acl_index = ntohl (acl_index);
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+static int api_macip_acl_dump (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ u32 acl_index = ~0;
+ vl_api_acl_dump_t * mp;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (i, "%d", &acl_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M(MACIP_ACL_DUMP, macip_acl_dump);
+ mp->acl_index = ntohl (acl_index);
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+#define vec_validate_macip_acl_rules(v, idx) \
+ do { \
+ if (vec_len(v) < idx+1) { \
+ vec_validate(v, idx); \
+ v[idx].is_permit = 0x1; \
+ } \
+ } while (0)
+
+
+static int api_macip_acl_add (vat_main_t * vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_macip_acl_add_t * mp;
+ u32 msg_size = sizeof (*mp); /* without the rules */
+
+ vl_api_macip_acl_rule_t *rules = 0;
+ int rule_idx = 0;
+ int n_rules = 0;
+ u32 src_prefix_length = 0;
+ u32 action = 0;
+ ip4_address_t src_v4address;
+ ip6_address_t src_v6address;
+ u8 src_mac[6];
+ u8 *tag = 0;
+ u8 mac_mask_all_1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "ipv4"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "permit"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 1;
+ }
+ else if (unformat (i, "deny"))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = 0;
+ }
+ else if (unformat (i, "action %d", &action))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ rules[rule_idx].is_permit = action;
+ }
+ else if (unformat (i, "ip %U/%d",
+ unformat_ip4_address, &src_v4address, &src_prefix_length))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v4address, 4);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 0;
+ }
+ else if (unformat (i, "ip %U/%d",
+ unformat_ip6_address, &src_v6address, &src_prefix_length))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_ip_addr, &src_v6address, 16);
+ rules[rule_idx].src_ip_prefix_len = src_prefix_length;
+ rules[rule_idx].is_ipv6 = 1;
+ }
+ else if (unformat (i, "mac %U",
+ my_unformat_mac_address, &src_mac))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_mac, &src_mac, 6);
+ memcpy (rules[rule_idx].src_mac_mask, &mac_mask_all_1, 6);
+ }
+ else if (unformat (i, "mask %U",
+ my_unformat_mac_address, &src_mac))
+ {
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ memcpy (rules[rule_idx].src_mac_mask, &src_mac, 6);
+ }
+ else if (unformat (i, "tag %s", &tag))
+ {
+ }
+ else if (unformat (i, ","))
+ {
+ rule_idx++;
+ vec_validate_macip_acl_rules(rules, rule_idx);
+ }
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ vam->result_ready = 0;
+
+ if(rules)
+ n_rules = vec_len(rules);
+ else
+ n_rules = 0;
+
+ msg_size += n_rules*sizeof(rules[0]);
+
+ mp = vl_msg_api_alloc_as_if_client(msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id = ntohs (VL_API_MACIP_ACL_ADD + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+ if (n_rules > 0)
+ clib_memcpy(mp->r, rules, n_rules*sizeof (mp->r[0]));
+ if (tag)
+ {
+ if (vec_len(tag) >= sizeof(mp->tag))
+ {
+ tag[sizeof(mp->tag)-1] = 0;
+ _vec_len(tag) = sizeof(mp->tag);
+ }
+ clib_memcpy(mp->tag, tag, vec_len(tag));
+ vec_free(tag);
+ }
+
+ mp->count = htonl(n_rules);
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(acl_plugin_get_version, "") \
+_(acl_add_replace, "<acl-idx> [<ipv4|ipv6> <permit|permit+reflect|deny|action N> [src IP/plen] [dst IP/plen] [sport X-Y] [dport X-Y] [proto P] [tcpflags FL MASK], ... , ...") \
+_(acl_del, "<acl-idx>") \
+_(acl_dump, "[<acl-idx>]") \
+_(acl_interface_add_del, "<intfc> | sw_if_index <if-idx> [add|del] [input|output] acl <acl-idx>") \
+_(acl_interface_set_acl_list, "<intfc> | sw_if_index <if-idx> input [acl-idx list] output [acl-idx list]") \
+_(acl_interface_list_dump, "[<intfc> | sw_if_index <if-idx>]") \
+_(macip_acl_add, "...") \
+_(macip_acl_del, "<acl-idx>")\
+_(macip_acl_dump, "[<acl-idx>]") \
+_(macip_acl_interface_add_del, "<intfc> | sw_if_index <if-idx> [add|del] acl <acl-idx>") \
+_(macip_acl_interface_get, "")
+
+
+
+void vat_api_hookup (vat_main_t *vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ acl_test_main_t * sm = &acl_test_main;
+ u8 * name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "acl_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~0)
+ vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/acl/l2sess.c b/src/plugins/acl/l2sess.c
new file mode 100644
index 00000000000..cc9bde4417d
--- /dev/null
+++ b/src/plugins/acl/l2sess.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * l2sess.c - simple MAC-swap API / debug CLI handling
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <acl/l2sess.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/timing_wheel.h>
+
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/l2_input.h>
+
+void
+l2sess_vlib_plugin_register (vlib_main_t * vm, void* hh,
+ int from_early_init)
+{
+ l2sess_main_t *sm = &l2sess_main;
+ vnet_plugin_handoff_t * h = hh;
+ memset (sm, 0, sizeof (*sm));
+
+ sm->vlib_main = vm;
+ sm->vnet_main = h->vnet_main;
+ sm->ethernet_main = h->ethernet_main;
+}
+
+void
+l2sess_init_next_features_input (vlib_main_t * vm, l2sess_main_t * sm)
+{
+#define _(node_name, node_var, is_out, is_ip6, is_track) \
+ if (!is_out) feat_bitmap_init_next_nodes(vm, node_var.index, L2INPUT_N_FEAT, l2input_get_feat_names (), sm->node_var ## _input_next_node_index);
+ foreach_l2sess_node
+#undef _
+}
+
+void
+l2sess_add_our_next_nodes (vlib_main_t * vm, l2sess_main_t * sm,
+ u8 * prev_node_name, int add_output_nodes)
+{
+ vlib_node_t *n;
+ n = vlib_get_node_by_name (vm, prev_node_name);
+#define _(node_name, node_var, is_out, is_ip6, is_track) \
+ if (is_out == add_output_nodes) { \
+ u32 idx = vlib_node_add_next_with_slot(vm, n->index, node_var.index, ~0); \
+ if (is_track) { \
+ sm->next_slot_track_node_by_is_ip6_is_out[is_ip6][is_out] = idx; \
+ } \
+ }
+ foreach_l2sess_node
+#undef _
+}
+
+void
+l2sess_setup_nodes (void)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ l2sess_main_t *sm = &l2sess_main;
+
+ l2sess_init_next_features_input (vm, sm);
+
+ l2sess_add_our_next_nodes (vm, sm, (u8 *) "l2-input-classify", 0);
+ l2sess_add_our_next_nodes (vm, sm, (u8 *) "l2-output-classify", 1);
+
+}
+
+static char *
+get_l4_proto_str (int is_ip6, uint8_t l4_proto)
+{
+ switch (l4_proto)
+ {
+ case 6:
+ return "tcp";
+ case 17:
+ return "udp";
+ case 1:
+ return "icmp";
+ case 58:
+ return "icmp6";
+ default:
+ return "<?l4-unknown?>";
+ }
+}
+
+static clib_error_t *
+l2sess_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ l2sess_main_t *sm = &l2sess_main;
+ clib_time_t *ct = &vm->clib_time;
+ l2s_session_t *s;
+ u64 now = clib_cpu_time_now ();
+
+ vlib_cli_output (vm, "Timing wheel info: \n%U", format_timing_wheel,
+ &sm->timing_wheel, 255);
+
+ pool_foreach (s, sm->sessions, (
+ {
+ f64 ctime =
+ (now -
+ s->create_time) * ct->seconds_per_clock;
+ f64 atime0 =
+ (now -
+ s->side[0].active_time) *
+ ct->seconds_per_clock;
+ f64 atime1 =
+ (now -
+ s->side[1].active_time) *
+ ct->seconds_per_clock;
+/*
+ f64 ctime = (s->create_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock;
+ f64 atime0 = (s->side[0].active_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock;
+ f64 atime1 = (s->side[1].active_time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock;
+*/
+ u8 * out0 =
+ format (0,
+ "%5d: create time: %U pkts/bytes/active time: [ %ld %ld %U : %ld %ld %U ]\n",
+ (s - sm->sessions),
+ format_time_interval, "h:m:s:u",
+ ctime, s->side[0].n_packets,
+ s->side[0].n_bytes,
+ format_time_interval, "h:m:s:u",
+ atime0, s->side[1].n_packets,
+ s->side[1].n_bytes,
+ format_time_interval, "h:m:s:u",
+ atime1); u8 * out1 = 0;
+ if (s->is_ip6)
+ {
+ out1 =
+ format (0, "%s %U :%u <-> %U :%u",
+ get_l4_proto_str (s->is_ip6,
+ s->l4_proto),
+ format_ip6_address,
+ &s->side[0].addr.ip6,
+ s->side[0].port,
+ format_ip6_address,
+ &s->side[1].addr.ip6,
+ s->side[1].port);}
+ else
+ {
+ out1 =
+ format (0, "%s %U :%u <-> %U :%u",
+ get_l4_proto_str (s->is_ip6,
+ s->l4_proto),
+ format_ip4_address,
+ &s->side[0].addr.ip4,
+ s->side[0].port,
+ format_ip4_address,
+ &s->side[1].addr.ip4,
+ s->side[1].port);}
+ vlib_cli_output (vm, "%s %s", out0,
+ out1); vec_free (out0);
+ vec_free (out1);}
+ ));
+ return 0;
+}
+
+static clib_error_t *
+l2sess_show_count_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ l2sess_main_t *sm = &l2sess_main;
+
+ vlib_cli_output (vm, "Timing wheel info: \n%U", format_timing_wheel,
+ &sm->timing_wheel, 255);
+ vlib_cli_output (vm, "session pool len: %d, pool elts: %d",
+ pool_len (sm->sessions), pool_elts (sm->sessions));
+ vlib_cli_output (vm,
+ "attempted to delete sessions which were already free: %d",
+ sm->counter_attempted_delete_free_session);
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (l2sess_show_command, static) = {
+ .path = "show l2sess",
+ .short_help = "show l2sess",
+ .function = l2sess_show_command_fn,
+};
+
+VLIB_CLI_COMMAND (l2sess_show_count_command, static) = {
+ .path = "show l2sess count",
+ .short_help = "show l2sess count",
+ .function = l2sess_show_count_command_fn,
+};
+/* *INDENT-OFF* */
+
+static inline u64
+time_sec_to_clock( clib_time_t *ct, f64 sec)
+{
+ return (u64)(((f64)sec)/ct->seconds_per_clock);
+}
+
+static clib_error_t * l2sess_init (vlib_main_t * vm)
+{
+ l2sess_main_t * sm = &l2sess_main;
+ clib_error_t * error = 0;
+ u64 cpu_time_now = clib_cpu_time_now();
+
+
+ clib_time_t *ct = &vm->clib_time;
+ sm->udp_session_idle_timeout = time_sec_to_clock(ct, UDP_SESSION_IDLE_TIMEOUT_SEC);
+ sm->tcp_session_idle_timeout = time_sec_to_clock(ct, TCP_SESSION_IDLE_TIMEOUT_SEC);
+ sm->tcp_session_transient_timeout = time_sec_to_clock(ct, TCP_SESSION_TRANSIENT_TIMEOUT_SEC);
+
+ /* The min sched time of 10e-1 causes erroneous behavior... */
+ sm->timing_wheel.min_sched_time = 10e-2;
+ sm->timing_wheel.max_sched_time = 3600.0*48.0;
+ timing_wheel_init (&sm->timing_wheel, cpu_time_now, vm->clib_time.clocks_per_second);
+ sm->timer_wheel_next_expiring_time = 0;
+ sm->timer_wheel_tick = time_sec_to_clock(ct, sm->timing_wheel.min_sched_time);
+ /* Pre-allocate expired nodes. */
+ vec_alloc (sm->data_from_advancing_timing_wheel, 32);
+
+ l2sess_setup_nodes();
+ l2output_init_output_node_vec (&sm->output_next_nodes.output_node_index_vec);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (l2sess_init);
+
+
diff --git a/src/plugins/acl/l2sess.h b/src/plugins/acl/l2sess.h
new file mode 100644
index 00000000000..db899917113
--- /dev/null
+++ b/src/plugins/acl/l2sess.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_l2sess_h__
+#define __included_l2sess_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/timing_wheel.h>
+
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/l2_input.h>
+
+#define _(node_name, node_var, is_out, is_ip6, is_track)
+#undef _
+#define foreach_l2sess_node \
+ _("aclp-l2s-input-ip4-add", l2sess_in_ip4_add, 0, 0, 0) \
+ _("aclp-l2s-input-ip6-add", l2sess_in_ip6_add, 0, 1, 0) \
+ _("aclp-l2s-output-ip4-add", l2sess_out_ip4_add, 1, 0, 0) \
+ _("aclp-l2s-output-ip6-add", l2sess_out_ip6_add, 1, 1, 0) \
+ _("aclp-l2s-input-ip4-track", l2sess_in_ip4_track, 0, 0, 1) \
+ _("aclp-l2s-input-ip6-track", l2sess_in_ip6_track, 0, 1, 1) \
+ _("aclp-l2s-output-ip4-track",l2sess_out_ip4_track, 1, 0, 1) \
+ _("aclp-l2s-output-ip6-track", l2sess_out_ip6_track, 1, 1, 1)
+
+#define _(node_name, node_var, is_out, is_ip6, is_track) \
+ extern vlib_node_registration_t node_var;
+foreach_l2sess_node
+#undef _
+
+#define TCP_FLAG_FIN 0x01
+#define TCP_FLAG_SYN 0x02
+#define TCP_FLAG_RST 0x04
+#define TCP_FLAG_PUSH 0x08
+#define TCP_FLAG_ACK 0x10
+#define TCP_FLAG_URG 0x20
+#define TCP_FLAG_ECE 0x40
+#define TCP_FLAG_CWR 0x80
+#define TCP_FLAGS_RSTFINACKSYN (TCP_FLAG_RST + TCP_FLAG_FIN + TCP_FLAG_SYN + TCP_FLAG_ACK)
+#define TCP_FLAGS_ACKSYN (TCP_FLAG_SYN + TCP_FLAG_ACK)
+
+typedef struct {
+ ip46_address_t addr;
+ u64 active_time;
+ u64 n_packets;
+ u64 n_bytes;
+ u16 port;
+} l2s_session_side_t;
+
+enum {
+ L2S_SESSION_SIDE_IN = 0,
+ L2S_SESSION_SIDE_OUT,
+ L2S_N_SESSION_SIDES
+};
+
+typedef struct {
+ u64 create_time;
+ l2s_session_side_t side[L2S_N_SESSION_SIDES];
+ u8 l4_proto;
+ u8 is_ip6;
+ u16 tcp_flags_seen; /* u16 because of two sides */
+} l2s_session_t;
+
+#define PROD
+#ifdef PROD
+#define UDP_SESSION_IDLE_TIMEOUT_SEC 600
+#define TCP_SESSION_IDLE_TIMEOUT_SEC (3600*24)
+#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 120
+#else
+#define UDP_SESSION_IDLE_TIMEOUT_SEC 15
+#define TCP_SESSION_IDLE_TIMEOUT_SEC 15
+#define TCP_SESSION_TRANSIENT_TIMEOUT_SEC 5
+#endif
+
+typedef struct {
+ /*
+ * the next two fields are present for all nodes, but
+ * only one of them is used per node - depending
+ * on whether the node is an input or output one.
+ */
+#define _(node_name, node_var, is_out, is_ip6, is_track) \
+ u32 node_var ## _input_next_node_index[32]; \
+ l2_output_next_nodes_st node_var ## _next_nodes;
+foreach_l2sess_node
+#undef _
+ l2_output_next_nodes_st output_next_nodes;
+
+ /* Next indices of the tracker nodes */
+ u32 next_slot_track_node_by_is_ip6_is_out[2][2];
+
+ /*
+ * Pairing of "forward" and "reverse" tables by table index.
+ * Each relationship has two entries - for one and the other table,
+ * so it is bidirectional.
+ */
+
+ u32 *fwd_to_rev_by_table_index;
+
+ /*
+ * The vector of per-interface session pools
+ */
+
+ l2s_session_t *sessions;
+
+ /* The session timeouts */
+ u64 tcp_session_transient_timeout;
+ u64 tcp_session_idle_timeout;
+ u64 udp_session_idle_timeout;
+
+ /* Timing wheel to time out the idle sessions */
+ timing_wheel_t timing_wheel;
+ u32 *data_from_advancing_timing_wheel;
+ u64 timer_wheel_next_expiring_time;
+ u64 timer_wheel_tick;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ ethernet_main_t * ethernet_main;
+
+ /* Counter(s) */
+ u64 counter_attempted_delete_free_session;
+} l2sess_main_t;
+
+l2sess_main_t l2sess_main;
+
+/* Just exposed for acl.c */
+
+void
+l2sess_vlib_plugin_register (vlib_main_t * vm, void * hh,
+ int from_early_init);
+
+
+#endif /* __included_l2sess_h__ */
diff --git a/src/plugins/acl/l2sess_node.c b/src/plugins/acl/l2sess_node.c
new file mode 100644
index 00000000000..520e5929b4b
--- /dev/null
+++ b/src/plugins/acl/l2sess_node.c
@@ -0,0 +1,816 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <netinet/in.h>
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <acl/l2sess.h>
+#include <vnet/l2/l2_classify.h>
+
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 trace_flags;
+ u32 session_tables[2];
+ u32 session_nexts[2];
+ u8 l4_proto;
+} l2sess_trace_t;
+
+/* packet trace format function */
+
+#define _(node_name, node_var, is_out, is_ip6, is_track) \
+static u8 * format_## node_var ##_trace (u8 * s, va_list * args) \
+{ \
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); \
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); \
+ l2sess_trace_t * t = va_arg (*args, l2sess_trace_t *); \
+ \
+ s = format (s, node_name ": sw_if_index %d, next index %d trace_flags %08x L4 proto %d\n" \
+ " tables [ %d, %d ] nexts [ %d, %d ]", \
+ t->sw_if_index, t->next_index, t->trace_flags, t->l4_proto, \
+ t->session_tables[0], t->session_tables[1], \
+ t->session_nexts[0], t->session_nexts[1]); \
+ return s; \
+}
+foreach_l2sess_node
+#undef _
+#define foreach_l2sess_error \
+_(SWAPPED, "Mac swap packets processed")
+ typedef enum
+{
+#define _(sym,str) L2SESS_ERROR_##sym,
+ foreach_l2sess_error
+#undef _
+ L2SESS_N_ERROR,
+} l2sess_error_t;
+
+static char *l2sess_error_strings[] = {
+#define _(sym,string) string,
+ foreach_l2sess_error
+#undef _
+};
+
+typedef enum
+{
+ L2SESS_NEXT_DROP,
+ L2SESS_N_NEXT,
+} l2sess_next_t;
+
+u8
+l2sess_get_l4_proto (vlib_buffer_t * b0, int node_is_ip6)
+{
+ u8 proto;
+ int proto_offset;
+ if (node_is_ip6)
+ {
+ proto_offset = 20;
+ }
+ else
+ {
+ proto_offset = 23;
+ }
+ proto = *((u8 *) vlib_buffer_get_current (b0) + proto_offset);
+ return proto;
+}
+
+
+u8
+l2sess_get_tcp_flags (vlib_buffer_t * b0, int node_is_ip6)
+{
+ u8 flags;
+ int flags_offset;
+ if (node_is_ip6)
+ {
+ flags_offset = 14 + 40 + 13; /* FIXME: no extension headers assumed */
+ }
+ else
+ {
+ flags_offset = 14 + 20 + 13;
+ }
+ flags = *((u8 *) vlib_buffer_get_current (b0) + flags_offset);
+ return flags;
+}
+
+static inline int
+l4_tcp_or_udp (u8 proto)
+{
+ return ((proto == 6) || (proto == 17));
+}
+
+void
+l2sess_get_session_tables (l2sess_main_t * sm, u32 sw_if_index,
+ int node_is_out, int node_is_ip6, u8 l4_proto,
+ u32 * session_tables)
+{
+/*
+ * Based on the direction, l3 and l4 protocol, fill a u32[2] array:
+ * [0] is index for the "direct match" path, [1] is for "mirrored match".
+ * Store the indices of the tables to add the session to in session_tables[]
+ */
+ l2_output_classify_main_t *l2om = &l2_output_classify_main;
+ l2_input_classify_main_t *l2im = &l2_input_classify_main;
+
+ u32 output_table_index;
+ u32 input_table_index;
+
+ if (!l4_tcp_or_udp (l4_proto))
+ {
+ return;
+ }
+
+ if (node_is_ip6)
+ {
+ vec_validate_init_empty (l2im->
+ classify_table_index_by_sw_if_index
+ [L2_INPUT_CLASSIFY_TABLE_IP6], sw_if_index,
+ ~0);
+ input_table_index =
+ l2im->
+ classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP6]
+ [sw_if_index];
+ vec_validate_init_empty (l2om->
+ classify_table_index_by_sw_if_index
+ [L2_OUTPUT_CLASSIFY_TABLE_IP6], sw_if_index,
+ ~0);
+ output_table_index =
+ l2om->
+ classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP6]
+ [sw_if_index];
+ }
+ else
+ {
+ vec_validate_init_empty (l2im->
+ classify_table_index_by_sw_if_index
+ [L2_INPUT_CLASSIFY_TABLE_IP4], sw_if_index,
+ ~0);
+ input_table_index =
+ l2im->
+ classify_table_index_by_sw_if_index[L2_INPUT_CLASSIFY_TABLE_IP4]
+ [sw_if_index];
+ vec_validate_init_empty (l2om->
+ classify_table_index_by_sw_if_index
+ [L2_OUTPUT_CLASSIFY_TABLE_IP4], sw_if_index,
+ ~0);
+ output_table_index =
+ l2om->
+ classify_table_index_by_sw_if_index[L2_OUTPUT_CLASSIFY_TABLE_IP4]
+ [sw_if_index];
+ }
+
+ if (node_is_out)
+ {
+ session_tables[0] = output_table_index;
+ session_tables[1] = input_table_index;
+ }
+ else
+ {
+ session_tables[0] = input_table_index;
+ session_tables[1] = output_table_index;
+ }
+}
+
+void
+l2sess_get_session_nexts (l2sess_main_t * sm, u32 sw_if_index,
+ int node_is_out, int node_is_ip6, u8 l4_proto,
+ u32 * session_nexts)
+{
+/*
+ * Based on the direction, l3 and l4 protocol, fill a u32[2] array:
+ * [0] is the index for the "direct match" path, [1] is for "mirrored match".
+ * Store the match_next_index in session_nexts[] for a new session entry which is being added to session tables.
+ */
+ u32 input_node_index;
+ u32 output_node_index;
+
+ if (!l4_tcp_or_udp (l4_proto))
+ {
+ return;
+ }
+
+ input_node_index =
+ sm->next_slot_track_node_by_is_ip6_is_out[node_is_ip6][0];
+ output_node_index =
+ sm->next_slot_track_node_by_is_ip6_is_out[node_is_ip6][1];
+
+ if (node_is_out)
+ {
+ session_nexts[0] = output_node_index;
+ session_nexts[1] = input_node_index;
+ }
+ else
+ {
+ session_nexts[0] = input_node_index;
+ session_nexts[1] = output_node_index;
+ }
+}
+
+
+static inline void
+swap_bytes (vlib_buffer_t * b0, int off_a, int off_b, int nbytes)
+{
+ u8 tmp;
+ u8 *pa = vlib_buffer_get_current (b0) + off_a;
+ u8 *pb = vlib_buffer_get_current (b0) + off_b;
+ while (nbytes--)
+ {
+ tmp = *pa;
+ *pa++ = *pb;
+ *pb++ = tmp;
+ }
+}
+
+/*
+ * This quite pro[bv]ably is a terrible idea performance wise. Moreso doing it twice.
+ * Would having a long (ish) chunk of memory work better for this ?
+ * We will see when we get to the performance of this.
+ */
+void
+l2sess_flip_l3l4_fields (vlib_buffer_t * b0, int node_is_ip6, u8 l4_proto)
+{
+ if (!l4_tcp_or_udp (l4_proto))
+ {
+ return;
+ }
+ if (node_is_ip6)
+ {
+ swap_bytes (b0, 22, 38, 16); /* L3 */
+ swap_bytes (b0, 54, 56, 2); /* L4 (when no EH!) */
+ }
+ else
+ {
+ swap_bytes (b0, 26, 30, 4); /* L3 */
+ swap_bytes (b0, 34, 36, 2); /* L4 */
+ }
+}
+
+void
+l2sess_add_session (vlib_buffer_t * b0, int node_is_out, int node_is_ip6,
+ u32 session_table, u32 session_match_next,
+ u32 opaque_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u32 action = 0;
+ u32 metadata = 0;
+
+#ifdef DEBUG_SESSIONS
+ printf ("Adding session to table %d with next %d\n", session_table,
+ session_match_next);
+#endif
+ vnet_classify_add_del_session (cm, session_table,
+ vlib_buffer_get_current (b0),
+ session_match_next, opaque_index, 0, action,
+ metadata, 1);
+}
+
+
+
+static void *
+get_ptr_to_offset (vlib_buffer_t * b0, int offset)
+{
+ u8 *p = vlib_buffer_get_current (b0) + offset;
+ return p;
+}
+
+
+/*
+ * FIXME: Hardcoded offsets are ugly, although if casting to structs one
+ * would need to take care about alignment.. So let's for now be naive and simple.
+ */
+
+void
+session_store_ip4_l3l4_info (vlib_buffer_t * b0, l2s_session_t * sess,
+ int node_is_out)
+{
+ clib_memcpy (&sess->side[1 - node_is_out].addr.ip4,
+ get_ptr_to_offset (b0, 26), 4);
+ clib_memcpy (&sess->side[node_is_out].addr.ip4, get_ptr_to_offset (b0, 30),
+ 4);
+ sess->side[1 - node_is_out].port =
+ ntohs (*(u16 *) get_ptr_to_offset (b0, 34));
+ sess->side[node_is_out].port = ntohs (*(u16 *) get_ptr_to_offset (b0, 36));
+}
+
+void
+session_store_ip6_l3l4_info (vlib_buffer_t * b0, l2s_session_t * sess,
+ int node_is_out)
+{
+ clib_memcpy (&sess->side[1 - node_is_out].addr.ip6,
+ get_ptr_to_offset (b0, 22), 16);
+ clib_memcpy (&sess->side[node_is_out].addr.ip4, get_ptr_to_offset (b0, 38),
+ 16);
+ sess->side[1 - node_is_out].port =
+ ntohs (*(u16 *) get_ptr_to_offset (b0, 54));
+ sess->side[node_is_out].port = ntohs (*(u16 *) get_ptr_to_offset (b0, 56));
+}
+
+static void
+build_match_from_session (l2sess_main_t * sm, u8 * match,
+ l2s_session_t * sess, int is_out)
+{
+ if (sess->is_ip6)
+ {
+ match[20] = sess->l4_proto;
+ clib_memcpy (&match[22], &sess->side[1 - is_out].addr.ip6, 16);
+ clib_memcpy (&match[38], &sess->side[is_out].addr.ip4, 16);
+ *(u16 *) & match[54] = htons (sess->side[1 - is_out].port);
+ *(u16 *) & match[56] = htons (sess->side[is_out].port);
+ }
+ else
+ {
+ match[23] = sess->l4_proto;
+ clib_memcpy (&match[26], &sess->side[1 - is_out].addr.ip6, 4);
+ clib_memcpy (&match[30], &sess->side[is_out].addr.ip4, 4);
+ *(u16 *) & match[34] = htons (sess->side[1 - is_out].port);
+ *(u16 *) & match[36] = htons (sess->side[is_out].port);
+ }
+}
+
+static void
+delete_session (l2sess_main_t * sm, u32 sw_if_index, u32 session_index)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ u8 match[5 * 16]; /* For building the mock of the packet to delete the classifier session */
+ u32 session_tables[2] = { ~0, ~0 };
+ l2s_session_t *sess = sm->sessions + session_index;
+ if (pool_is_free (sm->sessions, sess))
+ {
+ sm->counter_attempted_delete_free_session++;
+ return;
+ }
+ l2sess_get_session_tables (sm, sw_if_index, 0, sess->is_ip6, sess->l4_proto,
+ session_tables);
+ if (session_tables[1] != ~0)
+ {
+ build_match_from_session (sm, match, sess, 1);
+ vnet_classify_add_del_session (cm, session_tables[1], match, 0, 0, 0, 0,
+ 0, 0);
+ }
+ if (session_tables[1] != ~0)
+ {
+ build_match_from_session (sm, match, sess, 1);
+ vnet_classify_add_del_session (cm, session_tables[1], match, 0, 0, 0, 0,
+ 0, 0);
+ }
+ pool_put (sm->sessions, sess);
+}
+
+static void
+udp_session_account_buffer (vlib_buffer_t * b0, l2s_session_t * s,
+ int which_side, u64 now)
+{
+ l2s_session_side_t *ss = &s->side[which_side];
+ ss->active_time = now;
+ ss->n_packets++;
+ ss->n_bytes += b0->current_data + b0->current_length;
+}
+
+static inline u64
+udp_session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now)
+{
+ return (sm->udp_session_idle_timeout);
+}
+
+static void
+tcp_session_account_buffer (vlib_buffer_t * b0, l2s_session_t * s,
+ int which_side, u64 now)
+{
+ l2s_session_side_t *ss = &s->side[which_side];
+ ss->active_time = now;
+ ss->n_packets++;
+ ss->n_bytes += b0->current_data + b0->current_length;
+ /* Very very lightweight TCP state tracking: just record which flags were seen */
+ s->tcp_flags_seen |=
+ l2sess_get_tcp_flags (b0, s->is_ip6) << (8 * which_side);
+}
+
+/*
+ * Since we are tracking for the purposes of timing the sessions out,
+ * we mostly care about two states: established (maximize the idle timeouts)
+ * and transient (halfopen/halfclosed/reset) - we need to have a reasonably short timeout to
+ * quickly get rid of sessions but not short enough to violate the TCP specs.
+ */
+
+static inline u64
+tcp_session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now)
+{
+ /* seen both SYNs and ACKs but not FINs means we are in establshed state */
+ u16 masked_flags =
+ sess->tcp_flags_seen & ((TCP_FLAGS_RSTFINACKSYN << 8) +
+ TCP_FLAGS_RSTFINACKSYN);
+ if (((TCP_FLAGS_ACKSYN << 8) + TCP_FLAGS_ACKSYN) == masked_flags)
+ {
+ return (sm->tcp_session_idle_timeout);
+ }
+ else
+ {
+ return (sm->tcp_session_transient_timeout);
+ }
+}
+
+static inline u64
+session_get_timeout (l2sess_main_t * sm, l2s_session_t * sess, u64 now)
+{
+ u64 timeout;
+
+ switch (sess->l4_proto)
+ {
+ case 6:
+ timeout = tcp_session_get_timeout (sm, sess, now);
+ break;
+ case 17:
+ timeout = udp_session_get_timeout (sm, sess, now);
+ break;
+ default:
+ timeout = 0;
+ }
+
+ return timeout;
+}
+
+static inline u64
+get_session_last_active_time(l2s_session_t * sess)
+{
+ u64 last_active =
+ sess->side[0].active_time >
+ sess->side[1].active_time ? sess->side[0].active_time : sess->side[1].
+ active_time;
+ return last_active;
+}
+
+static int
+session_is_alive (l2sess_main_t * sm, l2s_session_t * sess, u64 now, u64 *last_active_cache)
+{
+ u64 last_active = get_session_last_active_time(sess);
+ u64 timeout = session_get_timeout (sm, sess, now);
+ int is_alive = ((now - last_active) < timeout);
+ if (last_active_cache)
+ *last_active_cache = last_active;
+ return is_alive;
+}
+
+static void
+check_idle_sessions (l2sess_main_t * sm, u32 sw_if_index, u64 now)
+{
+ sm->timer_wheel_next_expiring_time = 0;
+ sm->data_from_advancing_timing_wheel
+ =
+ timing_wheel_advance (&sm->timing_wheel, now,
+ sm->data_from_advancing_timing_wheel,
+ &sm->timer_wheel_next_expiring_time);
+#ifdef DEBUG_SESSIONS_VERBOSE
+ {
+ clib_time_t *ct = &sm->vlib_main->clib_time;
+ f64 ctime;
+ ctime = now * ct->seconds_per_clock;
+ clib_warning ("Now : %U", format_time_interval, "h:m:s:u", ctime);
+ ctime = sm->timer_wheel_next_expiring_time * ct->seconds_per_clock;
+ clib_warning ("Next expire: %U", format_time_interval, "h:m:s:u", ctime);
+ clib_warning ("Expired items: %d",
+ (int) vec_len (sm->data_from_advancing_timing_wheel));
+ }
+#endif
+
+ sm->timer_wheel_next_expiring_time = now + sm->timer_wheel_tick;
+ if (PREDICT_FALSE ( 0 == sm->data_from_advancing_timing_wheel )) {
+ return;
+ }
+
+ if (PREDICT_FALSE (_vec_len (sm->data_from_advancing_timing_wheel) > 0))
+ {
+ uword i;
+ for (i = 0; i < _vec_len (sm->data_from_advancing_timing_wheel); i++)
+ {
+ u32 session_index = sm->data_from_advancing_timing_wheel[i];
+ if (!pool_is_free_index (sm->sessions, session_index))
+ {
+ l2s_session_t *sess = sm->sessions + session_index;
+ u64 last_active;
+ if (session_is_alive (sm, sess, now, &last_active))
+ {
+#ifdef DEBUG_SESSIONS
+ clib_warning ("Restarting timer for session %d", (int) session_index);
+#endif
+ /* Pretend we did this in the past, at last_active moment */
+ timing_wheel_insert (&sm->timing_wheel,
+ last_active + session_get_timeout (sm, sess,
+ last_active),
+ session_index);
+ }
+ else
+ {
+#ifdef DEBUG_SESSIONS
+ clib_warning ("Deleting session %d", (int) session_index);
+#endif
+ delete_session (sm, sw_if_index, session_index);
+ }
+ }
+ }
+ _vec_len (sm->data_from_advancing_timing_wheel) = 0;
+ }
+}
+
+static uword
+l2sess_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ l2sess_next_t next_index;
+ u32 pkts_swapped = 0;
+ u32 cached_sw_if_index = (u32) ~ 0;
+ u32 cached_next_index = (u32) ~ 0;
+ u32 feature_bitmap0;
+ u32 trace_flags0;
+
+ l2sess_main_t *sm = &l2sess_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Only a single loop for now for simplicity */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = L2SESS_NEXT_DROP;
+ u32 sw_if_index0;
+ //ethernet_header_t *en0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ //en0 = vlib_buffer_get_current (b0);
+
+/*
+ * The non-boilerplate is in the block below.
+ * Note first a magic macro block that sets up the behavior qualifiers:
+ * node_is_out : 1 = is output, 0 = is input
+ * node_is_ip6 : 1 = is ip6, 0 = is ip4
+ * node_is_track : 1 = is a state tracking node, 0 - is a session addition node
+ *
+ * Subsequently the code adjusts its behavior depending on these variables.
+ * It's most probably not great performance wise but much easier to work with.
+ *
+ */
+ {
+ int node_is_out = -1;
+ CLIB_UNUSED (int node_is_ip6) = -1;
+ CLIB_UNUSED (int node_is_track) = -1;
+ u32 node_index = 0;
+ u32 session_tables[2] = { ~0, ~0 };
+ u32 session_nexts[2] = { ~0, ~0 };
+ l2_output_next_nodes_st *next_nodes = 0;
+ u32 *input_feat_next_node_index;
+ u8 l4_proto;
+ u64 now = clib_cpu_time_now ();
+
+/*
+ * Set the variables according to which of the 8 nodes we are.
+ * Hopefully the compiler is smart enough to eliminate the extraneous.
+ */
+#define _(node_name, node_var, is_out, is_ip6, is_track) \
+if(node_var.index == node->node_index) \
+ { \
+ node_is_out = is_out; \
+ node_is_ip6 = is_ip6; \
+ node_is_track = is_track; \
+ node_index = node_var.index; \
+ next_nodes = &sm->node_var ## _next_nodes; \
+ input_feat_next_node_index = sm->node_var ## _input_next_node_index; \
+ }
+ foreach_l2sess_node
+#undef _
+ trace_flags0 = 0;
+ if (node_is_out)
+ {
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ }
+ else
+ {
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ }
+ /* potentially also remove the nodes here */
+ feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap;
+
+ if (node_is_track)
+ {
+ u32 sess_index = vnet_buffer (b0)->l2_classify.opaque_index;
+ l2s_session_t *sess = sm->sessions + sess_index;
+ l4_proto = sess->l4_proto;
+
+ if (session_is_alive (sm, sess, now, 0))
+ {
+ if (6 == l4_proto)
+ {
+ tcp_session_account_buffer (b0, sess, node_is_out,
+ now);
+ }
+ else
+ {
+ udp_session_account_buffer (b0, sess, node_is_out,
+ now);
+ }
+ }
+ else
+ {
+ timing_wheel_delete (&sm->timing_wheel, sess_index);
+ delete_session (sm, sw_if_index0, sess_index);
+ /* FIXME: drop the packet that hit the obsolete node, for now. We really ought to recycle it. */
+ next0 = 0;
+ }
+ }
+ else
+ {
+ /*
+ * "-add" node: take l2opaque which arrived to us, and deduce
+ * the tables out of that. ~0 means the topmost classifier table
+ * applied for this AF on the RX(for input)/TX(for output)) sw_if_index.
+ * Also add the mirrored session to the paired table.
+ */
+ l2s_session_t *sess;
+ u32 sess_index;
+
+ l4_proto = l2sess_get_l4_proto (b0, node_is_ip6);
+
+ pool_get (sm->sessions, sess);
+ sess_index = sess - sm->sessions;
+ sess->create_time = now;
+ sess->side[node_is_out].active_time = now;
+ sess->side[1 - node_is_out].active_time = now;
+ sess->l4_proto = l4_proto;
+ sess->is_ip6 = node_is_ip6;
+ if (node_is_ip6)
+ {
+ session_store_ip6_l3l4_info (b0, sess, node_is_out);
+ }
+ else
+ {
+ session_store_ip4_l3l4_info (b0, sess, node_is_out);
+ }
+
+ l2sess_get_session_tables (sm, sw_if_index0, node_is_out,
+ node_is_ip6, l4_proto,
+ session_tables);
+ l2sess_get_session_nexts (sm, sw_if_index0, node_is_out,
+ node_is_ip6, l4_proto,
+ session_nexts);
+ l2sess_flip_l3l4_fields (b0, node_is_ip6, l4_proto);
+ if (session_tables[1] != ~0)
+ {
+ l2sess_add_session (b0, node_is_out, node_is_ip6,
+ session_tables[1], session_nexts[1],
+ sess_index);
+ }
+ l2sess_flip_l3l4_fields (b0, node_is_ip6, l4_proto);
+ if (session_tables[0] != ~0)
+ {
+ l2sess_add_session (b0, node_is_out, node_is_ip6,
+ session_tables[0], session_nexts[0],
+ sess_index);
+ }
+ if (6 == sess->l4_proto)
+ {
+ tcp_session_account_buffer (b0, sess, node_is_out, now);
+ }
+ else
+ {
+ udp_session_account_buffer (b0, sess, node_is_out, now);
+ }
+ timing_wheel_insert (&sm->timing_wheel,
+ now + session_get_timeout (sm, sess,
+ now),
+ sess_index);
+ }
+
+ if (now >= sm->timer_wheel_next_expiring_time)
+ {
+ check_idle_sessions (sm, sw_if_index0, now);
+ }
+
+ if (node_is_out)
+ {
+ if (feature_bitmap0)
+ {
+ trace_flags0 |= 0x10;
+ }
+ if (sw_if_index0 == cached_sw_if_index)
+ {
+ trace_flags0 |= 0x20;
+ }
+ l2_output_dispatch (sm->vlib_main,
+ sm->vnet_main,
+ node,
+ node_index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ next_nodes,
+ b0, sw_if_index0, feature_bitmap0,
+ &next0);
+ trace_flags0 |= 2;
+
+ }
+ else
+ {
+ next0 =
+ feat_bitmap_get_next_node_index (input_feat_next_node_index,
+ feature_bitmap0);
+ trace_flags0 |= 4;
+
+ }
+
+
+
+ if (next0 >= node->n_next_nodes)
+ {
+ trace_flags0 |= 1;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ l2sess_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->trace_flags = trace_flags0;
+ t->l4_proto = l4_proto;
+ t->session_tables[0] = session_tables[0];
+ t->session_tables[1] = session_tables[1];
+ t->session_nexts[0] = session_nexts[0];
+ t->session_nexts[1] = session_nexts[1];
+ }
+
+ }
+ pkts_swapped += 1;
+ if (next0 >= node->n_next_nodes)
+ {
+ next0 = 0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, node->node_index,
+ L2SESS_ERROR_SWAPPED, pkts_swapped);
+ return frame->n_vectors;
+}
+
+
+#define _(node_name, node_var, is_out, is_ip6, is_track) \
+static uword \
+node_var ## node_fn (vlib_main_t * vm, \
+ vlib_node_runtime_t * node, \
+ vlib_frame_t * frame) \
+{ \
+ return l2sess_node_fn(vm, node, frame); \
+} \
+VLIB_REGISTER_NODE (node_var) = { \
+ .function = node_var ## node_fn, \
+ .name = node_name, \
+ .vector_size = sizeof (u32), \
+ .format_trace = format_ ## node_var ## _trace, \
+ .type = VLIB_NODE_TYPE_INTERNAL, \
+ \
+ .n_errors = ARRAY_LEN(l2sess_error_strings), \
+ .error_strings = l2sess_error_strings, \
+ \
+ .n_next_nodes = L2SESS_N_NEXT, \
+ .next_nodes = { \
+ [L2SESS_NEXT_DROP] = "error-drop", \
+ }, \
+};
+foreach_l2sess_node
+#undef _
diff --git a/src/plugins/acl/node_in.c b/src/plugins/acl/node_in.c
new file mode 100644
index 00000000000..2a5199a9ab8
--- /dev/null
+++ b/src/plugins/acl/node_in.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <acl/acl.h>
+#include "node_in.h"
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 match_acl_index;
+ u32 match_rule_index;
+ u32 trace_bitmap;
+} acl_in_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_acl_in_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ acl_in_trace_t *t = va_arg (*args, acl_in_trace_t *);
+
+ s =
+ format (s,
+ "ACL_IN: sw_if_index %d, next index %d, match: inacl %d rule %d trace_bits %08x",
+ t->sw_if_index, t->next_index, t->match_acl_index,
+ t->match_rule_index, t->trace_bitmap);
+ return s;
+}
+
+vlib_node_registration_t acl_in_node;
+
+#define foreach_acl_in_error \
+_(ACL_CHECK, "InACL check packets processed")
+
+typedef enum
+{
+#define _(sym,str) ACL_IN_ERROR_##sym,
+ foreach_acl_in_error
+#undef _
+ ACL_IN_N_ERROR,
+} acl_in_error_t;
+
+static char *acl_in_error_strings[] = {
+#define _(sym,string) string,
+ foreach_acl_in_error
+#undef _
+};
+
+static uword
+acl_in_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ acl_in_next_t next_index;
+ u32 pkts_acl_checked = 0;
+ u32 feature_bitmap0;
+ u32 trace_bitmap = 0;
+ u32 *input_feat_next_node_index =
+ acl_main.acl_in_node_input_next_node_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ~0;
+ u32 sw_if_index0;
+ u32 next = ~0;
+ u32 match_acl_index = ~0;
+ u32 match_rule_index = ~0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap;
+
+ input_acl_packet_match (sw_if_index0, b0, &next, &match_acl_index,
+ &match_rule_index, &trace_bitmap);
+ if (next != ~0)
+ {
+ next0 = next;
+ }
+ if (next0 == ~0)
+ {
+ next0 =
+ feat_bitmap_get_next_node_index (input_feat_next_node_index,
+ feature_bitmap0);
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ acl_in_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->match_acl_index = match_acl_index;
+ t->match_rule_index = match_rule_index;
+ t->trace_bitmap = trace_bitmap;
+ }
+
+ next0 = next0 < node->n_next_nodes ? next0 : 0;
+
+ pkts_acl_checked += 1;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, acl_in_node.index,
+ ACL_IN_ERROR_ACL_CHECK, pkts_acl_checked);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (acl_in_node) =
+{
+ .function = acl_in_node_fn,.name = "acl-plugin-in",.vector_size =
+ sizeof (u32),.format_trace = format_acl_in_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.n_errors =
+ ARRAY_LEN (acl_in_error_strings),.error_strings =
+ acl_in_error_strings,.n_next_nodes = ACL_IN_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [ACL_IN_ERROR_DROP] = "error-drop",
+ [ACL_IN_ETHERNET_INPUT] = "ethernet-input",
+ [ACL_IN_L2S_INPUT_IP4_ADD] = "aclp-l2s-input-ip4-add",
+ [ACL_IN_L2S_INPUT_IP6_ADD] = "aclp-l2s-input-ip6-add",}
+,};
diff --git a/src/plugins/acl/node_in.h b/src/plugins/acl/node_in.h
new file mode 100644
index 00000000000..502bbf8dd1d
--- /dev/null
+++ b/src/plugins/acl/node_in.h
@@ -0,0 +1,12 @@
+#ifndef _NODE_IN_H_
+#define _NODE_IN_H_
+
+typedef enum {
+ ACL_IN_ERROR_DROP,
+ ACL_IN_ETHERNET_INPUT,
+ ACL_IN_L2S_INPUT_IP4_ADD,
+ ACL_IN_L2S_INPUT_IP6_ADD,
+ ACL_IN_N_NEXT,
+} acl_in_next_t;
+
+#endif
diff --git a/src/plugins/acl/node_out.c b/src/plugins/acl/node_out.c
new file mode 100644
index 00000000000..50af3679b6e
--- /dev/null
+++ b/src/plugins/acl/node_out.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <acl/acl.h>
+
+#include "node_out.h"
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 match_acl_index;
+ u32 match_rule_index;
+ u32 trace_bitmap;
+} acl_out_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_acl_out_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ acl_out_trace_t *t = va_arg (*args, acl_out_trace_t *);
+ s =
+ format (s,
+ "ACL_OUT: sw_if_index %d, next index %d, match: outacl %d rule %d trace_bits %08x",
+ t->sw_if_index, t->next_index, t->match_acl_index,
+ t->match_rule_index, t->trace_bitmap);
+ return s;
+}
+
+vlib_node_registration_t acl_out_node;
+
+#define foreach_acl_out_error \
+_(ACL_CHECK, "OutACL check packets processed")
+
+typedef enum
+{
+#define _(sym,str) ACL_OUT_ERROR_##sym,
+ foreach_acl_out_error
+#undef _
+ ACL_OUT_N_ERROR,
+} acl_out_error_t;
+
+static char *acl_out_error_strings[] = {
+#define _(sym,string) string,
+ foreach_acl_out_error
+#undef _
+};
+
+static uword
+acl_out_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ acl_main_t *am = &acl_main;
+ l2_output_next_nodes_st *next_nodes = &am->acl_out_output_next_nodes;
+ u32 n_left_from, *from, *to_next;
+ acl_out_next_t next_index;
+ u32 pkts_acl_checked = 0;
+ u32 feature_bitmap0;
+ u32 cached_sw_if_index = (u32) ~ 0;
+ u32 cached_next_index = (u32) ~ 0;
+ u32 match_acl_index = ~0;
+ u32 match_rule_index = ~0;
+ u32 trace_bitmap = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ~0;
+ u32 next = 0;
+ u32 sw_if_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ feature_bitmap0 = vnet_buffer (b0)->l2.feature_bitmap;
+
+ output_acl_packet_match (sw_if_index0, b0, &next, &match_acl_index,
+ &match_rule_index, &trace_bitmap);
+ if (next != ~0)
+ {
+ next0 = next;
+ }
+ if (next0 == ~0)
+ {
+ l2_output_dispatch (vm,
+ am->vnet_main,
+ node,
+ acl_out_node.index,
+ &cached_sw_if_index,
+ &cached_next_index,
+ next_nodes,
+ b0, sw_if_index0, feature_bitmap0, &next0);
+ }
+
+
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ acl_out_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->match_acl_index = match_acl_index;
+ t->match_rule_index = match_rule_index;
+ t->trace_bitmap = trace_bitmap;
+ }
+
+ pkts_acl_checked += 1;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, acl_out_node.index,
+ ACL_OUT_ERROR_ACL_CHECK, pkts_acl_checked);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (acl_out_node) =
+{
+ .function = acl_out_node_fn,.name = "acl-plugin-out",.vector_size =
+ sizeof (u32),.format_trace = format_acl_out_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.n_errors =
+ ARRAY_LEN (acl_out_error_strings),.error_strings =
+ acl_out_error_strings,.n_next_nodes = ACL_OUT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [ACL_OUT_ERROR_DROP] = "error-drop",
+ [ACL_OUT_INTERFACE_OUTPUT] = "interface-output",
+ [ACL_OUT_L2S_OUTPUT_IP4_ADD] = "aclp-l2s-output-ip4-add",
+ [ACL_OUT_L2S_OUTPUT_IP6_ADD] = "aclp-l2s-output-ip6-add",}
+,};
diff --git a/src/plugins/acl/node_out.h b/src/plugins/acl/node_out.h
new file mode 100644
index 00000000000..c919f3b701c
--- /dev/null
+++ b/src/plugins/acl/node_out.h
@@ -0,0 +1,12 @@
+#ifndef _NODE_OUT_H_
+#define _NODE_OUT_H_
+
+typedef enum {
+ ACL_OUT_ERROR_DROP,
+ ACL_OUT_INTERFACE_OUTPUT,
+ ACL_OUT_L2S_OUTPUT_IP4_ADD,
+ ACL_OUT_L2S_OUTPUT_IP6_ADD,
+ ACL_OUT_N_NEXT,
+} acl_out_next_t;
+
+#endif
diff --git a/src/plugins/acl/test/run-python b/src/plugins/acl/test/run-python
new file mode 100755
index 00000000000..215eb17aa8d
--- /dev/null
+++ b/src/plugins/acl/test/run-python
@@ -0,0 +1,28 @@
+#!/bin/sh
+#
+# Do all the legwork to run a scapy shell with APIs available for load
+#
+CURR_DIR=`pwd`
+ROOT_DIR=`git rev-parse --show-toplevel`
+cd $ROOT_DIR
+sudo apt-get install -y python-virtualenv
+# uncomment the line below to enable build of plugins and api each time
+# make plugins && make build-vpp-api || exit
+virtualenv virtualenv
+virtualenv/bin/pip install ipaddress
+virtualenv/bin/pip install scapy
+# install the python API into the virtualenv
+cd $ROOT_DIR/vpp-api/python/
+$ROOT_DIR/virtualenv/bin/python setup.py install
+# install the python ACL plugin API into the virtualenv
+ACL_PLUGIN_SETUP_DIR=`find $ROOT_DIR/build-root -name acl-plugin`
+cd $ACL_PLUGIN_SETUP_DIR;
+$ROOT_DIR/virtualenv/bin/python setup.py install
+cd $ROOT_DIR
+# figure out the shared library path and start scapy
+export LD_LIBRARY_PATH=`pwd`/`find . -name "libpneum.so" -exec dirname {} \; | grep lib64 | head -n 1`
+cd $CURR_DIR
+sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH $ROOT_DIR/virtualenv/bin/python $1 $2 $3 $4 $5 $6 $7 $8 $9
+
+
+
diff --git a/src/plugins/acl/test/run-scapy b/src/plugins/acl/test/run-scapy
new file mode 100755
index 00000000000..266f07d1b1a
--- /dev/null
+++ b/src/plugins/acl/test/run-scapy
@@ -0,0 +1,26 @@
+#!/bin/sh
+#
+# Do all the legwork to run a scapy shell with APIs available for load
+#
+ROOT_DIR=`git rev-parse --show-toplevel`
+cd $ROOT_DIR
+sudo apt-get install -y python-virtualenv
+# uncomment the line below to enable the build of plugins and API each time..
+# make plugins && make build-vpp-api || exit
+virtualenv virtualenv
+virtualenv/bin/pip install ipaddress
+virtualenv/bin/pip install scapy
+# install the python API into the virtualenv
+cd $ROOT_DIR/vpp-api/python/
+$ROOT_DIR/virtualenv/bin/python setup.py install
+# install the python ACL plugin API into the virtualenv
+ACL_PLUGIN_SETUP_DIR=`find $ROOT_DIR/build-root -name acl-plugin`
+cd $ACL_PLUGIN_SETUP_DIR;
+$ROOT_DIR/virtualenv/bin/python setup.py install
+cd $ROOT_DIR
+# figure out the shared library path and start scapy
+export LD_LIBRARY_PATH=`pwd`/`find . -name "libpneum.so" -exec dirname {} \; | grep lib64 | head -n 1`
+sudo LD_LIBRARY_PATH=$LD_LIBRARY_PATH virtualenv/bin/scapy
+
+
+
diff --git a/src/plugins/acl/test/test_acl_plugin.py b/src/plugins/acl/test/test_acl_plugin.py
new file mode 100644
index 00000000000..7fc72d670a5
--- /dev/null
+++ b/src/plugins/acl/test/test_acl_plugin.py
@@ -0,0 +1,118 @@
+from __future__ import print_function
+import unittest, sys, time, threading, struct, logging, os
+import vpp_papi
+# import vpp_papi_plugins.acl
+from ipaddress import *
+papi_event = threading.Event()
+print(vpp_papi.vpe.VL_API_SW_INTERFACE_SET_FLAGS)
+def papi_event_handler(result):
+ if result.vl_msg_id == vpp_papi.vpe.VL_API_SW_INTERFACE_SET_FLAGS:
+ return
+ if result.vl_msg_id == vpp_papi.vpe.VL_API_VNET_INTERFACE_COUNTERS:
+ print('Interface counters', result)
+ return
+ if result.vl_msg_id == vpp_papi.vpe.VL_API_VNET_IP6_FIB_COUNTERS:
+ print('IPv6 FIB counters', result)
+ papi_event.set()
+ return
+
+ print('Unknown message id:', result.vl_msg_id)
+
+import glob, subprocess
+class TestAclPlugin(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ print("Setup")
+ @classmethod
+ def tearDownClass(cls):
+ print("Teardown")
+
+ def setUp(self):
+ print("Connecting API")
+ r = vpp_papi.connect("test_papi")
+ self.assertEqual(r, 0)
+
+ def tearDown(self):
+ r = vpp_papi.disconnect()
+ self.assertEqual(r, 0)
+
+ #
+ # The tests themselves
+ #
+
+ #
+ # Basic request / reply
+ #
+ def test_show_version(self):
+ t = vpp_papi.show_version()
+ print('T', t);
+ program = t.program.decode().rstrip('\x00')
+ self.assertEqual('vpe', program)
+
+ def x_test_acl_add(self):
+ print("Test ACL add")
+ self.assertEqual(1, 1)
+
+ #
+ # Details / Dump
+ #
+ def x_test_details_dump(self):
+ t = vpp_papi.sw_interface_dump(0, b'')
+ print('Dump/details T', t)
+
+ #
+ # Arrays
+ #
+ def x_test_arrays(self):
+ t = vpp_papi.vnet_get_summary_stats()
+ print('Summary stats', t)
+ print('Packets:', t.total_pkts[0])
+ print('Packets:', t.total_pkts[1])
+ #
+ # Variable sized arrays and counters
+ #
+ #@unittest.skip("stats")
+ def x_test_want_stats(self):
+ pid = 123
+ vpp_papi.register_event_callback(papi_event_handler)
+ papi_event.clear()
+
+ # Need to configure IPv6 to get som IPv6 FIB stats
+ t = vpp_papi.create_loopback('')
+ print(t)
+ self.assertEqual(t.retval, 0)
+
+ ifindex = t.sw_if_index
+ addr = str(IPv6Address(u'1::1').packed)
+ t = vpp_papi.sw_interface_add_del_address(ifindex, 1, 1, 0, 16, addr)
+ print(t)
+ self.assertEqual(t.retval, 0)
+
+ # Check if interface is up
+ # XXX: Add new API to query interface state based on ifindex, instead of dump all.
+ t = vpp_papi.sw_interface_set_flags(ifindex, 1, 1, 0)
+ self.assertEqual(t.retval, 0)
+
+ t = vpp_papi.want_stats(True, pid)
+
+ print (t)
+
+ #
+ # Wait for some stats
+ #
+ self.assertEqual(papi_event.wait(15), True)
+ t = vpp_papi.want_stats(False, pid)
+ print (t)
+
+
+ #
+ # Plugins?
+ #
+
+if __name__ == '__main__' or __name__ == '__builtin__':
+ print("This is main")
+ suite = unittest.TestLoader().loadTestsFromTestCase(TestAclPlugin)
+ unittest.TextTestRunner(verbosity=2).run(suite)
+ #logging.basicConfig(level=logging.DEBUG)
+ # unittest.main()
+
diff --git a/src/plugins/ioam.am b/src/plugins/ioam.am
new file mode 100644
index 00000000000..a4984b18299
--- /dev/null
+++ b/src/plugins/ioam.am
@@ -0,0 +1,150 @@
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+########################################
+# iOAM Proof of Transit
+########################################
+
+ioam_pot_plugin_la_SOURCES = \
+ ioam/lib-pot/pot_util.c \
+ ioam/encap/ip6_ioam_pot.c \
+ ioam/lib-pot/pot_util.h \
+ ioam/lib-pot/math64.h \
+ ioam/lib-pot/pot_api.c
+
+noinst_HEADERS += \
+ ioam/lib-pot/pot_all_api_h.h \
+ ioam/lib-pot/pot_msg_enum.h \
+ ioam/lib-pot/pot.api.h \
+ ioam/lib-pot/pot_util.h \
+ ioam/lib-pot/math64.h
+
+API_FILES += ioam/lib-pot/pot.api
+
+ioam_pot_test_plugin_la_SOURCES = \
+ ioam/lib-pot/pot_test.c \
+ ioam/lib-pot/pot_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_pot_test_plugin.la
+vppplugins_LTLIBRARIES += ioam_pot_plugin.la
+
+########################################
+# iOAM trace export for IPv6
+########################################
+
+ioam_export_plugin_la_SOURCES = \
+ioam/export/ioam_export.c \
+ioam/export/node.c \
+ioam/export/ioam_export.api.h \
+ioam/export/ioam_export_thread.c
+
+noinst_HEADERS += \
+ ioam/export/ioam_export_all_api_h.h \
+ ioam/export/ioam_export_msg_enum.h \
+ ioam/export/ioam_export.api.h
+
+API_FILES += ioam/export/ioam_export.api
+
+ioam_export_test_plugin_la_SOURCES = \
+ ioam/export/ioam_export_test.c \
+ ioam/export/ioam_export_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_export_test_plugin.la
+vppplugins_LTLIBRARIES += ioam_export_plugin.la
+
+########################################
+# iOAM Trace
+########################################
+libioam_trace_plugin_la_SOURCES = \
+ ioam/lib-trace/trace_util.c \
+ ioam/encap/ip6_ioam_trace.c \
+ ioam/lib-trace/trace_util.h \
+ ioam/lib-trace/trace_api.c
+
+noinst_HEADERS += \
+ ioam/export/ioam_export_all_api_h.h \
+ ioam/lib-trace/trace_all_api_h.h \
+ ioam/lib-trace/trace_msg_enum.h \
+ ioam/lib-trace/trace.api.h \
+ ioam/lib-trace/trace_util.h
+
+API_FILES += ioam/lib-trace/trace.api
+
+ioam_trace_test_plugin_la_SOURCES = \
+ ioam/lib-trace/trace_test.c \
+ ioam/lib-trace/trace_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_trace_test_plugin.la
+vppplugins_LTLIBRARIES += libioam_trace_plugin.la
+
+########################################
+# VxLAN-GPE
+########################################
+libioam_vxlan_gpe_plugin_la_SOURCES = \
+ ioam/lib-vxlan-gpe/ioam_encap.c \
+ ioam/lib-vxlan-gpe/ioam_decap.c \
+ ioam/lib-vxlan-gpe/ioam_transit.c \
+ ioam/lib-vxlan-gpe/ioam_pop.c \
+ ioam/lib-vxlan-gpe/vxlan_gpe_api.c \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c \
+ ioam/export-vxlan-gpe/vxlan_gpe_node.c \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h\
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
+
+noinst_HEADERS += \
+ ioam/export/ioam_export_all_api_h.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h \
+ ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h \
+ ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h
+
+API_FILES += ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
+API_FILES += ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
+
+ioam_vxlan_gpe_test_plugin_la_SOURCES = \
+ ioam/lib-vxlan-gpe/vxlan_gpe_test.c \
+ ioam/lib-vxlan-gpe/vxlan_gpe_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_vxlan_gpe_test_plugin.la
+vppplugins_LTLIBRARIES += libioam_vxlan_gpe_plugin.la
+
+vxlan_gpe_ioam_export_test_plugin_la_SOURCES = \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c \
+ ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += vxlan_gpe_ioam_export_test_plugin.la
+
+########################################
+# iOAM E2E plugin
+########################################
+
+ioam_e2e_plugin_la_SOURCES = \
+ ioam/encap/ip6_ioam_e2e.c \
+ ioam/encap/ip6_ioam_seqno.c \
+ ioam/encap/ip6_ioam_seqno_analyse.c
+
+noinst_HEADERS += \
+ ioam/encap/ip6_ioam_e2e.h \
+ ioam/encap/ip6_ioam_seqno.h
+
+vppplugins_LTLIBRARIES += ioam_e2e_plugin.la
+
+# vi:syntax=automake
diff --git a/src/plugins/ioam/dir.dox b/src/plugins/ioam/dir.dox
new file mode 100644
index 00000000000..f3389b52c3e
--- /dev/null
+++ b/src/plugins/ioam/dir.dox
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Inband OAM (iOAM) implementation
+*/
diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.c b/src/plugins/ioam/encap/ip6_ioam_e2e.c
new file mode 100644
index 00000000000..0839cdceca7
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_e2e.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/plugin/plugin.h>
+
+#include "ip6_ioam_e2e.h"
+
+ioam_e2e_main_t ioam_e2e_main;
+
+static u8 * ioam_e2e_trace_handler (u8 * s,
+ ip6_hop_by_hop_option_t *opt)
+{
+ ioam_e2e_option_t * e2e = (ioam_e2e_option_t *)opt;
+ u32 seqno = 0;
+
+ if (e2e)
+ {
+ seqno = clib_net_to_host_u32 (e2e->e2e_data);
+ }
+
+ s = format (s, "SeqNo = 0x%Lx", seqno);
+ return s;
+}
+
+int
+ioam_e2e_config_handler (void *data, u8 disable)
+{
+ int *analyse = data;
+
+ /* Register hanlders if enabled */
+ if (!disable)
+ {
+ /* If encap node register for encap handler */
+ if (0 == *analyse)
+ {
+ if (ip6_hbh_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ioam_seqno_encap_handler,
+ ioam_e2e_trace_handler) < 0)
+ {
+ return (-1);
+ }
+ }
+ /* If analyze node then register for decap handler */
+ else
+ {
+ if (ip6_hbh_pop_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ioam_seqno_decap_handler) < 0)
+ {
+ return (-1);
+ }
+ }
+ return 0;
+ }
+
+ /* UnRegister handlers */
+ (void) ip6_hbh_unregister_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
+ (void) ip6_hbh_pop_unregister_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
+ return 0;
+}
+
+int
+ioam_e2e_rewrite_handler (u8 *rewrite_string,
+ u8 *rewrite_size)
+{
+ ioam_e2e_option_t *e2e_option;
+
+ if (rewrite_string && *rewrite_size == sizeof(ioam_e2e_option_t))
+ {
+ e2e_option = (ioam_e2e_option_t *)rewrite_string;
+ e2e_option->hdr.type = HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE
+ | HBH_OPTION_TYPE_SKIP_UNKNOWN;
+ e2e_option->hdr.length = sizeof (ioam_e2e_option_t) -
+ sizeof (ip6_hop_by_hop_option_t);
+ return(0);
+ }
+ return(-1);
+}
+
+u32
+ioam_e2e_flow_handler (u32 ctx, u8 add)
+{
+ ioam_e2e_data_t *data;
+ u16 i;
+
+ if (add)
+ {
+ pool_get(ioam_e2e_main.e2e_data, data);
+ data->flow_ctx = ctx;
+ ioam_seqno_init_bitmap(&data->seqno_data);
+ return ((u32) (data - ioam_e2e_main.e2e_data));
+ }
+
+ /* Delete case */
+ for (i = 0; i < vec_len(ioam_e2e_main.e2e_data); i++)
+ {
+ if (pool_is_free_index(ioam_e2e_main.e2e_data, i))
+ continue;
+
+ data = pool_elt_at_index(ioam_e2e_main.e2e_data, i);
+ if (data && (data->flow_ctx == ctx))
+ {
+ pool_put_index(ioam_e2e_main.e2e_data, i);
+ return (0);
+ }
+ }
+ return 0;
+}
+
+static clib_error_t *
+ioam_show_e2e_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ioam_e2e_data_t *e2e_data;
+ u8 *s = 0;
+ int i;
+
+ vec_reset_length(s);
+
+ s = format(0, "IOAM E2E information: \n");
+ for (i = 0; i < vec_len(ioam_e2e_main.e2e_data); i++)
+ {
+ if (pool_is_free_index(ioam_e2e_main.e2e_data, i))
+ continue;
+
+ e2e_data = pool_elt_at_index(ioam_e2e_main.e2e_data, i);
+ s = format(s, "Flow name: %s\n", get_flow_name_from_flow_ctx(e2e_data->flow_ctx));
+
+ s = show_ioam_seqno_cmd_fn(s,
+ &e2e_data->seqno_data,
+ !IOAM_DEAP_ENABLED(e2e_data->flow_ctx));
+ }
+
+ vlib_cli_output(vm, "%v", s);
+ return 0;
+}
+
+
+VLIB_CLI_COMMAND (ioam_show_e2e_cmd, static) = {
+ .path = "show ioam e2e ",
+ .short_help = "show ioam e2e information",
+ .function = ioam_show_e2e_cmd_fn,
+};
+
+/*
+ * This routine exists to convince the vlib plugin framework that
+ * we haven't accidentally copied a random .dll into the plugin directory.
+ *
+ * Also collects global variable pointers passed from the vpp engine
+ */
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h,
+ int from_early_init)
+{
+ clib_error_t * error = 0;
+
+ ioam_e2e_main.vlib_main = vm;
+ ioam_e2e_main.vnet_main = h->vnet_main;
+ return error;
+}
+
+/*
+ * Init handler E2E headet handling.
+ * Init hanlder registers encap, decap, trace and Rewrite handlers.
+ */
+static clib_error_t *
+ioam_e2e_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init)))
+ {
+ return(error);
+ }
+
+ /*
+ * As of now we have only PPC under E2E header.
+ */
+ if (ip6_hbh_config_handler_register(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ioam_e2e_config_handler) < 0)
+ {
+ return (clib_error_create("Registration of "
+ "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE for rewrite failed"));
+ }
+
+ if (ip6_hbh_add_register_option(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ sizeof(ioam_e2e_option_t),
+ ioam_e2e_rewrite_handler) < 0)
+ {
+ return (clib_error_create("Registration of "
+ "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE for rewrite failed"));
+ }
+
+ if (ip6_hbh_flow_handler_register(HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE,
+ ioam_e2e_flow_handler) < 0)
+ {
+ return (clib_error_create("Registration of "
+ "HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE Flow handler failed"));
+ }
+
+ return (0);
+}
+
+/*
+ * Init function for the E2E lib.
+ * ip6_hop_by_hop_ioam_e2e_init gets called during init.
+ */
+VLIB_INIT_FUNCTION (ioam_e2e_init);
diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.h b/src/plugins/ioam/encap/ip6_ioam_e2e.h
new file mode 100644
index 00000000000..18f35f80c60
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_e2e.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_ip6_ioam_e2e_h__
+#define __included_ip6_ioam_e2e_h__
+
+#include "ip6_ioam_seqno.h"
+
+typedef struct ioam_e2e_data_t_ {
+ u32 flow_ctx;
+ u32 pad;
+ ioam_seqno_data seqno_data;
+} ioam_e2e_data_t;
+
+typedef struct {
+ ioam_e2e_data_t *e2e_data;
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ioam_e2e_main_t;
+
+extern ioam_e2e_main_t ioam_e2e_main;
+
+static inline ioam_seqno_data *
+ioam_e2ec_get_seqno_data_from_flow_ctx (u32 flow_ctx)
+{
+ ioam_e2e_data_t *data = NULL;
+ u32 index;
+
+ index = get_flow_data_from_flow_ctx(flow_ctx,
+ HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
+ data = &ioam_e2e_main.e2e_data[index];
+ return &(data->seqno_data);
+}
+
+#endif /* __included_ioam_e2e_h__ */
diff --git a/src/plugins/ioam/encap/ip6_ioam_pot.c b/src/plugins/ioam/encap/ip6_ioam_pot.c
new file mode 100644
index 00000000000..05f42c91d0f
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_pot.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <ioam/lib-pot/pot_util.h>
+
+typedef CLIB_PACKED(struct {
+ ip6_hop_by_hop_option_t hdr;
+ u8 pot_type;
+#define PROFILE_ID_MASK 0xF
+ u8 reserved_profile_id; /* 4 bits reserved, 4 bits to carry profile id */
+ u64 random;
+ u64 cumulative;
+}) ioam_pot_option_t;
+
+#define foreach_ip6_hop_by_hop_ioam_pot_stats \
+ _(PROCESSED, "Pkts with ip6 hop-by-hop pot options") \
+ _(PROFILE_MISS, "Pkts with ip6 hop-by-hop pot options but no profile set") \
+ _(PASSED, "Pkts with POT in Policy") \
+ _(FAILED, "Pkts with POT out of Policy")
+
+static char * ip6_hop_by_hop_ioam_pot_stats_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_hop_by_hop_ioam_pot_stats
+#undef _
+};
+
+typedef enum {
+#define _(sym,str) IP6_IOAM_POT_##sym,
+ foreach_ip6_hop_by_hop_ioam_pot_stats
+#undef _
+ IP6_IOAM_POT_N_STATS,
+} ip6_ioam_pot_stats_t;
+
+typedef struct {
+ /* stats */
+ u64 counters[ARRAY_LEN(ip6_hop_by_hop_ioam_pot_stats_strings)];
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} ip6_hop_by_hop_ioam_pot_main_t;
+
+ip6_hop_by_hop_ioam_pot_main_t ip6_hop_by_hop_ioam_pot_main;
+
+always_inline void
+ip6_ioam_stats_increment_counter (u32 counter_index, u64 increment)
+{
+ ip6_hop_by_hop_ioam_pot_main_t *hm = &ip6_hop_by_hop_ioam_pot_main;
+
+ hm->counters[counter_index] += increment;
+}
+
+
+static u8 * format_ioam_pot (u8 * s, va_list * args)
+{
+ ioam_pot_option_t * pot0 = va_arg (*args, ioam_pot_option_t *);
+ u64 random, cumulative;
+ random = cumulative = 0;
+ if (pot0)
+ {
+ random = clib_net_to_host_u64 (pot0->random);
+ cumulative = clib_net_to_host_u64 (pot0->cumulative);
+ }
+
+ s = format (s, "random = 0x%Lx, Cumulative = 0x%Lx, Index = 0x%x",
+ random, cumulative, pot0 ? pot0->reserved_profile_id : ~0);
+ return s;
+}
+
+u8 *
+ip6_hbh_ioam_proof_of_transit_trace_handler (u8 *s, ip6_hop_by_hop_option_t *opt)
+{
+ ioam_pot_option_t *pot;
+
+ s = format (s, " POT opt present\n");
+ pot = (ioam_pot_option_t *) opt;
+ s = format (s, " %U\n", format_ioam_pot, pot);
+ return (s);
+}
+
+int
+ip6_hbh_ioam_proof_of_transit_handler (vlib_buffer_t *b,
+ ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt0)
+{
+ ioam_pot_option_t * pot0;
+ u64 random = 0, cumulative = 0;
+ int rv = 0;
+ u8 pot_profile_index;
+ pot_profile *pot_profile = 0, *new_profile = 0;
+ u8 pot_encap = 0;
+
+ pot0 = (ioam_pot_option_t *) opt0;
+ pot_encap = (pot0->random == 0);
+ pot_profile_index = pot_profile_get_active_id();
+ pot_profile = pot_profile_get_active();
+ if (pot_encap && PREDICT_FALSE(!pot_profile))
+ {
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROFILE_MISS, 1);
+ return(-1);
+ }
+ if (pot_encap)
+ {
+ pot0->reserved_profile_id =
+ pot_profile_index & PROFILE_ID_MASK;
+ pot_profile_incr_usage_stats(pot_profile);
+ }
+ else
+ { /* Non encap node */
+ if (PREDICT_FALSE(pot0->reserved_profile_id !=
+ pot_profile_index || pot_profile == 0))
+ {
+ /* New profile announced by encap node. */
+ new_profile =
+ pot_profile_find(pot0->reserved_profile_id);
+ if (PREDICT_FALSE(new_profile == 0 ||
+ new_profile->valid == 0))
+ {
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROFILE_MISS, 1);
+ return(-1);
+ }
+ else
+ {
+ pot_profile_index = pot0->reserved_profile_id;
+ pot_profile = new_profile;
+ pot_profile_set_active(pot_profile_index);
+ pot_profile_reset_usage_stats(pot_profile);
+ }
+ }
+ pot_profile_incr_usage_stats(pot_profile);
+ }
+
+ if (pot0->random == 0)
+ {
+ pot0->random = clib_host_to_net_u64(pot_generate_random(pot_profile));
+ pot0->cumulative = 0;
+ }
+ random = clib_net_to_host_u64(pot0->random);
+ cumulative = clib_net_to_host_u64(pot0->cumulative);
+ pot0->cumulative = clib_host_to_net_u64(
+ pot_update_cumulative(pot_profile,
+ cumulative,
+ random));
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PROCESSED, 1);
+
+ return (rv);
+}
+
+int
+ip6_hbh_ioam_proof_of_transit_pop_handler (vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt0)
+{
+ ioam_pot_option_t * pot0;
+ u64 random = 0;
+ u64 cumulative = 0;
+ int rv = 0;
+ pot_profile *pot_profile = 0;
+ u8 result = 0;
+
+ pot0 = (ioam_pot_option_t *) opt0;
+ random = clib_net_to_host_u64(pot0->random);
+ cumulative = clib_net_to_host_u64(pot0->cumulative);
+ pot_profile = pot_profile_get_active();
+ result = pot_validate (pot_profile,
+ cumulative, random);
+
+ if (result == 1)
+ {
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_PASSED, 1);
+ }
+ else
+ {
+ ip6_ioam_stats_increment_counter (IP6_IOAM_POT_FAILED, 1);
+ }
+ return (rv);
+}
+
+int ip6_hop_by_hop_ioam_pot_rewrite_handler (u8 *rewrite_string, u8 *rewrite_size)
+{
+ ioam_pot_option_t * pot_option;
+ if (rewrite_string && *rewrite_size == sizeof(ioam_pot_option_t))
+ {
+ pot_option = (ioam_pot_option_t *)rewrite_string;
+ pot_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT
+ | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
+ pot_option->hdr.length = sizeof (ioam_pot_option_t) -
+ sizeof (ip6_hop_by_hop_option_t);
+ return(0);
+ }
+ return(-1);
+}
+
+static clib_error_t *
+ip6_show_ioam_pot_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_hop_by_hop_ioam_pot_main_t *hm = &ip6_hop_by_hop_ioam_pot_main;
+ u8 *s = 0;
+ int i = 0;
+
+ for ( i = 0; i < IP6_IOAM_POT_N_STATS; i++)
+ {
+ s = format(s, " %s - %lu\n", ip6_hop_by_hop_ioam_pot_stats_strings[i],
+ hm->counters[i]);
+ }
+
+ vlib_cli_output(vm, "%v", s);
+ vec_free(s);
+ return 0;
+}
+
+
+VLIB_CLI_COMMAND (ip6_show_ioam_pot_cmd, static) = {
+ .path = "show ioam pot",
+ .short_help = "iOAM pot statistics",
+ .function = ip6_show_ioam_pot_cmd_fn,
+};
+
+
+static clib_error_t *
+ip6_hop_by_hop_ioam_pot_init (vlib_main_t * vm)
+{
+ ip6_hop_by_hop_ioam_pot_main_t * hm = &ip6_hop_by_hop_ioam_pot_main;
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init)))
+ return(error);
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main();
+ memset(hm->counters, 0, sizeof(hm->counters));
+
+ if (ip6_hbh_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT, ip6_hbh_ioam_proof_of_transit_handler,
+ ip6_hbh_ioam_proof_of_transit_trace_handler) < 0)
+ return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT failed"));
+
+ if (ip6_hbh_add_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT,
+ sizeof(ioam_pot_option_t),
+ ip6_hop_by_hop_ioam_pot_rewrite_handler) < 0)
+ return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT for rewrite failed"));
+
+ if (ip6_hbh_pop_register_option(HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT,
+ ip6_hbh_ioam_proof_of_transit_pop_handler) < 0)
+ return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT POP failed"));
+
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_pot_init);
+
+
diff --git a/src/plugins/ioam/encap/ip6_ioam_seqno.c b/src/plugins/ioam/encap/ip6_ioam_seqno.c
new file mode 100644
index 00000000000..0b4d4192975
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_seqno.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include "ip6_ioam_seqno.h"
+#include "ip6_ioam_e2e.h"
+
+ioam_seqno_data_main_t ioam_seqno_main;
+
+void ioam_seqno_init_bitmap (ioam_seqno_data *data)
+{
+ seqno_bitmap *bitmap = &data->seqno_rx.bitmap;
+ bitmap->window_size = SEQNO_WINDOW_SIZE;
+ bitmap->array_size = SEQNO_WINDOW_ARRAY_SIZE;
+ bitmap->mask = 32 * SEQNO_WINDOW_ARRAY_SIZE - 1;
+ bitmap->array[0] = 0x00000000;/* pretend we haven seen sequence numbers 0*/
+ bitmap->highest = 0;
+
+ data->seq_num = 0;
+ return ;
+}
+
+/*
+ * This Routine gets called from IPv6 hop-by-hop option handling.
+ * Only if we are encap node, then add PPC data.
+ * On a Transit(MID) node we dont do anything with E2E headers.
+ * On decap node decap is handled by seperate function.
+ */
+int
+ioam_seqno_encap_handler (vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt)
+{
+ u32 opaque_index = vnet_buffer(b)->l2_classify.opaque_index;
+ ioam_e2e_option_t * e2e;
+ int rv = 0;
+ ioam_seqno_data *data;
+
+ data = ioam_e2ec_get_seqno_data_from_flow_ctx(opaque_index);
+ e2e = (ioam_e2e_option_t *) opt;
+ e2e->e2e_data = clib_host_to_net_u32(++data->seq_num);
+
+ return (rv);
+}
+
+/*
+ * This Routine gets called on POP/Decap node.
+ */
+int
+ioam_seqno_decap_handler (vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt)
+{
+ u32 opaque_index = vnet_buffer(b)->l2_classify.opaque_index;
+ ioam_e2e_option_t * e2e;
+ int rv = 0;
+ ioam_seqno_data *data;
+
+ data = ioam_e2ec_get_seqno_data_from_flow_ctx(opaque_index);
+ e2e = (ioam_e2e_option_t *) opt;
+ ioam_analyze_seqno(&data->seqno_rx, (u64) clib_net_to_host_u32(e2e->e2e_data));
+
+ return (rv);
+}
+
+u8 *
+show_ioam_seqno_cmd_fn (u8 *s, ioam_seqno_data *seqno_data, u8 enc)
+{
+ seqno_rx_info *rx;
+
+ s = format(s, "SeqNo Data:\n");
+ if (enc)
+ {
+ s = format(s, " Current Seq. Number : %llu\n", seqno_data->seq_num);
+ }
+ else
+ {
+ rx = &seqno_data->seqno_rx;
+ s = format(s, " Highest Seq. Number : %llu\n", rx->bitmap.highest);
+ s = format(s, " Packets received : %llu\n", rx->rx_packets);
+ s = format(s, " Lost packets : %llu\n", rx->lost_packets);
+ s = format(s, " Reordered packets : %llu\n", rx->reordered_packets);
+ s = format(s, " Duplicate packets : %llu\n", rx->dup_packets);
+ }
+
+ format(s, "\n");
+ return s;
+}
diff --git a/src/plugins/ioam/encap/ip6_ioam_seqno.h b/src/plugins/ioam/encap/ip6_ioam_seqno.h
new file mode 100644
index 00000000000..13a84db0d71
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_seqno.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_ip6_ioam_seqno_h__
+#define __included_ip6_ioam_seqno_h__
+
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+
+#define SEQ_CHECK_VALUE 0x80000000 /* for seq number wraparound detection */
+
+#define SEQNO_WINDOW_SIZE 2048
+#define SEQNO_WINDOW_ARRAY_SIZE 64
+
+typedef struct seqno_bitmap_ {
+ u32 window_size;
+ u32 array_size;
+ u32 mask;
+ u32 pad;
+ u64 highest;
+ u64 array[SEQNO_WINDOW_ARRAY_SIZE]; /* Will be alloc to array_size */
+} seqno_bitmap;
+
+typedef struct seqno_rx_info_ {
+ u64 rx_packets;
+ u64 lost_packets;
+ u64 reordered_packets;
+ u64 dup_packets;
+ seqno_bitmap bitmap;
+} seqno_rx_info;
+
+/* This structure is 64-byte aligned */
+typedef struct ioam_seqno_data_ {
+ union {
+ u32 seq_num; /* Useful only for encap node */
+ seqno_rx_info seqno_rx;
+ };
+} ioam_seqno_data;
+
+typedef struct ioam_seqno_data_main_t_ {
+ ioam_seqno_data *seqno_data;
+} ioam_seqno_data_main_t;
+
+void ioam_seqno_init_bitmap(ioam_seqno_data *data);
+
+int ioam_seqno_encap_handler(vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt);
+
+int
+ioam_seqno_decap_handler(vlib_buffer_t *b, ip6_header_t *ip,
+ ip6_hop_by_hop_option_t *opt);
+
+void ioam_analyze_seqno(seqno_rx_info *ppc_rx, u64 seqno);
+
+u8 *
+show_ioam_seqno_cmd_fn(u8 *s, ioam_seqno_data *seqno_data, u8 enc);
+
+#endif
diff --git a/src/plugins/ioam/encap/ip6_ioam_seqno_analyse.c b/src/plugins/ioam/encap/ip6_ioam_seqno_analyse.c
new file mode 100644
index 00000000000..4638871c224
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_seqno_analyse.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include "ip6_ioam_seqno.h"
+
+static inline void BIT_SET (u64 *p, u32 n)
+{
+ p[ n>>5 ] |= (1 << (n&31));
+}
+
+static inline int BIT_TEST (u64 *p, u32 n)
+{
+ return p[ n>>5 ] & (1 << (n&31));
+}
+
+static void BIT_CLEAR (u64 *p, u64 start, int num_bits, u32 mask)
+{
+ int n, t;
+ int start_index = (start >> 5);
+ int mask_index = (mask >> 5);
+
+ start_index &= mask_index;
+ if (start & 0x1f)
+ {
+ int start_bit = (start & 0x1f);
+
+ n = (1 << start_bit)-1;
+ t = start_bit + num_bits;
+ if (t < 32)
+ {
+ n |= ~((1 << t)-1);
+ p[ start_index ] &= n;
+ return;
+ }
+ p[ start_index ] &= n;
+ start_index = (start_index + 1) & mask_index;
+ num_bits -= (32 - start_bit);
+ }
+ while (num_bits >= 32)
+ {
+ p[ start_index ] = 0;
+ start_index = (start_index + 1) & mask_index;
+ num_bits -= 32;
+ }
+ n = ~((1 << num_bits) - 1);
+ p[ start_index ] &= n;
+}
+
+static inline u8 seqno_check_wraparound(u32 a, u32 b)
+{
+ if ((a != b) && (a > b) && ((a - b) > SEQ_CHECK_VALUE))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Function to analyze the PPC value recevied.
+ * - Updates the bitmap with received sequence number
+ * - counts the received/lost/duplicate/reordered packets
+ */
+void ioam_analyze_seqno (seqno_rx_info *seqno_rx, u64 seqno)
+{
+ int diff;
+ static int peer_dead_count;
+ seqno_bitmap *bitmap = &seqno_rx->bitmap;
+
+ seqno_rx->rx_packets++;
+
+ if (seqno > bitmap->highest)
+ { /* new larger sequence number */
+ peer_dead_count = 0;
+ diff = seqno - bitmap->highest;
+ if (diff < bitmap->window_size)
+ {
+ if (diff > 1)
+ { /* diff==1 is *such* a common case it's a win to optimize it */
+ BIT_CLEAR(bitmap->array, bitmap->highest+1, diff-1, bitmap->mask);
+ seqno_rx->lost_packets += diff -1;
+ }
+ }
+ else
+ {
+ seqno_rx->lost_packets += diff -1;
+ memset( bitmap->array, 0, bitmap->array_size * sizeof(u64) );
+ }
+ BIT_SET(bitmap->array, seqno & bitmap->mask);
+ bitmap->highest = seqno;
+ return;
+ }
+
+ /* we've seen a bigger seq number before */
+ diff = bitmap->highest - seqno;
+ if (diff >= bitmap->window_size)
+ {
+ if (seqno_check_wraparound(bitmap->highest, seqno))
+ {
+ memset( bitmap->array, 0, bitmap->array_size * sizeof(u64));
+ BIT_SET(bitmap->array, seqno & bitmap->mask);
+ bitmap->highest = seqno;
+ return;
+ }
+ else
+ {
+ peer_dead_count++;
+ if (peer_dead_count > 25)
+ {
+ peer_dead_count = 0;
+ memset( bitmap->array, 0, bitmap->array_size * sizeof(u64) );
+ BIT_SET(bitmap->array, seqno & bitmap->mask);
+ bitmap->highest = seqno;
+ }
+ //ppc_rx->reordered_packets++;
+ }
+ return;
+ }
+
+ if (BIT_TEST(bitmap->array, seqno & bitmap->mask))
+ {
+ seqno_rx->dup_packets++;
+ return; /* Already seen */
+ }
+ seqno_rx->reordered_packets++;
+ seqno_rx->lost_packets--;
+ BIT_SET(bitmap->array, seqno & bitmap->mask);
+ return;
+}
diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.c b/src/plugins/ioam/encap/ip6_ioam_trace.c
new file mode 100644
index 00000000000..e63db6e4ec5
--- /dev/null
+++ b/src/plugins/ioam/encap/ip6_ioam_trace.c
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <ioam/lib-trace/trace_util.h>
+
+/* Timestamp precision multipliers for seconds, milliseconds, microseconds
+ * and nanoseconds respectively.
+ */
+static f64 trace_tsp_mul[4] = { 1, 1e3, 1e6, 1e9 };
+
+typedef union
+{
+ u64 as_u64;
+ u32 as_u32[2];
+} time_u64_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct {
+ ip6_hop_by_hop_option_t hdr;
+ u8 ioam_trace_type;
+ u8 data_list_elts_left;
+ u32 elts[0]; /* Variable type. So keep it generic */
+}) ioam_trace_option_t;
+/* *INDENT-ON* */
+
+
+extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
+extern ip6_main_t ip6_main;
+
+#define foreach_ip6_hop_by_hop_ioam_trace_stats \
+ _(PROCESSED, "Pkts with ip6 hop-by-hop trace options") \
+ _(PROFILE_MISS, "Pkts with ip6 hop-by-hop trace options but no profile set") \
+ _(UPDATED, "Pkts with trace updated") \
+ _(FULL, "Pkts with trace options but no space")
+
+static char *ip6_hop_by_hop_ioam_trace_stats_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_hop_by_hop_ioam_trace_stats
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) IP6_IOAM_TRACE_##sym,
+ foreach_ip6_hop_by_hop_ioam_trace_stats
+#undef _
+ IP6_IOAM_TRACE_N_STATS,
+} ip6_ioam_trace_stats_t;
+
+
+typedef struct
+{
+ /* stats */
+ u64 counters[ARRAY_LEN (ip6_hop_by_hop_ioam_trace_stats_strings)];
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ip6_hop_by_hop_ioam_trace_main_t;
+
+ip6_hop_by_hop_ioam_trace_main_t ip6_hop_by_hop_ioam_trace_main;
+
+always_inline void
+ip6_ioam_trace_stats_increment_counter (u32 counter_index, u64 increment)
+{
+ ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main;
+
+ hm->counters[counter_index] += increment;
+}
+
+
+static u8 *
+format_ioam_data_list_element (u8 * s, va_list * args)
+{
+ u32 *elt = va_arg (*args, u32 *);
+ u8 *trace_type_p = va_arg (*args, u8 *);
+ u8 trace_type = *trace_type_p;
+
+
+ if (trace_type & BIT_TTL_NODEID)
+ {
+ u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ttl 0x%x node id 0x%x ",
+ ttl_node_id_host_byte_order >> 24,
+ ttl_node_id_host_byte_order & 0x00FFFFFF);
+
+ elt++;
+ }
+
+ if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE)
+ {
+ u32 ingress_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ingress 0x%x egress 0x%x ",
+ ingress_host_byte_order >> 16,
+ ingress_host_byte_order & 0xFFFF);
+ elt++;
+ }
+
+ if (trace_type & BIT_TIMESTAMP)
+ {
+ u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ts 0x%x \n", ts_in_host_byte_order);
+ elt++;
+ }
+
+ if (trace_type & BIT_APPDATA)
+ {
+ u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "app 0x%x ", appdata_in_host_byte_order);
+ elt++;
+ }
+
+ return s;
+}
+
+
+int
+ip6_ioam_trace_get_sizeof_handler (u32 * result)
+{
+ u16 size = 0;
+ u8 trace_data_size = 0;
+ trace_profile *profile = NULL;
+
+ *result = 0;
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1);
+ return (-1);
+ }
+
+ trace_data_size = fetch_trace_data_size (profile->trace_type);
+ if (PREDICT_FALSE (trace_data_size == 0))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (PREDICT_FALSE (profile->num_elts * trace_data_size > 254))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ size +=
+ sizeof (ioam_trace_option_t) + (profile->num_elts * trace_data_size);
+ *result = size;
+
+ return 0;
+}
+
+
+
+int
+ip6_hop_by_hop_ioam_trace_rewrite_handler (u8 * rewrite_string,
+ u8 * rewrite_size)
+{
+ ioam_trace_option_t *trace_option = NULL;
+ u8 trace_data_size = 0;
+ u8 trace_option_elts = 0;
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1);
+ return (-1);
+ }
+
+ if (PREDICT_FALSE (!rewrite_string))
+ return -1;
+
+ trace_option_elts = profile->num_elts;
+ trace_data_size = fetch_trace_data_size (profile->trace_type);
+ trace_option = (ioam_trace_option_t *) rewrite_string;
+ trace_option->hdr.type = HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST |
+ HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
+ trace_option->hdr.length = 2 /*ioam_trace_type,data_list_elts_left */ +
+ trace_option_elts * trace_data_size;
+ trace_option->ioam_trace_type = profile->trace_type & TRACE_TYPE_MASK;
+ trace_option->data_list_elts_left = trace_option_elts;
+ *rewrite_size =
+ sizeof (ioam_trace_option_t) + (trace_option_elts * trace_data_size);
+
+ return 0;
+}
+
+
+int
+ip6_hbh_ioam_trace_data_list_handler (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u8 elt_index = 0;
+ ioam_trace_option_t *trace = (ioam_trace_option_t *) opt;
+ u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index);
+ time_u64_t time_u64;
+ u32 *elt;
+ int rv = 0;
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1);
+ return (-1);
+ }
+
+
+ time_u64.as_u64 = 0;
+
+ if (PREDICT_TRUE (trace->data_list_elts_left))
+ {
+ trace->data_list_elts_left--;
+ /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
+ * to skip to this node's location.
+ */
+ elt_index =
+ trace->data_list_elts_left *
+ fetch_trace_data_size (trace->ioam_trace_type) / 4;
+ elt = &trace->elts[elt_index];
+ if (trace->ioam_trace_type & BIT_TTL_NODEID)
+ {
+ *elt =
+ clib_host_to_net_u32 ((ip->hop_limit << 24) | profile->node_id);
+ elt++;
+ }
+
+ if (trace->ioam_trace_type & BIT_ING_INTERFACE)
+ {
+ *elt =
+ (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 |
+ (adj->rewrite_header.sw_if_index & 0xFFFF);
+ *elt = clib_host_to_net_u32 (*elt);
+ elt++;
+ }
+
+ if (trace->ioam_trace_type & BIT_TIMESTAMP)
+ {
+ /* Send least significant 32 bits */
+ f64 time_f64 =
+ (f64) (((f64) hm->unix_time_0) +
+ (vlib_time_now (hm->vlib_main) - hm->vlib_time_0));
+
+ time_u64.as_u64 = time_f64 * trace_tsp_mul[profile->trace_tsp];
+ *elt = clib_host_to_net_u32 (time_u64.as_u32[0]);
+ elt++;
+ }
+
+ if (trace->ioam_trace_type & BIT_APPDATA)
+ {
+ /* $$$ set elt0->app_data */
+ *elt = clib_host_to_net_u32 (profile->app_data);
+ elt++;
+ }
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_UPDATED, 1);
+ }
+ else
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_FULL, 1);
+ }
+ return (rv);
+}
+
+u8 *
+ip6_hbh_ioam_trace_data_list_trace_handler (u8 * s,
+ ip6_hop_by_hop_option_t * opt)
+{
+ ioam_trace_option_t *trace;
+ u8 trace_data_size_in_words = 0;
+ u32 *elt;
+ int elt_index = 0;
+
+ trace = (ioam_trace_option_t *) opt;
+ s =
+ format (s, " Trace Type 0x%x , %d elts left\n", trace->ioam_trace_type,
+ trace->data_list_elts_left);
+ trace_data_size_in_words =
+ fetch_trace_data_size (trace->ioam_trace_type) / 4;
+ elt = &trace->elts[0];
+ while ((u8 *) elt < ((u8 *) (&trace->elts[0]) + trace->hdr.length - 2
+ /* -2 accounts for ioam_trace_type,elts_left */ ))
+ {
+ s = format (s, " [%d] %U\n", elt_index,
+ format_ioam_data_list_element,
+ elt, &trace->ioam_trace_type);
+ elt_index++;
+ elt += trace_data_size_in_words;
+ }
+ return (s);
+}
+
+
+static clib_error_t *
+ip6_show_ioam_trace_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main;
+ u8 *s = 0;
+ int i = 0;
+
+ for (i = 0; i < IP6_IOAM_TRACE_N_STATS; i++)
+ {
+ s =
+ format (s, " %s - %lu\n", ip6_hop_by_hop_ioam_trace_stats_strings[i],
+ hm->counters[i]);
+ }
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_ioam_trace_cmd, static) = {
+ .path = "show ioam trace",
+ .short_help = "iOAM trace statistics",
+ .function = ip6_show_ioam_trace_cmd_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+ip6_hop_by_hop_ioam_trace_init (vlib_main_t * vm)
+{
+ ip6_hop_by_hop_ioam_trace_main_t *hm = &ip6_hop_by_hop_ioam_trace_main;
+ clib_error_t *error;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return (error);
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_ioam_init)))
+ return (error);
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main ();
+ memset (hm->counters, 0, sizeof (hm->counters));
+
+
+ if (ip6_hbh_register_option
+ (HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST,
+ ip6_hbh_ioam_trace_data_list_handler,
+ ip6_hbh_ioam_trace_data_list_trace_handler) < 0)
+ return (clib_error_create
+ ("registration of HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST failed"));
+
+
+ if (ip6_hbh_add_register_option (HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST,
+ sizeof (ioam_trace_option_t),
+ ip6_hop_by_hop_ioam_trace_rewrite_handler)
+ < 0)
+ return (clib_error_create
+ ("registration of HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST for rewrite failed"));
+
+
+ return (0);
+}
+
+int
+ip6_trace_profile_cleanup (void)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] = 0;
+
+ return 0;
+
+}
+
+
+int
+ip6_trace_profile_setup (void)
+{
+ u32 trace_size = 0;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ ip6_ioam_trace_stats_increment_counter (IP6_IOAM_TRACE_PROFILE_MISS, 1);
+ return (-1);
+ }
+
+
+ if (ip6_ioam_trace_get_sizeof_handler (&trace_size) < 0)
+ return (-1);
+
+ hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] = trace_size;
+
+ return (0);
+}
+
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_trace_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h
new file mode 100644
index 00000000000..fbd86180656
--- /dev/null
+++ b/src/plugins/ioam/export-common/ioam_export.h
@@ -0,0 +1,616 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ioam_export_h__
+#define __included_ioam_export_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/udp.h>
+#include <vnet/flow/ipfix_packet.h>
+
+#include <vppinfra/pool.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vlib/threads.h>
+
+typedef struct ioam_export_buffer
+{
+ /* Allocated buffer */
+ u32 buffer_index;
+ u64 touched_at;
+ u8 records_in_this_buffer;
+} ioam_export_buffer_t;
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* TODO: to support multiple collectors all this has to be grouped and create a vector here */
+ u8 *record_header;
+ u32 sequence_number;
+ u32 domain_id;
+
+ /* ipfix collector, our ip address */
+ ip4_address_t ipfix_collector;
+ ip4_address_t src_address;
+
+ /* Pool of ioam_export_buffer_t */
+ ioam_export_buffer_t *buffer_pool;
+ /* Vector of per thread ioam_export_buffer_t to buffer pool index */
+ u32 *buffer_per_thread;
+ /* Lock per thread to swap buffers between worker and timer process */
+ volatile u32 **lockp;
+
+ /* time scale transform */
+ u32 unix_time_0;
+ f64 vlib_time_0;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ u32 ip4_lookup_node_index;
+
+ uword my_hbh_slot;
+ u32 export_process_node_index;
+} ioam_export_main_t;
+
+ioam_export_main_t ioam_export_main;
+ioam_export_main_t vxlan_gpe_ioam_export_main;
+
+extern vlib_node_registration_t export_node;
+
+#define DEFAULT_EXPORT_SIZE (3 * CLIB_CACHE_LINE_BYTES)
+/*
+ * Number of records in a buffer
+ * ~(MTU (1500) - [ip hdr(40) + UDP(8) + ipfix (24)]) / DEFAULT_EXPORT_SIZE
+ */
+#define DEFAULT_EXPORT_RECORDS 7
+
+always_inline ioam_export_buffer_t *
+ioam_export_get_my_buffer (ioam_export_main_t * em, u32 thread_id)
+{
+
+ if (vec_len (em->buffer_per_thread) > thread_id)
+ return (pool_elt_at_index
+ (em->buffer_pool, em->buffer_per_thread[thread_id]));
+ return (0);
+}
+
+inline static int
+ioam_export_buffer_add_header (ioam_export_main_t * em, vlib_buffer_t * b0)
+{
+ clib_memcpy (b0->data, em->record_header, vec_len (em->record_header));
+ b0->current_data = 0;
+ b0->current_length = vec_len (em->record_header);
+ b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ return (1);
+}
+
+inline static int
+ioam_export_init_buffer (ioam_export_main_t * em, vlib_main_t * vm,
+ ioam_export_buffer_t * eb)
+{
+ vlib_buffer_t *b = 0;
+
+ if (!eb)
+ return (-1);
+ /* TODO: Perhaps buffer init from template here */
+ if (vlib_buffer_alloc (vm, &(eb->buffer_index), 1) != 1)
+ return (-2);
+ eb->records_in_this_buffer = 0;
+ eb->touched_at = vlib_time_now (vm);
+ b = vlib_get_buffer (vm, eb->buffer_index);
+ (void) ioam_export_buffer_add_header (em, b);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+ return (1);
+}
+
+inline static void
+ioam_export_thread_buffer_free (ioam_export_main_t * em)
+{
+ vlib_main_t *vm = em->vlib_main;
+ ioam_export_buffer_t *eb = 0;
+ int i;
+ for (i = 0; i < vec_len (em->buffer_per_thread); i++)
+ {
+ eb = pool_elt_at_index (em->buffer_pool, em->buffer_per_thread[i]);
+ if (eb)
+ vlib_buffer_free (vm, &(eb->buffer_index), 1);
+ }
+ for (i = 0; i < vec_len (em->lockp); i++)
+ clib_mem_free ((void *) em->lockp[i]);
+ vec_free (em->buffer_per_thread);
+ pool_free (em->buffer_pool);
+ vec_free (em->lockp);
+ em->buffer_per_thread = 0;
+ em->buffer_pool = 0;
+ em->lockp = 0;
+}
+
+inline static int
+ioam_export_thread_buffer_init (ioam_export_main_t * em, vlib_main_t * vm)
+{
+ int no_of_threads = vec_len (vlib_worker_threads);
+ int i;
+ ioam_export_buffer_t *eb = 0;
+ vlib_node_t *ip4_lookup_node;
+
+ pool_alloc_aligned (em->buffer_pool,
+ no_of_threads - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (em->buffer_per_thread,
+ no_of_threads - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (em->lockp, no_of_threads - 1, CLIB_CACHE_LINE_BYTES);
+ ip4_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup");
+ em->ip4_lookup_node_index = ip4_lookup_node->index;
+ if (!em->buffer_per_thread || !em->buffer_pool || !em->lockp)
+ {
+ return (-1);
+ }
+ for (i = 0; i < no_of_threads; i++)
+ {
+ eb = 0;
+ pool_get_aligned (em->buffer_pool, eb, CLIB_CACHE_LINE_BYTES);
+ memset (eb, 0, sizeof (*eb));
+ em->buffer_per_thread[i] = eb - em->buffer_pool;
+ if (ioam_export_init_buffer (em, vm, eb) != 1)
+ {
+ ioam_export_thread_buffer_free (em);
+ return (-2);
+ }
+ em->lockp[i] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+ memset ((void *) em->lockp[i], 0, CLIB_CACHE_LINE_BYTES);
+ }
+ return (1);
+}
+
+#define IPFIX_IOAM_EXPORT_ID 272
+
+/* Used to build the rewrite */
+/* data set packet */
+typedef struct
+{
+ ipfix_message_header_t h;
+ ipfix_set_header_t s;
+} ipfix_data_packet_t;
+
+typedef struct
+{
+ ip4_header_t ip4;
+ udp_header_t udp;
+ ipfix_data_packet_t ipfix;
+} ip4_ipfix_data_packet_t;
+
+
+inline static void
+ioam_export_header_cleanup (ioam_export_main_t * em,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address)
+{
+ vec_free (em->record_header);
+ em->record_header = 0;
+}
+
+inline static int
+ioam_export_header_create (ioam_export_main_t * em,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address)
+{
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s;
+ u8 *rewrite = 0;
+ ip4_ipfix_data_packet_t *tp;
+
+
+ /* allocate rewrite space */
+ vec_validate_aligned (rewrite,
+ sizeof (ip4_ipfix_data_packet_t) - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ tp = (ip4_ipfix_data_packet_t *) rewrite;
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->src_address.as_u32 = src_address->as_u32;
+ ip->dst_address.as_u32 = collector_address->as_u32;
+ udp->src_port = clib_host_to_net_u16 (4939 /* $$FIXME */ );
+ udp->dst_port = clib_host_to_net_u16 (4939);
+ /* FIXUP: UDP length */
+ udp->length = clib_host_to_net_u16 (vec_len (rewrite) +
+ (DEFAULT_EXPORT_RECORDS *
+ DEFAULT_EXPORT_SIZE) - sizeof (*ip));
+
+ /* FIXUP: message header export_time */
+ /* FIXUP: message header sequence_number */
+ h->domain_id = clib_host_to_net_u32 (em->domain_id);
+
+ /*FIXUP: Setid length in octets if records exported are not default */
+ s->set_id_length = ipfix_set_id_length (IPFIX_IOAM_EXPORT_ID,
+ (sizeof (*s) +
+ (DEFAULT_EXPORT_RECORDS *
+ DEFAULT_EXPORT_SIZE)));
+
+ /* FIXUP: h version and length length in octets if records exported are not default */
+ h->version_length = version_length (sizeof (*h) +
+ (sizeof (*s) +
+ (DEFAULT_EXPORT_RECORDS *
+ DEFAULT_EXPORT_SIZE)));
+
+ /* FIXUP: ip length if records exported are not default */
+ /* FIXUP: ip checksum if records exported are not default */
+ ip->length = clib_host_to_net_u16 (vec_len (rewrite) +
+ (DEFAULT_EXPORT_RECORDS *
+ DEFAULT_EXPORT_SIZE));
+ ip->checksum = ip4_header_checksum (ip);
+ _vec_len (rewrite) = sizeof (ip4_ipfix_data_packet_t);
+ em->record_header = rewrite;
+ return (1);
+}
+
+inline static int
+ioam_export_send_buffer (ioam_export_main_t * em, vlib_main_t * vm,
+ ioam_export_buffer_t * eb)
+{
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ ipfix_message_header_t *h;
+ ipfix_set_header_t *s;
+ ip4_ipfix_data_packet_t *tp;
+ vlib_buffer_t *b0;
+ u16 new_l0, old_l0;
+ ip_csum_t sum0;
+ vlib_frame_t *nf = 0;
+ u32 *to_next;
+
+ b0 = vlib_get_buffer (vm, eb->buffer_index);
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ /* FIXUP: message header export_time */
+ h->export_time = clib_host_to_net_u32 ((u32)
+ (((f64) em->unix_time_0) +
+ (vlib_time_now (em->vlib_main) -
+ em->vlib_time_0)));
+
+ /* FIXUP: message header sequence_number */
+ h->sequence_number = clib_host_to_net_u32 (em->sequence_number++);
+
+ /* FIXUP: lengths if different from default */
+ if (PREDICT_FALSE (eb->records_in_this_buffer != DEFAULT_EXPORT_RECORDS))
+ {
+ s->set_id_length =
+ ipfix_set_id_length (IPFIX_IOAM_EXPORT_ID /* set_id */ ,
+ b0->current_length - (sizeof (*ip) +
+ sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length =
+ version_length (b0->current_length - (sizeof (*ip) + sizeof (*udp)));
+ sum0 = ip->checksum;
+ old_l0 = ip->length;
+ new_l0 = clib_host_to_net_u16 ((u16) b0->current_length);
+ sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+ length /* changed member */ );
+ ip->checksum = ip_csum_fold (sum0);
+ ip->length = new_l0;
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+ }
+
+ /* Enqueue pkts to ip4-lookup */
+
+ nf = vlib_get_frame_to_node (vm, em->ip4_lookup_node_index);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+ nf->n_vectors = 1;
+ to_next[0] = eb->buffer_index;
+ vlib_put_frame_to_node (vm, em->ip4_lookup_node_index, nf);
+ return (1);
+
+}
+
+#define EXPORT_TIMEOUT (20.0)
+#define THREAD_PERIOD (30.0)
+inline static uword
+ioam_export_process_common (ioam_export_main_t * em, vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f,
+ u32 index)
+{
+ f64 now;
+ f64 timeout = 30.0;
+ uword event_type;
+ uword *event_data = 0;
+ int i;
+ ioam_export_buffer_t *eb = 0, *new_eb = 0;
+ u32 *vec_buffer_indices = 0;
+ u32 *vec_buffer_to_be_sent = 0;
+ u32 *thread_index = 0;
+ u32 new_pool_index = 0;
+
+ em->export_process_node_index = index;
+ /* Wait for Godot... */
+ vlib_process_wait_for_event_or_clock (vm, 1e9);
+ event_type = vlib_process_get_events (vm, &event_data);
+ if (event_type != 1)
+ clib_warning ("bogus kickoff event received, %d", event_type);
+ vec_reset_length (event_data);
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 2: /* Stop and Wait for kickoff again */
+ timeout = 1e9;
+ break;
+ case 1: /* kickoff : Check for unsent buffers */
+ timeout = THREAD_PERIOD;
+ break;
+ case ~0: /* timeout */
+ break;
+ }
+ vec_reset_length (event_data);
+ now = vlib_time_now (vm);
+ /*
+ * Create buffers for threads that are not active enough
+ * to send out the export records
+ */
+ for (i = 0; i < vec_len (em->buffer_per_thread); i++)
+ {
+ /* If the worker thread is processing export records ignore further checks */
+ if (*em->lockp[i] == 1)
+ continue;
+ eb = pool_elt_at_index (em->buffer_pool, em->buffer_per_thread[i]);
+ if (eb->records_in_this_buffer > 0
+ && now > (eb->touched_at + EXPORT_TIMEOUT))
+ {
+ pool_get_aligned (em->buffer_pool, new_eb,
+ CLIB_CACHE_LINE_BYTES);
+ memset (new_eb, 0, sizeof (*new_eb));
+ if (ioam_export_init_buffer (em, vm, new_eb) == 1)
+ {
+ new_pool_index = new_eb - em->buffer_pool;
+ vec_add (vec_buffer_indices, &new_pool_index, 1);
+ vec_add (vec_buffer_to_be_sent, &em->buffer_per_thread[i],
+ 1);
+ vec_add (thread_index, &i, 1);
+ }
+ else
+ {
+ pool_put (em->buffer_pool, new_eb);
+ /*Give up */
+ goto CLEANUP;
+ }
+ }
+ }
+ if (vec_len (thread_index) != 0)
+ {
+ /*
+ * Now swap the buffers out
+ */
+ for (i = 0; i < vec_len (thread_index); i++)
+ {
+ while (__sync_lock_test_and_set (em->lockp[thread_index[i]], 1))
+ ;
+ em->buffer_per_thread[thread_index[i]] =
+ vec_pop (vec_buffer_indices);
+ *em->lockp[thread_index[i]] = 0;
+ }
+
+ /* Send the buffers */
+ for (i = 0; i < vec_len (vec_buffer_to_be_sent); i++)
+ {
+ eb =
+ pool_elt_at_index (em->buffer_pool, vec_buffer_to_be_sent[i]);
+ ioam_export_send_buffer (em, vm, eb);
+ pool_put (em->buffer_pool, eb);
+ }
+ }
+
+ CLEANUP:
+ /* Free any leftover/unused buffers and everything that was allocated */
+ for (i = 0; i < vec_len (vec_buffer_indices); i++)
+ {
+ new_eb = pool_elt_at_index (em->buffer_pool, vec_buffer_indices[i]);
+ vlib_buffer_free (vm, &new_eb->buffer_index, 1);
+ pool_put (em->buffer_pool, new_eb);
+ }
+ vec_free (vec_buffer_indices);
+ vec_free (vec_buffer_to_be_sent);
+ vec_free (thread_index);
+ }
+ return 0; /* not so much */
+}
+
+#define ioam_export_node_common(EM, VM, N, F, HTYPE, L, V, NEXT) \
+do { \
+ u32 n_left_from, *from, *to_next; \
+ export_next_t next_index; \
+ u32 pkts_recorded = 0; \
+ ioam_export_buffer_t *my_buf = 0; \
+ vlib_buffer_t *eb0 = 0; \
+ u32 ebi0 = 0; \
+ from = vlib_frame_vector_args (F); \
+ n_left_from = (F)->n_vectors; \
+ next_index = (N)->cached_next_index; \
+ while (__sync_lock_test_and_set ((EM)->lockp[(VM)->cpu_index], 1)); \
+ my_buf = ioam_export_get_my_buffer (EM, (VM)->cpu_index); \
+ my_buf->touched_at = vlib_time_now (VM); \
+ while (n_left_from > 0) \
+ { \
+ u32 n_left_to_next; \
+ vlib_get_next_frame (VM, N, next_index, to_next, n_left_to_next); \
+ while (n_left_from >= 4 && n_left_to_next >= 2) \
+ { \
+ u32 next0 = NEXT; \
+ u32 next1 = NEXT; \
+ u32 bi0, bi1; \
+ HTYPE *ip0, *ip1; \
+ vlib_buffer_t *p0, *p1; \
+ u32 ip_len0, ip_len1; \
+ { \
+ vlib_buffer_t *p2, *p3; \
+ p2 = vlib_get_buffer (VM, from[2]); \
+ p3 = vlib_get_buffer (VM, from[3]); \
+ vlib_prefetch_buffer_header (p2, LOAD); \
+ vlib_prefetch_buffer_header (p3, LOAD); \
+ CLIB_PREFETCH (p2->data, 3 * CLIB_CACHE_LINE_BYTES, LOAD); \
+ CLIB_PREFETCH (p3->data, 3 * CLIB_CACHE_LINE_BYTES, LOAD); \
+ } \
+ to_next[0] = bi0 = from[0]; \
+ to_next[1] = bi1 = from[1]; \
+ from += 2; \
+ to_next += 2; \
+ n_left_from -= 2; \
+ n_left_to_next -= 2; \
+ p0 = vlib_get_buffer (VM, bi0); \
+ p1 = vlib_get_buffer (VM, bi1); \
+ ip0 = vlib_buffer_get_current (p0); \
+ ip1 = vlib_buffer_get_current (p1); \
+ ip_len0 = \
+ clib_net_to_host_u16 (ip0->L) + sizeof (HTYPE); \
+ ip_len1 = \
+ clib_net_to_host_u16 (ip1->L) + sizeof (HTYPE); \
+ ebi0 = my_buf->buffer_index; \
+ eb0 = vlib_get_buffer (VM, ebi0); \
+ if (PREDICT_FALSE (eb0 == 0)) \
+ goto NO_BUFFER1; \
+ ip_len0 = \
+ ip_len0 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len0; \
+ ip_len1 = \
+ ip_len1 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len1; \
+ copy3cachelines (eb0->data + eb0->current_length, ip0, ip_len0); \
+ eb0->current_length += DEFAULT_EXPORT_SIZE; \
+ my_buf->records_in_this_buffer++; \
+ if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \
+ { \
+ ioam_export_send_buffer (EM, VM, my_buf); \
+ ioam_export_init_buffer (EM, VM, my_buf); \
+ } \
+ ebi0 = my_buf->buffer_index; \
+ eb0 = vlib_get_buffer (VM, ebi0); \
+ if (PREDICT_FALSE (eb0 == 0)) \
+ goto NO_BUFFER1; \
+ copy3cachelines (eb0->data + eb0->current_length, ip1, ip_len1); \
+ eb0->current_length += DEFAULT_EXPORT_SIZE; \
+ my_buf->records_in_this_buffer++; \
+ if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \
+ { \
+ ioam_export_send_buffer (EM, VM, my_buf); \
+ ioam_export_init_buffer (EM, VM, my_buf); \
+ } \
+ pkts_recorded += 2; \
+ if (PREDICT_FALSE (((node)->flags & VLIB_NODE_FLAG_TRACE))) \
+ { \
+ if (p0->flags & VLIB_BUFFER_IS_TRACED) \
+ { \
+ export_trace_t *t = \
+ vlib_add_trace (VM, node, p0, sizeof (*t)); \
+ t->flow_label = \
+ clib_net_to_host_u32 (ip0->V); \
+ t->next_index = next0; \
+ } \
+ if (p1->flags & VLIB_BUFFER_IS_TRACED) \
+ { \
+ export_trace_t *t = \
+ vlib_add_trace (VM, N, p1, sizeof (*t)); \
+ t->flow_label = \
+ clib_net_to_host_u32 (ip1->V); \
+ t->next_index = next1; \
+ } \
+ } \
+ NO_BUFFER1: \
+ vlib_validate_buffer_enqueue_x2 (VM, N, next_index, \
+ to_next, n_left_to_next, \
+ bi0, bi1, next0, next1); \
+ } \
+ while (n_left_from > 0 && n_left_to_next > 0) \
+ { \
+ u32 bi0; \
+ vlib_buffer_t *p0; \
+ u32 next0 = NEXT; \
+ HTYPE *ip0; \
+ u32 ip_len0; \
+ bi0 = from[0]; \
+ to_next[0] = bi0; \
+ from += 1; \
+ to_next += 1; \
+ n_left_from -= 1; \
+ n_left_to_next -= 1; \
+ p0 = vlib_get_buffer (VM, bi0); \
+ ip0 = vlib_buffer_get_current (p0); \
+ ip_len0 = \
+ clib_net_to_host_u16 (ip0->L) + sizeof (HTYPE); \
+ ebi0 = my_buf->buffer_index; \
+ eb0 = vlib_get_buffer (VM, ebi0); \
+ if (PREDICT_FALSE (eb0 == 0)) \
+ goto NO_BUFFER; \
+ ip_len0 = \
+ ip_len0 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len0; \
+ copy3cachelines (eb0->data + eb0->current_length, ip0, ip_len0); \
+ eb0->current_length += DEFAULT_EXPORT_SIZE; \
+ my_buf->records_in_this_buffer++; \
+ if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS) \
+ { \
+ ioam_export_send_buffer (EM, VM, my_buf); \
+ ioam_export_init_buffer (EM, VM, my_buf); \
+ } \
+ if (PREDICT_FALSE (((N)->flags & VLIB_NODE_FLAG_TRACE) \
+ && (p0->flags & VLIB_BUFFER_IS_TRACED))) \
+ { \
+ export_trace_t *t = vlib_add_trace (VM, (N), p0, sizeof (*t)); \
+ t->flow_label = \
+ clib_net_to_host_u32 (ip0->V); \
+ t->next_index = next0; \
+ } \
+ pkts_recorded += 1; \
+ NO_BUFFER: \
+ vlib_validate_buffer_enqueue_x1 (VM, N, next_index, \
+ to_next, n_left_to_next, \
+ bi0, next0); \
+ } \
+ vlib_put_next_frame (VM, N, next_index, n_left_to_next); \
+ } \
+ vlib_node_increment_counter (VM, export_node.index, \
+ EXPORT_ERROR_RECORDED, pkts_recorded); \
+ *(EM)->lockp[(VM)->cpu_index] = 0; \
+} while(0)
+
+#endif /* __included_ioam_export_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
new file mode 100644
index 00000000000..7b17c3f7a32
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api
@@ -0,0 +1,42 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Define a simple binary API to control the feature */
+
+define vxlan_gpe_ioam_export_enable_disable {
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ /* Enable / disable the feature */
+ u8 is_disable;
+
+ /* Collector ip address */
+ u8 collector_address[4];
+ u8 src_address[4];
+
+ /* Src ip address */
+};
+
+define vxlan_gpe_ioam_export_enable_disable_reply {
+ /* From the request */
+ u32 context;
+
+ /* Return value, zero means all OK */
+ i32 retval;
+}; \ No newline at end of file
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
new file mode 100644
index 00000000000..bab8d977062
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_ioam_export.c - ioam export API / debug CLI handling
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/export-common/ioam_export.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+/* define message IDs */
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+
+/* List of message types that this plugin understands */
+
+
+#define foreach_vxlan_gpe_ioam_export_plugin_api_msg \
+_(VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE, vxlan_gpe_ioam_export_enable_disable)
+
+extern void vxlan_gpe_set_next_override (uword next);
+/* Action function shared between message handler and debug CLI */
+int
+vxlan_gpe_ioam_export_enable_disable (ioam_export_main_t * em,
+ u8 is_disable,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address)
+{
+ vlib_main_t *vm = em->vlib_main;
+ u32 node_index = export_node.index;
+ vlib_node_t *vxlan_gpe_decap_ioam_node = NULL;
+
+ if (is_disable == 0)
+ {
+ if (em->my_hbh_slot == ~0)
+ {
+ /* Hook this export node to vxlan-gpe-decap-ioam-v4 */
+ vxlan_gpe_decap_ioam_node =
+ vlib_get_node_by_name (vm, (u8 *) "vxlan-gpe-decap-ioam-v4");
+ if (!vxlan_gpe_decap_ioam_node)
+ {
+ /* node does not exist give up */
+ return (-1);
+ }
+ em->my_hbh_slot =
+ vlib_node_add_next (vm, vxlan_gpe_decap_ioam_node->index,
+ node_index);
+ }
+ if (1 == ioam_export_header_create (em, collector_address, src_address))
+ {
+ ioam_export_thread_buffer_init (em, vm);
+ vxlan_gpe_set_next_override (em->my_hbh_slot);
+ /* Turn on the export buffer check process */
+ vlib_process_signal_event (vm, em->export_process_node_index, 1, 0);
+
+ }
+ else
+ {
+ return (-2);
+ }
+ }
+ else
+ {
+ vxlan_gpe_set_next_override (VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP);
+ ioam_export_header_cleanup (em, collector_address, src_address);
+ ioam_export_thread_buffer_free (em);
+ /* Turn off the export buffer check process */
+ vlib_process_signal_event (vm, em->export_process_node_index, 2, 0);
+
+ }
+
+ return 0;
+}
+
+/* API message handler */
+static void vl_api_vxlan_gpe_ioam_export_enable_disable_t_handler
+ (vl_api_vxlan_gpe_ioam_export_enable_disable_t * mp)
+{
+ vl_api_vxlan_gpe_ioam_export_enable_disable_reply_t *rmp;
+ ioam_export_main_t *sm = &vxlan_gpe_ioam_export_main;
+ int rv;
+
+ rv = vxlan_gpe_ioam_export_enable_disable (sm, (int) (mp->is_disable),
+ (ip4_address_t *)
+ mp->collector_address,
+ (ip4_address_t *)
+ mp->src_address);
+
+ REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE_REPLY);
+} /* API message handler */
+
+
+
+/* Set up the API message handling tables */
+static clib_error_t *
+vxlan_gpe_ioam_export_plugin_api_hookup (vlib_main_t * vm)
+{
+ ioam_export_main_t *sm = &vxlan_gpe_ioam_export_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vxlan_gpe_ioam_export_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+
+static clib_error_t *
+set_vxlan_gpe_ioam_export_ipfix_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ioam_export_main_t *em = &vxlan_gpe_ioam_export_main;
+ ip4_address_t collector, src;
+ u8 is_disable = 0;
+
+ collector.as_u32 = 0;
+ src.as_u32 = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+ ;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src))
+ ;
+ else if (unformat (input, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ if (collector.as_u32 == 0)
+ return clib_error_return (0, "collector address required");
+
+ if (src.as_u32 == 0)
+ return clib_error_return (0, "src address required");
+
+ em->ipfix_collector.as_u32 = collector.as_u32;
+ em->src_address.as_u32 = src.as_u32;
+
+ vlib_cli_output (vm, "Collector %U, src address %U",
+ format_ip4_address, &em->ipfix_collector,
+ format_ip4_address, &em->src_address);
+
+ /* Turn on the export timer process */
+ // vlib_process_signal_event (vm, flow_report_process_node.index,
+ //1, 0);
+ if (0 !=
+ vxlan_gpe_ioam_export_enable_disable (em, is_disable, &collector, &src))
+ {
+ return clib_error_return (0, "Unable to set ioam vxlan-gpe export");
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_vxlan_gpe_ioam_ipfix_command, static) =
+{
+.path = "set vxlan-gpe-ioam export ipfix",
+.short_help = "set vxlan-gpe-ioam export ipfix collector <ip4-address> src <ip4-address>",
+.function = set_vxlan_gpe_ioam_export_ipfix_command_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+vxlan_gpe_ioam_export_init (vlib_main_t * vm)
+{
+ ioam_export_main_t *em = &vxlan_gpe_ioam_export_main;
+ clib_error_t *error = 0;
+ u8 *name;
+
+ name = format (0, "vxlan_gpe_ioam_export_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ em->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+ em->unix_time_0 = (u32) time (0); /* Store starting time */
+ em->vlib_time_0 = vlib_time_now (vm);
+
+ error = vxlan_gpe_ioam_export_plugin_api_hookup (vm);
+ em->my_hbh_slot = ~0;
+ em->vlib_main = vm;
+ em->vnet_main = vnet_get_main ();
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (vxlan_gpe_ioam_export_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h
new file mode 100644
index 00000000000..6d93f09341a
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.h>
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h
new file mode 100644
index 00000000000..cc5698de334
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vxlan_gpe_ioam_export_msg_enum_h
+#define included_vxlan_gpe_ioam_export_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_vxlan_gpe_ioam_export_msg_enum_h */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c
new file mode 100644
index 00000000000..494263d9a1a
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_test.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_ioam_export_test.c - test harness plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+
+/* Declare message IDs */
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} export_test_main_t;
+
+export_test_main_t export_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(vxlan_gpe_ioam_export_enable_disable_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = export_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE_REPLY, vxlan_gpe_ioam_export_enable_disable_reply)
+
+
+/* M: construct, but don't yet send a message */
+
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T,t,n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+static int
+api_vxlan_gpe_ioam_export_enable_disable (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ unformat_input_t *i = vam->input;
+ f64 timeout;
+ int is_disable = 0;
+ vl_api_vxlan_gpe_ioam_export_enable_disable_t *mp;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (VXLAN_GPE_IOAM_EXPORT_ENABLE_DISABLE,
+ vxlan_gpe_ioam_export_enable_disable);
+ mp->is_disable = is_disable;
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(vxlan_gpe_ioam_export_enable_disable, "<intfc> [disable]")
+
+void
+vat_api_hookup (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "vxlan_gpe_ioam_export_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
new file mode 100644
index 00000000000..58508ebf10e
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ioam_export_thread.c
+ */
+#include <vnet/api_errno.h>
+#include <vppinfra/pool.h>
+#include <ioam/export-common/ioam_export.h>
+
+static vlib_node_registration_t vxlan_gpe_ioam_export_process_node;
+
+static uword
+vxlan_gpe_ioam_export_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ return (ioam_export_process_common (&vxlan_gpe_ioam_export_main,
+ vm, rt, f,
+ vxlan_gpe_ioam_export_process_node.index));
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_ioam_export_process_node, static) =
+{
+ .function = vxlan_gpe_ioam_export_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vxlan-gpe-ioam-export-process",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
new file mode 100644
index 00000000000..722c2b061c0
--- /dev/null
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <ioam/export-common/ioam_export.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 flow_label;
+} export_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_export_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ export_trace_t *t = va_arg (*args, export_trace_t *);
+
+ s = format (s, "EXPORT: flow_label %d, next index %d",
+ t->flow_label, t->next_index);
+ return s;
+}
+
+vlib_node_registration_t export_node;
+
+#define foreach_export_error \
+_(RECORDED, "Packets recorded for export")
+
+typedef enum
+{
+#define _(sym,str) EXPORT_ERROR_##sym,
+ foreach_export_error
+#undef _
+ EXPORT_N_ERROR,
+} export_error_t;
+
+static char *export_error_strings[] = {
+#define _(sym,string) string,
+ foreach_export_error
+#undef _
+};
+
+typedef enum
+{
+ EXPORT_NEXT_VXLAN_GPE_INPUT,
+ EXPORT_N_NEXT,
+} export_next_t;
+
+always_inline void
+copy3cachelines (void *dst, const void *src, size_t n)
+{
+#if 0
+ if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+ {
+ /* Copy only the first 1/2 cache lines whatever is available */
+ if (n >= 64)
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ if (n >= 128)
+ clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+ return;
+ }
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+ clib_mov64 ((u8 *) dst + 128, (const u8 *) src + 128);
+#endif
+#if 1
+
+ u64 *copy_dst, *copy_src;
+ int i;
+ copy_dst = (u64 *) dst;
+ copy_src = (u64 *) src;
+ if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+ {
+ for (i = 0; i < n / 64; i++)
+ {
+ copy_dst[0] = copy_src[0];
+ copy_dst[1] = copy_src[1];
+ copy_dst[2] = copy_src[2];
+ copy_dst[3] = copy_src[3];
+ copy_dst[4] = copy_src[4];
+ copy_dst[5] = copy_src[5];
+ copy_dst[6] = copy_src[6];
+ copy_dst[7] = copy_src[7];
+ copy_dst += 8;
+ copy_src += 8;
+ }
+ return;
+ }
+ for (i = 0; i < 3; i++)
+ {
+ copy_dst[0] = copy_src[0];
+ copy_dst[1] = copy_src[1];
+ copy_dst[2] = copy_src[2];
+ copy_dst[3] = copy_src[3];
+ copy_dst[4] = copy_src[4];
+ copy_dst[5] = copy_src[5];
+ copy_dst[6] = copy_src[6];
+ copy_dst[7] = copy_src[7];
+ copy_dst += 8;
+ copy_src += 8;
+ }
+#endif
+}
+
+
+static uword
+vxlan_gpe_export_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ioam_export_main_t *em = &vxlan_gpe_ioam_export_main;
+ ioam_export_node_common (em, vm, node, frame, ip4_header_t, length,
+ ip_version_and_header_length,
+ EXPORT_NEXT_VXLAN_GPE_INPUT);
+ return frame->n_vectors;
+}
+
+/*
+ * Node for VXLAN-GPE export
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (export_node) =
+{
+ .function = vxlan_gpe_export_node_fn,
+ .name = "vxlan-gpe-ioam-export",
+ .vector_size = sizeof (u32),
+ .format_trace = format_export_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (export_error_strings),
+ .error_strings = export_error_strings,
+ .n_next_nodes = EXPORT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {[EXPORT_NEXT_VXLAN_GPE_INPUT] = "vxlan-gpe-pop-ioam-v4"},
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export/ioam_export.api b/src/plugins/ioam/export/ioam_export.api
new file mode 100644
index 00000000000..f22d9fc8ebe
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export.api
@@ -0,0 +1,42 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Define a simple binary API to control the feature */
+
+define ioam_export_ip6_enable_disable {
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ /* Enable / disable the feature */
+ u8 is_disable;
+
+ /* Collector ip address */
+ u8 collector_address[4];
+ u8 src_address[4];
+
+ /* Src ip address */
+};
+
+define ioam_export_ip6_enable_disable_reply {
+ /* From the request */
+ u32 context;
+
+ /* Return value, zero means all OK */
+ i32 retval;
+};
diff --git a/src/plugins/ioam/export/ioam_export.c b/src/plugins/ioam/export/ioam_export.c
new file mode 100644
index 00000000000..b122e445b28
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * ioam_export.c - ioam export API / debug CLI handling
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/export-common/ioam_export.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+
+
+/* define message IDs */
+#include <ioam/export/ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+
+/* List of message types that this plugin understands */
+
+#define foreach_ioam_export_plugin_api_msg \
+_(IOAM_EXPORT_IP6_ENABLE_DISABLE, ioam_export_ip6_enable_disable)
+
+/*
+ * This routine exists to convince the vlib plugin framework that
+ * we haven't accidentally copied a random .dll into the plugin directory.
+ *
+ * Also collects global variable pointers passed from the vpp engine
+ */
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h,
+ int from_early_init)
+{
+ ioam_export_main_t *em = &ioam_export_main;
+ clib_error_t *error = 0;
+
+ em->vlib_main = vm;
+ em->vnet_main = h->vnet_main;
+
+ return error;
+}
+
+/* Action function shared between message handler and debug CLI */
+
+int
+ioam_export_ip6_enable_disable (ioam_export_main_t * em,
+ u8 is_disable,
+ ip4_address_t * collector_address,
+ ip4_address_t * src_address)
+{
+ vlib_main_t *vm = em->vlib_main;
+
+ if (is_disable == 0)
+ {
+ if (1 == ioam_export_header_create (em, collector_address, src_address))
+ {
+ ioam_export_thread_buffer_init (em, vm);
+ ip6_hbh_set_next_override (em->my_hbh_slot);
+ /* Turn on the export buffer check process */
+ vlib_process_signal_event (vm, em->export_process_node_index, 1, 0);
+
+ }
+ else
+ {
+ return (-2);
+ }
+ }
+ else
+ {
+ ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP);
+ ioam_export_header_cleanup (em, collector_address, src_address);
+ ioam_export_thread_buffer_free (em);
+ /* Turn off the export buffer check process */
+ vlib_process_signal_event (vm, em->export_process_node_index, 2, 0);
+
+ }
+
+ return 0;
+}
+
+/* API message handler */
+static void vl_api_ioam_export_ip6_enable_disable_t_handler
+ (vl_api_ioam_export_ip6_enable_disable_t * mp)
+{
+ vl_api_ioam_export_ip6_enable_disable_reply_t *rmp;
+ ioam_export_main_t *sm = &ioam_export_main;
+ int rv;
+
+ rv = ioam_export_ip6_enable_disable (sm, (int) (mp->is_disable),
+ (ip4_address_t *)
+ mp->collector_address,
+ (ip4_address_t *) mp->src_address);
+
+ REPLY_MACRO (VL_API_IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY);
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+ioam_export_plugin_api_hookup (vlib_main_t * vm)
+{
+ ioam_export_main_t *sm = &ioam_export_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_ioam_export_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (ioam_export_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_ioam_export;
+#undef _
+}
+
+static clib_error_t *
+set_ioam_export_ipfix_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ioam_export_main_t *em = &ioam_export_main;
+ ip4_address_t collector, src;
+ u8 is_disable = 0;
+
+ collector.as_u32 = 0;
+ src.as_u32 = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+ ;
+ else if (unformat (input, "src %U", unformat_ip4_address, &src))
+ ;
+ else if (unformat (input, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ if (collector.as_u32 == 0)
+ return clib_error_return (0, "collector address required");
+
+ if (src.as_u32 == 0)
+ return clib_error_return (0, "src address required");
+
+ em->ipfix_collector.as_u32 = collector.as_u32;
+ em->src_address.as_u32 = src.as_u32;
+
+ vlib_cli_output (vm, "Collector %U, src address %U",
+ format_ip4_address, &em->ipfix_collector,
+ format_ip4_address, &em->src_address);
+
+ /* Turn on the export timer process */
+ // vlib_process_signal_event (vm, flow_report_process_node.index,
+ //1, 0);
+ ioam_export_ip6_enable_disable (em, is_disable, &collector, &src);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ipfix_command, static) =
+{
+.path = "set ioam export ipfix",.short_help =
+ "set ioam export ipfix collector <ip4-address> src <ip4-address>",.
+ function = set_ioam_export_ipfix_command_fn,};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+ioam_export_init (vlib_main_t * vm)
+{
+ ioam_export_main_t *em = &ioam_export_main;
+ clib_error_t *error = 0;
+ u8 *name;
+ u32 node_index = export_node.index;
+ vlib_node_t *ip6_hbyh_node = NULL;
+
+ name = format (0, "ioam_export_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ em->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+ em->unix_time_0 = (u32) time (0); /* Store starting time */
+ em->vlib_time_0 = vlib_time_now (vm);
+
+ error = ioam_export_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (em, &api_main);
+
+ /* Hook this export node to ip6-hop-by-hop */
+ ip6_hbyh_node = vlib_get_node_by_name (vm, (u8 *) "ip6-hop-by-hop");
+ em->my_hbh_slot = vlib_node_add_next (vm, ip6_hbyh_node->index, node_index);
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ioam_export_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/export/ioam_export_all_api_h.h b/src/plugins/ioam/export/ioam_export_all_api_h.h
new file mode 100644
index 00000000000..bc4368f2acb
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/export/ioam_export.api.h>
diff --git a/src/plugins/ioam/export/ioam_export_msg_enum.h b/src/plugins/ioam/export/ioam_export_msg_enum.h
new file mode 100644
index 00000000000..c2de798893c
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_ioam_export_msg_enum_h
+#define included_ioam_export_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/export/ioam_export_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_ioam_export_msg_enum_h */
diff --git a/src/plugins/ioam/export/ioam_export_test.c b/src/plugins/ioam/export/ioam_export_test.c
new file mode 100644
index 00000000000..f991fc0c795
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export_test.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * ioam_export_test.c - test harness plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+
+/* Declare message IDs */
+#include <ioam/export/ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} export_test_main_t;
+
+export_test_main_t export_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(ioam_export_ip6_enable_disable_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = export_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY, ioam_export_ip6_enable_disable_reply)
+
+
+/* M: construct, but don't yet send a message */
+
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T,t,n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+static int
+api_ioam_export_ip6_enable_disable (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ unformat_input_t *i = vam->input;
+ f64 timeout;
+ int is_disable = 0;
+ vl_api_ioam_export_ip6_enable_disable_t *mp;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ is_disable = 1;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (IOAM_EXPORT_IP6_ENABLE_DISABLE, ioam_export_ip6_enable_disable);
+ mp->is_disable = is_disable;
+
+ /* send it... */
+ S;
+
+ /* Wait for a reply... */
+ W;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(ioam_export_ip6_enable_disable, "<intfc> [disable]")
+
+void
+vat_api_hookup (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ export_test_main_t *sm = &export_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "ioam_export_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
diff --git a/src/plugins/ioam/export/ioam_export_thread.c b/src/plugins/ioam/export/ioam_export_thread.c
new file mode 100644
index 00000000000..d2eb200936a
--- /dev/null
+++ b/src/plugins/ioam/export/ioam_export_thread.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ioam_export_thread.c
+ */
+#include <vnet/api_errno.h>
+#include <vppinfra/pool.h>
+#include <ioam/export-common/ioam_export.h>
+
+static vlib_node_registration_t ioam_export_process_node;
+
+static uword
+ioam_export_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ return (ioam_export_process_common(&ioam_export_main,
+ vm, rt, f,
+ ioam_export_process_node.index));
+}
+
+VLIB_REGISTER_NODE (ioam_export_process_node, static) =
+{
+ .function = ioam_export_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ioam-export-process",
+};
diff --git a/src/plugins/ioam/export/node.c b/src/plugins/ioam/export/node.c
new file mode 100644
index 00000000000..19f143dfbf3
--- /dev/null
+++ b/src/plugins/ioam/export/node.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <ioam/export-common/ioam_export.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 flow_label;
+} export_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_export_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ export_trace_t *t = va_arg (*args, export_trace_t *);
+
+ s = format (s, "EXPORT: flow_label %d, next index %d",
+ t->flow_label, t->next_index);
+ return s;
+}
+
+vlib_node_registration_t export_node;
+
+#define foreach_export_error \
+_(RECORDED, "Packets recorded for export")
+
+typedef enum
+{
+#define _(sym,str) EXPORT_ERROR_##sym,
+ foreach_export_error
+#undef _
+ EXPORT_N_ERROR,
+} export_error_t;
+
+static char *export_error_strings[] = {
+#define _(sym,string) string,
+ foreach_export_error
+#undef _
+};
+
+typedef enum
+{
+ EXPORT_NEXT_POP_HBYH,
+ EXPORT_N_NEXT,
+} export_next_t;
+
+always_inline void
+copy3cachelines (void *dst, const void *src, size_t n)
+{
+#if 0
+ if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+ {
+ /* Copy only the first 1/2 cache lines whatever is available */
+ if (n >= 64)
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ if (n >= 128)
+ clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+ return;
+ }
+ clib_mov64 ((u8 *) dst, (const u8 *) src);
+ clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+ clib_mov64 ((u8 *) dst + 128, (const u8 *) src + 128);
+#endif
+#if 1
+
+ u64 *copy_dst, *copy_src;
+ int i;
+ copy_dst = (u64 *) dst;
+ copy_src = (u64 *) src;
+ if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+ {
+ for (i = 0; i < n / 64; i++)
+ {
+ copy_dst[0] = copy_src[0];
+ copy_dst[1] = copy_src[1];
+ copy_dst[2] = copy_src[2];
+ copy_dst[3] = copy_src[3];
+ copy_dst[4] = copy_src[4];
+ copy_dst[5] = copy_src[5];
+ copy_dst[6] = copy_src[6];
+ copy_dst[7] = copy_src[7];
+ copy_dst += 8;
+ copy_src += 8;
+ }
+ return;
+ }
+ for (i = 0; i < 3; i++)
+ {
+ copy_dst[0] = copy_src[0];
+ copy_dst[1] = copy_src[1];
+ copy_dst[2] = copy_src[2];
+ copy_dst[3] = copy_src[3];
+ copy_dst[4] = copy_src[4];
+ copy_dst[5] = copy_src[5];
+ copy_dst[6] = copy_src[6];
+ copy_dst[7] = copy_src[7];
+ copy_dst += 8;
+ copy_src += 8;
+ }
+#endif
+}
+
+static uword
+ip6_export_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ioam_export_main_t *em = &ioam_export_main;
+ ioam_export_node_common(em, vm, node, frame, ip6_header_t, payload_length,
+ ip_version_traffic_class_and_flow_label,
+ EXPORT_NEXT_POP_HBYH);
+ return frame->n_vectors;
+}
+
+/*
+ * Node for IP6 export
+ */
+VLIB_REGISTER_NODE (export_node) =
+{
+ .function = ip6_export_node_fn,
+ .name = "ip6-export",
+ .vector_size = sizeof (u32),
+ .format_trace = format_export_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (export_error_strings),
+ .error_strings = export_error_strings,
+ .n_next_nodes = EXPORT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes =
+ {
+ [EXPORT_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop"
+ },
+};
diff --git a/src/plugins/ioam/ioam_plugin_doc.md b/src/plugins/ioam/ioam_plugin_doc.md
new file mode 100644
index 00000000000..343abcf73d8
--- /dev/null
+++ b/src/plugins/ioam/ioam_plugin_doc.md
@@ -0,0 +1,464 @@
+## VPP Inband OAM (iOAM) {#ioam_plugin_doc}
+
+In-band OAM (iOAM) is an implementation study to record operational
+information in the packet while the packet traverses a path between
+two points in the network.
+
+Overview of iOAM can be found in [iOAM-Devnet] page.
+The following IETF drafts detail the motivation and mechanism for
+recording operational information:
+ - [iOAM-ietf-requirements] - Describes motivation and usecases for iOAM
+ - [iOAM-ietf-data] - Describes data records that can be collected using iOAM
+ - [iOAM-ietf-transport] - Lists out the transport protocols
+ and mechanism to carry iOAM data records
+ - [iOAM-ietf-proof-of-transit] - Describes the idea of Proof of Transit (POT)
+ and mechanisms to operationalize the idea
+
+## Terminology
+In-band OAM is expected to be deployed in a specific domain rather
+than on the overall Internet. The part of the network which employs in-band OAM
+is referred to as **"in-band OAM-domain"**.
+
+In-band OAM data is added to a packet on entering the in-band OAM-domain
+and is removed from the packet when exiting the domain.
+Within the in-band OAM-domain, network nodes that the packet traverses
+may update the in-band OAM data records.
+
+- The node which adds in-band OAM data to the packet is called the
+**"in-band OAM encapsulating node"**.
+
+- The node which removes the in-band OAM data is referred to as the
+**"in-band OAM decapsulating node"**.
+
+- Nodes within the domain which are aware of in-band OAM data and read
+and/or write or process the in-band OAM data are called
+**"in-band OAM transit nodes"**.
+
+## Features supported in the current release
+VPP can function as in-band OAM encapsulating, transit and decapsulating node.
+In this version of VPP in-band OAM data is transported as options in an
+IPv6 hop-by-hop extension header. Hence in-band OAM can be enabled
+for IPv6 traffic.
+
+The following iOAM features are supported:
+
+- **In-band OAM Tracing** : In-band OAM supports multiple data records to be
+recorded in the packet as the packet traverses the network.
+These data records offer insights into the operational behavior of the network.
+The following information can be collected in the tracing
+data from the nodes a packet traverses:
+ - Node ID
+ - Ingress interface ID
+ - Egress interface ID
+ - Timestamp
+ - Pre-configured application data
+
+- **In-band OAM Proof of Transit (POT)**: Proof of transit iOAM data is
+added to every packet for verifying that a packet traverses a specific
+set of nodes.
+In-band OAM data is updated at every node that is enabled with iOAM
+proof of transit and is used to verify whether a packet traversed
+all the specified nodes. When the verifier receives each packet,
+it can validate whether the packet traversed the specified nodes.
+
+
+## Configuration
+Configuring iOAM involves:
+- Selecting the packets for which iOAM data must be inserted, updated or removed
+ - Selection of packets for iOAM data insertion on iOAM encapsulating node.
+ Selection of packets is done by 5-tuple based classification
+ - Selection of packets for updating iOAM data is implicitly done on the
+ presence of iOAM options in the packet
+ - Selection of packets for removing the iOAM data is done on 5-tuple
+ based classification
+- The kind of data to be collected
+ - Tracing data
+ - Proof of transit
+- Additional details for processing iOAM data to be collected
+ - For trace data - trace type, number of nodes to be recorded in the trace,
+ time stamp precision, etc.
+ - For POT data - configuration of POT profile required to process the POT data
+
+The CLI for configuring iOAM is explained here followed by detailed steps
+and examples to deploy iOAM on VPP as an encapsulating, transit or
+decapsulating iOAM node in the subsequent sub-sections.
+
+VPP iOAM configuration for enabling trace and POT is as follows:
+
+ set ioam rewrite trace-type <0x1f|0x7|0x9|0x11|0x19>
+ trace-elts <number of trace elements> trace-tsp <0|1|2|3>
+ node-id <node ID in hex> app-data <application data in hex> [pot]
+
+A description of each of the options of the CLI follows:
+- trace-type : An entry in the "Node data List" array of the trace option
+can have different formats, following the needs of the a deployment.
+For example: Some deployments might only be interested
+in recording the node identifiers, whereas others might be interested
+in recording node identifier and timestamp.
+The following types are currently supported:
+ - 0x1f : Node data to include hop limit (8 bits), node ID (24 bits),
+ ingress and egress interface IDs (16 bits each), timestamp (32 bits),
+ application data (32 bits)
+ - 0x7 : Node data to include hop limit (8 bits), node ID (24 bits),
+ ingress and egress interface IDs (16 bits each)
+ - 0x9 : Node data to include hop limit (8 bits), node ID (24 bits),
+ timestamp (32 bits)
+ - 0x11: Node data to include hop limit (8 bits), node ID (24 bits),
+ application data (32 bits)
+ - 0x19: Node data to include hop limit (8 bits), node ID (24 bits),
+ timestamp (32 bits), application data (32 bits)
+- trace-elts : Defines the length of the node data array in the trace option.
+- trace-tsp : Defines the timestamp precision to use with the enumerated value
+ for precision as follows:
+ - 0 : 32bits timestamp in seconds
+ - 1 : 32bits timestamp in milliseconds
+ - 2 : 32bits timestamp in microseconds
+ - 3 : 32bits timestamp in nanoseconds
+- node-id : Unique identifier for the node, included in the node ID
+ field of the node data in trace option.
+- app-data : The value configured here is included as is in
+application data field of node data in trace option.
+- pot : Enables POT option to be included in the iOAM options.
+
+### Trace configuration
+
+#### On in-band OAM encapsulating node
+ - **Configure classifier and apply ACL** to select packets for
+ iOAM data insertion
+ - Example to enable iOAM data insertion for all the packets
+ towards IPv6 address db06::06:
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-add-hop-by-hop
+ table-index 0 match l3 ip6 dst db06::06
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+ - **Enable tracing** : Specify node ID, maximum number of nodes for which
+ trace data should be recorded, type of data to be included for recording,
+ optionally application data to be included
+ - Example to enable tracing with a maximum of 4 nodes recorded
+ and the data to be recorded to include - hop limit, node id,
+ ingress and egress interface IDs, timestamp (millisecond precision),
+ application data (0x1234):
+
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
+ node-id 0x1 app-data 0x1234
+
+
+
+#### On in-band OAM transit node
+- The transit node requires trace type, timestamp precision, node ID and
+optionally application data to be configured,
+to update its node data in the trace option.
+
+Example:
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
+ node-id 0x2 app-data 0x1234
+
+#### On the In-band OAM decapsulating node
+- The decapsulating node similar to encapsulating node requires
+**classification** of the packets to remove iOAM data from.
+ - Example to decapsulate iOAM data for packets towards
+ db06::06, configure classifier and enable it as an ACL as follows:
+
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-lookup table-index 0
+ match l3 ip6 dst db06::06 opaque-index 100
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+
+- Decapsulating node requires trace type, timestamp precision,
+node ID and optionally application data to be configured,
+to update its node data in the trace option before it is decapsulated.
+
+Example:
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4
+ trace-tsp 1 node-id 0x3 app-data 0x1234
+
+
+### Proof of Transit configuration
+
+For details on proof-of-transit,
+see the IETF draft [iOAM-ietf-proof-of-transit].
+To enable Proof of Transit all the nodes that participate
+and hence are verified for transit need a proof of transit profile.
+A script to generate a proof of transit profile as per the mechanism
+described in [iOAM-ietf-proof-of-transit] will be available at [iOAM-Devnet].
+
+The Proof of transit mechanism implemented here is based on
+Shamir's Secret Sharing algorithm.
+The overall algorithm uses two polynomials
+POLY-1 and POLY-2. The degree of polynomials depends on number of nodes
+to be verified for transit.
+POLY-1 is secret and constant. Each node gets a point on POLY-1
+at setup-time and keeps it secret.
+POLY-2 is public, random and per packet.
+Each node is assigned a point on POLY-1 and POLY-2 with the same x index.
+Each node derives its point on POLY-2 each time a packet arrives at it.
+A node then contributes its points on POLY-1 and POLY-2 to construct
+POLY-3 (POLY-3 = POLY-1 + POLY-2) using lagrange extrapolation and
+forwards it towards the verifier by updating POT data in the packet.
+The verifier constructs POLY-3 from the accumulated value from all the nodes
+and its own points on POLY-1 and POLY-2 and verifies whether
+POLY-3 = POLY-1 + POLY-2. Only the verifier knows POLY-1.
+The solution leverages finite field arithmetic in a field of size "prime number"
+for reasons explained in description of Shamir's secret sharing algorithm.
+
+Here is an explanation of POT profile list and profile configuration CLI to
+realize the above mechanism.
+It is best to use the script provided at [iOAM-Devnet] to generate
+this configuration.
+- **Create POT profile** : set pot profile name <string> id [0-1]
+[validator-key 0xu64] prime-number 0xu64 secret_share 0xu64
+lpc 0xu64 polynomial2 0xu64 bits-in-random [0-64]
+ - name : Profile list name.
+ - id : Profile id, it can be 0 or 1.
+ A maximum of two profiles can be configured per profile list.
+ - validator-key : Secret key configured only on the
+ verifier/decapsulating node used to compare and verify proof of transit.
+ - prime-number : Prime number for finite field arithmetic as required by the
+ proof of transit mechanism.
+ - secret_share : Unique point for each node on the secret polynomial POLY-1.
+ - lpc : Lagrange Polynomial Constant(LPC) calculated per node based on
+ its point (x value used for evaluating the points on the polynomial)
+ on the polynomial used in lagrange extrapolation
+ for reconstructing polynomial (POLY-3).
+ - polynomial2 : Is the pre-evaluated value of the point on
+ 2nd polynomial(POLY-2). This is unique for each node.
+ It is pre-evaluated for all the coefficients of POLY-2 except
+ for the constant part of the polynomial that changes per packet
+ and is received as part of the POT data in the packet.
+ - bits-in-random : To control the size of the random number to be
+ generated. This number has to match the other numbers generated and used
+ in the profile as per the algorithm.
+
+- **Set a configured profile as active/in-use** :
+set pot profile-active name <string> ID [0-1]
+ - name : Name of the profile list to be used for computing
+ POT data per packet.
+ - ID : Identifier of the profile within the list to be used.
+
+#### On In-band OAM encapsulating node
+ - Configure the classifier and apply ACL to select packets for iOAM data insertion.
+ - Example to enable iOAM data insertion for all the packet towards
+ IPv6 address db06::06 -
+
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node
+ ip6-add-hop-by-hop table-index 0 match l3 ip6 dst db06::06
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+
+ - Configure the proof of transit profile list with profiles.
+Each profile list referred to by a name can contain 2 profiles,
+only one is in use for updating proof of transit data at any time.
+ - Example profile list example with a profile generated from the
+ script to verify transit through 3 nodes is:
+
+
+ vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
+ secret_share 0x6c22eff0f45ec56d lpc 0x7fff0000fa884682
+ polynomial2 0xffb543d4a9c bits-in-random 63
+
+ - Enable one of the profiles from the configured profile list as active
+ so that is will be used for calculating proof of transit
+
+Example enable profile ID 0 from profile list example configured above:
+
+
+ vpp# set pot profile-active name example ID 0
+
+
+ - Enable POT option to be inserted
+
+
+ vpp# set ioam rewrite pot
+
+
+#### On in-band OAM transit node
+ - Configure the proof of transit profile list with profiles for transit node.
+Example:
+
+
+ vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
+ secret_share 0x564cdbdec4eb625d lpc 0x1
+ polynomial2 0x23f3a227186a bits-in-random 63
+
+#### On in-band OAM decapsulating node / verifier
+- The decapsulating node, similar to the encapsulating node requires
+classification of the packets to remove iOAM data from.
+ - Example to decapsulate iOAM data for packets towards db06::06
+ configure classifier and enable it as an ACL as follows:
+
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-lookup table-index 0
+ match l3 ip6 dst db06::06 opaque-index 100
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- To update and verify the proof of transit, POT profile list should be configured.
+ - Example POT profile list configured as follows:
+
+ vpp# set pot profile name example id 0 validate-key 0x7fff0000fa88465d
+ prime-number 0x7fff0000fa884685 secret_share 0x7a08fbfc5b93116d lpc 0x3
+ polynomial2 0x3ff738597ce bits-in-random 63
+
+## Operational data
+
+Following CLIs are available to check iOAM operation:
+- To check iOAM configuration that are effective use "show ioam summary"
+
+Example:
+
+ vpp# show ioam summary
+ REWRITE FLOW CONFIGS - Not configured
+ HOP BY HOP OPTIONS - TRACE CONFIG -
+ Trace Type : 0x1f (31)
+ Trace timestamp precision : 1 (Milliseconds)
+ Num of trace nodes : 4
+ Node-id : 0x2 (2)
+ App Data : 0x1234 (4660)
+ POT OPTION - 1 (Enabled)
+ Try 'show ioam pot and show pot profile' for more information
+
+- To find statistics about packets for which iOAM options were
+added (encapsulating node) and removed (decapsulating node) execute
+*show errors*
+
+Example on encapsulating node:
+
+
+ vpp# show error
+ Count Node Reason
+ 1208804706 ip6-inacl input ACL hits
+ 1208804706 ip6-add-hop-by-hop Pkts w/ added ip6 hop-by-hop options
+
+Example on decapsulating node:
+
+ vpp# show error
+ Count Node Reason
+ 69508569 ip6-inacl input ACL hits
+ 69508569 ip6-pop-hop-by-hop Pkts w/ removed ip6 hop-by-hop options
+
+- To check the POT profiles use "show pot profile"
+
+Example:
+
+ vpp# show pot profile
+ Profile list in use : example
+ POT Profile at index: 0
+ ID : 0
+ Validator : False (0)
+ Secret share : 0x564cdbdec4eb625d (6218586935324795485)
+ Prime number : 0x7fff0000fa884685 (9223090566081300101)
+ 2nd polynomial(eval) : 0x23f3a227186a (39529304496234)
+ LPC : 0x1 (1)
+ Bit mask : 0x7fffffffffffffff (9223372036854775807)
+ Profile index in use: 0
+ Pkts passed : 0x36 (54)
+
+- To get statistics of POT for packets use "show ioam pot"
+
+Example at encapsulating or transit node:
+
+ vpp# show ioam pot
+ Pkts with ip6 hop-by-hop POT options - 54
+ Pkts with ip6 hop-by-hop POT options but no profile set - 0
+ Pkts with POT in Policy - 0
+ Pkts with POT out of Policy - 0
+
+
+Example at decapsulating/verification node:
+
+
+ vpp# show ioam pot
+ Pkts with ip6 hop-by-hop POT options - 54
+ Pkts with ip6 hop-by-hop POT options but no profile set - 0
+ Pkts with POT in Policy - 54
+ Pkts with POT out of Policy - 0
+
+- Tracing - enable trace of IPv6 packets to view the data inserted and
+collected.
+
+Example when the nodes are receiving data over a DPDK interface:
+Enable tracing using "trace add dpdk-input 20" and
+execute "show trace" to view the iOAM data collected:
+
+
+ vpp# trace add dpdk-input 20
+
+ vpp# show trace
+
+ ------------------- Start of thread 0 vpp_main -------------------
+
+ Packet 1
+
+ 00:00:19:294697: dpdk-input
+ GigabitEthernetb/0/0 rx queue 0
+ buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
+ PKT MBUF: port 0, nb_segs 1, pkt_len 214
+ buf_len 2176, data_len 214, ol_flags 0x0, data_off 128, phys_addr 0xe9a35a00
+ packet_type 0x0
+ IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294737: ethernet-input
+ IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
+ 00:00:19:294753: ip6-input
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294757: ip6-lookup
+ fib 0 adj-idx 15 : indirect via db05::2 flow hash: 0x00000000
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294802: ip6-hop-by-hop
+ IP6_HOP_BY_HOP: next index 5 len 96 traced 96 Trace Type 0x1f , 1 elts left
+ [0] ttl 0x0 node ID 0x0 ingress 0x0 egress 0x0 ts 0x0
+ app 0x0
+ [1] ttl 0x3e node ID 0x3 ingress 0x1 egress 0x2 ts 0xb68c2213
+ app 0x1234
+ [2] ttl 0x3f node ID 0x2 ingress 0x1 egress 0x2 ts 0xb68c2204
+ app 0x1234
+ [3] ttl 0x40 node ID 0x1 ingress 0x5 egress 0x6 ts 0xb68c2200
+ app 0x1234
+ POT opt present
+ random = 0x577a916946071950, Cumulative = 0x10b46e78a35a392d, Index = 0x0
+ 00:00:19:294810: ip6-rewrite
+ tx_sw_if_index 1 adj-idx 14 : GigabitEthernetb/0/0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 flow hash: 0x00000000
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+ 00:00:19:294814: GigabitEthernetb/0/0-output
+ GigabitEthernetb/0/0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+ 00:00:19:294820: GigabitEthernetb/0/0-tx
+ GigabitEthernetb/0/0 tx queue 0
+ buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+
+
+[iOAM-Devnet]: <https://github.com/ciscodevnet/iOAM>
+[iOAM-ietf-requirements]:<https://tools.ietf.org/html/draft-brockners-inband-oam-requirements-01>
+[iOAM-ietf-transport]:<https://tools.ietf.org/html/draft-brockners-inband-oam-transport-01>
+[iOAM-ietf-data]:<https://tools.ietf.org/html/draft-brockners-inband-oam-data-01>
+[iOAM-ietf-proof-of-transit]:<https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>
diff --git a/src/plugins/ioam/lib-pot/math64.h b/src/plugins/ioam/lib-pot/math64.h
new file mode 100644
index 00000000000..4c608a37de4
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/math64.h
@@ -0,0 +1,159 @@
+/*
+ * math64.h provides the 64 bit unsigned integer add, multiply followed by modulo operation
+ * The linux/math64.h provides divide and multiply 64 bit integers but:
+ * 1. multiply: mul_u64_u64_shr - only returns 64 bits of the result and has to be called
+ * twice to get the complete 128 bits of the result.
+ * 2. Modulo operation of the result of addition and multiplication of u64 that may result
+ * in integers > 64 bits is not supported
+ * Hence this header to combine add/multiply followed by modulo of u64 integrers
+ * always resulting in u64.
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef include_vnet_math64_h
+#define include_vnet_math64_h
+#include <stdint.h>
+
+/*
+ * multiplies and returns result in hi and lo
+ */
+static inline void mul64by64(u64 a, u64 b, u64 * hi, u64 * lo)
+{
+ u64 a_lo = (u64) (uint32_t) a;
+ u64 a_hi = a >> 32;
+ u64 b_lo = (u64) (u32) b;
+ u64 b_hi = b >> 32;
+
+ u64 p0 = a_lo * b_lo;
+ u64 p1 = a_lo * b_hi;
+ u64 p2 = a_hi * b_lo;
+ u64 p3 = a_hi * b_hi;
+
+ u32 cy = (u32) (((p0 >> 32) + (u32) p1 + (u32) p2) >> 32);
+
+ *lo = p0 + (p1 << 32) + (p2 << 32);
+ *hi = p3 + (p1 >> 32) + (p2 >> 32) + cy;
+ return;
+}
+
+#define TWO64 18446744073709551616.0
+
+static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
+{
+ u64 q1, q2, q;
+ u64 p1, p0;
+ double dq;
+
+ /* calculate quotient first pass 53 bits */
+ dq = (TWO64 * (double)x + (double)y) * di;
+
+ if (dq >= TWO64)
+ q1 = 0xfffffffffffff800L;
+ else
+ q1 = dq;
+
+ /* q1 * m to compare the product to the dividend. */
+ mul64by64(q1, m, &p1, &p0);
+
+ /* Adjust quotient. is it > actual result: */
+ if (x < p1 || (x == p1 && y < p0))
+ {
+ /* q1 > quotient. calculate abs remainder */
+ x = p1 - (x + (p0 < y));
+ y = p0 - y;
+
+ /* use the remainder as new dividend to adjust quotient */
+ q2 = (u64) ((TWO64 * (double)x + (double)y) * di);
+ mul64by64(q2, m, &p1, &p0);
+
+ q = q1 - q2;
+ if (x < p1 || (x == p1 && y <= p0))
+ {
+ y = p0 - y;
+ }
+ else
+ {
+ y = p0 - y;
+ y += m;
+ q--;
+ }
+ }
+ else
+ {
+ x = x - (p1 + (y < p0));
+ y = y - p0;
+
+ q2 = (u64) ((TWO64 * (double)x + (double)y) * di);
+ mul64by64(q2, m, &p1, &p0);
+
+ q = q1 + q2;
+ if (x < p1 || (x == p1 && y < p0))
+ {
+ y = y - p0;
+ y += m;
+ q--;
+ }
+ else
+ {
+ y = y - p0;
+ if (y >= m)
+ {
+ y -= m;
+ q++;
+ }
+ }
+ }
+
+ return y;
+}
+
+/*
+ * returns a % p
+ */
+static inline u64 mod64by64(u64 a, u64 p, u64 primeinv)
+{
+ return (mod128by64(0, a, p, primeinv));
+}
+
+static inline void add64(u64 a, u64 b, u64 * whi, u64 * wlo)
+{
+ *wlo = a + b;
+ if (*wlo < a)
+ *whi = 1;
+
+}
+
+/*
+ * returns (a + b)%p
+ */
+static inline u64 add64_mod(u64 a, u64 b, u64 p, double pi)
+{
+ u64 shi = 0, slo = 0;
+
+ add64(a, b, &shi, &slo);
+ return (mod128by64(shi, slo, p, pi));
+}
+
+/*
+ * returns (ab) % p
+ */
+static inline u64 mul64_mod(u64 a, u64 b, u64 p, double pi)
+{
+ u64 phi = 0, plo = 0;
+
+ mul64by64(a, b, &phi, &plo);
+ return (mod128by64(phi, plo, p, pi));
+}
+
+#endif
diff --git a/src/plugins/ioam/lib-pot/pot.api b/src/plugins/ioam/lib-pot/pot.api
new file mode 100644
index 00000000000..fa2fc126b7e
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot.api
@@ -0,0 +1,133 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief Proof of Transit(POT): Set POT profile
+ @param id - id of the profile
+ @param validator - True/False to indicate if this is verifier
+ @param secret_key - Verification key
+ @param secret_share - Share of the 1st polynomial
+ @param prime - Prime number used for modulo operation
+ @param max_bits - Max bits to be used for Random number generation
+ @param lpc - Lagrange basis polynomial
+ @param polynomial_public - pre-evaluated public polynomial
+ @param list_name_len - length of the name of this profile list
+ @param list_name - name of this profile list
+*/
+define pot_profile_add {
+ u32 client_index;
+ u32 context;
+ u8 id;
+ u8 validator;
+ u64 secret_key;
+ u64 secret_share;
+ u64 prime;
+ u8 max_bits;
+ u64 lpc;
+ u64 polynomial_public;
+ u8 list_name_len;
+ u8 list_name[0];
+};
+
+/** \brief Proof of Transit profile add / del response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define pot_profile_add_reply {
+ u32 context;
+ i32 retval;
+};
+
+
+/** \brief Proof of Transit(POT): Activate POT profile in the list
+ @param id - id of the profile
+ @param list_name_len - length of the name of this profile list
+ @param list_name - name of this profile list
+*/
+define pot_profile_activate {
+ u32 client_index;
+ u32 context;
+ u8 id;
+ u8 list_name_len;
+ u8 list_name[0];
+};
+
+/** \brief Proof of Transit profile activate response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define pot_profile_activate_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Delete POT Profile
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param list_name_len - length of the name of the profile list
+ @param list_name - name of profile list to delete
+*/
+define pot_profile_del {
+ u32 client_index;
+ u32 context;
+ u8 list_name_len;
+ u8 list_name[0];
+};
+
+/** \brief Proof of Transit profile add / del response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define pot_profile_del_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Show POT Profiles
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param id - id of the profile
+*/
+define pot_profile_show_config_dump {
+ u32 client_index;
+ u32 context;
+ u8 id;
+};
+
+/** \brief Show POT profile reply
+ @param id - id of the profile
+ @param validator - True/False to indicate if this is verifier
+ @param secret_key - Verification key
+ @param secret_share - Share of the 1st polynomial
+ @param prime - Prime number used for modulo operation
+ @param max_bits - Max bits to be used for Random number generation
+ @param lpc - Lagrange basis polynomial
+ @param polynomial_public - pre-evaluated public polynomial
+ @param list_name_len - length of the name of this profile list
+ @param list_name - name of this profile list
+*/
+define pot_profile_show_config_details {
+ u32 context;
+ i32 retval;
+ u8 id;
+ u8 validator;
+ u64 secret_key;
+ u64 secret_share;
+ u64 prime;
+ u64 bit_mask;
+ u64 lpc;
+ u64 polynomial_public;
+};
diff --git a/src/plugins/ioam/lib-pot/pot_all_api_h.h b/src/plugins/ioam/lib-pot/pot_all_api_h.h
new file mode 100644
index 00000000000..63967c45444
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/lib-pot/pot.api.h>
diff --git a/src/plugins/ioam/lib-pot/pot_api.c b/src/plugins/ioam/lib-pot/pot_api.c
new file mode 100644
index 00000000000..d3af7b4036a
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_api.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * pot_api.c - Proof of Transit related APIs to create
+ * and maintain profiles
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/lib-pot/pot_util.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <ioam/lib-pot/pot_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+/* List of message types that this plugin understands */
+
+#define foreach_pot_plugin_api_msg \
+_(POT_PROFILE_ADD, pot_profile_add) \
+_(POT_PROFILE_ACTIVATE, pot_profile_activate) \
+_(POT_PROFILE_DEL, pot_profile_del) \
+_(POT_PROFILE_SHOW_CONFIG_DUMP, pot_profile_show_config_dump) \
+
+static void vl_api_pot_profile_add_t_handler
+(vl_api_pot_profile_add_t *mp)
+{
+ pot_main_t * sm = &pot_main;
+ int rv = 0;
+ vl_api_pot_profile_add_reply_t * rmp;
+ u8 id;
+ pot_profile *profile = NULL;
+ u8 *name = 0;
+
+ if (mp->list_name_len)
+ name = format(0, "%s", mp->list_name);
+
+ pot_profile_list_init(name);
+ id = mp->id;
+ profile = pot_profile_find(id);
+ if (profile) {
+ rv = pot_profile_create(profile,
+ clib_net_to_host_u64(mp->prime),
+ clib_net_to_host_u64(mp->polynomial_public),
+ clib_net_to_host_u64(mp->lpc),
+ clib_net_to_host_u64(mp->secret_share));
+ if (rv != 0)
+ goto ERROROUT;
+ if (1 == mp->validator)
+ (void)pot_set_validator(profile, clib_net_to_host_u64(mp->secret_key));
+ (void)pot_profile_set_bit_mask(profile, mp->max_bits);
+ } else {
+ rv = -3;
+ }
+ ERROROUT:
+ vec_free(name);
+ REPLY_MACRO(VL_API_POT_PROFILE_ADD_REPLY);
+}
+
+static void send_pot_profile_details(vl_api_pot_profile_show_config_dump_t *mp, u8 id)
+{
+ vl_api_pot_profile_show_config_details_t * rmp;
+ pot_main_t * sm = &pot_main;
+ pot_profile *profile = pot_profile_find(id);
+ int rv = 0;
+ if(profile){
+ REPLY_MACRO2(VL_API_POT_PROFILE_SHOW_CONFIG_DETAILS,
+ rmp->id=id;
+ rmp->validator=profile->validator;
+ rmp->secret_key=clib_host_to_net_u64(profile->secret_key);
+ rmp->secret_share=clib_host_to_net_u64(profile->secret_share);
+ rmp->prime=clib_host_to_net_u64(profile->prime);
+ rmp->bit_mask=clib_host_to_net_u64(profile->bit_mask);
+ rmp->lpc=clib_host_to_net_u64(profile->lpc);
+ rmp->polynomial_public=clib_host_to_net_u64(profile->poly_pre_eval);
+ );
+ }
+ else{
+ REPLY_MACRO2(VL_API_POT_PROFILE_SHOW_CONFIG_DETAILS,
+ rmp->id=id;
+ rmp->validator=0;
+ rmp->secret_key=0;
+ rmp->secret_share=0;
+ rmp->prime=0;
+ rmp->bit_mask=0;
+ rmp->lpc=0;
+ rmp->polynomial_public=0;
+ );
+ }
+}
+
+static void vl_api_pot_profile_show_config_dump_t_handler
+(vl_api_pot_profile_show_config_dump_t *mp)
+{
+ u8 id = mp->id;
+ u8 dump_call_id = ~0;
+ if(dump_call_id==id){
+ for(id=0;id<MAX_POT_PROFILES;id++)
+ send_pot_profile_details(mp,id);
+ }
+ else
+ send_pot_profile_details(mp,id);
+}
+
+static void vl_api_pot_profile_activate_t_handler
+(vl_api_pot_profile_activate_t *mp)
+{
+ pot_main_t * sm = &pot_main;
+ int rv = 0;
+ vl_api_pot_profile_add_reply_t * rmp;
+ u8 id;
+ u8 *name = NULL;
+
+ if (mp->list_name_len)
+ name = format(0, "%s", mp->list_name);
+ if (!pot_profile_list_is_enabled(name)) {
+ rv = -1;
+ } else {
+ id = mp->id;
+ rv = pot_profile_set_active(id);
+ }
+
+ vec_free(name);
+ REPLY_MACRO(VL_API_POT_PROFILE_ACTIVATE_REPLY);
+}
+
+
+static void vl_api_pot_profile_del_t_handler
+(vl_api_pot_profile_del_t *mp)
+{
+ pot_main_t * sm = &pot_main;
+ int rv = 0;
+ vl_api_pot_profile_del_reply_t * rmp;
+
+ clear_pot_profiles();
+
+ REPLY_MACRO(VL_API_POT_PROFILE_DEL_REPLY);
+}
+
+
+/*
+ * This routine exists to convince the vlib plugin framework that
+ * we haven't accidentally copied a random .dll into the plugin directory.
+ *
+ * Also collects global variable pointers passed from the vpp engine
+ */
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h,
+ int from_early_init)
+{
+ pot_main_t * sm = &pot_main;
+ clib_error_t * error = 0;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = h->vnet_main;
+ return error;
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+pot_plugin_api_hookup (vlib_main_t *vm)
+{
+ pot_main_t * sm = &pot_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_pot_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (pot_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_pot;
+#undef _
+}
+
+static clib_error_t * pot_init (vlib_main_t * vm)
+{
+ pot_main_t * sm = &pot_main;
+ clib_error_t * error = 0;
+ u8 * name;
+
+ bzero(sm, sizeof(pot_main));
+ (void)pot_util_init();
+ name = format (0, "ioam_pot_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = pot_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, &api_main);
+
+ vec_free(name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (pot_init);
diff --git a/src/plugins/ioam/lib-pot/pot_msg_enum.h b/src/plugins/ioam/lib-pot/pot_msg_enum.h
new file mode 100644
index 00000000000..a4a88bed20f
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_pot_msg_enum_h
+#define included_pot_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/lib-pot/pot_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_pot_msg_enum_h */
diff --git a/src/plugins/ioam/lib-pot/pot_test.c b/src/plugins/ioam/lib-pot/pot_test.c
new file mode 100644
index 00000000000..2e87023896e
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_test.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * pot_test.c - test harness for pot plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+/* Declare message IDs */
+#include <ioam/lib-pot/pot_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-pot/pot_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} pot_test_main_t;
+
+pot_test_main_t pot_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(pot_profile_add_reply) \
+_(pot_profile_activate_reply) \
+_(pot_profile_del_reply)
+
+#define foreach_custom_reply_retval_handler \
+_(pot_profile_show_config_details, \
+ errmsg(" ID:%d\n",mp->id); \
+ errmsg(" Validator:%d\n",mp->validator); \
+ errmsg(" secret_key:%Lx\n",clib_net_to_host_u64(mp->secret_key)); \
+ errmsg(" secret_share:%Lx\n",clib_net_to_host_u64(mp->secret_share)); \
+ errmsg(" prime:%Lx\n",clib_net_to_host_u64(mp->prime)); \
+ errmsg(" bitmask:%Lx\n",clib_net_to_host_u64(mp->bit_mask)); \
+ errmsg(" lpc:%Lx\n",clib_net_to_host_u64(mp->lpc)); \
+ errmsg(" public poly:%Lx\n",clib_net_to_host_u64(mp->polynomial_public)); \
+ )
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = pot_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+#define _(n,body) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = pot_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ do{body;}while(0); \
+ }
+foreach_custom_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(POT_PROFILE_ADD_REPLY, pot_profile_add_reply) \
+_(POT_PROFILE_ACTIVATE_REPLY, pot_profile_activate_reply) \
+_(POT_PROFILE_DEL_REPLY, pot_profile_del_reply) \
+_(POT_PROFILE_SHOW_CONFIG_DETAILS, pot_profile_show_config_details)
+
+
+/* M: construct, but don't yet send a message */
+
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T,t,n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+
+static int api_pot_profile_add (vat_main_t *vam)
+{
+#define MAX_BITS 64
+ pot_test_main_t * sm = &pot_test_main;
+ unformat_input_t *input = vam->input;
+ vl_api_pot_profile_add_t *mp;
+ u8 *name = NULL;
+ u64 prime = 0;
+ u64 secret_share = 0;
+ u64 secret_key = 0;
+ u32 bits = MAX_BITS;
+ u64 lpc = 0, poly2 = 0;
+ f64 timeout;
+ u8 id = 0;
+ int rv = 0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(input, "name %s", &name))
+ ;
+ else if(unformat(input, "id %d", &id))
+ ;
+ else if (unformat(input, "validator-key 0x%Lx", &secret_key))
+ ;
+ else if (unformat(input, "prime-number 0x%Lx", &prime))
+ ;
+ else if (unformat(input, "secret-share 0x%Lx", &secret_share))
+ ;
+ else if (unformat(input, "polynomial-public 0x%Lx", &poly2))
+ ;
+ else if (unformat(input, "lpc 0x%Lx", &lpc))
+ ;
+ else if (unformat(input, "bits-in-random %u", &bits))
+ {
+ if (bits > MAX_BITS)
+ bits = MAX_BITS;
+ }
+ else
+ break;
+ }
+
+ if (!name)
+ {
+ errmsg ("name required\n");
+ rv = -99;
+ goto OUT;
+ }
+
+ M2(POT_PROFILE_ADD, pot_profile_add, vec_len(name));
+
+ mp->list_name_len = vec_len(name);
+ clib_memcpy(mp->list_name, name, mp->list_name_len);
+ mp->secret_share = clib_host_to_net_u64(secret_share);
+ mp->polynomial_public = clib_host_to_net_u64(poly2);
+ mp->lpc = clib_host_to_net_u64(lpc);
+ mp->prime = clib_host_to_net_u64(prime);
+ if (secret_key != 0)
+ {
+ mp->secret_key = clib_host_to_net_u64(secret_key);
+ mp->validator = 1;
+ }
+ else
+ {
+ mp->validator = 0;
+ }
+ mp->id = id;
+ mp->max_bits = bits;
+
+ S; W;
+
+OUT:
+ vec_free(name);
+ return(rv);
+}
+
+static int api_pot_profile_activate (vat_main_t *vam)
+{
+#define MAX_BITS 64
+ pot_test_main_t * sm = &pot_test_main;
+ unformat_input_t *input = vam->input;
+ vl_api_pot_profile_activate_t *mp;
+ u8 *name = NULL;
+ u8 id = 0;
+ int rv = 0;
+ f64 timeout;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(input, "name %s", &name))
+ ;
+ else if(unformat(input, "id %d", &id))
+ ;
+ else
+ break;
+ }
+
+ if (!name)
+ {
+ errmsg ("name required\n");
+ rv = -99;
+ goto OUT;
+ }
+
+ M2(POT_PROFILE_ACTIVATE, pot_profile_activate, vec_len(name));
+
+ mp->list_name_len = vec_len(name);
+ clib_memcpy(mp->list_name, name, mp->list_name_len);
+ mp->id = id;
+
+ S; W;
+
+OUT:
+ vec_free(name);
+ return(rv);
+}
+
+
+static int api_pot_profile_del (vat_main_t *vam)
+{
+ pot_test_main_t * sm = &pot_test_main;
+ vl_api_pot_profile_del_t *mp;
+ f64 timeout;
+
+ M(POT_PROFILE_DEL, pot_profile_del);
+ mp->list_name_len = 0;
+ S; W;
+ return 0;
+}
+
+static int api_pot_profile_show_config_dump (vat_main_t *vam)
+{
+ pot_test_main_t * sm = &pot_test_main;
+ unformat_input_t *input = vam->input;
+ vl_api_pot_profile_show_config_dump_t *mp;
+ f64 timeout;
+ u8 id = 0;
+
+ while(unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if(unformat(input,"id %d",&id));
+ else
+ break;
+ }
+ M(POT_PROFILE_SHOW_CONFIG_DUMP, pot_profile_show_config_dump);
+
+ mp->id = id;
+
+ S; W;
+ return 0;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(pot_profile_add, "name <name> id [0-1] " \
+ "prime-number <0xu64> bits-in-random [0-64] " \
+ "secret-share <0xu64> lpc <0xu64> polynomial-public <0xu64> " \
+ "[validator-key <0xu64>] [validity <0xu64>]") \
+_(pot_profile_activate, "name <name> id [0-1] ") \
+_(pot_profile_del, "[id <nn>]") \
+_(pot_profile_show_config_dump, "id [0-1]")
+
+void vat_api_hookup (vat_main_t *vam)
+{
+ pot_test_main_t * sm = &pot_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ pot_test_main_t * sm = &pot_test_main;
+ u8 * name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "ioam_pot_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~0)
+ vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/ioam/lib-pot/pot_util.c b/src/plugins/ioam/lib-pot/pot_util.c
new file mode 100644
index 00000000000..a253ad4130f
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_util.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+#include <vppinfra/mem.h>
+#include "math64.h"
+#include "pot_util.h"
+
+pot_main_t pot_main;
+
+static void pot_profile_cleanup(pot_profile *profile);
+
+static void pot_main_profiles_reset (void)
+{
+ pot_main_t *sm = &pot_main;
+ int i = 0;
+
+ for (i = 0; i < MAX_POT_PROFILES; i++)
+ {
+ pot_profile_cleanup(&(sm->profile_list[i]));
+ }
+ sm->active_profile_id = 0;
+ if (sm->profile_list_name)
+ vec_free(sm->profile_list_name);
+ sm->profile_list_name = NULL;
+}
+
+int pot_util_init (void)
+{
+ pot_main_profiles_reset();
+
+ return(0);
+}
+
+static void pot_profile_init(pot_profile * new, u8 id)
+{
+ if (new)
+ {
+ memset(new, 0, sizeof(pot_profile));
+ new->id = id;
+ }
+}
+
+pot_profile *pot_profile_find(u8 id)
+{
+ pot_main_t *sm = &pot_main;
+
+ if (id < MAX_POT_PROFILES)
+ {
+ return (&(sm->profile_list[id]));
+ }
+ return (NULL);
+}
+static int pot_profile_name_equal (u8 *name0, u8 *name1)
+{
+ int len0, len1;
+
+ len0 = vec_len (name0);
+ len1 = vec_len (name1);
+ if (len0 != len1)
+ return(0);
+ return (0==strncmp ((char *) name0, (char *)name1, len0));
+}
+
+int pot_profile_list_is_enabled (u8 *name)
+{
+ pot_main_t *sm = &pot_main;
+ return (pot_profile_name_equal(sm->profile_list_name, name));
+}
+
+void pot_profile_list_init(u8 * profile_list_name)
+{
+ pot_main_t *sm = &pot_main;
+ int i = 0;
+
+ /* If it is the same profile list skip reset */
+ if (pot_profile_name_equal(sm->profile_list_name, profile_list_name))
+ {
+ return;
+ }
+
+ pot_main_profiles_reset();
+ if (vec_len(profile_list_name))
+ sm->profile_list_name = (u8 *)vec_dup(profile_list_name);
+ else
+ sm->profile_list_name = 0;
+ sm->active_profile_id = 0;
+
+ for (i = 0; i < MAX_POT_PROFILES; i++)
+ {
+ pot_profile_init(&(sm->profile_list[i]), i);
+ }
+}
+
+static void pot_profile_cleanup(pot_profile * profile)
+{
+ u16 id = profile->id;
+
+ memset(profile, 0, sizeof(pot_profile));
+ profile->id = id; /* Restore id alone */
+}
+
+int pot_profile_create(pot_profile * profile, u64 prime,
+ u64 poly2, u64 lpc, u64 secret_share)
+{
+ if (profile && !profile->in_use)
+ {
+ pot_profile_cleanup(profile);
+ profile->prime = prime;
+ profile->primeinv = 1.0 / prime;
+ profile->lpc = lpc;
+ profile->poly_pre_eval = poly2;
+ profile->secret_share = secret_share;
+ profile->total_pkts_using_this_profile = 0;
+ profile->valid = 1;
+ return(0);
+ }
+
+ return(-1);
+}
+
+int pot_set_validator(pot_profile * profile, u64 key)
+{
+ if (profile && !profile->in_use)
+ {
+ profile->validator = 1;
+ profile->secret_key = key;
+ return(0);
+ }
+ return(-1);
+}
+
+always_inline u64 pot_update_cumulative_inline(u64 cumulative, u64 random,
+ u64 secret_share, u64 prime, u64 lpc, u64 pre_split, double prime_inv)
+{
+ u64 share_random = 0;
+ u64 cumulative_new = 0;
+
+ /*
+ * calculate split share for random
+ */
+ share_random = add64_mod(pre_split, random, prime, prime_inv);
+
+ /*
+ * lpc * (share_secret + share_random)
+ */
+ share_random = add64_mod(share_random, secret_share, prime, prime_inv);
+ share_random = mul64_mod(share_random, lpc, prime, prime_inv);
+
+ cumulative_new = add64_mod(cumulative, share_random, prime, prime_inv);
+
+ return (cumulative_new);
+}
+
+u64 pot_update_cumulative(pot_profile * profile, u64 cumulative, u64 random)
+{
+ if (profile && profile->valid != 0)
+ {
+ return (pot_update_cumulative_inline(cumulative, random, profile->secret_share,
+ profile->prime, profile->lpc, profile->poly_pre_eval,
+ profile->primeinv));
+ }
+ return (0);
+}
+
+always_inline u8 pot_validate_inline(u64 secret, u64 prime, double prime_inv,
+ u64 cumulative, u64 random)
+{
+ if (cumulative == (random + secret))
+ {
+ return (1);
+ }
+ else if (cumulative == add64_mod(random, secret, prime, prime_inv))
+ {
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * return True if the cumulative matches secret from a profile
+ */
+u8 pot_validate(pot_profile * profile, u64 cumulative, u64 random)
+{
+ if (profile && profile->validator)
+ {
+ return (pot_validate_inline(profile->secret_key, profile->prime,
+ profile->primeinv, cumulative, random));
+ }
+ return (0);
+}
+
+/*
+ * Utility function to get random number per pack
+ */
+u64 pot_generate_random(pot_profile * profile)
+{
+ u64 random = 0;
+ int32_t second_half;
+ static u32 seed = 0;
+
+ if (PREDICT_FALSE(!seed))
+ seed = random_default_seed();
+
+ /*
+ * Upper 4 bytes seconds
+ */
+ random = (u64) time(NULL);
+
+ random &= 0xffffffff;
+ random = random << 32;
+ /*
+ * Lower 4 bytes random number
+ */
+ second_half = random_u32(&seed);
+
+ random |= second_half;
+
+ if (PREDICT_TRUE(profile != NULL))
+ {
+ random &= profile->bit_mask;
+ }
+ return (random);
+}
+
+int pot_profile_set_bit_mask(pot_profile * profile, u16 bits)
+{
+ int sizeInBits;
+
+ if (profile && !profile->in_use)
+ {
+ sizeInBits = sizeof(profile->bit_mask) * 8;
+ profile->bit_mask =
+ (bits >=
+ sizeInBits ? (u64) - 1 : (u64) ((u64) 1 << (u64) bits) - 1);
+ return(0);
+ }
+ return(-1);
+}
+
+clib_error_t *clear_pot_profile_command_fn(vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+
+ pot_main_profiles_reset();
+
+ return 0;
+}
+
+void clear_pot_profiles()
+{
+ clear_pot_profile_command_fn(0, 0, 0);
+}
+
+VLIB_CLI_COMMAND(clear_pot_profile_command) =
+{
+.path = "clear pot profile",
+.short_help = "clear pot profile [<index>|all]",
+.function = clear_pot_profile_command_fn,
+};
+
+static clib_error_t *set_pot_profile_command_fn(vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u64 prime;
+ u64 secret_share;
+ u64 secret_key;
+ u8 validator = 0;
+ u32 profile_id = ~0;
+ u32 bits;
+ u64 lpc = 0, poly2 = 0;
+ pot_profile *profile = NULL;
+ u8 *profile_list_name = NULL;
+
+ bits = MAX_BITS;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(input, "name %s",
+ &profile_list_name));
+ else if (unformat(input, "id %d", &profile_id))
+ ;
+ else if (unformat(input, "validate-key 0x%Lx", &secret_key))
+ validator = 1;
+ else if (unformat(input, "prime-number 0x%Lx", &prime))
+ ;
+ else if (unformat(input, "secret_share 0x%Lx", &secret_share))
+ ;
+ else if (unformat(input, "polynomial2 0x%Lx", &poly2))
+ ;
+ else if (unformat(input, "lpc 0x%Lx", &lpc))
+ ;
+ else if (unformat(input, "bits-in-random %d", &bits))
+ {
+ if (bits > MAX_BITS)
+ bits = MAX_BITS;
+ }
+ else
+ break;
+ }
+ if (profile_list_name == 0)
+ {
+ return clib_error_return(0, "Name cannot be null");
+ }
+ pot_profile_list_init(profile_list_name);
+ profile = pot_profile_find(profile_id);
+
+ if (profile)
+ {
+ pot_profile_create(profile, prime, poly2, lpc, secret_share);
+ if (validator)
+ pot_set_validator(profile, secret_key);
+ pot_profile_set_bit_mask(profile, bits);
+ }
+ vec_free(profile_list_name);
+ return 0;
+}
+
+VLIB_CLI_COMMAND(set_pot_profile_command) =
+{
+.path = "set pot profile",
+.short_help = "set pot profile name <string> id [0-1] [validator-key 0xu64] \
+ prime-number 0xu64 secret_share 0xu64 lpc 0xu64 \
+ polynomial2 0xu64 bits-in-random [0-64] ",
+.function = set_pot_profile_command_fn,
+};
+
+static clib_error_t *set_pot_profile_activate_command_fn(vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pot_main_t *sm = &pot_main;
+ u8 *profile_list_name = NULL;
+ u32 id = 0;
+ clib_error_t *result = NULL;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(input, "name %s",
+ &profile_list_name));
+ else if (unformat(input, "id %d", &id))
+ ;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ if (profile_list_name == 0)
+ {
+ return clib_error_return(0, "Name cannot be null");
+ }
+
+ if (!pot_profile_list_is_enabled(profile_list_name)) {
+ result = clib_error_return(0, "%s list is not enabled, profile in use %s",
+ profile_list_name, sm->profile_list_name);
+ } else if (0 != pot_profile_set_active((u8)id)) {
+ result = clib_error_return(0, "Profile %d not defined in %s",
+ id, sm->profile_list_name);
+ }
+ vec_free(profile_list_name);
+ return result;
+}
+
+VLIB_CLI_COMMAND(set_pot_profile_activate_command) =
+{
+.path = "set pot profile-active",
+.short_help = "set pot profile-active name <string> id [0-1]",
+.function = set_pot_profile_activate_command_fn,
+};
+
+static clib_error_t *show_pot_profile_command_fn(vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ pot_main_t *sm = &pot_main;
+ pot_profile *p = NULL;
+ u16 i;
+ u8 *s = 0;
+
+ if (vec_len(sm->profile_list_name) == 0)
+ {
+ s = format(s, "POT Profiles not configured\n");
+ vlib_cli_output(vm, "%v", s);
+ return 0;
+ }
+ s = format(s, "Profile list in use : %s\n",sm->profile_list_name);
+ for (i = 0; i < MAX_POT_PROFILES; i++)
+ {
+ p = pot_profile_find(i);
+ if (p->valid == 0)
+ continue;
+ s = format(s, "POT Profile at index: %d\n", i);
+ s = format(s, " Id : %d\n", p->id);
+ s = format(s, " Validator : %s (%d)\n",
+ (p->validator) ? "True" : "False", p->validator);
+ if (p->validator == 1)
+ s = format(s, " Secret key : 0x%Lx (%Ld)\n",
+ p->secret_key, p->secret_key);
+ s = format(s, " Secret share : 0x%Lx (%Ld)\n",
+ p->secret_share, p->secret_share);
+ s = format(s, " Prime number : 0x%Lx (%Ld)\n",
+ p->prime, p->prime);
+ s = format(s, "2nd polynomial(eval) : 0x%Lx (%Ld)\n",
+ p->poly_pre_eval, p->poly_pre_eval);
+ s = format(s, " LPC : 0x%Lx (%Ld)\n", p->lpc, p->lpc);
+
+ s = format(s, " Bit mask : 0x%Lx (%Ld)\n",
+ p->bit_mask, p->bit_mask);
+ }
+
+ p = pot_profile_find(sm->active_profile_id);
+
+ if (p && p->valid && p->in_use) {
+ s = format(s, "\nProfile index in use: %d\n", sm->active_profile_id);
+ s = format(s, "Pkts passed : 0x%Lx (%Ld)\n",
+ p->total_pkts_using_this_profile,
+ p->total_pkts_using_this_profile);
+ if (pot_is_decap(p))
+ s = format(s, " This is Decap node. \n");
+ } else {
+ s = format(s, "\nProfile index in use: None\n");
+ }
+ vlib_cli_output(vm, "%v", s);
+ vec_free(s);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND(show_pot_profile_command) =
+{
+.path = "show pot profile",
+.short_help = "show pot profile",
+.function = show_pot_profile_command_fn,
+};
diff --git a/src/plugins/ioam/lib-pot/pot_util.h b/src/plugins/ioam/lib-pot/pot_util.h
new file mode 100644
index 00000000000..9df31fae0df
--- /dev/null
+++ b/src/plugins/ioam/lib-pot/pot_util.h
@@ -0,0 +1,195 @@
+/*
+ * pot_util.h -- Proof Of Transit Utility Header
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef include_vnet_pot_util_h
+#define include_vnet_pot_util_h
+
+#include <vnet/ip/ip6_hop_by_hop.h>
+#define debug_ioam debug_ioam_fn
+/* Dont change this size 256. This is there across multiple components */
+#define PATH_NAME_SIZE 256
+
+/* Ring size. this should be same as the one in ODL. Do not change this
+ without change in ODL. */
+#define MAX_POT_PROFILES 2
+
+/**
+ * Usage:
+ *
+ * On any node that participates in Proof of Transit:
+ *
+ * Step 1: Initialize this library by calling pot_init()
+ * Step 2: Setup a proof of transit profile that contains all the parameters needed to compute cumulative:
+ * Call these functions:
+ * pot_profile_find
+ * pot_profile_create
+ * pot_profile_set_bit_mask - To setup how large we want the numbers used in the computation and random number <= 64 bits
+ * Step 2a: For validator do this:
+ * pot_set_validator
+ * Step 2b: On initial node enable the profile to be used:
+ * pot_profile_set_active / pot_profile_get_active will return the profile
+ * Step 3a: At the initial node to generate Random number that will be read by all other nodes:
+ * pot_generate_random
+ * Step 3b: At all nodes including initial and verifier call this to compute cumulative:
+ * pot_update_cumulative
+ * Step 4: At the verifier:
+ * pot_validate
+ *
+ */
+
+typedef struct pot_profile_
+{
+ u8 id : 1;
+ u8 valid : 1;
+ u8 in_use : 1;
+ u64 random;
+ u8 validator;
+ u64 secret_key;
+ u64 secret_share;
+ u64 prime;
+ u64 lpc;
+ u64 poly_pre_eval;
+ u64 bit_mask;
+ u64 limit;
+ double primeinv;
+ u64 total_pkts_using_this_profile;
+} pot_profile;
+
+typedef struct {
+ /* Name of the default profile list in use*/
+ u8 *profile_list_name;
+ pot_profile profile_list[MAX_POT_PROFILES];
+ /* number of profiles in the list */
+ u8 active_profile_id : 1;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} pot_main_t;
+
+extern pot_main_t pot_main;
+
+/*
+ * Initialize proof of transit
+ */
+int pot_util_init(void);
+void pot_profile_list_init(u8 * name);
+
+
+/*
+ * Find a pot profile by ID
+ */
+pot_profile *pot_profile_find(u8 id);
+
+static inline u16 pot_profile_get_id(pot_profile * profile)
+{
+ if (profile)
+ {
+ return (profile->id);
+ }
+ return (0);
+}
+
+/* setup and clean up profile */
+int pot_profile_create(pot_profile * profile, u64 prime,
+ u64 poly2, u64 lpc, u64 secret_share);
+/*
+ * Setup profile as a validator
+ */
+int pot_set_validator(pot_profile * profile, u64 key);
+
+/*
+ * Setup max bits to be used for random number generation
+ */
+#define MAX_BITS 64
+int pot_profile_set_bit_mask(pot_profile * profile, u16 bits);
+
+/*
+ * Given a random and cumulative compute the new cumulative for a given profile
+ */
+u64 pot_update_cumulative(pot_profile * profile, u64 cumulative, u64 random);
+
+/*
+ * return True if the cumulative matches secret from a profile
+ */
+u8 pot_validate(pot_profile * profile, u64 cumulative, u64 random);
+
+/*
+ * Utility function to get random number per pack
+ */
+u64 pot_generate_random(pot_profile * profile);
+
+
+extern void clear_pot_profiles();
+extern int pot_profile_list_is_enabled(u8 *name);
+
+static inline u8 pot_is_decap(pot_profile * p)
+{
+ return (p->validator == 1);
+}
+
+static inline int pot_profile_set_active (u8 id)
+{
+ pot_main_t *sm = &pot_main;
+ pot_profile *profile = NULL;
+ pot_profile *current_active_prof = NULL;
+
+ current_active_prof = pot_profile_find(sm->active_profile_id);
+ profile = pot_profile_find(id);
+ if (profile && profile->valid) {
+ sm->active_profile_id = id;
+ current_active_prof->in_use = 0;
+ profile->in_use = 1;
+ return(0);
+ }
+ return(-1);
+}
+static inline u8 pot_profile_get_active_id (void)
+{
+ pot_main_t *sm = &pot_main;
+ return (sm->active_profile_id);
+}
+
+static inline pot_profile * pot_profile_get_active (void)
+{
+ pot_main_t *sm = &pot_main;
+ pot_profile *profile = NULL;
+ profile = pot_profile_find(sm->active_profile_id);
+ if (profile && profile->in_use)
+ return(profile);
+ return (NULL);
+}
+
+static inline void pot_profile_reset_usage_stats (pot_profile *pow)
+{
+ if (pow) {
+ pow->total_pkts_using_this_profile = 0;
+ }
+}
+
+static inline void pot_profile_incr_usage_stats (pot_profile *pow)
+{
+ if (pow) {
+ pow->total_pkts_using_this_profile++;
+ }
+}
+
+
+#endif
diff --git a/src/plugins/ioam/lib-trace/trace.api b/src/plugins/ioam/lib-trace/trace.api
new file mode 100644
index 00000000000..cb9583256f6
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace.api
@@ -0,0 +1,92 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief iOAM6 Trace - Set the iOAM6 trace profile
+ @param trace_type - Type of trace requested
+ @param num_elts - Number of trace elements to be inserted
+ @param node_id - Trace Node ID
+ @param trace_tsp- Timestamp resolution
+ @param app_data - Application specific opaque
+*/
+define trace_profile_add {
+ u32 client_index;
+ u32 context;
+ u8 trace_type;
+ u8 num_elts;
+ u8 trace_tsp;
+ u32 node_id;
+ u32 app_data;
+};
+
+/** \brief Trace profile add / del response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define trace_profile_add_reply {
+ u32 context;
+ i32 retval;
+};
+
+
+
+/** \brief Delete trace Profile
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define trace_profile_del {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Trace profile add / del response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define trace_profile_del_reply {
+ u32 context;
+ i32 retval;
+};
+
+
+
+/** \brief Show trace Profile
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define trace_profile_show_config {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Show trace config response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param trace_type - Type of trace requested
+ @param num_elts - Number of trace elements to be inserted
+ @param node_id - Trace Node ID
+ @param trace_tsp- Timestamp resolution
+ @param app_data - Application specific opaque
+*/
+define trace_profile_show_config_reply {
+ u32 context;
+ i32 retval;
+ u8 trace_type;
+ u8 num_elts;
+ u8 trace_tsp;
+ u32 node_id;
+ u32 app_data;
+};
diff --git a/src/plugins/ioam/lib-trace/trace_all_api_h.h b/src/plugins/ioam/lib-trace/trace_all_api_h.h
new file mode 100644
index 00000000000..223f95450aa
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/lib-trace/trace.api.h>
diff --git a/src/plugins/ioam/lib-trace/trace_api.c b/src/plugins/ioam/lib-trace/trace_api.c
new file mode 100644
index 00000000000..7e0d708e155
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_api.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * trace_api.c - iOAM Trace related APIs to create
+ * and maintain profiles
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/lib-trace/trace_util.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <ioam/lib-trace/trace_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define TRACE_REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+/* *INDENT-OFF* */
+#define TRACE_REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+/* *INDENT-ON* */
+
+/* List of message types that this plugin understands */
+
+#define foreach_trace_plugin_api_msg \
+_(TRACE_PROFILE_ADD, trace_profile_add) \
+_(TRACE_PROFILE_DEL, trace_profile_del) \
+_(TRACE_PROFILE_SHOW_CONFIG, trace_profile_show_config)
+
+static void vl_api_trace_profile_add_t_handler
+ (vl_api_trace_profile_add_t * mp)
+{
+ trace_main_t *sm = &trace_main;
+ int rv = 0;
+ vl_api_trace_profile_add_reply_t *rmp;
+ trace_profile *profile = NULL;
+
+ profile = trace_profile_find ();
+ if (profile)
+ {
+ rv =
+ trace_profile_create (profile, mp->trace_type, mp->num_elts,
+ mp->trace_tsp, ntohl (mp->node_id),
+ ntohl (mp->app_data));
+ if (rv != 0)
+ goto ERROROUT;
+ }
+ else
+ {
+ rv = -3;
+ }
+ERROROUT:
+ TRACE_REPLY_MACRO (VL_API_TRACE_PROFILE_ADD_REPLY);
+}
+
+
+static void vl_api_trace_profile_del_t_handler
+ (vl_api_trace_profile_del_t * mp)
+{
+ trace_main_t *sm = &trace_main;
+ int rv = 0;
+ vl_api_trace_profile_del_reply_t *rmp;
+
+ clear_trace_profiles ();
+
+ TRACE_REPLY_MACRO (VL_API_TRACE_PROFILE_DEL_REPLY);
+}
+
+static void vl_api_trace_profile_show_config_t_handler
+ (vl_api_trace_profile_show_config_t * mp)
+{
+ trace_main_t *sm = &trace_main;
+ vl_api_trace_profile_show_config_reply_t *rmp;
+ int rv = 0;
+ trace_profile *profile = trace_profile_find ();
+ if (profile->valid)
+ {
+ TRACE_REPLY_MACRO2 (VL_API_TRACE_PROFILE_SHOW_CONFIG_REPLY,
+ rmp->trace_type = profile->trace_type;
+ rmp->num_elts = profile->num_elts;
+ rmp->trace_tsp = profile->trace_tsp;
+ rmp->node_id = htonl (profile->node_id);
+ rmp->app_data = htonl (profile->app_data);
+ );
+ }
+ else
+ {
+ TRACE_REPLY_MACRO2 (VL_API_TRACE_PROFILE_SHOW_CONFIG_REPLY,
+ rmp->trace_type = 0;
+ rmp->num_elts = 0; rmp->trace_tsp = 0;
+ rmp->node_id = 0; rmp->app_data = 0;
+ );
+ }
+}
+
+/*
+ * This routine exists to convince the vlib plugin framework that
+ * we haven't accidentally copied a random .dll into the plugin directory.
+ *
+ * Also collects global variable pointers passed from the vpp engine
+ */
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h,
+ int from_early_init)
+{
+ trace_main_t *sm = &trace_main;
+ clib_error_t *error = 0;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = h->vnet_main;
+ return error;
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+trace_plugin_api_hookup (vlib_main_t * vm)
+{
+ trace_main_t *sm = &trace_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_trace_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (trace_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_trace;
+#undef _
+}
+
+static clib_error_t *
+trace_init (vlib_main_t * vm)
+{
+ trace_main_t *sm = &trace_main;
+ clib_error_t *error = 0;
+ u8 *name;
+
+ bzero (sm, sizeof (trace_main));
+ (void) trace_util_init ();
+ name = format (0, "ioam_trace_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = trace_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, &api_main);
+
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (trace_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-trace/trace_msg_enum.h b/src/plugins/ioam/lib-trace/trace_msg_enum.h
new file mode 100644
index 00000000000..78c35665f3c
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_trace_msg_enum_h
+#define included_trace_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/lib-trace/trace_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_trace_msg_enum_h */
diff --git a/src/plugins/ioam/lib-trace/trace_test.c b/src/plugins/ioam/lib-trace/trace_test.c
new file mode 100644
index 00000000000..111dd461b5b
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_test.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * trace_test.c - test harness for trace plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+/* Declare message IDs */
+#include <ioam/lib-trace/trace_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-trace/trace_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} trace_test_main_t;
+
+trace_test_main_t trace_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(trace_profile_add_reply) \
+_(trace_profile_del_reply)
+
+#define foreach_custom_reply_handler \
+_(trace_profile_show_config_reply, \
+ if(mp->trace_type) \
+ { \
+ errmsg(" Trace Type : 0x%x (%d)\n",mp->trace_type, mp->trace_type); \
+ errmsg(" Trace timestamp precision : %d \n",mp->trace_tsp); \
+ errmsg(" Node Id : 0x%x (%d)\n",htonl(mp->node_id), htonl(mp->node_id)); \
+ errmsg(" App Data : 0x%x (%d)\n",htonl(mp->app_data), htonl(mp->app_data)); \
+ } \
+ else errmsg("No valid trace profile configuration found\n");)
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = trace_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+#define _(n,body) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = trace_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ if(retval>=0)do{body;} while(0); \
+ else errmsg("Error, retval: %d",retval); \
+ }
+foreach_custom_reply_handler;
+#undef _
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(TRACE_PROFILE_ADD_REPLY, trace_profile_add_reply) \
+_(TRACE_PROFILE_DEL_REPLY, trace_profile_del_reply) \
+_(TRACE_PROFILE_SHOW_CONFIG_REPLY, trace_profile_show_config_reply)
+
+
+/* M: construct, but don't yet send a message */
+
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T,t,n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+
+static int
+api_trace_profile_add (vat_main_t * vam)
+{
+ trace_test_main_t *sm = &trace_test_main;
+ unformat_input_t *input = vam->input;
+ vl_api_trace_profile_add_t *mp;
+ u8 trace_type = 0;
+ u8 num_elts = 0;
+ int rv = 0;
+ u32 node_id = 0;
+ u32 app_data = 0;
+ u8 trace_tsp = 0;
+ f64 timeout;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace-type 0x%x", &trace_type))
+ ;
+ else if (unformat (input, "trace-elts %d", &num_elts))
+ ;
+ else if (unformat (input, "trace-tsp %d", &trace_tsp))
+ ;
+ else if (unformat (input, "node-id 0x%x", &node_id))
+ ;
+ else if (unformat (input, "app-data 0x%x", &app_data))
+ ;
+
+ else
+ break;
+ }
+
+
+ M (TRACE_PROFILE_ADD, trace_profile_add);
+
+ mp->trace_type = trace_type;
+ mp->trace_tsp = trace_tsp;
+ mp->node_id = htonl (node_id);
+ mp->app_data = htonl (app_data);
+ mp->num_elts = num_elts;
+
+ S;
+ W;
+
+ return (rv);
+}
+
+
+
+static int
+api_trace_profile_del (vat_main_t * vam)
+{
+ trace_test_main_t *sm = &trace_test_main;
+ vl_api_trace_profile_del_t *mp;
+ f64 timeout;
+
+ M (TRACE_PROFILE_DEL, trace_profile_del);
+ S;
+ W;
+ return 0;
+}
+
+static int
+api_trace_profile_show_config (vat_main_t * vam)
+{
+ trace_test_main_t *sm = &trace_test_main;
+ vl_api_trace_profile_show_config_t *mp;
+ f64 timeout;
+ M (TRACE_PROFILE_SHOW_CONFIG, trace_profile_show_config);
+ S;
+ W;
+ return 0;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(trace_profile_add, ""\
+ "trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> node-id <node id in hex> app-data <app_data in hex>") \
+_(trace_profile_del, "[id <nn>]") \
+_(trace_profile_show_config, "[id <nn>]")
+
+
+void
+vat_api_hookup (vat_main_t * vam)
+{
+ trace_test_main_t *sm = &trace_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ trace_test_main_t *sm = &trace_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "ioam_trace_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-trace/trace_util.c b/src/plugins/ioam/lib-trace/trace_util.c
new file mode 100644
index 00000000000..5c7f1eefd9c
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_util.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+#include <vppinfra/mem.h>
+#include "trace_util.h"
+
+trace_main_t trace_main;
+
+static int
+trace_profile_cleanup (trace_profile * profile)
+{
+
+ memset (profile, 0, sizeof (trace_profile));
+ profile->trace_tsp = TSP_MICROSECONDS; /* Micro seconds */
+ ip6_trace_profile_cleanup (); /* lib-trace_TODO: Remove this once IOAM-IPv6 transport is a plugin */
+ return 0;
+
+}
+
+static int
+trace_main_profiles_reset (void)
+{
+ int rv;
+
+ trace_main_t *sm = &trace_main;
+ rv = trace_profile_cleanup (&(sm->profile));
+ return (rv);
+}
+
+int
+trace_util_init (void)
+{
+ int rv;
+
+ rv = trace_main_profiles_reset ();
+ return (rv);
+}
+
+
+int
+trace_profile_create (trace_profile * profile, u8 trace_type, u8 num_elts,
+ u32 trace_tsp, u32 node_id, u32 app_data)
+{
+
+ if (!trace_type || !num_elts || !(node_id))
+ {
+ return (-1);
+ }
+ if (profile && !profile->valid)
+ {
+ //rv = trace_profile_cleanup (profile);
+ profile->trace_type = trace_type;
+ profile->num_elts = num_elts;
+ profile->trace_tsp = trace_tsp;
+ profile->node_id = node_id;
+ profile->app_data = app_data;
+ profile->valid = 1;
+
+ /* lib-trace_TODO: Remove this once IOAM-IPv6 transport is a plugin */
+ ip6_trace_profile_setup ();
+ return (0);
+ }
+
+ return (-1);
+}
+
+
+
+clib_error_t *
+clear_trace_profile_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+
+ trace_main_profiles_reset ();
+ return 0;
+}
+
+void
+clear_trace_profiles (void)
+{
+ clear_trace_profile_command_fn (0, 0, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND(clear_trace_profile_command) =
+{
+.path = "clear ioam-trace profile",
+.short_help = "clear ioam-trace profile [<index>|all]",
+.function = clear_trace_profile_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_trace_profile_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 trace_type = 0;
+ u8 num_elts = 0;
+ u32 node_id = 0;
+ u32 app_data = 0;
+ u32 trace_tsp = 0;
+ trace_profile *profile = NULL;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace-type 0x%x", &trace_type));
+ else if (unformat (input, "trace-elts %d", &num_elts));
+ else if (unformat (input, "trace-tsp %d", &trace_tsp));
+ else if (unformat (input, "node-id 0x%x", &node_id));
+ else if (unformat (input, "app-data 0x%x", &app_data));
+ else
+ break;
+ }
+ profile = trace_profile_find ();
+ if (profile)
+ {
+ trace_profile_create (profile, trace_type, num_elts, trace_tsp,
+ node_id, app_data);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_trace_profile_command, static) =
+{
+.path = "set ioam-trace profile",
+.short_help = "set ioam-trace \
+ trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> \
+ node-id <node id in hex> app-data <app_data in hex>",
+.function = set_trace_profile_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_trace_profile_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ trace_profile *p = NULL;
+ u8 *s = 0;
+ p = trace_profile_find ();
+ if (!(p && p->valid))
+ {
+ s = format (s, "\nTrace configuration not valid\n");
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+ }
+ s = format (s, " HOP BY HOP OPTIONS - TRACE CONFIG - \n");
+ s = format (s, " Trace Type : 0x%x (%d)\n",
+ p->trace_type, p->trace_type);
+ s =
+ format (s, " Trace timestamp precision : %d (%s)\n",
+ p->trace_tsp,
+ (p->trace_tsp ==
+ TSP_SECONDS) ? "Seconds" : ((p->trace_tsp ==
+ TSP_MILLISECONDS) ?
+ "Milliseconds"
+ : (((p->trace_tsp ==
+ TSP_MICROSECONDS) ?
+ "Microseconds" :
+ "Nanoseconds"))));
+ s = format (s, " Num of trace nodes : %d\n", p->num_elts);
+ s =
+ format (s, " Node-id : 0x%x (%d)\n",
+ p->node_id, p->node_id);
+ s =
+ format (s, " App Data : 0x%x (%d)\n",
+ p->app_data, p->app_data);
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_trace_profile_command, static) =
+{
+.path = "show ioam-trace profile",
+.short_help = "show ioam-trace profile",
+.function = show_trace_profile_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-trace/trace_util.h b/src/plugins/ioam/lib-trace/trace_util.h
new file mode 100644
index 00000000000..556f07ee3f1
--- /dev/null
+++ b/src/plugins/ioam/lib-trace/trace_util.h
@@ -0,0 +1,247 @@
+/*
+ * trace_util.h -- Trace Profile Utility header
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef include_vnet_trace_util_h
+#define include_vnet_trace_util_h
+
+#define debug_ioam debug_ioam_fn
+
+
+/**
+ * Usage:
+ *
+ * On any node that participates in iOAM Trace.
+ *
+ * Step 1: Initialize this library by calling trace_init()
+ * Step 2: Setup a trace profile that contains all the parameters needed to compute cumulative:
+ * Call these functions:
+ * trace_profile_find
+ * trace_profile_create
+ * Step 2a: On initial node enable the profile to be used:
+ * trace_profile_set_active / trace_profile_get_active will return the profile
+ * Step 4: TBD
+ * trace_validate
+ *
+ */
+
+typedef struct trace_profile_
+{
+ u8 valid:1;
+ u8 trace_type;
+ u8 num_elts;
+ /* Configured node-id */
+ u32 node_id;
+ u32 app_data;
+ u32 trace_tsp;
+} trace_profile;
+
+typedef struct
+{
+ /* Name of the default profile list in use */
+ trace_profile profile;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} trace_main_t;
+
+extern trace_main_t trace_main;
+
+/*
+ * Initialize Trace profile
+ */
+int trace_util_init (void);
+
+
+/*
+ * Find a trace profile
+ */
+
+always_inline trace_profile *
+trace_profile_find (void)
+{
+ trace_main_t *sm = &trace_main;
+
+ return (&(sm->profile));
+}
+
+
+/* setup and clean up profile */
+int trace_profile_create (trace_profile * profile, u8 trace_type, u8 num_elts,
+ u32 trace_tsp, u32 node_id, u32 app_data);
+
+void clear_trace_profiles (void);
+
+
+
+#define BIT_TTL_NODEID (1<<0)
+#define BIT_ING_INTERFACE (1<<1)
+#define BIT_EGR_INTERFACE (1<<2)
+#define BIT_TIMESTAMP (1<<3)
+#define BIT_APPDATA (1<<4)
+#define TRACE_TYPE_MASK 0x1F /* Mask of all above bits */
+
+/*
+ 0x00011111 iOAM-trace-type is 0x00011111 then the format of node
+ data is:
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ingress_if_id | egress_if_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ + timestamp +
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | app_data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+#define TRACE_TYPE_IF_TS_APP 0x1f
+typedef struct
+{
+ u32 ttl_node_id;
+ u16 ingress_if;
+ u16 egress_if;
+ u32 timestamp;
+ u32 app_data;
+} ioam_trace_if_ts_app_t;
+
+/*
+ 0x00000111 iOAM-trace-type is 0x00000111 then the format is:
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ingress_if_id | egress_if_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+
+#define TRACE_TYPE_IF 0x03
+typedef struct
+{
+ u32 ttl_node_id;
+ u16 ingress_if;
+ u16 egress_if;
+} ioam_trace_if_t;
+
+/*
+ 0x00001001 iOAM-trace-type is 0x00001001 then the format is:
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ + timestamp +
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+
+#define TRACE_TYPE_TS 0x09
+typedef struct
+{
+ u32 ttl_node_id;
+ u32 timestamp;
+} ioam_trace_ts_t;
+
+/*
+ 0x00010001 iOAM-trace-type is 0x00010001 then the format is:
+
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | app_data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+
+
+#define TRACE_TYPE_APP 0x11
+typedef struct
+{
+ u32 ttl_node_id;
+ u32 app_data;
+} ioam_trace_app_t;
+
+/*
+
+ 0x00011001 iOAM-trace-type is 0x00011001 then the format is:
+
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Hop_Lim | node_id |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ + timestamp +
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | app_data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+#define TRACE_TYPE_TS_APP 0x19
+typedef struct
+{
+ u32 ttl_node_id;
+ u32 timestamp;
+ u32 app_data;
+} ioam_trace_ts_app_t;
+
+
+
+static inline u8
+fetch_trace_data_size (u8 trace_type)
+{
+ u8 trace_data_size = 0;
+
+ if (trace_type == TRACE_TYPE_IF_TS_APP)
+ trace_data_size = sizeof (ioam_trace_if_ts_app_t);
+ else if (trace_type == TRACE_TYPE_IF)
+ trace_data_size = sizeof (ioam_trace_if_t);
+ else if (trace_type == TRACE_TYPE_TS)
+ trace_data_size = sizeof (ioam_trace_ts_t);
+ else if (trace_type == TRACE_TYPE_APP)
+ trace_data_size = sizeof (ioam_trace_app_t);
+ else if (trace_type == TRACE_TYPE_TS_APP)
+ trace_data_size = sizeof (ioam_trace_ts_app_t);
+
+ return trace_data_size;
+}
+
+int ioam_trace_get_sizeof_handler (u32 * result);
+int ip6_trace_profile_setup (void);
+int ip6_trace_profile_cleanup (void);
+
+#define TSP_SECONDS 0
+#define TSP_MILLISECONDS 1
+#define TSP_MICROSECONDS 2
+#define TSP_NANOSECONDS 3
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
new file mode 100644
index 00000000000..fd3086571eb
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_decap_ioam_v4_error \
+_(DECAPSULATED, "good packets decapsulated")
+
+static char *vxlan_gpe_decap_ioam_v4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_decap_ioam_v4_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_DECAP_IOAM_V4_ERROR_##sym,
+ foreach_vxlan_gpe_decap_ioam_v4_error
+#undef _
+ VXLAN_GPE_DECAP_IOAM_V4_N_ERROR,
+} vxlan_gpe_decap_ioam_v4_error_t;
+
+
+always_inline void
+vxlan_gpe_decap_ioam_v4_two_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vxlan_gpe_main_t * ngm,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u32 * next0, u32 * next1)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ next0[0] = next1[0] = hm->decap_v4_next_override;
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0, &next0[0],
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b1, &next0[1],
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+}
+
+
+
+static uword
+vxlan_gpe_decap_ioam (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, u8 is_ipv6)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+
+ next0 = next1 = hm->decap_v4_next_override;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+
+ vlib_buffer_advance (b0,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof (vxlan_gpe_header_t)));
+ vlib_buffer_advance (b1,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof (vxlan_gpe_header_t)));
+
+ vxlan_gpe_decap_ioam_v4_two_inline (vm, node, ngm, b0, b1,
+ &next0, &next1);
+
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = hm->decap_v4_next_override;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+
+ vlib_buffer_advance (b0,
+ -(word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof (vxlan_gpe_header_t)));
+
+ next0 = hm->decap_v4_next_override;
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0,
+ &next0,
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+
+static uword
+vxlan_gpe_decap_ioam_v4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_gpe_decap_ioam (vm, node, from_frame, 0);
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_decap_ioam_v4_node) = {
+ .function = vxlan_gpe_decap_ioam_v4,
+ .name = "vxlan-gpe-decap-ioam-v4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_ioam_v4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_decap_ioam_v4_error_strings),
+ .error_strings = vxlan_gpe_decap_ioam_v4_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_DECAP_IOAM_V4_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP] = "vxlan-gpe-pop-ioam-v4",
+ [VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
new file mode 100644
index 00000000000..4b18bfea533
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_encap_ioam_v4_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char *vxlan_gpe_encap_ioam_v4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_encap_ioam_v4_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_ENCAP_IOAM_V4_ERROR_##sym,
+ foreach_vxlan_gpe_encap_ioam_v4_error
+#undef _
+ VXLAN_GPE_ENCAP_IOAM_V4_N_ERROR,
+} vxlan_gpe_encap_ioam_v4_error_t;
+
+typedef enum
+{
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP,
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP,
+ VXLAN_GPE_ENCAP_IOAM_V4_N_NEXT
+} vxlan_gpe_encap_ioam_v4_next_t;
+
+
+always_inline void
+vxlan_gpe_encap_ioam_v4_two_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vxlan_gpe_main_t * ngm,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u32 * next0, u32 * next1)
+{
+ next0[0] = next1[0] = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP;
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0, next0,
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b1, next1,
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+}
+
+
+static uword
+vxlan_gpe_encap_ioam_v4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+
+ next0 = next1 = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ vxlan_gpe_encap_ioam_v4_two_inline (vm, node, ngm, b0, b1,
+ &next0, &next1);
+
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node, b0,
+ &next0,
+ VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP,
+ 0 /* use_adj */ );
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_ioam_v4_trace_t *tr = vlib_add_trace (vm, node, b0,
+ sizeof (*tr));
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_encap_ioam_v4_node) = {
+ .function = vxlan_gpe_encap_ioam_v4,
+ .name = "vxlan-gpe-encap-ioam-v4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_ioam_v4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_encap_ioam_v4_error_strings),
+ .error_strings = vxlan_gpe_encap_ioam_v4_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_ENCAP_IOAM_V4_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_ENCAP_IOAM_V4_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
new file mode 100644
index 00000000000..55c33b144a1
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_pop_ioam_v4_error \
+_(POPPED, "good packets popped")
+
+static char *vxlan_gpe_pop_ioam_v4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_pop_ioam_v4_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_POP_IOAM_V4_ERROR_##sym,
+ foreach_vxlan_gpe_pop_ioam_v4_error
+#undef _
+ VXLAN_GPE_POP_IOAM_V4_N_ERROR,
+} vxlan_gpe_pop_ioam_v4_error_t;
+
+typedef struct
+{
+ ioam_trace_t fmt_trace;
+} vxlan_gpe_pop_ioam_v4_trace_t;
+
+
+u8 *
+format_vxlan_gpe_pop_ioam_v4_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_pop_ioam_v4_trace_t *t1
+ = va_arg (*args, vxlan_gpe_pop_ioam_v4_trace_t *);
+ ioam_trace_t *t = &(t1->fmt_trace);
+ vxlan_gpe_ioam_option_t *fmt_trace0;
+ vxlan_gpe_ioam_option_t *opt0, *limit0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ u8 type0;
+
+ fmt_trace0 = (vxlan_gpe_ioam_option_t *) t->option_data;
+
+ s = format (s, "VXLAN_GPE_IOAM_POP: next_index %d len %d traced %d",
+ t->next_index, fmt_trace0->length, t->trace_len);
+
+ opt0 = (vxlan_gpe_ioam_option_t *) (fmt_trace0 + 1);
+ limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) fmt_trace0) + t->trace_len;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1;
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (vxlan_gpe_ioam_option_t));
+ break;
+ }
+ }
+
+ return s;
+}
+
+always_inline void
+vxlan_gpe_ioam_pop_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_buffer_t * b0)
+{
+ ip4_header_t *ip0;
+ udp_header_t *udp_hdr0;
+ vxlan_gpe_header_t *gpe_hdr0;
+ vxlan_gpe_ioam_hdr_t *gpe_ioam0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ udp_hdr0 = (udp_header_t *) (ip0 + 1);
+ gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1);
+ gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1);
+
+ /* Pop the iOAM data */
+ vlib_buffer_advance (b0,
+ (word) (sizeof (udp_header_t) +
+ sizeof (ip4_header_t) +
+ sizeof (vxlan_gpe_header_t) +
+ gpe_ioam0->length));
+
+ return;
+}
+
+
+
+always_inline void
+vxlan_gpe_pop_ioam_v4_one_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vxlan_gpe_main_t * ngm,
+ vlib_buffer_t * b0, u32 * next0)
+{
+ CLIB_UNUSED (ip4_header_t * ip0);
+ CLIB_UNUSED (udp_header_t * udp_hdr0);
+ CLIB_UNUSED (vxlan_gpe_header_t * gpe_hdr0);
+ CLIB_UNUSED (vxlan_gpe_ioam_hdr_t * gpe_ioam0);
+ CLIB_UNUSED (vxlan_gpe_ioam_option_t * opt0);
+ CLIB_UNUSED (vxlan_gpe_ioam_option_t * limit0);
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+
+ /* Pop the iOAM header */
+ ip0 = vlib_buffer_get_current (b0);
+ udp_hdr0 = (udp_header_t *) (ip0 + 1);
+ gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1);
+ gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1);
+ opt0 = (vxlan_gpe_ioam_option_t *) (gpe_ioam0 + 1);
+ limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) gpe_ioam0 + gpe_ioam0->length);
+
+ /*
+ * Basic validity checks
+ */
+ if (gpe_ioam0->length > clib_net_to_host_u16 (ip0->length))
+ {
+ next0[0] = VXLAN_GPE_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ u8 type0;
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->pop_options[type0])
+ {
+ if ((*hm->pop_options[type0]) (ip0, opt0) < 0)
+ {
+ next0[0] = VXLAN_GPE_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ }
+ break;
+ }
+ opt0 =
+ (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (vxlan_gpe_ioam_hdr_t));
+ }
+
+
+ next0[0] =
+ (gpe_ioam0->protocol < VXLAN_GPE_PROTOCOL_MAX) ?
+ ngm->
+ decap_next_node_list[gpe_ioam0->protocol] : VXLAN_GPE_INPUT_NEXT_DROP;
+
+trace00:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_pop_ioam_v4_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = gpe_ioam0->length;
+ t->fmt_trace.next_index = next0[0];
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->fmt_trace.
+ option_data) ? trace_len : ARRAY_LEN (t->fmt_trace.
+ option_data);
+ t->fmt_trace.trace_len = trace_len;
+ clib_memcpy (&(t->fmt_trace.option_data), gpe_ioam0, trace_len);
+ }
+
+ /* Remove the iOAM header inside the VxLAN-GPE header */
+ vxlan_gpe_ioam_pop_v4 (vm, node, b0);
+ return;
+}
+
+always_inline void
+vxlan_gpe_pop_ioam_v4_two_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vxlan_gpe_main_t * ngm,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u32 * next0, u32 * next1)
+{
+
+ vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b0, next0);
+ vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b1, next1);
+}
+
+
+
+static uword
+vxlan_gpe_pop_ioam (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, u8 is_ipv6)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ vxlan_gpe_pop_ioam_v4_two_inline (vm, node, ngm, b0, b1, &next0,
+ &next1);
+
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ vxlan_gpe_pop_ioam_v4_one_inline (vm, node, ngm, b0, &next0);
+
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+
+static uword
+vxlan_gpe_pop_ioam_v4 (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return vxlan_gpe_pop_ioam (vm, node, from_frame, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_pop_ioam_v4_node) = {
+ .function = vxlan_gpe_pop_ioam_v4,
+ .name = "vxlan-gpe-pop-ioam-v4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_pop_ioam_v4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_pop_ioam_v4_error_strings),
+ .error_strings = vxlan_gpe_pop_ioam_v4_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_INPUT_N_NEXT,
+
+ .next_nodes = {
+#define _(s,n) [VXLAN_GPE_INPUT_NEXT_##s] = n,
+ foreach_vxlan_gpe_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
new file mode 100644
index 00000000000..b42c357c79b
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
@@ -0,0 +1,188 @@
+ /*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/udp.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
+
+/* Statistics (not really errors) */
+#define foreach_vxlan_gpe_transit_ioam_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char *vxlan_gpe_transit_ioam_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_transit_ioam_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_TRANSIT_IOAM_ERROR_##sym,
+ foreach_vxlan_gpe_transit_ioam_error
+#undef _
+ VXLAN_GPE_TRANSIT_IOAM_N_ERROR,
+} vxlan_gpe_transit_ioam_error_t;
+
+typedef enum
+{
+ VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT,
+ VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP,
+ VXLAN_GPE_TRANSIT_IOAM_N_NEXT
+} vxlan_gpe_transit_ioam_next_t;
+
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (vxlan_gpe_transit_ioam, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "vxlan-gpe-transit-ioam",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+/* *INDENT-ON* */
+
+static uword
+vxlan_gpe_transit_ioam (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ ip4_header_t *ip0;
+ u32 iph_offset = 0;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ iph_offset = vnet_buffer (b0)->ip.save_rewrite_length;
+ ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0)
+ + iph_offset);
+
+ /* just forward non ipv4 packets */
+ if (PREDICT_FALSE
+ ((ip0->ip_version_and_header_length & 0xF0) == 0x40))
+ {
+ /* ipv4 packets */
+ udp_header_t *udp_hdr0 = (udp_header_t *) (ip0 + 1);
+ if (PREDICT_FALSE
+ ((ip0->protocol == IP_PROTOCOL_UDP) &&
+ (clib_net_to_host_u16 (udp_hdr0->dst_port) ==
+ UDP_DST_PORT_vxlan_gpe)))
+ {
+
+ /* Check the iOAM header */
+ vxlan_gpe_header_t *gpe_hdr0 =
+ (vxlan_gpe_header_t *) (udp_hdr0 + 1);
+
+ if (PREDICT_FALSE
+ (gpe_hdr0->protocol == VXLAN_GPE_PROTOCOL_IOAM))
+ {
+ uword *t = NULL;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ fib_prefix_t key4;
+ memset (&key4, 0, sizeof (key4));
+ key4.fp_proto = FIB_PROTOCOL_IP4;
+ key4.fp_addr.ip4.as_u32 = ip0->dst_address.as_u32;
+ t = hash_get_mem (hm->dst_by_ip4, &key4);
+ if (t)
+ {
+
+
+ vlib_buffer_advance (b0,
+ (word) (sizeof
+ (ethernet_header_t)));
+ vxlan_gpe_encap_decap_ioam_v4_one_inline (vm, node,
+ b0,
+ &next0,
+ VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP,
+ 1
+ /* use_adj */
+ );
+ vlib_buffer_advance (b0,
+ -(word) (sizeof
+ (ethernet_header_t)));
+ }
+ }
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vxlan_gpe_transit_ioam_node) = {
+ .function = vxlan_gpe_transit_ioam,
+ .name = "vxlan-gpe-transit-ioam",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_gpe_ioam_v4_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(vxlan_gpe_transit_ioam_error_strings),
+ .error_strings = vxlan_gpe_transit_ioam_error_strings,
+
+ .n_next_nodes = VXLAN_GPE_TRANSIT_IOAM_N_NEXT,
+
+ .next_nodes = {
+ [VXLAN_GPE_TRANSIT_IOAM_NEXT_OUTPUT] = "interface-output",
+ [VXLAN_GPE_TRANSIT_IOAM_NEXT_DROP] = "error-drop",
+ },
+
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
new file mode 100644
index 00000000000..056529a4e8a
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api
@@ -0,0 +1,181 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VxLAN-GPE
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param id - profile id
+ @param trace_ppc - Trace PPC (none/encap/decap)
+ @param pow_enable - Proof of Work enabled or not flag
+ @param trace_enable - iOAM Trace enabled or not flag
+
+*/
+define vxlan_gpe_ioam_enable {
+ u32 client_index;
+ u32 context;
+ u16 id;
+ u8 trace_ppc;
+ u8 pow_enable;
+ u8 trace_enable;
+};
+
+/** \brief iOAM Over VxLAN-GPE - Set iOAM transport for VXLAN-GPE reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define vxlan_gpe_ioam_enable_reply {
+ u32 context;
+ i32 retval;
+};
+
+
+/** \brief iOAM for VxLAN-GPE disable
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param id - profile id
+*/
+define vxlan_gpe_ioam_disable
+{
+ u32 client_index;
+ u32 context;
+ u16 id;
+};
+
+/** \brief vxlan_gpe_ioam disable response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define vxlan_gpe_ioam_disable_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Enable iOAM for a VNI (VXLAN-GPE)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vni - VXLAN-GPE VNI
+ @param local - IPv4/6 Address of the local VTEP
+ @param remote - IPv4/6 Address of the remote VTEP
+
+*/
+define vxlan_gpe_ioam_vni_enable {
+ u32 client_index;
+ u32 context;
+ u32 vni;
+ u8 local[16];
+ u8 remote[16];
+ u8 is_ipv6;
+};
+
+/** \brief Reply to enable iOAM for a VNI (VXLAN-GPE)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+
+*/
+define vxlan_gpe_ioam_vni_enable_reply {
+ u32 client_index;
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Disable iOAM for a VNI (VXLAN-GPE)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vni - VXLAN-GPE VNI
+ @param local - IPv4/6 Address of the local VTEP
+ @param remote - IPv4/6 Address of the remote VTEP
+
+*/
+define vxlan_gpe_ioam_vni_disable {
+ u32 client_index;
+ u32 context;
+ u32 vni;
+ u8 local[16];
+ u8 remote[16];
+ u8 is_ipv6;
+};
+
+/** \brief Reply to disable iOAM for a VNI (VXLAN-GPE)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+
+*/
+define vxlan_gpe_ioam_vni_disable_reply {
+ u32 client_index;
+ u32 context;
+ i32 retval;
+};
+
+
+/** \brief Enable iOAM for a VXLAN-GPE transit
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param dst_addr - IPv4/6 Address of the local VTEP
+ @param outer_fib_index- FIB index
+
+*/
+define vxlan_gpe_ioam_transit_enable {
+ u32 client_index;
+ u32 context;
+ u32 outer_fib_index;
+ u8 dst_addr[16];
+ u8 is_ipv6;
+};
+
+/** \brief Reply to enable iOAM for VXLAN-GPE transit
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+
+*/
+define vxlan_gpe_ioam_transit_enable_reply {
+ u32 client_index;
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Disable iOAM for VXLAN-GPE transit
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param dst_addr - IPv4/6 Address of the local VTEP
+ @param outer_fib_index- FIB index
+
+*/
+define vxlan_gpe_ioam_transit_disable {
+ u32 client_index;
+ u32 context;
+ u32 outer_fib_index;
+ u8 dst_addr[16];
+ u8 is_ipv6;
+};
+
+/** \brief Reply to disable iOAM for VXLAN-GPE transit
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+
+*/
+define vxlan_gpe_ioam_transit_disable_reply {
+ u32 client_index;
+ u32 context;
+ i32 retval;
+};
+
+
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h
new file mode 100644
index 00000000000..06fa0d2cd34
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api.h>
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
new file mode 100644
index 00000000000..68752365f82
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_api.c - iOAM VxLAN-GPE related APIs to create
+ * and maintain profiles
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+/* define message IDs */
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define VXLAN_GPE_REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+/* *INDENT-OFF* */
+#define VXLAN_GPE_REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q; \
+ rv = vl_msg_api_pd_handler (mp, rv); \
+ q = vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+/* *INDENT-ON* */
+
+/* List of message types that this plugin understands */
+
+#define foreach_vxlan_gpe_plugin_api_msg \
+_(VXLAN_GPE_IOAM_ENABLE, vxlan_gpe_ioam_enable) \
+_(VXLAN_GPE_IOAM_DISABLE, vxlan_gpe_ioam_disable) \
+_(VXLAN_GPE_IOAM_VNI_ENABLE, vxlan_gpe_ioam_vni_enable) \
+_(VXLAN_GPE_IOAM_VNI_DISABLE, vxlan_gpe_ioam_vni_disable) \
+_(VXLAN_GPE_IOAM_TRANSIT_ENABLE, vxlan_gpe_ioam_transit_enable) \
+_(VXLAN_GPE_IOAM_TRANSIT_DISABLE, vxlan_gpe_ioam_transit_disable) \
+
+
+static void vl_api_vxlan_gpe_ioam_enable_t_handler
+ (vl_api_vxlan_gpe_ioam_enable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_enable_reply_t *rmp;
+ clib_error_t *error;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+
+ /* Ignoring the profile id as currently a single profile
+ * is supported */
+ error =
+ vxlan_gpe_ioam_enable (mp->trace_enable, mp->pow_enable, mp->trace_ppc);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_ENABLE_REPLY);
+}
+
+static void vl_api_vxlan_gpe_ioam_disable_t_handler
+ (vl_api_vxlan_gpe_ioam_disable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_disable_reply_t *rmp;
+ clib_error_t *error;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+
+ /* Ignoring the profile id as currently a single profile
+ * is supported */
+ error = vxlan_gpe_ioam_disable (0, 0, 0);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_DISABLE_REPLY);
+}
+
+static void vl_api_vxlan_gpe_ioam_vni_enable_t_handler
+ (vl_api_vxlan_gpe_ioam_vni_enable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_vni_enable_reply_t *rmp;
+ clib_error_t *error;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ vxlan4_gpe_tunnel_key_t key4;
+ uword *p = NULL;
+ vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ u32 vni;
+
+
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&key4.local, &mp->local, sizeof (key4.local));
+ clib_memcpy (&key4.remote, &mp->remote, sizeof (key4.remote));
+ vni = clib_net_to_host_u32 (mp->vni);
+ key4.vni = clib_host_to_net_u32 (vni << 8);
+ key4.pad = 0;
+
+ p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
+ }
+ else
+ {
+ return;
+ }
+
+ if (!p)
+ return;
+
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+
+ error = vxlan_gpe_ioam_set (t, hm->has_trace_option,
+ hm->has_pot_option,
+ hm->has_ppc_option, mp->is_ipv6);
+
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_VNI_ENABLE_REPLY);
+}
+
+
+static void vl_api_vxlan_gpe_ioam_vni_disable_t_handler
+ (vl_api_vxlan_gpe_ioam_vni_disable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_vni_enable_reply_t *rmp;
+ clib_error_t *error;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ vxlan4_gpe_tunnel_key_t key4;
+ uword *p = NULL;
+ vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ u32 vni;
+
+
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&key4.local, &mp->local, sizeof (key4.local));
+ clib_memcpy (&key4.remote, &mp->remote, sizeof (key4.remote));
+ vni = clib_net_to_host_u32 (mp->vni);
+ key4.vni = clib_host_to_net_u32 (vni << 8);
+ key4.pad = 0;
+
+ p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
+ }
+ else
+ {
+ return;
+ }
+
+ if (!p)
+ return;
+
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+
+ error = vxlan_gpe_ioam_clear (t, 0, 0, 0, 0);
+
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = clib_error_get_code (error);
+ }
+
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_VNI_DISABLE_REPLY);
+}
+
+static void vl_api_vxlan_gpe_ioam_transit_enable_t_handler
+ (vl_api_vxlan_gpe_ioam_transit_enable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_transit_enable_reply_t *rmp;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ ip46_address_t dst_addr;
+
+ memset (&dst_addr.ip4, 0, sizeof (dst_addr.ip4));
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&dst_addr.ip4, &mp->dst_addr, sizeof (dst_addr.ip4));
+ }
+ rv = vxlan_gpe_enable_disable_ioam_for_dest (sm->vlib_main,
+ dst_addr,
+ ntohl (mp->outer_fib_index),
+ mp->is_ipv6 ? 0 : 1,
+ 1 /* is_add */ );
+
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_TRANSIT_ENABLE_REPLY);
+}
+
+static void vl_api_vxlan_gpe_ioam_transit_disable_t_handler
+ (vl_api_vxlan_gpe_ioam_transit_disable_t * mp)
+{
+ int rv = 0;
+ vl_api_vxlan_gpe_ioam_transit_disable_reply_t *rmp;
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ ip46_address_t dst_addr;
+
+ memset (&dst_addr.ip4, 0, sizeof (dst_addr.ip4));
+ if (!mp->is_ipv6)
+ {
+ clib_memcpy (&dst_addr.ip4, &mp->dst_addr, sizeof (dst_addr.ip4));
+ }
+
+ rv = vxlan_gpe_ioam_disable_for_dest (sm->vlib_main,
+ dst_addr,
+ ntohl (mp->outer_fib_index),
+ mp->is_ipv6 ? 0 : 1);
+ VXLAN_GPE_REPLY_MACRO (VL_API_VXLAN_GPE_IOAM_TRANSIT_DISABLE_REPLY);
+}
+
+
+/*
+ * This routine exists to convince the vlib plugin framework that
+ * we haven't accidentally copied a random .dll into the plugin directory.
+ *
+ * Also collects global variable pointers passed from the vpp engine
+ */
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h,
+ int from_early_init)
+{
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ clib_error_t *error = 0;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = h->vnet_main;
+ sm->unix_time_0 = (u32) time (0); /* Store starting time */
+ sm->vlib_time_0 = vlib_time_now (vm);
+ return error;
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+vxlan_gpe_plugin_api_hookup (vlib_main_t * vm)
+{
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vxlan_gpe_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+static clib_error_t *
+vxlan_gpe_init (vlib_main_t * vm)
+{
+ vxlan_gpe_ioam_main_t *sm = &vxlan_gpe_ioam_main;
+ clib_error_t *error = 0;
+ u8 *name;
+ u32 encap_node_index = vxlan_gpe_encap_ioam_v4_node.index;
+ u32 decap_node_index = vxlan_gpe_decap_ioam_v4_node.index;
+ vlib_node_t *vxlan_gpe_encap_node = NULL;
+ vlib_node_t *vxlan_gpe_decap_node = NULL;
+ uword next_node = 0;
+
+ name = format (0, "ioam_vxlan_gpe_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ error = vxlan_gpe_plugin_api_hookup (vm);
+
+ /* Hook the ioam-encap node to vxlan-gpe-encap */
+ vxlan_gpe_encap_node = vlib_get_node_by_name (vm, (u8 *) "vxlan-gpe-encap");
+ sm->encap_v4_next_node =
+ vlib_node_add_next (vm, vxlan_gpe_encap_node->index, encap_node_index);
+
+ vxlan_gpe_decap_node =
+ vlib_get_node_by_name (vm, (u8 *) "vxlan4-gpe-input");
+ next_node =
+ vlib_node_add_next (vm, vxlan_gpe_decap_node->index, decap_node_index);
+ vxlan_gpe_register_decap_protocol (VXLAN_GPE_PROTOCOL_IOAM, next_node);
+
+ vec_new (vxlan_gpe_ioam_sw_interface_t, pool_elts (sm->sw_interfaces));
+ sm->dst_by_ip4 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword));
+
+ sm->dst_by_ip6 = hash_create_mem (0, sizeof (fib_prefix_t), sizeof (uword));
+
+ vxlan_gpe_ioam_interface_init ();
+ vec_free (name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (vxlan_gpe_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
new file mode 100644
index 00000000000..6c04d9af210
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
@@ -0,0 +1,773 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/format.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
+
+vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main;
+
+int
+vxlan_gpe_ioam_set_rewrite (vxlan_gpe_tunnel_t * t, int has_trace_option,
+ int has_pot_option, int has_ppc_option,
+ u8 ipv6_set)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ u32 size;
+ vxlan_gpe_ioam_hdr_t *vxlan_gpe_ioam_hdr;
+ u8 *current;
+ u8 trace_data_size = 0;
+ u8 pot_data_size = 0;
+
+ if (has_trace_option == 0 && has_pot_option == 0)
+ return -1;
+
+ /* Work out how much space we need */
+ size = sizeof (vxlan_gpe_ioam_hdr_t);
+
+ if (has_trace_option
+ && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] != 0)
+ {
+ size += sizeof (vxlan_gpe_ioam_option_t);
+ size += hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE];
+ }
+ if (has_pot_option
+ && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ size += sizeof (vxlan_gpe_ioam_option_t);
+ size += hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ }
+
+ t->rewrite_size = size;
+
+ if (!ipv6_set)
+ {
+ vxlan4_gpe_rewrite (t, size, VXLAN_GPE_PROTOCOL_IOAM,
+ hm->encap_v4_next_node);
+ vxlan_gpe_ioam_hdr =
+ (vxlan_gpe_ioam_hdr_t *) (t->rewrite +
+ sizeof (ip4_vxlan_gpe_header_t));
+ }
+ else
+ {
+ vxlan6_gpe_rewrite (t, size, VXLAN_GPE_PROTOCOL_IOAM,
+ VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP);
+ vxlan_gpe_ioam_hdr =
+ (vxlan_gpe_ioam_hdr_t *) (t->rewrite +
+ sizeof (ip6_vxlan_gpe_header_t));
+ }
+
+
+ vxlan_gpe_ioam_hdr->type = VXLAN_GPE_PROTOCOL_IOAM;
+ /* Length of the header in octets */
+ vxlan_gpe_ioam_hdr->length = size;
+ vxlan_gpe_ioam_hdr->protocol = t->protocol;
+ current = (u8 *) vxlan_gpe_ioam_hdr + sizeof (vxlan_gpe_ioam_hdr_t);
+
+ if (has_trace_option
+ && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] != 0)
+ {
+ if (0 != hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] (current,
+ &trace_data_size))
+ return -1;
+ current += trace_data_size;
+ }
+ if (has_pot_option
+ && hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ pot_data_size =
+ hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ if (0 ==
+ hm->add_options[VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]
+ (current, &pot_data_size))
+ current += pot_data_size;
+ }
+
+ return 0;
+}
+
+int
+vxlan_gpe_ioam_clear_rewrite (vxlan_gpe_tunnel_t * t, int has_trace_option,
+ int has_pot_option, int has_ppc_option,
+ u8 ipv6_set)
+{
+
+ t->rewrite_size = 0;
+
+ if (!ipv6_set)
+ {
+ vxlan4_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP4_LOOKUP);
+ }
+ else
+ {
+ vxlan6_gpe_rewrite (t, 0, 0, VXLAN_GPE_ENCAP_NEXT_IP6_LOOKUP);
+ }
+
+
+ return 0;
+}
+
+clib_error_t *
+vxlan_gpe_ioam_clear (vxlan_gpe_tunnel_t * t,
+ int has_trace_option, int has_pot_option,
+ int has_ppc_option, u8 ipv6_set)
+{
+ int rv;
+ rv = vxlan_gpe_ioam_clear_rewrite (t, 0, 0, 0, 0);
+
+ if (rv == 0)
+ {
+ return (0);
+ }
+ else
+ {
+ return clib_error_return_code (0, rv, 0,
+ "vxlan_gpe_ioam_clear_rewrite returned %d",
+ rv);
+ }
+
+}
+
+
+clib_error_t *
+vxlan_gpe_ioam_set (vxlan_gpe_tunnel_t * t,
+ int has_trace_option, int has_pot_option,
+ int has_ppc_option, u8 ipv6_set)
+{
+ int rv;
+ rv = vxlan_gpe_ioam_set_rewrite (t, has_trace_option,
+ has_pot_option, has_ppc_option, ipv6_set);
+
+ if (rv == 0)
+ {
+ return (0);
+ }
+ else
+ {
+ return clib_error_return_code (0, rv, 0,
+ "vxlan_gpe_ioam_set_rewrite returned %d",
+ rv);
+ }
+
+}
+
+static void
+vxlan_gpe_set_clear_output_feature_on_intf (vlib_main_t * vm,
+ u32 sw_if_index0, u8 is_add)
+{
+
+
+
+ vnet_feature_enable_disable ("ip4-output", "vxlan-gpe-transit-ioam",
+ sw_if_index0, is_add,
+ 0 /* void *feature_config */ ,
+ 0 /* u32 n_feature_config_bytes */ );
+ return;
+}
+
+void
+vxlan_gpe_clear_output_feature_on_all_intfs (vlib_main_t * vm)
+{
+ vnet_sw_interface_t *si = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ pool_foreach (si, im->sw_interfaces, (
+ {
+ vxlan_gpe_set_clear_output_feature_on_intf
+ (vm, si->sw_if_index, 0);
+ }));
+ return;
+}
+
+
+extern fib_forward_chain_type_t
+fib_entry_get_default_chain_type (const fib_entry_t * fib_entry);
+
+int
+vxlan_gpe_enable_disable_ioam_for_dest (vlib_main_t * vm,
+ ip46_address_t dst_addr,
+ u32 outer_fib_index,
+ u8 is_ipv4, u8 is_add)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ u32 fib_index0 = 0;
+ u32 sw_if_index0 = ~0;
+
+ fib_node_index_t fei = ~0;
+ fib_entry_t *fib_entry;
+ u32 adj_index0;
+ ip_adjacency_t *adj0;
+ fib_prefix_t fib_prefix;
+ //fib_forward_chain_type_t fct;
+ load_balance_t *lb_m, *lb_b;
+ const dpo_id_t *dpo0, *dpo1;
+ u32 i, j;
+ //vnet_hw_interface_t *hw;
+
+ if (is_ipv4)
+ {
+ memset (&fib_prefix, 0, sizeof (fib_prefix_t));
+ fib_prefix.fp_len = 32;
+ fib_prefix.fp_proto = FIB_PROTOCOL_IP4;
+ fib_prefix.fp_addr = dst_addr;
+ }
+ else
+ {
+ return 0;
+ }
+
+ fei = fib_table_lookup (fib_index0, &fib_prefix);
+ fib_entry = fib_entry_get (fei);
+
+ //fct = fib_entry_get_default_chain_type (fib_entry);
+
+ if (!dpo_id_is_valid (&fib_entry->fe_lb /*[fct] */ ))
+ {
+ return (-1);
+ }
+
+ lb_m = load_balance_get (fib_entry->fe_lb /*[fct] */ .dpoi_index);
+
+ for (i = 0; i < lb_m->lb_n_buckets; i++)
+ {
+ dpo0 = load_balance_get_bucket_i (lb_m, i);
+
+ if (dpo0->dpoi_type == DPO_LOAD_BALANCE)
+ {
+ lb_b = load_balance_get (dpo0->dpoi_index);
+
+ for (j = 0; j < lb_b->lb_n_buckets; j++)
+ {
+ dpo1 = load_balance_get_bucket_i (lb_b, j);
+
+ if (dpo1->dpoi_type == DPO_ADJACENCY)
+ {
+ adj_index0 = dpo1->dpoi_index;
+
+ if (ADJ_INDEX_INVALID == adj_index0)
+ {
+ continue;
+ }
+
+ adj0 =
+ ip_get_adjacency (&(ip4_main.lookup_main), adj_index0);
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+
+ if (~0 == sw_if_index0)
+ {
+ continue;
+ }
+
+
+ if (is_add)
+ {
+ vnet_feature_enable_disable ("ip4-output",
+ "vxlan-gpe-transit-ioam",
+ sw_if_index0, is_add, 0
+ /* void *feature_config */
+ , 0 /* u32 n_feature_config_bytes */
+ );
+
+ vec_validate_init_empty (hm->bool_ref_by_sw_if_index,
+ sw_if_index0, ~0);
+ hm->bool_ref_by_sw_if_index[sw_if_index0] = 1;
+ }
+ else
+ {
+ hm->bool_ref_by_sw_if_index[sw_if_index0] = ~0;
+ }
+ }
+ }
+ }
+ }
+
+ if (is_ipv4)
+ {
+
+ uword *t = NULL;
+ vxlan_gpe_ioam_dest_tunnels_t *t1;
+ fib_prefix_t key4, *key4_copy;
+ hash_pair_t *hp;
+ memset (&key4, 0, sizeof (key4));
+ key4.fp_proto = FIB_PROTOCOL_IP4;
+ key4.fp_addr.ip4.as_u32 = fib_prefix.fp_addr.ip4.as_u32;
+ t = hash_get_mem (hm->dst_by_ip4, &key4);
+ if (is_add)
+ {
+ if (t)
+ {
+ return 0;
+ }
+ pool_get_aligned (hm->dst_tunnels, t1, CLIB_CACHE_LINE_BYTES);
+ memset (t1, 0, sizeof (*t1));
+ t1->fp_proto = FIB_PROTOCOL_IP4;
+ t1->dst_addr.ip4.as_u32 = fib_prefix.fp_addr.ip4.as_u32;
+ key4_copy = clib_mem_alloc (sizeof (*key4_copy));
+ clib_memcpy (key4_copy, &key4, sizeof (*key4_copy));
+ hash_set_mem (hm->dst_by_ip4, key4_copy, t1 - hm->dst_tunnels);
+ /*
+ * Attach to the FIB entry for the VxLAN-GPE destination
+ * and become its child. The dest route will invoke a callback
+ * when the fib entry changes, it can be used to
+ * re-program the output feature on the egress interface.
+ */
+
+ const fib_prefix_t tun_dst_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {.ip4 = t1->dst_addr.ip4,}
+ };
+
+ t1->fib_entry_index =
+ fib_table_entry_special_add (outer_fib_index,
+ &tun_dst_pfx,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE,
+ ADJ_INDEX_INVALID);
+ t1->sibling_index =
+ fib_entry_child_add (t1->fib_entry_index,
+ hm->fib_entry_type, t1 - hm->dst_tunnels);
+ t1->outer_fib_index = outer_fib_index;
+
+ }
+ else
+ {
+ if (!t)
+ {
+ return 0;
+ }
+ t1 = pool_elt_at_index (hm->dst_tunnels, t[0]);
+ hp = hash_get_pair (hm->dst_by_ip4, &key4);
+ key4_copy = (void *) (hp->key);
+ hash_unset_mem (hm->dst_by_ip4, &key4);
+ clib_mem_free (key4_copy);
+ pool_put (hm->dst_tunnels, t1);
+ }
+ }
+ else
+ {
+ // TBD for IPv6
+ }
+
+ return 0;
+}
+
+void
+vxlan_gpe_refresh_output_feature_on_all_dest (void)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ vxlan_gpe_ioam_dest_tunnels_t *t;
+ u32 i;
+ if (pool_elts (hm->dst_tunnels) == 0)
+ return;
+ vxlan_gpe_clear_output_feature_on_all_intfs (hm->vlib_main);
+ i = vec_len (hm->bool_ref_by_sw_if_index);
+ vec_free (hm->bool_ref_by_sw_if_index);
+ vec_validate_init_empty (hm->bool_ref_by_sw_if_index, i, ~0);
+ pool_foreach (t, hm->dst_tunnels, (
+ {
+ vxlan_gpe_enable_disable_ioam_for_dest
+ (hm->vlib_main,
+ t->dst_addr,
+ t->outer_fib_index,
+ (t->fp_proto == FIB_PROTOCOL_IP4), 1
+ /* is_add */
+ );
+ }
+ ));
+ return;
+}
+
+void
+vxlan_gpe_clear_output_feature_on_select_intfs (void)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ u32 sw_if_index0 = 0;
+ for (sw_if_index0 = 0;
+ sw_if_index0 < vec_len (hm->bool_ref_by_sw_if_index); sw_if_index0++)
+ {
+ if (hm->bool_ref_by_sw_if_index[sw_if_index0] == 0xFF)
+ {
+ vxlan_gpe_set_clear_output_feature_on_intf
+ (hm->vlib_main, sw_if_index0, 0);
+ }
+ }
+
+ return;
+}
+
+static clib_error_t *
+vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
+ vm,
+ unformat_input_t
+ * input, vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ ip46_address_t local, remote;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u32 vni;
+ u8 vni_set = 0;
+ u8 disable = 0;
+ clib_error_t *rv = 0;
+ vxlan4_gpe_tunnel_key_t key4;
+ vxlan6_gpe_tunnel_key_t key6;
+ uword *p;
+ vxlan_gpe_main_t *gm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "local %U", unformat_ip4_address, &local.ip4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else
+ if (unformat (input, "remote %U", unformat_ip4_address, &remote.ip4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (input, "local %U", unformat_ip6_address, &local.ip6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else
+ if (unformat (input, "remote %U", unformat_ip6_address, &remote.ip6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (input, "vni %d", &vni))
+ vni_set = 1;
+ else if (unformat (input, "disable"))
+ disable = 1;
+ else
+ break;
+ }
+
+ if (local_set == 0)
+ return clib_error_return (0, "tunnel local address not specified");
+ if (remote_set == 0)
+ return clib_error_return (0, "tunnel remote address not specified");
+ if (ipv4_set && ipv6_set)
+ return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ if ((ipv4_set
+ && memcmp (&local.ip4, &remote.ip4,
+ sizeof (local.ip4)) == 0) || (ipv6_set
+ &&
+ memcmp
+ (&local.ip6,
+ &remote.ip6,
+ sizeof (local.ip6)) == 0))
+ return clib_error_return (0, "src and dst addresses are identical");
+ if (vni_set == 0)
+ return clib_error_return (0, "vni not specified");
+ if (!ipv6_set)
+ {
+ key4.local = local.ip4.as_u32;
+ key4.remote = remote.ip4.as_u32;
+ key4.vni = clib_host_to_net_u32 (vni << 8);
+ key4.pad = 0;
+ p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
+ }
+ else
+ {
+ key6.local.as_u64[0] = local.ip6.as_u64[0];
+ key6.local.as_u64[1] = local.ip6.as_u64[1];
+ key6.remote.as_u64[0] = remote.ip6.as_u64[0];
+ key6.remote.as_u64[1] = remote.ip6.as_u64[1];
+ key6.vni = clib_host_to_net_u32 (vni << 8);
+ p = hash_get_mem (gm->vxlan6_gpe_tunnel_by_key, &key6);
+ }
+
+ if (!p)
+ return clib_error_return (0, "VxLAN Tunnel not found");
+ t = pool_elt_at_index (gm->tunnels, p[0]);
+ if (!disable)
+ {
+ rv =
+ vxlan_gpe_ioam_set (t, hm->has_trace_option,
+ hm->has_pot_option, hm->has_ppc_option, ipv6_set);
+ }
+ else
+ {
+ rv = vxlan_gpe_ioam_clear (t, 0, 0, 0, 0);
+ }
+ return rv;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_rewrite_cmd, static) = {
+ .path = "set vxlan-gpe-ioam",
+ .short_help = "set vxlan-gpe-ioam vxlan <src-ip> <dst_ip> <vnid> [disable]",
+ .function = vxlan_gpe_set_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+
+
+clib_error_t *
+vxlan_gpe_ioam_enable (int has_trace_option,
+ int has_pot_option, int has_ppc_option)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ hm->has_trace_option = has_trace_option;
+ hm->has_pot_option = has_pot_option;
+ hm->has_ppc_option = has_ppc_option;
+ if (hm->has_trace_option)
+ {
+ vxlan_gpe_trace_profile_setup ();
+ }
+
+ return 0;
+}
+
+clib_error_t *
+vxlan_gpe_ioam_disable (int
+ has_trace_option,
+ int has_pot_option, int has_ppc_option)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ hm->has_trace_option = has_trace_option;
+ hm->has_pot_option = has_pot_option;
+ hm->has_ppc_option = has_ppc_option;
+ if (!hm->has_trace_option)
+ {
+ vxlan_gpe_trace_profile_cleanup ();
+ }
+
+ return 0;
+}
+
+void
+vxlan_gpe_set_next_override (uword next)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ hm->decap_v4_next_override = next;
+ return;
+}
+
+static clib_error_t *
+vxlan_gpe_set_ioam_flags_command_fn (vlib_main_t * vm,
+ unformat_input_t
+ * input, vlib_cli_command_t * cmd)
+{
+ int has_trace_option = 0;
+ int has_pot_option = 0;
+ int has_ppc_option = 0;
+ clib_error_t *rv = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pot"))
+ has_pot_option = 1;
+ else if (unformat (input, "ppc encap"))
+ has_ppc_option = PPC_ENCAP;
+ else if (unformat (input, "ppc decap"))
+ has_ppc_option = PPC_DECAP;
+ else if (unformat (input, "ppc none"))
+ has_ppc_option = PPC_NONE;
+ else
+ break;
+ }
+
+
+ rv =
+ vxlan_gpe_ioam_enable (has_trace_option, has_pot_option, has_ppc_option);
+ return rv;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_flags_cmd, static) =
+{
+.path = "set vxlan-gpe-ioam rewrite",
+.short_help = "set vxlan-gpe-ioam [trace] [pot] [ppc <encap|decap>]",
+.function = vxlan_gpe_set_ioam_flags_command_fn,};
+/* *INDENT-ON* */
+
+
+int vxlan_gpe_ioam_disable_for_dest
+ (vlib_main_t * vm, ip46_address_t dst_addr, u32 outer_fib_index,
+ u8 ipv4_set)
+{
+ vxlan_gpe_ioam_dest_tunnels_t *t;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ vxlan_gpe_enable_disable_ioam_for_dest (hm->vlib_main,
+ dst_addr, outer_fib_index, ipv4_set,
+ 0);
+ if (pool_elts (hm->dst_tunnels) == 0)
+ {
+ vxlan_gpe_clear_output_feature_on_select_intfs ();
+ return 0;
+ }
+
+ pool_foreach (t, hm->dst_tunnels, (
+ {
+ vxlan_gpe_enable_disable_ioam_for_dest
+ (hm->vlib_main,
+ t->dst_addr,
+ t->outer_fib_index,
+ (t->fp_proto ==
+ FIB_PROTOCOL_IP4), 1 /* is_add */ );
+ }
+ ));
+ vxlan_gpe_clear_output_feature_on_select_intfs ();
+ return (0);
+
+}
+
+static clib_error_t *vxlan_gpe_set_ioam_transit_rewrite_command_fn
+ (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ ip46_address_t dst_addr;
+ u8 dst_addr_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u8 disable = 0;
+ clib_error_t *rv = 0;
+ u32 outer_fib_index = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "dst-ip %U", unformat_ip4_address, &dst_addr.ip4))
+ {
+ dst_addr_set = 1;
+ ipv4_set = 1;
+ }
+ else
+ if (unformat
+ (input, "dst-ip %U", unformat_ip6_address, &dst_addr.ip6))
+ {
+ dst_addr_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (input, "outer-fib-index %d", &outer_fib_index))
+ {
+ }
+
+ else if (unformat (input, "disable"))
+ disable = 1;
+ else
+ break;
+ }
+
+ if (dst_addr_set == 0)
+ return clib_error_return (0, "tunnel destination address not specified");
+ if (ipv4_set && ipv6_set)
+ return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+ if (!disable)
+ {
+ vxlan_gpe_enable_disable_ioam_for_dest (hm->vlib_main,
+ dst_addr, outer_fib_index,
+ ipv4_set, 1);
+ }
+ else
+ {
+ vxlan_gpe_ioam_disable_for_dest
+ (vm, dst_addr, outer_fib_index, ipv4_set);
+ }
+ return rv;
+}
+
+ /* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_transit_rewrite_cmd, static) = {
+ .path = "set vxlan-gpe-ioam-transit",
+ .short_help = "set vxlan-gpe-ioam-transit dst-ip <dst_ip> [outer-fib-index <outer_fib_index>] [disable]",
+ .function = vxlan_gpe_set_ioam_transit_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *clear_vxlan_gpe_ioam_rewrite_command_fn
+ (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ return (vxlan_gpe_ioam_disable (0, 0, 0));
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_clear_ioam_flags_cmd, static) =
+{
+.path = "clear vxlan-gpe-ioam rewrite",
+.short_help = "clear vxlan-gpe-ioam rewrite",
+.function = clear_vxlan_gpe_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+vxlan_gpe_ioam_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+{
+ vxlan_gpe_refresh_output_feature_on_all_dest ();
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t *
+vxlan_gpe_ioam_fib_node_get (fib_node_index_t index)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ return (&hm->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+vxlan_gpe_ioam_last_lock_gone (fib_node_t * node)
+{
+ ASSERT (0);
+}
+
+
+/*
+ * Virtual function table registered by MPLS GRE tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t vxlan_gpe_ioam_vft = {
+ .fnv_get = vxlan_gpe_ioam_fib_node_get,
+ .fnv_last_lock = vxlan_gpe_ioam_last_lock_gone,
+ .fnv_back_walk = vxlan_gpe_ioam_back_walk,
+};
+
+void
+vxlan_gpe_ioam_interface_init (void)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+ hm->fib_entry_type = fib_node_register_new_type (&vxlan_gpe_ioam_vft);
+ return;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h
new file mode 100644
index 00000000000..0711b87abbe
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vxlan_gpe_ioam_h__
+#define __included_vxlan_gpe_ioam_h__
+
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <vnet/ip/ip.h>
+
+
+typedef struct vxlan_gpe_sw_interface_
+{
+ u32 sw_if_index;
+} vxlan_gpe_ioam_sw_interface_t;
+
+typedef struct vxlan_gpe_dest_tunnels_
+{
+ ip46_address_t dst_addr;
+ u32 fp_proto;
+ u32 sibling_index;
+ fib_node_index_t fib_entry_index;
+ u32 outer_fib_index;
+} vxlan_gpe_ioam_dest_tunnels_t;
+
+typedef struct vxlan_gpe_ioam_main_
+{
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /* time scale transform. Joy. */
+ u32 unix_time_0;
+ f64 vlib_time_0;
+
+
+ /* Trace option */
+ u8 has_trace_option;
+
+ /* Pot option */
+ u8 has_pot_option;
+
+#define PPC_NONE 0
+#define PPC_ENCAP 1
+#define PPC_DECAP 2
+ u8 has_ppc_option;
+
+#define TSP_SECONDS 0
+#define TSP_MILLISECONDS 1
+#define TSP_MICROSECONDS 2
+#define TSP_NANOSECONDS 3
+
+ /* Array of function pointers to ADD and POP VxLAN-GPE iOAM option handling routines */
+ u8 options_size[256];
+ int (*add_options[256]) (u8 * rewrite_string, u8 * rewrite_size);
+ int (*pop_options[256]) (ip4_header_t * ip, vxlan_gpe_ioam_option_t * opt);
+
+ /* Array of function pointers to iOAM option handling routines */
+ int (*options[256]) (vlib_buffer_t * b, vxlan_gpe_ioam_option_t * opt,
+ u8 is_ipv4, u8 use_adj);
+ u8 *(*trace[256]) (u8 * s, vxlan_gpe_ioam_option_t * opt);
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* Override to export for iOAM */
+ uword decap_v4_next_override;
+ uword decap_v6_next_override;
+
+ /* sequence of node graph for encap */
+ uword encap_v4_next_node;
+ uword encap_v6_next_node;
+
+ /* Software interfaces. */
+ vxlan_gpe_ioam_sw_interface_t *sw_interfaces;
+
+ /* hash ip4/ip6 -> list of destinations for doing transit iOAM operation */
+ vxlan_gpe_ioam_dest_tunnels_t *dst_tunnels;
+ uword *dst_by_ip4;
+ uword *dst_by_ip6;
+
+ /** per sw_if_index, to maintain bitmap */
+ u8 *bool_ref_by_sw_if_index;
+ fib_node_type_t fib_entry_type;
+
+ /** State convenience vlib_main_t */
+ vlib_main_t *vlib_main;
+ /** State convenience vnet_main_t */
+ vnet_main_t *vnet_main;
+
+
+} vxlan_gpe_ioam_main_t;
+extern vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main;
+
+/*
+ * Primary h-b-h handler trace support
+ */
+typedef struct
+{
+ u32 next_index;
+ u32 trace_len;
+ u8 option_data[256];
+} ioam_trace_t;
+
+
+extern vlib_node_registration_t vxlan_gpe_encap_ioam_v4_node;
+extern vlib_node_registration_t vxlan_gpe_decap_ioam_v4_node;
+extern vlib_node_registration_t vxlan_gpe_transit_ioam_v4_node;
+
+clib_error_t *vxlan_gpe_ioam_enable (int has_trace_option, int has_pot_option,
+ int has_ppc_option);
+
+clib_error_t *vxlan_gpe_ioam_disable (int has_trace_option,
+ int has_pot_option, int has_ppc_option);
+
+clib_error_t *vxlan_gpe_ioam_set (vxlan_gpe_tunnel_t * t,
+ int has_trace_option,
+ int has_pot_option,
+ int has_ppc_option, u8 ipv6_set);
+clib_error_t *vxlan_gpe_ioam_clear (vxlan_gpe_tunnel_t * t,
+ int has_trace_option, int has_pot_option,
+ int has_ppc_option, u8 ipv6_set);
+
+int vxlan_gpe_ioam_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 *
+ rewrite_string,
+ u8 *
+ rewrite_size));
+
+int vxlan_gpe_add_unregister_option (u8 option);
+
+int vxlan_gpe_ioam_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ vxlan_gpe_ioam_option_t *
+ opt, u8 is_ipv4, u8 use_adj),
+ u8 * trace (u8 * s,
+ vxlan_gpe_ioam_option_t *
+ opt));
+int vxlan_gpe_ioam_unregister_option (u8 option);
+
+int vxlan_gpe_trace_profile_setup (void);
+
+int vxlan_gpe_trace_profile_cleanup (void);
+extern void vxlan_gpe_ioam_interface_init (void);
+int
+vxlan_gpe_enable_disable_ioam_for_dest (vlib_main_t * vm,
+ ip46_address_t dst_addr,
+ u32 outer_fib_index,
+ u8 is_ipv4, u8 is_add);
+int vxlan_gpe_ioam_disable_for_dest
+ (vlib_main_t * vm, ip46_address_t dst_addr, u32 outer_fib_index,
+ u8 ipv4_set);
+
+typedef enum
+{
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_POP,
+ VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP,
+ VXLAN_GPE_DECAP_IOAM_V4_N_NEXT
+} vxlan_gpe_decap_ioam_v4_next_t;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h
new file mode 100644
index 00000000000..a7ef859ec58
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vxlan_gpe_ioam_packet_h__
+#define __included_vxlan_gpe_ioam_packet_h__
+
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/ip.h>
+
+
+
+#define VXLAN_GPE_OPTION_TYPE_IOAM_TRACE 59
+#define VXLAN_GPE_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT 60
+
+/**
+ * @brief VXLAN GPE Extension (iOAM) Header definition
+ */
+typedef struct
+{
+ u8 type;
+ u8 length;
+ /** Reserved */
+ u8 reserved;
+ /** see vxlan_gpe_protocol_t */
+ u8 protocol;
+} vxlan_gpe_ioam_hdr_t;
+
+/*
+ * @brief VxLAN GPE iOAM Option definition
+ */
+typedef struct
+{
+ /* Option Type */
+ u8 type;
+ /* Length in octets of the option data field */
+ u8 length;
+} vxlan_gpe_ioam_option_t;
+
+
+#endif
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
new file mode 100644
index 00000000000..e37b1642d96
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
@@ -0,0 +1,552 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <ioam/lib-trace/trace_util.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+/* Timestamp precision multipliers for seconds, milliseconds, microseconds
+ * and nanoseconds respectively.
+ */
+static f64 trace_tsp_mul[4] = { 1, 1e3, 1e6, 1e9 };
+
+typedef union
+{
+ u64 as_u64;
+ u32 as_u32[2];
+} time_u64_t;
+
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED(struct {
+ vxlan_gpe_ioam_option_t hdr;
+ u8 ioam_trace_type;
+ u8 data_list_elts_left;
+ u32 elts[0]; /* Variable type. So keep it generic */
+}) vxlan_gpe_ioam_trace_option_t;
+/* *INDENT-ON* */
+
+
+#define foreach_vxlan_gpe_ioam_trace_stats \
+ _(SUCCESS, "Pkts updated with TRACE records") \
+ _(FAILED, "Errors in TRACE due to lack of TRACE records")
+
+static char *vxlan_gpe_ioam_trace_stats_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_gpe_ioam_trace_stats
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_GPE_IOAM_TRACE_##sym,
+ foreach_vxlan_gpe_ioam_trace_stats
+#undef _
+ VXLAN_GPE_IOAM_TRACE_N_STATS,
+} vxlan_gpe_ioam_trace_stats_t;
+
+
+typedef struct
+{
+ /* stats */
+ u64 counters[ARRAY_LEN (vxlan_gpe_ioam_trace_stats_strings)];
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} vxlan_gpe_ioam_trace_main_t;
+
+vxlan_gpe_ioam_trace_main_t vxlan_gpe_ioam_trace_main;
+
+int
+vxlan_gpe_ioam_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 * rewrite_string,
+ u8 * rewrite_size))
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Already registered */
+ if (hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = rewrite_options;
+ hm->options_size[option] = size;
+
+ return (0);
+}
+
+int
+vxlan_gpe_add_unregister_option (u8 option)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Not registered */
+ if (!hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = NULL;
+ hm->options_size[option] = 0;
+ return (0);
+}
+
+
+int
+vxlan_gpe_ioam_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ vxlan_gpe_ioam_option_t * opt,
+ u8 is_ipv4, u8 use_adj),
+ u8 * trace (u8 * s,
+ vxlan_gpe_ioam_option_t * opt))
+{
+ vxlan_gpe_ioam_main_t *im = &vxlan_gpe_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (im->options));
+
+ /* Already registered */
+ if (im->options[option])
+ return (-1);
+
+ im->options[option] = options;
+ im->trace[option] = trace;
+
+ return (0);
+}
+
+int
+vxlan_gpe_ioam_unregister_option (u8 option)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->options));
+
+ /* Not registered */
+ if (!hm->options[option])
+ return (-1);
+
+ hm->options[option] = NULL;
+ hm->trace[option] = NULL;
+
+ return (0);
+}
+
+
+always_inline void
+vxlan_gpe_ioam_trace_stats_increment_counter (u32 counter_index,
+ u64 increment)
+{
+ vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main;
+
+ hm->counters[counter_index] += increment;
+}
+
+
+static u8 *
+format_ioam_data_list_element (u8 * s, va_list * args)
+{
+ u32 *elt = va_arg (*args, u32 *);
+ u8 *trace_type_p = va_arg (*args, u8 *);
+ u8 trace_type = *trace_type_p;
+
+
+ if (trace_type & BIT_TTL_NODEID)
+ {
+ u32 ttl_node_id_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ttl 0x%x node id 0x%x ",
+ ttl_node_id_host_byte_order >> 24,
+ ttl_node_id_host_byte_order & 0x00FFFFFF);
+
+ elt++;
+ }
+
+ if (trace_type & BIT_ING_INTERFACE && trace_type & BIT_ING_INTERFACE)
+ {
+ u32 ingress_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ingress 0x%x egress 0x%x ",
+ ingress_host_byte_order >> 16,
+ ingress_host_byte_order & 0xFFFF);
+ elt++;
+ }
+
+ if (trace_type & BIT_TIMESTAMP)
+ {
+ u32 ts_in_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "ts 0x%x \n", ts_in_host_byte_order);
+ elt++;
+ }
+
+ if (trace_type & BIT_APPDATA)
+ {
+ u32 appdata_in_host_byte_order = clib_net_to_host_u32 (*elt);
+ s = format (s, "app 0x%x ", appdata_in_host_byte_order);
+ elt++;
+ }
+
+ return s;
+}
+
+
+
+int
+vxlan_gpe_ioam_trace_rewrite_handler (u8 * rewrite_string, u8 * rewrite_size)
+{
+ vxlan_gpe_ioam_trace_option_t *trace_option = NULL;
+ u8 trace_data_size = 0;
+ u8 trace_option_elts = 0;
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ return (-1);
+ }
+
+ if (PREDICT_FALSE (!rewrite_string))
+ return -1;
+
+ trace_option_elts = profile->num_elts;
+ trace_data_size = fetch_trace_data_size (profile->trace_type);
+ trace_option = (vxlan_gpe_ioam_trace_option_t *) rewrite_string;
+ trace_option->hdr.type = VXLAN_GPE_OPTION_TYPE_IOAM_TRACE;
+ trace_option->hdr.length = 2 /*ioam_trace_type,data_list_elts_left */ +
+ trace_option_elts * trace_data_size;
+ trace_option->ioam_trace_type = profile->trace_type & TRACE_TYPE_MASK;
+ trace_option->data_list_elts_left = trace_option_elts;
+ *rewrite_size =
+ sizeof (vxlan_gpe_ioam_trace_option_t) +
+ (trace_option_elts * trace_data_size);
+
+ return 0;
+}
+
+
+int
+vxlan_gpe_ioam_trace_data_list_handler (vlib_buffer_t * b,
+ vxlan_gpe_ioam_option_t * opt,
+ u8 is_ipv4, u8 use_adj)
+{
+ u8 elt_index = 0;
+ vxlan_gpe_ioam_trace_option_t *trace =
+ (vxlan_gpe_ioam_trace_option_t *) opt;
+ time_u64_t time_u64;
+ u32 *elt;
+ int rv = 0;
+ trace_profile *profile = NULL;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ return (-1);
+ }
+
+
+ time_u64.as_u64 = 0;
+
+ if (PREDICT_TRUE (trace->data_list_elts_left))
+ {
+ trace->data_list_elts_left--;
+ /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
+ * to skip to this node's location.
+ */
+ elt_index =
+ trace->data_list_elts_left *
+ fetch_trace_data_size (trace->ioam_trace_type) / 4;
+ elt = &trace->elts[elt_index];
+ if (is_ipv4)
+ {
+ if (trace->ioam_trace_type & BIT_TTL_NODEID)
+ {
+ ip4_header_t *ip0 = vlib_buffer_get_current (b);
+ /* The transit case is the only case where the TTL decrement happens
+ * before iOAM processing. For now, use the use_adj flag as an overload.
+ * We can probably use a separate flag instead of overloading the use_adj flag.
+ */
+ *elt = clib_host_to_net_u32 (((ip0->ttl - 1 + use_adj) << 24) |
+ profile->node_id);
+ elt++;
+ }
+
+ if (trace->ioam_trace_type & BIT_ING_INTERFACE)
+ {
+ u16 tx_if = 0;
+ u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX];
+ ip4_main_t *im4 = &ip4_main;
+ ip_lookup_main_t *lm = &im4->lookup_main;
+ if (use_adj)
+ {
+ ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index);
+ tx_if = adj->rewrite_header.sw_if_index & 0xFFFF;
+ }
+
+ *elt =
+ (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 |
+ tx_if;
+ *elt = clib_host_to_net_u32 (*elt);
+ elt++;
+ }
+ }
+ else
+ {
+ if (trace->ioam_trace_type & BIT_TTL_NODEID)
+ {
+ ip6_header_t *ip0 = vlib_buffer_get_current (b);
+ *elt = clib_host_to_net_u32 ((ip0->hop_limit << 24) |
+ profile->node_id);
+ elt++;
+ }
+ if (trace->ioam_trace_type & BIT_ING_INTERFACE)
+ {
+ u16 tx_if = 0;
+ u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX];
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm = &im6->lookup_main;
+ if (use_adj)
+ {
+ ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index);
+ tx_if = adj->rewrite_header.sw_if_index & 0xFFFF;
+ }
+
+ *elt =
+ (vnet_buffer (b)->sw_if_index[VLIB_RX] & 0xFFFF) << 16 |
+ tx_if;
+ *elt = clib_host_to_net_u32 (*elt);
+ elt++;
+ }
+ }
+
+ if (trace->ioam_trace_type & BIT_TIMESTAMP)
+ {
+ /* Send least significant 32 bits */
+ f64 time_f64 =
+ (f64) (((f64) hm->unix_time_0) +
+ (vlib_time_now (hm->vlib_main) - hm->vlib_time_0));
+
+ time_u64.as_u64 = time_f64 * trace_tsp_mul[profile->trace_tsp];
+ *elt = clib_host_to_net_u32 (time_u64.as_u32[0]);
+ elt++;
+ }
+
+ if (trace->ioam_trace_type & BIT_APPDATA)
+ {
+ /* $$$ set elt0->app_data */
+ *elt = clib_host_to_net_u32 (profile->app_data);
+ elt++;
+ }
+ vxlan_gpe_ioam_trace_stats_increment_counter
+ (VXLAN_GPE_IOAM_TRACE_SUCCESS, 1);
+ }
+ else
+ {
+ vxlan_gpe_ioam_trace_stats_increment_counter
+ (VXLAN_GPE_IOAM_TRACE_FAILED, 1);
+ }
+ return (rv);
+}
+
+u8 *
+vxlan_gpe_ioam_trace_data_list_trace_handler (u8 * s,
+ vxlan_gpe_ioam_option_t * opt)
+{
+ vxlan_gpe_ioam_trace_option_t *trace;
+ u8 trace_data_size_in_words = 0;
+ u32 *elt;
+ int elt_index = 0;
+
+ trace = (vxlan_gpe_ioam_trace_option_t *) opt;
+ s =
+ format (s, " Trace Type 0x%x , %d elts left\n", trace->ioam_trace_type,
+ trace->data_list_elts_left);
+ trace_data_size_in_words =
+ fetch_trace_data_size (trace->ioam_trace_type) / 4;
+ elt = &trace->elts[0];
+ while ((u8 *) elt < ((u8 *) (&trace->elts[0]) + trace->hdr.length - 2
+ /* -2 accounts for ioam_trace_type,elts_left */ ))
+ {
+ s = format (s, " [%d] %U\n", elt_index,
+ format_ioam_data_list_element,
+ elt, &trace->ioam_trace_type);
+ elt_index++;
+ elt += trace_data_size_in_words;
+ }
+ return (s);
+}
+
+
+static clib_error_t *
+vxlan_gpe_show_ioam_trace_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main;
+ u8 *s = 0;
+ int i = 0;
+
+ for (i = 0; i < VXLAN_GPE_IOAM_TRACE_N_STATS; i++)
+ {
+ s = format (s, " %s - %lu\n", vxlan_gpe_ioam_trace_stats_strings[i],
+ hm->counters[i]);
+ }
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vxlan_gpe_show_ioam_trace_cmd, static) = {
+ .path = "show ioam vxlan-gpe trace",
+ .short_help = "iOAM trace statistics",
+ .function = vxlan_gpe_show_ioam_trace_cmd_fn,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+vxlan_gpe_ioam_trace_init (vlib_main_t * vm)
+{
+ vxlan_gpe_ioam_trace_main_t *hm = &vxlan_gpe_ioam_trace_main;
+ clib_error_t *error;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return (error);
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, vxlan_gpe_init)))
+ return (error);
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main ();
+ memset (hm->counters, 0, sizeof (hm->counters));
+
+ if (vxlan_gpe_ioam_register_option
+ (VXLAN_GPE_OPTION_TYPE_IOAM_TRACE,
+ vxlan_gpe_ioam_trace_data_list_handler,
+ vxlan_gpe_ioam_trace_data_list_trace_handler) < 0)
+ return (clib_error_create
+ ("registration of VXLAN_GPE_OPTION_TYPE_IOAM_TRACE failed"));
+
+
+ if (vxlan_gpe_ioam_add_register_option
+ (VXLAN_GPE_OPTION_TYPE_IOAM_TRACE,
+ sizeof (vxlan_gpe_ioam_trace_option_t),
+ vxlan_gpe_ioam_trace_rewrite_handler) < 0)
+ return (clib_error_create
+ ("registration of VXLAN_GPE_OPTION_TYPE_IOAM_TRACE for rewrite failed"));
+
+
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (vxlan_gpe_ioam_trace_init);
+
+int
+vxlan_gpe_trace_profile_cleanup (void)
+{
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] = 0;
+
+ return 0;
+
+}
+
+static int
+vxlan_gpe_ioam_trace_get_sizeof_handler (u32 * result)
+{
+ u16 size = 0;
+ u8 trace_data_size = 0;
+ trace_profile *profile = NULL;
+
+ *result = 0;
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ return (-1);
+ }
+
+ trace_data_size = fetch_trace_data_size (profile->trace_type);
+ if (PREDICT_FALSE (trace_data_size == 0))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (PREDICT_FALSE (profile->num_elts * trace_data_size > 254))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ size +=
+ sizeof (vxlan_gpe_ioam_trace_option_t) +
+ profile->num_elts * trace_data_size;
+ *result = size;
+
+ return 0;
+}
+
+
+int
+vxlan_gpe_trace_profile_setup (void)
+{
+ u32 trace_size = 0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ trace_profile *profile = NULL;
+
+
+ profile = trace_profile_find ();
+
+ if (PREDICT_FALSE (!profile))
+ {
+ return (-1);
+ }
+
+
+ if (vxlan_gpe_ioam_trace_get_sizeof_handler (&trace_size) < 0)
+ return (-1);
+
+ hm->options_size[VXLAN_GPE_OPTION_TYPE_IOAM_TRACE] = trace_size;
+
+ return (0);
+}
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h
new file mode 100644
index 00000000000..c0ad8d9d03a
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_util.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vxlan_gpe_ioam_util_h__
+#define __included_vxlan_gpe_ioam_util_h__
+
+#include <vnet/vxlan-gpe/vxlan_gpe.h>
+#include <vnet/vxlan-gpe/vxlan_gpe_packet.h>
+#include <vnet/ip/ip.h>
+
+
+typedef struct
+{
+ u32 tunnel_index;
+ ioam_trace_t fmt_trace;
+} vxlan_gpe_ioam_v4_trace_t;
+
+
+static u8 *
+format_vxlan_gpe_ioam_v4_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_gpe_ioam_v4_trace_t *t1 = va_arg (*args, vxlan_gpe_ioam_v4_trace_t *);
+ ioam_trace_t *t = &(t1->fmt_trace);
+ vxlan_gpe_ioam_option_t *fmt_trace0;
+ vxlan_gpe_ioam_option_t *opt0, *limit0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ u8 type0;
+
+ fmt_trace0 = (vxlan_gpe_ioam_option_t *) t->option_data;
+
+ s = format (s, "VXLAN-GPE-IOAM: next_index %d len %d traced %d",
+ t->next_index, fmt_trace0->length, t->trace_len);
+
+ opt0 = (vxlan_gpe_ioam_option_t *) (fmt_trace0 + 1);
+ limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) fmt_trace0) + t->trace_len;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1;
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (vxlan_gpe_ioam_option_t));
+ break;
+ }
+ }
+
+ s = format (s, "VXLAN-GPE-IOAM: tunnel %d", t1->tunnel_index);
+ return s;
+}
+
+
+always_inline void
+vxlan_gpe_encap_decap_ioam_v4_one_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ u32 * next0, u32 drop_node_val,
+ u8 use_adj)
+{
+ ip4_header_t *ip0;
+ udp_header_t *udp_hdr0;
+ vxlan_gpe_header_t *gpe_hdr0;
+ vxlan_gpe_ioam_hdr_t *gpe_ioam0;
+ vxlan_gpe_ioam_option_t *opt0;
+ vxlan_gpe_ioam_option_t *limit0;
+ vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
+
+ /* Populate the iOAM header */
+ ip0 = vlib_buffer_get_current (b0);
+ udp_hdr0 = (udp_header_t *) (ip0 + 1);
+ gpe_hdr0 = (vxlan_gpe_header_t *) (udp_hdr0 + 1);
+ gpe_ioam0 = (vxlan_gpe_ioam_hdr_t *) (gpe_hdr0 + 1);
+ opt0 = (vxlan_gpe_ioam_option_t *) (gpe_ioam0 + 1);
+ limit0 = (vxlan_gpe_ioam_option_t *) ((u8 *) gpe_ioam0 + gpe_ioam0->length);
+
+ /*
+ * Basic validity checks
+ */
+ if (gpe_ioam0->length > clib_net_to_host_u16 (ip0->length))
+ {
+ *next0 = drop_node_val;
+ return;
+ }
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ u8 type0;
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (vxlan_gpe_ioam_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->options[type0])
+ {
+ if ((*hm->options[type0]) (b0, opt0, 1 /* is_ipv4 */ ,
+ use_adj) < 0)
+ {
+ *next0 = drop_node_val;
+ return;
+ }
+ }
+ break;
+ }
+ opt0 =
+ (vxlan_gpe_ioam_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (vxlan_gpe_ioam_hdr_t));
+ }
+
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_gpe_ioam_v4_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = gpe_ioam0->length;
+ t->fmt_trace.next_index = *next0;
+ /* Capture the ioam option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->fmt_trace.
+ option_data) ? trace_len : ARRAY_LEN (t->fmt_trace.
+ option_data);
+ t->fmt_trace.trace_len = trace_len;
+ clib_memcpy (&(t->fmt_trace.option_data), gpe_ioam0, trace_len);
+ }
+ return;
+}
+
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h
new file mode 100644
index 00000000000..cc0a10a3fec
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vxlan_gpe_msg_enum_h
+#define included_vxlan_gpe_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_vxlan_gpe_msg_enum_h */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c
new file mode 100644
index 00000000000..47253eb67ab
--- /dev/null
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_test.c
@@ -0,0 +1,600 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * vxlan_gpe_test.c - test harness for vxlan_gpe plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+/* Declare message IDs */
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_all_api_h.h>
+#undef vl_api_version
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam_packet.h>
+#include <ioam/lib-vxlan-gpe/vxlan_gpe_ioam.h>
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} vxlan_gpe_test_main_t;
+
+vxlan_gpe_test_main_t vxlan_gpe_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(vxlan_gpe_ioam_enable_reply) \
+_(vxlan_gpe_ioam_disable_reply) \
+_(vxlan_gpe_ioam_vni_enable_reply) \
+_(vxlan_gpe_ioam_vni_disable_reply) \
+_(vxlan_gpe_ioam_transit_enable_reply) \
+_(vxlan_gpe_ioam_transit_disable_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = vxlan_gpe_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(VXLAN_GPE_IOAM_ENABLE_REPLY, vxlan_gpe_ioam_enable_reply) \
+_(VXLAN_GPE_IOAM_DISABLE_REPLY, vxlan_gpe_ioam_disable_reply) \
+_(VXLAN_GPE_IOAM_VNI_ENABLE_REPLY, vxlan_gpe_ioam_vni_enable_reply) \
+_(VXLAN_GPE_IOAM_VNI_DISABLE_REPLY, vxlan_gpe_ioam_vni_disable_reply) \
+_(VXLAN_GPE_IOAM_TRANSIT_ENABLE_REPLY, vxlan_gpe_ioam_transit_enable_reply) \
+_(VXLAN_GPE_IOAM_TRANSIT_DISABLE_REPLY, vxlan_gpe_ioam_transit_disable_reply) \
+
+
+/* M: construct, but don't yet send a message */
+
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T,t,n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+
+static int
+api_vxlan_gpe_ioam_enable (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+
+ unformat_input_t *input = vam->input;
+ vl_api_vxlan_gpe_ioam_enable_t *mp;
+ f64 timeout;
+ u32 id = 0;
+ int has_trace_option = 0;
+ int has_pow_option = 0;
+ int has_ppc_option = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pow"))
+ has_pow_option = 1;
+ else if (unformat (input, "ppc encap"))
+ has_ppc_option = PPC_ENCAP;
+ else if (unformat (input, "ppc decap"))
+ has_ppc_option = PPC_DECAP;
+ else if (unformat (input, "ppc none"))
+ has_ppc_option = PPC_NONE;
+ else
+ break;
+ }
+ M (VXLAN_GPE_IOAM_ENABLE, vxlan_gpe_ioam_enable);
+ mp->id = htons (id);
+ mp->trace_ppc = has_ppc_option;
+ mp->pow_enable = has_pow_option;
+ mp->trace_enable = has_trace_option;
+
+
+ S;
+ W;
+
+ return (0);
+}
+
+
+static int
+api_vxlan_gpe_ioam_disable (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+ vl_api_vxlan_gpe_ioam_disable_t *mp;
+ f64 timeout;
+
+ M (VXLAN_GPE_IOAM_DISABLE, vxlan_gpe_ioam_disable);
+ S;
+ W;
+ return 0;
+}
+
+static int
+api_vxlan_gpe_ioam_vni_enable (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_ioam_vni_enable_t *mp;
+ ip4_address_t local4, remote4;
+ ip6_address_t local6, remote6;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u32 vni;
+ u8 vni_set = 0;
+ f64 timeout;
+
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "local %U", unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip4_address, &remote4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "local %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip6_address, &remote6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("tunnel local address not specified\n");
+ return -99;
+ }
+ if (remote_set == 0)
+ {
+ errmsg ("tunnel remote address not specified\n");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+ if (vni_set == 0)
+ {
+ errmsg ("vni not specified\n");
+ return -99;
+ }
+
+ M (VXLAN_GPE_IOAM_VNI_ENABLE, vxlan_gpe_ioam_vni_enable);
+
+
+ if (ipv6_set)
+ {
+ clib_memcpy (&mp->local, &local6, sizeof (local6));
+ clib_memcpy (&mp->remote, &remote6, sizeof (remote6));
+ }
+ else
+ {
+ clib_memcpy (&mp->local, &local4, sizeof (local4));
+ clib_memcpy (&mp->remote, &remote4, sizeof (remote4));
+ }
+
+ mp->vni = ntohl (vni);
+ mp->is_ipv6 = ipv6_set;
+
+ S;
+ W;
+
+ return (0);
+}
+
+static int
+api_vxlan_gpe_ioam_vni_disable (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_ioam_vni_disable_t *mp;
+ ip4_address_t local4, remote4;
+ ip6_address_t local6, remote6;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u8 remote_set = 0;
+ u32 vni;
+ u8 vni_set = 0;
+ f64 timeout;
+
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "local %U", unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip4_address, &remote4))
+ {
+ remote_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "local %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "remote %U",
+ unformat_ip6_address, &remote6))
+ {
+ remote_set = 1;
+ ipv6_set = 1;
+ }
+
+ else if (unformat (line_input, "vni %d", &vni))
+ vni_set = 1;
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("tunnel local address not specified\n");
+ return -99;
+ }
+ if (remote_set == 0)
+ {
+ errmsg ("tunnel remote address not specified\n");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+ if (vni_set == 0)
+ {
+ errmsg ("vni not specified\n");
+ return -99;
+ }
+
+ M (VXLAN_GPE_IOAM_VNI_DISABLE, vxlan_gpe_ioam_vni_disable);
+
+
+ if (ipv6_set)
+ {
+ clib_memcpy (&mp->local, &local6, sizeof (local6));
+ clib_memcpy (&mp->remote, &remote6, sizeof (remote6));
+ }
+ else
+ {
+ clib_memcpy (&mp->local, &local4, sizeof (local4));
+ clib_memcpy (&mp->remote, &remote4, sizeof (remote4));
+ }
+
+ mp->vni = ntohl (vni);
+ mp->is_ipv6 = ipv6_set;
+
+ S;
+ W;
+
+ return 0;
+}
+
+static int
+api_vxlan_gpe_ioam_transit_enable (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_ioam_transit_enable_t *mp;
+ ip4_address_t local4;
+ ip6_address_t local6;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u32 outer_fib_index = 0;
+ f64 timeout;
+
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "dst-ip %U", unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst-ip %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+
+ else if (unformat (line_input, "outer-fib-index %d", &outer_fib_index))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("destination address not specified\n");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+
+ M (VXLAN_GPE_IOAM_TRANSIT_ENABLE, vxlan_gpe_ioam_transit_enable);
+
+
+ if (ipv6_set)
+ {
+ errmsg ("IPv6 currently unsupported");
+ return -1;
+ }
+ else
+ {
+ clib_memcpy (&mp->dst_addr, &local4, sizeof (local4));
+ }
+
+ mp->outer_fib_index = htonl (outer_fib_index);
+ mp->is_ipv6 = ipv6_set;
+
+ S;
+ W;
+
+ return (0);
+}
+
+static int
+api_vxlan_gpe_ioam_transit_disable (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+
+ unformat_input_t *line_input = vam->input;
+ vl_api_vxlan_gpe_ioam_transit_disable_t *mp;
+ ip4_address_t local4;
+ ip6_address_t local6;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 local_set = 0;
+ u32 outer_fib_index;
+ f64 timeout;
+
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "dst-ip %U", unformat_ip4_address, &local4))
+ {
+ local_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst-ip %U",
+ unformat_ip6_address, &local6))
+ {
+ local_set = 1;
+ ipv6_set = 1;
+ }
+
+ else if (unformat (line_input, "outer-fib-index %d", &outer_fib_index))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'\n", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (local_set == 0)
+ {
+ errmsg ("destination address not specified\n");
+ return -99;
+ }
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+
+
+ M (VXLAN_GPE_IOAM_TRANSIT_DISABLE, vxlan_gpe_ioam_transit_disable);
+
+
+ if (ipv6_set)
+ {
+ return -1;
+ }
+ else
+ {
+ clib_memcpy (&mp->dst_addr, &local4, sizeof (local4));
+ }
+
+ mp->outer_fib_index = htonl (outer_fib_index);
+ mp->is_ipv6 = ipv6_set;
+
+ S;
+ W;
+
+
+ return (0);
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(vxlan_gpe_ioam_enable, ""\
+ "[trace] [pow] [ppc <encap|ppc decap>]") \
+_(vxlan_gpe_ioam_disable, "") \
+_(vxlan_gpe_ioam_vni_enable, ""\
+ "local <local_vtep_ip> remote <remote_vtep_ip> vni <vnid>") \
+_(vxlan_gpe_ioam_vni_disable, ""\
+ "local <local_vtep_ip> remote <remote_vtep_ip> vni <vnid>") \
+_(vxlan_gpe_ioam_transit_enable, ""\
+ "dst-ip <dst_ip> [outer-fib-index <outer_fib_index>]") \
+_(vxlan_gpe_ioam_transit_disable, ""\
+ "dst-ip <dst_ip> [outer-fib-index <outer_fib_index>]") \
+
+
+void
+vat_api_hookup (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+ vxlan_gpe_test_main_t *sm = &vxlan_gpe_test_main;
+ u8 *name;
+
+ sm->vat_main = vam;
+
+ name = format (0, "ioam_vxlan_gpe_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~ 0)
+ vat_api_hookup (vam);
+
+ vec_free (name);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lb.am b/src/plugins/lb.am
new file mode 100644
index 00000000000..352358fa88f
--- /dev/null
+++ b/src/plugins/lb.am
@@ -0,0 +1,42 @@
+# Copyright (c) 2016 Cisco Systems, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += lb_test_plugin.la
+vppplugins_LTLIBRARIES += lb_plugin.la
+
+lb_plugin_la_SOURCES = \
+ lb/lb.c \
+ lb/node.c \
+ lb/cli.c \
+ lb/util.c \
+ lb/refcount.c \
+ lb/api.c
+
+BUILT_SOURCES += \
+ lb/lb.api.h \
+ lb/lb.api.json
+
+API_FILES += lb/lb.api
+
+noinst_HEADERS += \
+ lb/lb.h \
+ lb/util.h \
+ lb/refcount.h \
+ lb/lbhash.h \
+ lb/lb.api.h
+
+lb_test_plugin_la_SOURCES = \
+ lb/lb_test.c \
+ lb/lb_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c
new file mode 100644
index 00000000000..06c53fa1005
--- /dev/null
+++ b/src/plugins/lb/api.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/lb.h>
+
+#include <vppinfra/byte_order.h>
+#include <vlibapi/api.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <lb/lb.api.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+
+/* define message structures */
+#define vl_typedefs
+#include <lb/lb.api.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <lb/lb.api.h>
+#undef vl_endianfun
+
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <lb/lb.api.h>
+#undef vl_api_version
+
+#define vl_msg_name_crc_list
+#include <lb/lb.api.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (lb_main_t * lbm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + lbm->msg_id_base);
+ foreach_vl_msg_name_crc_lb;
+#undef _
+}
+
+/* Macro to finish up custom dump fns */
+#define FINISH \
+ vec_add1 (s, 0); \
+ vl_print (handle, (char *)s); \
+ vec_free (s); \
+ return handle;
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+lbm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+static void
+vl_api_lb_conf_t_handler
+(vl_api_lb_conf_t * mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t * rmp;
+ int rv = 0;
+
+ rv = lb_conf((ip4_address_t *)&mp->ip4_src_address,
+ (ip6_address_t *)mp->ip6_src_address,
+ mp->sticky_buckets_per_core,
+ mp->flow_timeout);
+
+ REPLY_MACRO (VL_API_LB_CONF_REPLY);
+}
+
+static void *vl_api_lb_conf_t_print
+(vl_api_lb_conf_t *mp, void * handle)
+{
+ u8 * s;
+ s = format (0, "SCRIPT: lb_conf ");
+ s = format (s, "%U ", format_ip4_address, (ip4_address_t *)&mp->ip4_src_address);
+ s = format (s, "%U ", format_ip6_address, (ip6_address_t *)mp->ip6_src_address);
+ s = format (s, "%u ", mp->sticky_buckets_per_core);
+ s = format (s, "%u ", mp->flow_timeout);
+ FINISH;
+}
+
+
+static void
+vl_api_lb_add_del_vip_t_handler
+(vl_api_lb_add_del_vip_t * mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t * rmp;
+ int rv = 0;
+ ip46_address_t prefix;
+ memcpy(&prefix.ip6, mp->ip_prefix, sizeof(prefix.ip6));
+
+ if (mp->is_del) {
+ u32 vip_index;
+ if (!(rv = lb_vip_find_index(&prefix, mp->prefix_length, &vip_index)))
+ rv = lb_vip_del(vip_index);
+ } else {
+ u32 vip_index;
+ lb_vip_type_t type;
+ if (ip46_prefix_is_ip4(&prefix, mp->prefix_length)) {
+ type = mp->is_gre4?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6;
+ } else {
+ type = mp->is_gre4?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6;
+ }
+
+ rv = lb_vip_add(&prefix, mp->prefix_length, type,
+ mp->new_flows_table_length, &vip_index);
+ }
+ REPLY_MACRO (VL_API_LB_CONF_REPLY);
+}
+
+static void *vl_api_lb_add_del_vip_t_print
+(vl_api_lb_add_del_vip_t *mp, void * handle)
+{
+ u8 * s;
+ s = format (0, "SCRIPT: lb_add_del_vip ");
+ s = format (s, "%U ", format_ip46_prefix,
+ (ip46_address_t *)mp->ip_prefix, mp->prefix_length, IP46_TYPE_ANY);
+ s = format (s, "%s ", mp->is_gre4?"gre4":"gre6");
+ s = format (s, "%u ", mp->new_flows_table_length);
+ s = format (s, "%s ", mp->is_del?"del":"add");
+ FINISH;
+}
+
+static void
+vl_api_lb_add_del_as_t_handler
+(vl_api_lb_add_del_as_t * mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t * rmp;
+ int rv = 0;
+ u32 vip_index;
+ if ((rv = lb_vip_find_index((ip46_address_t *)mp->vip_ip_prefix,
+ mp->vip_prefix_length, &vip_index)))
+ goto done;
+
+ if (mp->is_del)
+ rv = lb_vip_del_ass(vip_index, (ip46_address_t *)mp->as_address, 1);
+ else
+ rv = lb_vip_add_ass(vip_index, (ip46_address_t *)mp->as_address, 1);
+
+done:
+ REPLY_MACRO (VL_API_LB_CONF_REPLY);
+}
+
+static void *vl_api_lb_add_del_as_t_print
+(vl_api_lb_add_del_as_t *mp, void * handle)
+{
+ u8 * s;
+ s = format (0, "SCRIPT: lb_add_del_as ");
+ s = format (s, "%U ", format_ip46_prefix,
+ (ip46_address_t *)mp->vip_ip_prefix, mp->vip_prefix_length, IP46_TYPE_ANY);
+ s = format (s, "%U ", format_ip46_address,
+ (ip46_address_t *)mp->as_address, IP46_TYPE_ANY);
+ s = format (s, "%s ", mp->is_del?"del":"add");
+ FINISH;
+}
+
+/* List of message types that this plugin understands */
+#define foreach_lb_plugin_api_msg \
+_(LB_CONF, lb_conf) \
+_(LB_ADD_DEL_VIP, lb_add_del_vip) \
+_(LB_ADD_DEL_AS, lb_add_del_as)
+
+static clib_error_t * lb_api_init (vlib_main_t * vm)
+{
+ lb_main_t *lbm = &lb_main;
+ u8 *name = format (0, "lb_%08x%c", api_version, 0);
+ lbm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + lbm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_lb_plugin_api_msg;
+#undef _
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (lbm, &api_main);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (lb_api_init);
diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c
new file mode 100644
index 00000000000..b59c6426241
--- /dev/null
+++ b/src/plugins/lb/cli.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/lb.h>
+#include <lb/util.h>
+
+static clib_error_t *
+lb_vip_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip46_address_t prefix;
+ u8 plen;
+ u32 new_len = 1024;
+ u8 del = 0;
+ int ret;
+ u32 gre4 = 0;
+ lb_vip_type_t type;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (!unformat(line_input, "%U", unformat_ip46_prefix, &prefix, &plen, IP46_TYPE_ANY, &plen))
+ return clib_error_return (0, "invalid vip prefix: '%U'",
+ format_unformat_error, line_input);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(line_input, "new_len %d", &new_len))
+ ;
+ else if (unformat(line_input, "del"))
+ del = 1;
+ else if (unformat(line_input, "encap gre4"))
+ gre4 = 1;
+ else if (unformat(line_input, "encap gre6"))
+ gre4 = 0;
+ else
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+
+ if (ip46_prefix_is_ip4(&prefix, plen)) {
+ type = (gre4)?LB_VIP_TYPE_IP4_GRE4:LB_VIP_TYPE_IP4_GRE6;
+ } else {
+ type = (gre4)?LB_VIP_TYPE_IP6_GRE4:LB_VIP_TYPE_IP6_GRE6;
+ }
+
+ lb_garbage_collection();
+
+ u32 index;
+ if (!del) {
+ if ((ret = lb_vip_add(&prefix, plen, type, new_len, &index))) {
+ return clib_error_return (0, "lb_vip_add error %d", ret);
+ } else {
+ vlib_cli_output(vm, "lb_vip_add ok %d", index);
+ }
+ } else {
+ if ((ret = lb_vip_find_index(&prefix, plen, &index)))
+ return clib_error_return (0, "lb_vip_find_index error %d", ret);
+ else if ((ret = lb_vip_del(index)))
+ return clib_error_return (0, "lb_vip_del error %d", ret);
+ }
+ return NULL;
+}
+
+VLIB_CLI_COMMAND (lb_vip_command, static) =
+{
+ .path = "lb vip",
+ .short_help = "lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del]",
+ .function = lb_vip_command_fn,
+};
+
+static clib_error_t *
+lb_as_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip46_address_t vip_prefix, as_addr;
+ u8 vip_plen;
+ ip46_address_t *as_array = 0;
+ u32 vip_index;
+ u8 del = 0;
+ int ret;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ if (!unformat(line_input, "%U", unformat_ip46_prefix, &vip_prefix, &vip_plen, IP46_TYPE_ANY))
+ return clib_error_return (0, "invalid as address: '%U'",
+ format_unformat_error, line_input);
+
+ if ((ret = lb_vip_find_index(&vip_prefix, vip_plen, &vip_index)))
+ return clib_error_return (0, "lb_vip_find_index error %d", ret);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(line_input, "%U", unformat_ip46_address, &as_addr, IP46_TYPE_ANY)) {
+ vec_add1(as_array, as_addr);
+ } else if (unformat(line_input, "del")) {
+ del = 1;
+ } else {
+ vec_free(as_array);
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+ }
+
+ if (!vec_len(as_array)) {
+ vec_free(as_array);
+ return clib_error_return (0, "No AS address provided");
+ }
+
+ lb_garbage_collection();
+ clib_warning("vip index is %d", vip_index);
+
+ if (del) {
+ if ((ret = lb_vip_del_ass(vip_index, as_array, vec_len(as_array)))) {
+ vec_free(as_array);
+ return clib_error_return (0, "lb_vip_del_ass error %d", ret);
+ }
+ } else {
+ if ((ret = lb_vip_add_ass(vip_index, as_array, vec_len(as_array)))) {
+ vec_free(as_array);
+ return clib_error_return (0, "lb_vip_add_ass error %d", ret);
+ }
+ }
+
+ vec_free(as_array);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (lb_as_command, static) =
+{
+ .path = "lb as",
+ .short_help = "lb as <vip-prefix> [<address> [<address> [...]]] [del]",
+ .function = lb_as_command_fn,
+};
+
+static clib_error_t *
+lb_conf_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ lb_main_t *lbm = &lb_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4 = lbm->ip4_src_address;
+ ip6_address_t ip6 = lbm->ip6_src_address;
+ u32 per_cpu_sticky_buckets = lbm->per_cpu_sticky_buckets;
+ u32 per_cpu_sticky_buckets_log2 = 0;
+ u32 flow_timeout = lbm->flow_timeout;
+ int ret;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat(line_input, "ip4-src-address %U", unformat_ip4_address, &ip4))
+ ;
+ else if (unformat(line_input, "ip6-src-address %U", unformat_ip6_address, &ip6))
+ ;
+ else if (unformat(line_input, "buckets %d", &per_cpu_sticky_buckets))
+ ;
+ else if (unformat(line_input, "buckets-log2 %d", &per_cpu_sticky_buckets_log2)) {
+ if (per_cpu_sticky_buckets_log2 >= 32)
+ return clib_error_return (0, "buckets-log2 value is too high");
+ per_cpu_sticky_buckets = 1 << per_cpu_sticky_buckets_log2;
+ } else if (unformat(line_input, "timeout %d", &flow_timeout))
+ ;
+ else
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ lb_garbage_collection();
+
+ if ((ret = lb_conf(&ip4, &ip6, per_cpu_sticky_buckets, flow_timeout)))
+ return clib_error_return (0, "lb_conf error %d", ret);
+
+ return NULL;
+}
+
+VLIB_CLI_COMMAND (lb_conf_command, static) =
+{
+ .path = "lb conf",
+ .short_help = "lb conf [ip4-src-address <addr>] [ip6-src-address <addr>] [buckets <n>] [timeout <s>]",
+ .function = lb_conf_command_fn,
+};
+
+static clib_error_t *
+lb_show_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_cli_output(vm, "%U", format_lb_main);
+ return NULL;
+}
+
+
+VLIB_CLI_COMMAND (lb_show_command, static) =
+{
+ .path = "show lb",
+ .short_help = "show lb",
+ .function = lb_show_command_fn,
+};
+
+static clib_error_t *
+lb_show_vips_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t line_input;
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip;
+ u8 verbose = 0;
+
+ if (!unformat_user (input, unformat_line_input, &line_input))
+ return 0;
+
+ if (unformat(&line_input, "verbose"))
+ verbose = 1;
+
+ pool_foreach(vip, lbm->vips, {
+ vlib_cli_output(vm, "%U\n", verbose?format_lb_vip_detailed:format_lb_vip, vip);
+ });
+
+ unformat_free (&line_input);
+ return NULL;
+}
+
+VLIB_CLI_COMMAND (lb_show_vips_command, static) =
+{
+ .path = "show lb vips",
+ .short_help = "show lb vips [verbose]",
+ .function = lb_show_vips_command_fn,
+};
diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api
new file mode 100644
index 00000000000..39ee3c8f98b
--- /dev/null
+++ b/src/plugins/lb/lb.api
@@ -0,0 +1,71 @@
+/** \brief Configure Load-Balancer global parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip4_src_address - IPv4 address to be used as source for IPv4 GRE traffic.
+ @param ip6_src_address - IPv6 address to be used as source for IPv6 GRE traffic.
+ @param n_sticky_buckets - Number of buckets *per worker thread* in the
+ established flow table (must be power of 2).
+ @param flow_timeout - Time in seconds after which, if no packet is received
+ for a given flow, the flow is removed from the established flow table.
+*/
+define lb_conf
+{
+ u32 client_index;
+ u32 context;
+ u32 ip4_src_address;
+ u8 ip6_src_address[16];
+ u32 sticky_buckets_per_core;
+ u32 flow_timeout;
+};
+
+define lb_conf_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Add a virtual address (or prefix)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip_prefix - IP address (IPv4 in lower order 32 bits).
+ @param prefix_length - IP prefix length (96 + 'IPv4 prefix length' for IPv4).
+ @param is_gre4 - Encap is ip4 GRE (ip6 GRE otherwise).
+ @param new_flows_table_length - Size of the new connections flow table used
+ for this VIP (must be power of 2).
+ @param is_del - The VIP should be removed.
+*/
+define lb_add_del_vip {
+ u32 client_index;
+ u32 context;
+ u8 ip_prefix[16];
+ u8 prefix_length;
+ u8 is_gre4;
+ u32 new_flows_table_length;
+ u8 is_del;
+};
+
+define lb_add_del_vip_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Add an application server for a given VIP
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vip_ip_prefix - VIP IP address (IPv4 in lower order 32 bits).
+ @param vip_ip_prefix - VIP IP prefix length (96 + 'IPv4 prefix length' for IPv4).
+ @param as_address - The application server address (IPv4 in lower order 32 bits).
+ @param is_del - The AS should be removed.
+*/
+define lb_add_del_as {
+ u32 client_index;
+ u32 context;
+ u8 vip_ip_prefix[16];
+ u8 vip_prefix_length;
+ u8 as_address[16];
+ u8 is_del;
+};
+
+define lb_add_del_as_reply {
+ u32 context;
+ i32 retval;
+};
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
new file mode 100644
index 00000000000..1d9b987095b
--- /dev/null
+++ b/src/plugins/lb/lb.c
@@ -0,0 +1,844 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/lb.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/api_errno.h>
+
+//GC runs at most once every so many seconds
+#define LB_GARBAGE_RUN 60
+
+//After so many seconds. It is assumed that inter-core race condition will not occur.
+#define LB_CONCURRENCY_TIMEOUT 10
+
+lb_main_t lb_main;
+
+#define lb_get_writer_lock() do {} while(__sync_lock_test_and_set (lb_main.writer_lock, 1))
+#define lb_put_writer_lock() lb_main.writer_lock[0] = 0
+
+static void lb_as_stack (lb_as_t *as);
+
+
+const static char * const lb_dpo_gre4_ip4[] = { "lb4-gre4" , NULL };
+const static char * const lb_dpo_gre4_ip6[] = { "lb6-gre4" , NULL };
+const static char* const * const lb_dpo_gre4_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_gre4_ip4,
+ [DPO_PROTO_IP6] = lb_dpo_gre4_ip6,
+ };
+
+const static char * const lb_dpo_gre6_ip4[] = { "lb4-gre6" , NULL };
+const static char * const lb_dpo_gre6_ip6[] = { "lb6-gre6" , NULL };
+const static char* const * const lb_dpo_gre6_nodes[DPO_PROTO_NUM] =
+ {
+ [DPO_PROTO_IP4] = lb_dpo_gre6_ip4,
+ [DPO_PROTO_IP6] = lb_dpo_gre6_ip6,
+ };
+
+u32 lb_hash_time_now(vlib_main_t * vm)
+{
+ return (u32) (vlib_time_now(vm) + 10000);
+}
+
+u8 *format_lb_main (u8 * s, va_list * args)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main();
+ lb_main_t *lbm = &lb_main;
+ s = format(s, "lb_main");
+ s = format(s, " ip4-src-address: %U \n", format_ip4_address, &lbm->ip4_src_address);
+ s = format(s, " ip6-src-address: %U \n", format_ip6_address, &lbm->ip6_src_address);
+ s = format(s, " #vips: %u\n", pool_elts(lbm->vips));
+ s = format(s, " #ass: %u\n", pool_elts(lbm->ass) - 1);
+
+ u32 cpu_index;
+ for(cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++ ) {
+ lb_hash_t *h = lbm->per_cpu[cpu_index].sticky_ht;
+ if (h) {
+ s = format(s, "core %d\n", cpu_index);
+ s = format(s, " timeout: %ds\n", h->timeout);
+ s = format(s, " usage: %d / %d\n", lb_hash_elts(h, lb_hash_time_now(vlib_get_main())), lb_hash_size(h));
+ }
+ }
+
+ return s;
+}
+
+static char *lb_vip_type_strings[] = {
+ [LB_VIP_TYPE_IP6_GRE6] = "ip6-gre6",
+ [LB_VIP_TYPE_IP6_GRE4] = "ip6-gre4",
+ [LB_VIP_TYPE_IP4_GRE6] = "ip4-gre6",
+ [LB_VIP_TYPE_IP4_GRE4] = "ip4-gre4",
+};
+
+u8 *format_lb_vip_type (u8 * s, va_list * args)
+{
+ lb_vip_type_t vipt = va_arg (*args, lb_vip_type_t);
+ u32 i;
+ for (i=0; i<LB_VIP_N_TYPES; i++)
+ if (vipt == i)
+ return format(s, lb_vip_type_strings[i]);
+ return format(s, "_WRONG_TYPE_");
+}
+
+uword unformat_lb_vip_type (unformat_input_t * input, va_list * args)
+{
+ lb_vip_type_t *vipt = va_arg (*args, lb_vip_type_t *);
+ u32 i;
+ for (i=0; i<LB_VIP_N_TYPES; i++)
+ if (unformat(input, lb_vip_type_strings[i])) {
+ *vipt = i;
+ return 1;
+ }
+ return 0;
+}
+
+u8 *format_lb_vip (u8 * s, va_list * args)
+{
+ lb_vip_t *vip = va_arg (*args, lb_vip_t *);
+ return format(s, "%U %U new_size:%u #as:%u%s",
+ format_lb_vip_type, vip->type,
+ format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
+ vip->new_flow_table_mask + 1,
+ pool_elts(vip->as_indexes),
+ (vip->flags & LB_VIP_FLAGS_USED)?"":" removed");
+}
+
+u8 *format_lb_as (u8 * s, va_list * args)
+{
+ lb_as_t *as = va_arg (*args, lb_as_t *);
+ return format(s, "%U %s", format_ip46_address,
+ &as->address, IP46_TYPE_ANY,
+ (as->flags & LB_AS_FLAGS_USED)?"used":"removed");
+}
+
+u8 *format_lb_vip_detailed (u8 * s, va_list * args)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip = va_arg (*args, lb_vip_t *);
+ uword indent = format_get_indent (s);
+
+ s = format(s, "%U %U [%u] %U%s\n"
+ "%U new_size:%u\n",
+ format_white_space, indent,
+ format_lb_vip_type, vip->type,
+ vip - lbm->vips, format_ip46_prefix, &vip->prefix, vip->plen, IP46_TYPE_ANY,
+ (vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
+ format_white_space, indent,
+ vip->new_flow_table_mask + 1);
+
+ //Print counters
+ s = format(s, "%U counters:\n",
+ format_white_space, indent);
+ u32 i;
+ for (i=0; i<LB_N_VIP_COUNTERS; i++)
+ s = format(s, "%U %s: %d\n",
+ format_white_space, indent,
+ lbm->vip_counters[i].name,
+ vlib_get_simple_counter(&lbm->vip_counters[i], vip - lbm->vips));
+
+
+ s = format(s, "%U #as:%u\n",
+ format_white_space, indent,
+ pool_elts(vip->as_indexes));
+
+ //Let's count the buckets for each AS
+ u32 *count = 0;
+ vec_validate(count, pool_len(lbm->ass)); //Possibly big alloc for not much...
+ lb_new_flow_entry_t *nfe;
+ vec_foreach(nfe, vip->new_flow_table)
+ count[nfe->as_index]++;
+
+ lb_as_t *as;
+ u32 *as_index;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ s = format(s, "%U %U %d buckets %d flows dpo:%u %s\n",
+ format_white_space, indent,
+ format_ip46_address, &as->address, IP46_TYPE_ANY,
+ count[as - lbm->ass],
+ vlib_refcount_get(&lbm->as_refcount, as - lbm->ass),
+ as->dpo.dpoi_index,
+ (as->flags & LB_AS_FLAGS_USED)?"used":" removed");
+ });
+
+ vec_free(count);
+
+ /*
+ s = format(s, "%U new flows table:\n", format_white_space, indent);
+ lb_new_flow_entry_t *nfe;
+ vec_foreach(nfe, vip->new_flow_table) {
+ s = format(s, "%U %d: %d\n", format_white_space, indent, nfe - vip->new_flow_table, nfe->as_index);
+ }
+ */
+ return s;
+}
+
+typedef struct {
+ u32 as_index;
+ u32 last;
+ u32 skip;
+} lb_pseudorand_t;
+
+static int lb_pseudorand_compare(void *a, void *b)
+{
+ lb_as_t *asa, *asb;
+ lb_main_t *lbm = &lb_main;
+ asa = &lbm->ass[((lb_pseudorand_t *)a)->as_index];
+ asb = &lbm->ass[((lb_pseudorand_t *)b)->as_index];
+ return memcmp(&asa->address, &asb->address, sizeof(asb->address));
+}
+
+static void lb_vip_garbage_collection(lb_vip_t *vip)
+{
+ lb_main_t *lbm = &lb_main;
+ ASSERT (lbm->writer_lock[0]);
+
+ u32 now = (u32) vlib_time_now(vlib_get_main());
+ if (!clib_u32_loop_gt(now, vip->last_garbage_collection + LB_GARBAGE_RUN))
+ return;
+
+ vip->last_garbage_collection = now;
+ lb_as_t *as;
+ u32 *as_index;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ if (!(as->flags & LB_AS_FLAGS_USED) && //Not used
+ clib_u32_loop_gt(now, as->last_used + LB_CONCURRENCY_TIMEOUT) && //Not recently used
+ (vlib_refcount_get(&lbm->as_refcount, as - lbm->ass) == 0))
+ { //Not referenced
+ fib_entry_child_remove(as->next_hop_fib_entry_index,
+ as->next_hop_child_index);
+ fib_table_entry_delete_index(as->next_hop_fib_entry_index,
+ FIB_SOURCE_RR);
+ as->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
+
+ pool_put(vip->as_indexes, as_index);
+ pool_put(lbm->ass, as);
+ }
+ });
+}
+
+void lb_garbage_collection()
+{
+ lb_main_t *lbm = &lb_main;
+ lb_get_writer_lock();
+ lb_vip_t *vip;
+ u32 *to_be_removed_vips = 0, *i;
+ pool_foreach(vip, lbm->vips, {
+ lb_vip_garbage_collection(vip);
+
+ if (!(vip->flags & LB_VIP_FLAGS_USED) &&
+ (pool_elts(vip->as_indexes) == 0)) {
+ vec_add1(to_be_removed_vips, vip - lbm->vips);
+ }
+ });
+
+ vec_foreach(i, to_be_removed_vips) {
+ vip = &lbm->vips[*i];
+ pool_put(lbm->vips, vip);
+ pool_free(vip->as_indexes);
+ }
+
+ vec_free(to_be_removed_vips);
+ lb_put_writer_lock();
+}
+
+static void lb_vip_update_new_flow_table(lb_vip_t *vip)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_new_flow_entry_t *old_table;
+ u32 i, *as_index;
+ lb_new_flow_entry_t *new_flow_table = 0;
+ lb_as_t *as;
+ lb_pseudorand_t *pr, *sort_arr = 0;
+ u32 count;
+
+ ASSERT (lbm->writer_lock[0]); //We must have the lock
+
+ //Check if some AS is configured or not
+ i = 0;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ if (as->flags & LB_AS_FLAGS_USED) { //Not used anymore
+ i = 1;
+ goto out; //Not sure 'break' works in this macro-loop
+ }
+ });
+
+out:
+ if (i == 0) {
+ //Only the default. i.e. no AS
+ vec_validate(new_flow_table, vip->new_flow_table_mask);
+ for (i=0; i<vec_len(new_flow_table); i++)
+ new_flow_table[i].as_index = 0;
+
+ goto finished;
+ }
+
+ //First, let's sort the ASs
+ sort_arr = 0;
+ vec_alloc(sort_arr, pool_elts(vip->as_indexes));
+
+ i = 0;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ if (!(as->flags & LB_AS_FLAGS_USED)) //Not used anymore
+ continue;
+
+ sort_arr[i].as_index = as - lbm->ass;
+ i++;
+ });
+ _vec_len(sort_arr) = i;
+
+ vec_sort_with_function(sort_arr, lb_pseudorand_compare);
+
+ //Now let's pseudo-randomly generate permutations
+ vec_foreach(pr, sort_arr) {
+ lb_as_t *as = &lbm->ass[pr->as_index];
+
+ u64 seed = clib_xxhash(as->address.as_u64[0] ^
+ as->address.as_u64[1]);
+ /* We have 2^n buckets.
+ * skip must be prime with 2^n.
+ * So skip must be odd.
+ * MagLev actually state that M should be prime,
+ * but this has a big computation cost (% operation).
+ * Using 2^n is more better (& operation).
+ */
+ pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
+ pr->last = (seed >> 32) & vip->new_flow_table_mask;
+ }
+
+ //Let's create a new flow table
+ vec_validate(new_flow_table, vip->new_flow_table_mask);
+ for (i=0; i<vec_len(new_flow_table); i++)
+ new_flow_table[i].as_index = ~0;
+
+ u32 done = 0;
+ while (1) {
+ vec_foreach(pr, sort_arr) {
+ while (1) {
+ u32 last = pr->last;
+ pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
+ if (new_flow_table[last].as_index == ~0) {
+ new_flow_table[last].as_index = pr->as_index;
+ break;
+ }
+ }
+ done++;
+ if (done == vec_len(new_flow_table))
+ goto finished;
+ }
+ }
+
+ vec_free(sort_arr);
+
+finished:
+
+//Count number of changed entries
+ count = 0;
+ for (i=0; i<vec_len(new_flow_table); i++)
+ if (vip->new_flow_table == 0 ||
+ new_flow_table[i].as_index != vip->new_flow_table[i].as_index)
+ count++;
+
+ old_table = vip->new_flow_table;
+ vip->new_flow_table = new_flow_table;
+ vec_free(old_table);
+}
+
+int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
+ u32 per_cpu_sticky_buckets, u32 flow_timeout)
+{
+ lb_main_t *lbm = &lb_main;
+
+ if (!is_pow2(per_cpu_sticky_buckets))
+ return VNET_API_ERROR_INVALID_MEMORY_SIZE;
+
+ lb_get_writer_lock(); //Not exactly necessary but just a reminder that it exists for my future self
+ lbm->ip4_src_address = *ip4_address;
+ lbm->ip6_src_address = *ip6_address;
+ lbm->per_cpu_sticky_buckets = per_cpu_sticky_buckets;
+ lbm->flow_timeout = flow_timeout;
+ lb_put_writer_lock();
+ return 0;
+}
+
+static
+int lb_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip;
+ ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
+ ip46_prefix_normalize(prefix, plen);
+ pool_foreach(vip, lbm->vips, {
+ if ((vip->flags & LB_AS_FLAGS_USED) &&
+ vip->plen == plen &&
+ vip->prefix.as_u64[0] == prefix->as_u64[0] &&
+ vip->prefix.as_u64[1] == prefix->as_u64[1]) {
+ *vip_index = vip - lbm->vips;
+ return 0;
+ }
+ });
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+}
+
+int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index)
+{
+ int ret;
+ lb_get_writer_lock();
+ ret = lb_vip_find_index_with_lock(prefix, plen, vip_index);
+ lb_put_writer_lock();
+ return ret;
+}
+
+static int lb_as_find_index_vip(lb_vip_t *vip, ip46_address_t *address, u32 *as_index)
+{
+ lb_main_t *lbm = &lb_main;
+ ASSERT (lbm->writer_lock[0]); //This must be called with the lock owned
+ lb_as_t *as;
+ u32 *asi;
+ pool_foreach(asi, vip->as_indexes, {
+ as = &lbm->ass[*asi];
+ if (as->vip_index == (vip - lbm->vips) &&
+ as->address.as_u64[0] == address->as_u64[0] &&
+ as->address.as_u64[1] == address->as_u64[1]) {
+ *as_index = as - lbm->ass;
+ return 0;
+ }
+ });
+ return -1;
+}
+
+int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_get_writer_lock();
+ lb_vip_t *vip;
+ if (!(vip = lb_vip_get_by_index(vip_index))) {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ ip46_type_t type = lb_vip_is_gre4(vip)?IP46_TYPE_IP4:IP46_TYPE_IP6;
+ u32 *to_be_added = 0;
+ u32 *to_be_updated = 0;
+ u32 i;
+ u32 *ip;
+
+ //Sanity check
+ while (n--) {
+
+ if (!lb_as_find_index_vip(vip, &addresses[n], &i)) {
+ if (lbm->ass[i].flags & LB_AS_FLAGS_USED) {
+ vec_free(to_be_added);
+ vec_free(to_be_updated);
+ lb_put_writer_lock();
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ vec_add1(to_be_updated, i);
+ goto next;
+ }
+
+ if (ip46_address_type(&addresses[n]) != type) {
+ vec_free(to_be_added);
+ vec_free(to_be_updated);
+ lb_put_writer_lock();
+ return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
+ }
+
+ if (n) {
+ u32 n2 = n;
+ while(n2--) //Check for duplicates
+ if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
+ addresses[n2].as_u64[1] == addresses[n].as_u64[1])
+ goto next;
+ }
+
+ vec_add1(to_be_added, n);
+
+next:
+ continue;
+ }
+
+ //Update reused ASs
+ vec_foreach(ip, to_be_updated) {
+ lbm->ass[*ip].flags = LB_AS_FLAGS_USED;
+ }
+ vec_free(to_be_updated);
+
+ //Create those who have to be created
+ vec_foreach(ip, to_be_added) {
+ lb_as_t *as;
+ u32 *as_index;
+ pool_get(lbm->ass, as);
+ as->address = addresses[*ip];
+ as->flags = LB_AS_FLAGS_USED;
+ as->vip_index = vip_index;
+ pool_get(vip->as_indexes, as_index);
+ *as_index = as - lbm->ass;
+
+ /*
+ * become a child of the FIB entry
+ * so we are informed when its forwarding changes
+ */
+ fib_prefix_t nh = {};
+ if (lb_vip_is_gre4(vip)) {
+ nh.fp_addr.ip4 = as->address.ip4;
+ nh.fp_len = 32;
+ nh.fp_proto = FIB_PROTOCOL_IP4;
+ } else {
+ nh.fp_addr.ip6 = as->address.ip6;
+ nh.fp_len = 128;
+ nh.fp_proto = FIB_PROTOCOL_IP6;
+ }
+
+ as->next_hop_fib_entry_index =
+ fib_table_entry_special_add(0,
+ &nh,
+ FIB_SOURCE_RR,
+ FIB_ENTRY_FLAG_NONE,
+ ADJ_INDEX_INVALID);
+ as->next_hop_child_index =
+ fib_entry_child_add(as->next_hop_fib_entry_index,
+ lbm->fib_node_type,
+ as - lbm->ass);
+
+ lb_as_stack(as);
+ }
+ vec_free(to_be_added);
+
+ //Recompute flows
+ lb_vip_update_new_flow_table(vip);
+
+ //Garbage collection maybe
+ lb_vip_garbage_collection(vip);
+
+ lb_put_writer_lock();
+ return 0;
+}
+
+int lb_vip_del_ass_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
+{
+ lb_main_t *lbm = &lb_main;
+ u32 now = (u32) vlib_time_now(vlib_get_main());
+ u32 *ip = 0;
+
+ lb_vip_t *vip;
+ if (!(vip = lb_vip_get_by_index(vip_index))) {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ u32 *indexes = NULL;
+ while (n--) {
+ u32 i;
+ if (lb_as_find_index_vip(vip, &addresses[n], &i)) {
+ vec_free(indexes);
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ if (n) { //Check for duplicates
+ u32 n2 = n - 1;
+ while(n2--) {
+ if (addresses[n2].as_u64[0] == addresses[n].as_u64[0] &&
+ addresses[n2].as_u64[1] == addresses[n].as_u64[1])
+ goto next;
+ }
+ }
+
+ vec_add1(indexes, i);
+next:
+ continue;
+ }
+
+ //Garbage collection maybe
+ lb_vip_garbage_collection(vip);
+
+ if (indexes != NULL) {
+ vec_foreach(ip, indexes) {
+ lbm->ass[*ip].flags &= ~LB_AS_FLAGS_USED;
+ lbm->ass[*ip].last_used = now;
+ }
+
+ //Recompute flows
+ lb_vip_update_new_flow_table(vip);
+ }
+
+ vec_free(indexes);
+ return 0;
+}
+
+int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
+{
+ lb_get_writer_lock();
+ int ret = lb_vip_del_ass_withlock(vip_index, addresses, n);
+ lb_put_writer_lock();
+ return ret;
+}
+
+/**
+ * Add the VIP adjacency to the ip4 or ip6 fib
+ */
+static void lb_vip_add_adjacency(lb_main_t *lbm, lb_vip_t *vip)
+{
+ dpo_proto_t proto = 0;
+ dpo_id_t dpo = DPO_INVALID;
+ fib_prefix_t pfx = {};
+ if (lb_vip_is_ip4(vip)) {
+ pfx.fp_addr.ip4 = vip->prefix.ip4;
+ pfx.fp_len = vip->plen - 96;
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ proto = DPO_PROTO_IP4;
+ } else {
+ pfx.fp_addr.ip6 = vip->prefix.ip6;
+ pfx.fp_len = vip->plen;
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ proto = DPO_PROTO_IP6;
+ }
+ dpo_set(&dpo, lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type,
+ proto, vip - lbm->vips);
+ fib_table_entry_special_dpo_add(0,
+ &pfx,
+ FIB_SOURCE_PLUGIN_HI,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpo);
+ dpo_reset(&dpo);
+}
+
+/**
+ * Deletes the adjacency associated with the VIP
+ */
+static void lb_vip_del_adjacency(lb_main_t *lbm, lb_vip_t *vip)
+{
+ fib_prefix_t pfx = {};
+ if (lb_vip_is_ip4(vip)) {
+ pfx.fp_addr.ip4 = vip->prefix.ip4;
+ pfx.fp_len = vip->plen - 96;
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ } else {
+ pfx.fp_addr.ip6 = vip->prefix.ip6;
+ pfx.fp_len = vip->plen;
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ }
+ fib_table_entry_special_remove(0, &pfx, FIB_SOURCE_PLUGIN_HI);
+}
+
+int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u32 new_length, u32 *vip_index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip;
+ lb_get_writer_lock();
+ ip46_prefix_normalize(prefix, plen);
+
+ if (!lb_vip_find_index_with_lock(prefix, plen, vip_index)) {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ if (!is_pow2(new_length)) {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_INVALID_MEMORY_SIZE;
+ }
+
+ if (ip46_prefix_is_ip4(prefix, plen) &&
+ (type != LB_VIP_TYPE_IP4_GRE4) &&
+ (type != LB_VIP_TYPE_IP4_GRE6))
+ return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
+
+
+ //Allocate
+ pool_get(lbm->vips, vip);
+
+ //Init
+ vip->prefix = *prefix;
+ vip->plen = plen;
+ vip->last_garbage_collection = (u32) vlib_time_now(vlib_get_main());
+ vip->type = type;
+ vip->flags = LB_VIP_FLAGS_USED;
+ vip->as_indexes = 0;
+
+ //Validate counters
+ u32 i;
+ for (i = 0; i < LB_N_VIP_COUNTERS; i++) {
+ vlib_validate_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
+ vlib_zero_simple_counter(&lbm->vip_counters[i], vip - lbm->vips);
+ }
+
+ //Configure new flow table
+ vip->new_flow_table_mask = new_length - 1;
+ vip->new_flow_table = 0;
+
+ //Create a new flow hash table full of the default entry
+ lb_vip_update_new_flow_table(vip);
+
+ //Create adjacency to direct traffic
+ lb_vip_add_adjacency(lbm, vip);
+
+ //Return result
+ *vip_index = vip - lbm->vips;
+
+ lb_put_writer_lock();
+ return 0;
+}
+
+int lb_vip_del(u32 vip_index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip;
+ lb_get_writer_lock();
+ if (!(vip = lb_vip_get_by_index(vip_index))) {
+ lb_put_writer_lock();
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ //FIXME: This operation is actually not working
+ //We will need to remove state before performing this.
+
+ {
+ //Remove all ASs
+ ip46_address_t *ass = 0;
+ lb_as_t *as;
+ u32 *as_index;
+ pool_foreach(as_index, vip->as_indexes, {
+ as = &lbm->ass[*as_index];
+ vec_add1(ass, as->address);
+ });
+ if (vec_len(ass))
+ lb_vip_del_ass_withlock(vip_index, ass, vec_len(ass));
+ vec_free(ass);
+ }
+
+ //Delete adjacency
+ lb_vip_del_adjacency(lbm, vip);
+
+ //Set the VIP as unused
+ vip->flags &= ~LB_VIP_FLAGS_USED;
+
+ lb_put_writer_lock();
+ return 0;
+}
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm,
+ vnet_plugin_handoff_t * h,
+ int from_early_init)
+{
+ clib_error_t *error = 0;
+ return error;
+}
+
+
+u8 *format_lb_dpo (u8 * s, va_list * va)
+{
+ index_t index = va_arg (*va, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*va, u32);
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip = pool_elt_at_index (lbm->vips, index);
+ return format (s, "%U", format_lb_vip, vip);
+}
+
+static void lb_dpo_lock (dpo_id_t *dpo) {}
+static void lb_dpo_unlock (dpo_id_t *dpo) {}
+
+static fib_node_t *
+lb_fib_node_get_node (fib_node_index_t index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_as_t *as = pool_elt_at_index (lbm->ass, index);
+ return (&as->fib_node);
+}
+
+static void
+lb_fib_node_last_lock_gone (fib_node_t *node)
+{
+}
+
+static lb_as_t *
+lb_as_from_fib_node (fib_node_t *node)
+{
+ return ((lb_as_t*)(((char*)node) -
+ STRUCT_OFFSET_OF(lb_as_t, fib_node)));
+}
+
+static void
+lb_as_stack (lb_as_t *as)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_vip_t *vip = &lbm->vips[as->vip_index];
+ dpo_stack(lb_vip_is_gre4(vip)?lbm->dpo_gre4_type:lbm->dpo_gre6_type,
+ lb_vip_is_ip4(vip)?DPO_PROTO_IP4:DPO_PROTO_IP6,
+ &as->dpo,
+ fib_entry_contribute_ip_forwarding(
+ as->next_hop_fib_entry_index));
+}
+
+static fib_node_back_walk_rc_t
+lb_fib_node_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ lb_as_stack(lb_as_from_fib_node(node));
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+clib_error_t *
+lb_init (vlib_main_t * vm)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ lb_main_t *lbm = &lb_main;
+ lb_as_t *default_as;
+ fib_node_vft_t lb_fib_node_vft = {
+ .fnv_get = lb_fib_node_get_node,
+ .fnv_last_lock = lb_fib_node_last_lock_gone,
+ .fnv_back_walk = lb_fib_node_back_walk_notify,
+ };
+ dpo_vft_t lb_vft = {
+ .dv_lock = lb_dpo_lock,
+ .dv_unlock = lb_dpo_unlock,
+ .dv_format = format_lb_dpo,
+ };
+
+ lbm->vips = 0;
+ lbm->per_cpu = 0;
+ vec_validate(lbm->per_cpu, tm->n_vlib_mains - 1);
+ lbm->writer_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ lbm->writer_lock[0] = 0;
+ lbm->per_cpu_sticky_buckets = LB_DEFAULT_PER_CPU_STICKY_BUCKETS;
+ lbm->flow_timeout = LB_DEFAULT_FLOW_TIMEOUT;
+ lbm->ip4_src_address.as_u32 = 0xffffffff;
+ lbm->ip6_src_address.as_u64[0] = 0xffffffffffffffffL;
+ lbm->ip6_src_address.as_u64[1] = 0xffffffffffffffffL;
+ lbm->dpo_gre4_type = dpo_register_new_type(&lb_vft, lb_dpo_gre4_nodes);
+ lbm->dpo_gre6_type = dpo_register_new_type(&lb_vft, lb_dpo_gre6_nodes);
+ lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);
+
+ //Init AS reference counters
+ vlib_refcount_init(&lbm->as_refcount);
+
+ //Allocate and init default AS.
+ lbm->ass = 0;
+ pool_get(lbm->ass, default_as);
+ default_as->flags = 0;
+ default_as->dpo.dpoi_next_node = LB_NEXT_DROP;
+ default_as->vip_index = ~0;
+ default_as->address.ip6.as_u64[0] = 0xffffffffffffffffL;
+ default_as->address.ip6.as_u64[1] = 0xffffffffffffffffL;
+
+#define _(a,b,c) lbm->vip_counters[c].name = b;
+ lb_foreach_vip_counter
+#undef _
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lb_init);
diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h
new file mode 100644
index 00000000000..882b9b30f7e
--- /dev/null
+++ b/src/plugins/lb/lb.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * lb-plugin implements a MagLev-like load balancer.
+ * http://research.google.com/pubs/pub44824.html
+ *
+ * It hasn't been tested for interoperability with the original MagLev
+ * but intends to provide similar functionality.
+ * The load-balancer receives traffic destined to VIP (Virtual IP)
+ * addresses from one or multiple(ECMP) routers.
+ * The load-balancer tunnels the traffic toward many application servers
+ * ensuring session stickyness (i.e. that a single sessions is tunneled
+ * towards a single application server).
+ *
+ */
+
+#ifndef LB_PLUGIN_LB_LB_H_
+#define LB_PLUGIN_LB_LB_H_
+
+#include <lb/util.h>
+#include <lb/refcount.h>
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/fib/fib_table.h>
+
+#include <lb/lbhash.h>
+
+#define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10
+#define LB_DEFAULT_FLOW_TIMEOUT 40
+
+typedef enum {
+ LB_NEXT_DROP,
+ LB_N_NEXT,
+} lb_next_t;
+
+/**
+ * Each VIP is configured with a set of
+ * application server.
+ */
+typedef struct {
+ /**
+ * Registration to FIB event.
+ */
+ fib_node_t fib_node;
+
+ /**
+ * Destination address used to tunnel traffic towards
+ * that application server.
+ * The address is also used as ID and pseudo-random
+ * seed for the load-balancing process.
+ */
+ ip46_address_t address;
+
+ /**
+ * ASs are indexed by address and VIP Index.
+ * Which means there will be duplicated if the same server
+ * address is used for multiple VIPs.
+ */
+ u32 vip_index;
+
+ /**
+ * Some per-AS flags.
+ * For now only LB_AS_FLAGS_USED is defined.
+ */
+ u8 flags;
+
+#define LB_AS_FLAGS_USED 0x1
+
+ /**
+ * Rotating timestamp of when LB_AS_FLAGS_USED flag was last set.
+ *
+ * AS removal is based on garbage collection and reference counting.
+ * When an AS is removed, there is a race between configuration core
+ * and worker cores which may still add a reference while it should not
+ * be used. This timestamp is used to not remove the AS while a race condition
+ * may happen.
+ */
+ u32 last_used;
+
+ /**
+ * The FIB entry index for the next-hop
+ */
+ fib_node_index_t next_hop_fib_entry_index;
+
+ /**
+ * The child index on the FIB entry
+ */
+ u32 next_hop_child_index;
+
+ /**
+ * The next DPO in the graph to follow.
+ */
+ dpo_id_t dpo;
+
+} lb_as_t;
+
+format_function_t format_lb_as;
+
+typedef struct {
+ u32 as_index;
+} lb_new_flow_entry_t;
+
+#define lb_foreach_vip_counter \
+ _(NEXT_PACKET, "packet from existing sessions", 0) \
+ _(FIRST_PACKET, "first session packet", 1) \
+ _(UNTRACKED_PACKET, "untracked packet", 2) \
+ _(NO_SERVER, "no server configured", 3)
+
+typedef enum {
+#define _(a,b,c) LB_VIP_COUNTER_##a = c,
+ lb_foreach_vip_counter
+#undef _
+ LB_N_VIP_COUNTERS
+} lb_vip_counter_t;
+
+/**
+ * The load balancer supports IPv4 and IPv6 traffic
+ * and GRE4 and GRE6 encap.
+ */
+typedef enum {
+ LB_VIP_TYPE_IP6_GRE6,
+ LB_VIP_TYPE_IP6_GRE4,
+ LB_VIP_TYPE_IP4_GRE6,
+ LB_VIP_TYPE_IP4_GRE4,
+ LB_VIP_N_TYPES,
+} lb_vip_type_t;
+
+format_function_t format_lb_vip_type;
+unformat_function_t unformat_lb_vip_type;
+
+/**
+ * Load balancing service is provided per VIP.
+ * In this data model, a VIP can be a whole prefix.
+ * But load balancing only
+ * occurs on a per-source-address/port basis. Meaning that if a given source
+ * reuses the same port for multiple destinations within the same VIP,
+ * they will be considered as a single flow.
+ */
+typedef struct {
+
+ //Runtime
+
+ /**
+ * Vector mapping (flow-hash & new_connect_table_mask) to AS index.
+ * This is used for new flows.
+ */
+ lb_new_flow_entry_t *new_flow_table;
+
+ /**
+ * New flows table length - 1
+ * (length MUST be a power of 2)
+ */
+ u32 new_flow_table_mask;
+
+ /**
+ * Last time garbage collection was run to free the ASs.
+ */
+ u32 last_garbage_collection;
+
+ //Not runtime
+
+ /**
+ * A Virtual IP represents a given service delivered
+ * by a set of application servers. It can be a single
+ * address or a prefix.
+ * IPv4 prefixes are encoded using IPv4-in-IPv6 embedded address
+ * (i.e. ::/96 prefix).
+ */
+ ip46_address_t prefix;
+
+ /**
+ * The VIP prefix length.
+ * In case of IPv4, plen = 96 + ip4_plen.
+ */
+ u8 plen;
+
+ /**
+ * The type of traffic for this.
+ * LB_TYPE_UNDEFINED if unknown.
+ */
+ lb_vip_type_t type;
+
+ /**
+ * Flags related to this VIP.
+ * LB_VIP_FLAGS_USED means the VIP is active.
+ * When it is not set, the VIP in the process of being removed.
+ * We cannot immediately remove a VIP because the VIP index still may be stored
+ * in the adjacency index.
+ */
+ u8 flags;
+#define LB_VIP_FLAGS_USED 0x1
+
+ /**
+ * Pool of AS indexes used for this VIP.
+ * This also includes ASs that have been removed (but are still referenced).
+ */
+ u32 *as_indexes;
+} lb_vip_t;
+
+#define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
+#define lb_vip_is_gre4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
+format_function_t format_lb_vip;
+format_function_t format_lb_vip_detailed;
+
+typedef struct {
+ /**
+ * Each CPU has its own sticky flow hash table.
+ * One single table is used for all VIPs.
+ */
+ lb_hash_t *sticky_ht;
+} lb_per_cpu_t;
+
+typedef struct {
+ /**
+ * Pool of all Virtual IPs
+ */
+ lb_vip_t *vips;
+
+ /**
+ * Pool of ASs.
+ * ASs are referenced by address and vip index.
+ * The first element (index 0) is special and used only to fill
+ * new_flow_tables when no AS has been configured.
+ */
+ lb_as_t *ass;
+
+ /**
+ * Each AS has an associated reference counter.
+ * As ass[0] has a special meaning, its associated counter
+ * starts at 0 and is decremented instead. i.e. do not use it.
+ */
+ vlib_refcount_t as_refcount;
+
+ /**
+ * Some global data is per-cpu
+ */
+ lb_per_cpu_t *per_cpu;
+
+ /**
+ * Node next index for IP adjacencies, for each of the traffic types.
+ */
+ u32 ip_lookup_next_index[LB_VIP_N_TYPES];
+
+ /**
+ * Source address used in IPv6 encapsulated traffic
+ */
+ ip6_address_t ip6_src_address;
+
+ /**
+ * Source address used for IPv4 encapsulated traffic
+ */
+ ip4_address_t ip4_src_address;
+
+ /**
+ * Number of buckets in the per-cpu sticky hash table.
+ */
+ u32 per_cpu_sticky_buckets;
+
+ /**
+ * Flow timeout in seconds.
+ */
+ u32 flow_timeout;
+
+ /**
+ * Per VIP counter
+ */
+ vlib_simple_counter_main_t vip_counters[LB_N_VIP_COUNTERS];
+
+ /**
+ * DPO used to send packet from IP4/6 lookup to LB node.
+ */
+ dpo_type_t dpo_gre4_type;
+ dpo_type_t dpo_gre6_type;
+
+ /**
+ * Node type for registering to fib changes.
+ */
+ fib_node_type_t fib_node_type;
+
+ /**
+ * API dynamically registered base ID.
+ */
+ u16 msg_id_base;
+
+ volatile u32 *writer_lock;
+} lb_main_t;
+
+extern lb_main_t lb_main;
+extern vlib_node_registration_t lb6_node;
+extern vlib_node_registration_t lb4_node;
+
+/**
+ * Fix global load-balancer parameters.
+ * @param ip4_address IPv4 source address used for encapsulated traffic
+ * @param ip6_address IPv6 source address used for encapsulated traffic
+ * @return 0 on success. VNET_LB_ERR_XXX on error
+ */
+int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
+ u32 sticky_buckets, u32 flow_timeout);
+
+int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type,
+ u32 new_length, u32 *vip_index);
+int lb_vip_del(u32 vip_index);
+
+int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index);
+
+#define lb_vip_get_by_index(index) (pool_is_free_index(lb_main.vips, index)?NULL:pool_elt_at_index(lb_main.vips, index))
+
+int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
+int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
+
+u32 lb_hash_time_now(vlib_main_t * vm);
+
+void lb_garbage_collection();
+
+format_function_t format_lb_main;
+
+#endif /* LB_PLUGIN_LB_LB_H_ */
diff --git a/src/plugins/lb/lb_plugin_doc.md b/src/plugins/lb/lb_plugin_doc.md
new file mode 100644
index 00000000000..c7885ffb837
--- /dev/null
+++ b/src/plugins/lb/lb_plugin_doc.md
@@ -0,0 +1,141 @@
+# Load Balancer plugin for VPP {#lb_plugin_doc}
+
+## Version
+
+The load balancer plugin is currently in *beta* version.
+Both CLIs and APIs are subject to *heavy* changes.
+Wich also means feedback is really welcome regarding features, apis, etc...
+
+## Overview
+
+This plugin provides load balancing for VPP in a way that is largely inspired
+from Google's MagLev: http://research.google.com/pubs/pub44824.html
+
+The load balancer is configured with a set of Virtual IPs (VIP, which can be
+prefixes), and for each VIP, with a set of Application Server addresses (ASs).
+
+Traffic received for a given VIP (or VIP prefix) is tunneled using GRE towards
+the different ASs in a way that (tries to) ensure that a given session will
+always be tunneled to the same AS.
+
+Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must be using
+the same encap. type (i.e. IPv4+GRE or IPv6+GRE). Meaning that for a given VIP,
+all AS addresses must be of the same family.
+
+## Performances
+
+The load balancer has been tested up to 1 millions flows and still forwards more
+than 3Mpps per core in such circumstances.
+Although 3Mpps seems already good, it is likely that performances will be improved
+in next versions.
+
+## Configuration
+
+### Global LB parameters
+
+The load balancer needs to be configured with some parameters:
+
+ lb conf [ip4-src-address <addr>] [ip6-src-address <addr>]
+ [buckets <n>] [timeout <s>]
+
+ip4-src-address: the source address used to send encap. packets using IPv4.
+
+ip6-src-address: the source address used to send encap. packets using IPv6.
+
+buckets: the *per-thread* established-connexions-table number of buckets.
+
+timeout: the number of seconds a connection will remain in the
+ established-connexions-table while no packet for this flow
+ is received.
+
+
+### Configure the VIPs
+
+ lb vip <prefix> [encap (gre6|gre4)] [new_len <n>] [del]
+
+new_len is the size of the new-connection-table. It should be 1 or 2 orders of
+magnitude bigger than the number of ASs for the VIP in order to ensure a good
+load balancing.
+
+Examples:
+
+ lb vip 2002::/16 encap gre6 new_len 1024
+ lb vip 2003::/16 encap gre4 new_len 2048
+ lb vip 80.0.0.0/8 encap gre6 new_len 16
+ lb vip 90.0.0.0/8 encap gre4 new_len 1024
+
+### Configure the ASs (for each VIP)
+
+ lb as <vip-prefix> [<address> [<address> [...]]] [del]
+
+You can add (or delete) as many ASs at a time (for a single VIP).
+Note that the AS address family must correspond to the VIP encap. IP family.
+
+Examples:
+
+ lb as 2002::/16 2001::2 2001::3 2001::4
+ lb as 2003::/16 10.0.0.1 10.0.0.2
+ lb as 80.0.0.0/8 2001::2
+ lb as 90.0.0.0/8 10.0.0.1
+
+
+
+## Monitoring
+
+The plugin provides quite a bunch of counters and information.
+These are still subject to quite significant changes.
+
+ show lb
+ show lb vip
+ show lb vip verbose
+
+ show node counters
+
+
+## Design notes
+
+### Multi-Threading
+
+MagLev is a distributed system which pseudo-randomly generates a
+new-connections-table based on AS names such that each server configured with
+the same set of ASs ends up with the same table. Connection stickyness is then
+ensured with an established-connections-table. Using ECMP, it is assumed (but
+not relied on) that servers will mostly receive traffic for different flows.
+
+This implementation pushes the parallelism a little bit further by using
+one established-connections table per thread. This is equivalent to assuming
+that RSS will make a job similar to ECMP, and is pretty useful as threads don't
+need to get a lock in order to write in the table.
+
+### Hash Table
+
+A load balancer requires an efficient read and write hash table. The hash table
+used by ip6-forward is very read-efficient, but not so much for writing. In
+addition, it is not a big deal if writing into the hash table fails (again,
+MagLev uses a flow table but does not heaviliy relies on it).
+
+The plugin therefore uses a very specific (and stupid) hash table.
+ - Fixed (and power of 2) number of buckets (configured at runtime)
+ - Fixed (and power of 2) elements per buckets (configured at compilation time)
+
+### Reference counting
+
+When an AS is removed, there is two possible ways to react.
+ - Keep using the AS for established connections
+ - Change AS for established connections (likely to cause error for TCP)
+
+In the first case, although an AS is removed from the configuration, its
+associated state needs to stay around as long as it is used by at least one
+thread.
+
+In order to avoid locks, a specific reference counter is used. The design is quite
+similar to clib counters but:
+ - It is possible to decrease the value
+ - Summing will not zero the per-thread counters
+ - Only the thread can reallocate its own counters vector (to avoid concurrency issues)
+
+This reference counter is lock free, but reading a count of 0 does not mean
+the value can be freed unless it is ensured by *other* means that no other thread
+is concurrently referencing the object. In the case of this plugin, it is assumed
+that no concurrent event will take place after a few seconds.
+
diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c
new file mode 100644
index 00000000000..8c2eaa91ce9
--- /dev/null
+++ b/src/plugins/lb/lb_test.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <lb/lb.h>
+
+//TODO: Move that to vat/plugin_api.c
+//////////////////////////
+uword unformat_ip46_address (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ if ((type != IP46_TYPE_IP6) &&
+ unformat(input, "%U", unformat_ip4_address, &ip46->ip4)) {
+ ip46_address_mask_ip4(ip46);
+ return 1;
+ } else if ((type != IP46_TYPE_IP4) &&
+ unformat(input, "%U", unformat_ip6_address, &ip46->ip6)) {
+ return 1;
+ }
+ return 0;
+}
+uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ u8 *len = va_arg (*args, u8 *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+
+ u32 l;
+ if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
+ if (l > 32)
+ return 0;
+ *len = l + 96;
+ ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
+ } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
+ if (l > 128)
+ return 0;
+ *len = l;
+ } else {
+ return 0;
+ }
+ return 1;
+}
+/////////////////////////
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <lb/lb.api.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+/* define message structures */
+#define vl_typedefs
+#include <lb/lb.api.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <lb/lb.api.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <lb/lb.api.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <lb/lb.api.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} lb_test_main_t;
+
+lb_test_main_t lb_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(lb_conf_reply) \
+_(lb_add_del_vip_reply) \
+_(lb_add_del_as_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = lb_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+ _(LB_CONF_REPLY, lb_conf_reply) \
+ _(LB_ADD_DEL_VIP_REPLY, lb_add_del_vip_reply) \
+ _(LB_ADD_DEL_AS_REPLY, lb_add_del_as_reply)
+
+/* M: construct, but don't yet send a message */
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memcpy (mp, &mps, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + lbtm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+static int api_lb_conf (vat_main_t * vam)
+{
+ lb_test_main_t *lbtm = &lb_test_main;
+ unformat_input_t *i = vam->input;
+ f64 timeout;
+ vl_api_lb_conf_t mps, *mp;
+
+ if (!unformat(i, "%U %U %u %u",
+ unformat_ip4_address, &mps.ip4_src_address,
+ unformat_ip6_address, mps.ip6_src_address,
+ &mps.sticky_buckets_per_core,
+ &mps.flow_timeout)) {
+ errmsg ("invalid arguments\n");
+ return -99;
+ }
+
+ M(LB_CONF, lb_conf); S; W;
+
+ /* NOTREACHED */
+ return 0;
+}
+
+static int api_lb_add_del_vip (vat_main_t * vam)
+{
+ lb_test_main_t *lbtm = &lb_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_lb_add_del_vip_t mps, *mp;
+ mps.is_del = 0;
+ mps.is_gre4 = 0;
+
+ if (!unformat(i, "%U",
+ unformat_ip46_prefix, mps.ip_prefix, &mps.prefix_length, IP46_TYPE_ANY)) {
+ errmsg ("invalid prefix\n");
+ return -99;
+ }
+
+ if (unformat(i, "gre4")) {
+ mps.is_gre4 = 1;
+ } else if (unformat(i, "gre6")) {
+ mps.is_gre4 = 0;
+ } else {
+ errmsg ("no encap\n");
+ return -99;
+ }
+
+ if (!unformat(i, "%d", &mps.new_flows_table_length)) {
+ errmsg ("no table lentgh\n");
+ return -99;
+ }
+
+ if (unformat(i, "del")) {
+ mps.is_del = 1;
+ }
+
+ M(LB_ADD_DEL_VIP, lb_add_del_vip); S; W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static int api_lb_add_del_as (vat_main_t * vam)
+{
+ lb_test_main_t *lbtm = &lb_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_lb_add_del_as_t mps, *mp;
+ mps.is_del = 0;
+
+ if (!unformat(i, "%U %U",
+ unformat_ip46_prefix, mps.vip_ip_prefix, &mps.vip_prefix_length, IP46_TYPE_ANY,
+ unformat_ip46_address, mps.as_address)) {
+ errmsg ("invalid prefix or address\n");
+ return -99;
+ }
+
+ if (unformat(i, "del")) {
+ mps.is_del = 1;
+ }
+
+ M(LB_ADD_DEL_AS, lb_add_del_as); S; W;
+ /* NOTREACHED */
+ return 0;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(lb_conf, "<ip4-src-addr> <ip6-src-address> <sticky_buckets_per_core> <flow_timeout>") \
+_(lb_add_del_vip, "<ip-prefix> [gre4|gre6] <new_table_len> [del]") \
+_(lb_add_del_as, "<vip-ip-prefix> <address> [del]")
+
+void vat_api_hookup (vat_main_t *vam)
+{
+ lb_test_main_t * lbtm = &lb_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + lbtm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ lb_test_main_t * lbtm = &lb_test_main;
+
+ u8 * name;
+
+ lbtm->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "lb_%08x%c", api_version, 0);
+ lbtm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (lbtm->msg_id_base != (u16) ~0)
+ vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}
diff --git a/src/plugins/lb/lbhash.h b/src/plugins/lb/lbhash.h
new file mode 100644
index 00000000000..ca3cc143dc2
--- /dev/null
+++ b/src/plugins/lb/lbhash.h
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * vppinfra already includes tons of different hash tables.
+ * MagLev flow table is a bit different. It has to be very efficient
+ * for both writing and reading operations. But it does not need to
+ * be 100% reliable (write can fail). It also needs to recycle
+ * old entries in a lazy way.
+ *
+ * This hash table is the most dummy hash table you can do.
+ * Fixed total size, fixed bucket size.
+ * Advantage is that it could be very efficient (maybe).
+ *
+ */
+
+#ifndef LB_PLUGIN_LB_LBHASH_H_
+#define LB_PLUGIN_LB_LBHASH_H_
+
+#include <vnet/vnet.h>
+
+#if defined (__SSE4_2__)
+#include <immintrin.h>
+#endif
+
+/*
+ * @brief Number of entries per bucket.
+ */
+#define LBHASH_ENTRY_PER_BUCKET 4
+
+#define LB_HASH_DO_NOT_USE_SSE_BUCKETS 0
+
+/*
+ * @brief One bucket contains 4 entries.
+ * Each bucket takes one 64B cache line in memory.
+ */
+typedef struct {
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 hash[LBHASH_ENTRY_PER_BUCKET];
+ u32 timeout[LBHASH_ENTRY_PER_BUCKET];
+ u32 vip[LBHASH_ENTRY_PER_BUCKET];
+ u32 value[LBHASH_ENTRY_PER_BUCKET];
+} lb_hash_bucket_t;
+
+typedef struct {
+ u32 buckets_mask;
+ u32 timeout;
+ lb_hash_bucket_t buckets[];
+} lb_hash_t;
+
+#define lb_hash_nbuckets(h) (((h)->buckets_mask) + 1)
+#define lb_hash_size(h) ((h)->buckets_mask + LBHASH_ENTRY_PER_BUCKET)
+
+#define lb_hash_foreach_bucket(h, bucket) \
+ for (bucket = (h)->buckets; \
+ bucket < (h)->buckets + lb_hash_nbuckets(h); \
+ bucket++)
+
+#define lb_hash_foreach_entry(h, bucket, i) \
+ lb_hash_foreach_bucket(h, bucket) \
+ for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++)
+
+#define lb_hash_foreach_valid_entry(h, bucket, i, now) \
+ lb_hash_foreach_entry(h, bucket, i) \
+ if (!clib_u32_loop_gt((now), bucket->timeout[i]))
+
+static_always_inline
+lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout)
+{
+ if (!is_pow2(buckets))
+ return NULL;
+
+ // Allocate 1 more bucket for prefetch
+ u32 size = ((u64)&((lb_hash_t *)(0))->buckets[0]) +
+ sizeof(lb_hash_bucket_t) * (buckets + 1);
+ u8 *mem = 0;
+ lb_hash_t *h;
+ vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES);
+ h = (lb_hash_t *)mem;
+ h->buckets_mask = (buckets - 1);
+ h->timeout = timeout;
+ return h;
+}
+
+static_always_inline
+void lb_hash_free(lb_hash_t *h)
+{
+ u8 *mem = (u8 *)h;
+ vec_free(mem);
+}
+
+#if __SSE4_2__
+static_always_inline
+u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
+{
+ u64 val = 0;
+ val = _mm_crc32_u64(val, k0);
+ val = _mm_crc32_u64(val, k1);
+ val = _mm_crc32_u64(val, k2);
+ val = _mm_crc32_u64(val, k3);
+ val = _mm_crc32_u64(val, k4);
+ return (u32) val;
+}
+#else
+static_always_inline
+u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
+{
+ u64 tmp = k0 ^ k1 ^ k2 ^ k3 ^ k4;
+ return (u32)clib_xxhash (tmp);
+}
+#endif
+
+static_always_inline
+void lb_hash_prefetch_bucket(lb_hash_t *ht, u32 hash)
+{
+ lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask];
+ CLIB_PREFETCH(bucket, sizeof(*bucket), READ);
+}
+
+static_always_inline
+void lb_hash_get(lb_hash_t *ht, u32 hash, u32 vip, u32 time_now,
+ u32 *available_index, u32 *found_value)
+{
+ lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask];
+ *found_value = ~0;
+ *available_index = ~0;
+#if __SSE4_2__ && LB_HASH_DO_NOT_USE_SSE_BUCKETS == 0
+ u32 bitmask, found_index;
+ __m128i mask;
+
+ // mask[*] = timeout[*] > now
+ mask = _mm_cmpgt_epi32(_mm_loadu_si128 ((__m128i *) bucket->timeout),
+ _mm_set1_epi32 (time_now));
+ // bitmask[*] = now <= timeout[*/4]
+ bitmask = (~_mm_movemask_epi8(mask)) & 0xffff;
+ // Get first index with now <= timeout[*], if any.
+ *available_index = (bitmask)?__builtin_ctz(bitmask)/4:*available_index;
+
+ // mask[*] = (timeout[*] > now) && (hash[*] == hash)
+ mask = _mm_and_si128(mask,
+ _mm_cmpeq_epi32(
+ _mm_loadu_si128 ((__m128i *) bucket->hash),
+ _mm_set1_epi32 (hash)));
+
+ // Load the array of vip values
+ // mask[*] = (timeout[*] > now) && (hash[*] == hash) && (vip[*] == vip)
+ mask = _mm_and_si128(mask,
+ _mm_cmpeq_epi32(
+ _mm_loadu_si128 ((__m128i *) bucket->vip),
+ _mm_set1_epi32 (vip)));
+
+ // mask[*] = (timeout[*x4] > now) && (hash[*x4] == hash) && (vip[*x4] == vip)
+ bitmask = _mm_movemask_epi8(mask);
+ // Get first index, if any
+ found_index = (bitmask)?__builtin_ctzll(bitmask)/4:0;
+ ASSERT(found_index < 4);
+ *found_value = (bitmask)?bucket->value[found_index]:*found_value;
+ bucket->timeout[found_index] =
+ (bitmask)?time_now + ht->timeout:bucket->timeout[found_index];
+#else
+ u32 i;
+ for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++) {
+ u8 cmp = (bucket->hash[i] == hash && bucket->vip[i] == vip);
+ u8 timeouted = clib_u32_loop_gt(time_now, bucket->timeout[i]);
+ *found_value = (cmp || timeouted)?*found_value:bucket->value[i];
+ bucket->timeout[i] = (cmp || timeouted)?time_now + ht->timeout:bucket->timeout[i];
+ *available_index = (timeouted && (*available_index == ~0))?i:*available_index;
+
+ if (!cmp)
+ return;
+ }
+#endif
+}
+
+static_always_inline
+u32 lb_hash_available_value(lb_hash_t *h, u32 hash, u32 available_index)
+{
+ return h->buckets[hash & h->buckets_mask].value[available_index];
+}
+
+static_always_inline
+void lb_hash_put(lb_hash_t *h, u32 hash, u32 value, u32 vip,
+ u32 available_index, u32 time_now)
+{
+ lb_hash_bucket_t *bucket = &h->buckets[hash & h->buckets_mask];
+ bucket->hash[available_index] = hash;
+ bucket->value[available_index] = value;
+ bucket->timeout[available_index] = time_now + h->timeout;
+ bucket->vip[available_index] = vip;
+}
+
+static_always_inline
+u32 lb_hash_elts(lb_hash_t *h, u32 time_now)
+{
+ u32 tot = 0;
+ lb_hash_bucket_t *bucket;
+ u32 i;
+ lb_hash_foreach_valid_entry(h, bucket, i, time_now) {
+ tot++;
+ }
+ return tot;
+}
+
+#endif /* LB_PLUGIN_LB_LBHASH_H_ */
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
new file mode 100644
index 00000000000..8b763c537d5
--- /dev/null
+++ b/src/plugins/lb/node.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/lb.h>
+
+#include <vnet/gre/packet.h>
+#include <lb/lbhash.h>
+
+#define foreach_lb_error \
+ _(NONE, "no error") \
+ _(PROTO_NOT_SUPPORTED, "protocol not supported")
+
+typedef enum {
+#define _(sym,str) LB_ERROR_##sym,
+ foreach_lb_error
+#undef _
+ LB_N_ERROR,
+} lb_error_t;
+
+static char *lb_error_strings[] = {
+#define _(sym,string) string,
+ foreach_lb_error
+#undef _
+};
+
+typedef struct {
+ u32 vip_index;
+ u32 as_index;
+} lb_trace_t;
+
+u8 *
+format_lb_trace (u8 * s, va_list * args)
+{
+ lb_main_t *lbm = &lb_main;
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ lb_trace_t *t = va_arg (*args, lb_trace_t *);
+ if (pool_is_free_index(lbm->vips, t->vip_index)) {
+ s = format(s, "lb vip[%d]: This VIP was freed since capture\n");
+ } else {
+ s = format(s, "lb vip[%d]: %U\n", t->vip_index, format_lb_vip, &lbm->vips[t->vip_index]);
+ }
+ if (pool_is_free_index(lbm->ass, t->as_index)) {
+ s = format(s, "lb as[%d]: This AS was freed since capture\n");
+ } else {
+ s = format(s, "lb as[%d]: %U\n", t->as_index, format_lb_as, &lbm->ass[t->as_index]);
+ }
+ return s;
+}
+
+lb_hash_t *lb_get_sticky_table(u32 cpu_index)
+{
+ lb_main_t *lbm = &lb_main;
+ lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
+ //Check if size changed
+ if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht))))
+ {
+ //Dereference everything in there
+ lb_hash_bucket_t *b;
+ u32 i;
+ lb_hash_foreach_entry(sticky_ht, b, i) {
+ vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1);
+ vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1);
+ }
+
+ lb_hash_free(sticky_ht);
+ sticky_ht = NULL;
+ }
+
+ //Create if necessary
+ if (PREDICT_FALSE(sticky_ht == NULL)) {
+ lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
+ sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
+ clib_warning("Regenerated sticky table %p", sticky_ht);
+ }
+
+ ASSERT(sticky_ht);
+
+ //Update timeout
+ sticky_ht->timeout = lbm->flow_timeout;
+ return sticky_ht;
+}
+
+u64
+lb_node_get_other_ports4(ip4_header_t *ip40)
+{
+ return 0;
+}
+
+u64
+lb_node_get_other_ports6(ip6_header_t *ip60)
+{
+ return 0;
+}
+
+static_always_inline u32
+lb_node_get_hash(vlib_buffer_t *p, u8 is_input_v4)
+{
+ u32 hash;
+ if (is_input_v4)
+ {
+ ip4_header_t *ip40;
+ u64 ports;
+ ip40 = vlib_buffer_get_current (p);
+ if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP ||
+ ip40->protocol == IP_PROTOCOL_UDP))
+ ports = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 16) |
+ ((u64)((udp_header_t *)(ip40 + 1))->dst_port);
+ else
+ ports = lb_node_get_other_ports4(ip40);
+
+ hash = lb_hash_hash(*((u64 *)&ip40->address_pair), ports,
+ 0, 0, 0);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ ip60 = vlib_buffer_get_current (p);
+ u64 ports;
+ if (PREDICT_TRUE (ip60->protocol == IP_PROTOCOL_TCP ||
+ ip60->protocol == IP_PROTOCOL_UDP))
+ ports = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 16) |
+ ((u64)((udp_header_t *)(ip60 + 1))->dst_port);
+ else
+ ports = lb_node_get_other_ports6(ip60);
+
+ hash = lb_hash_hash(ip60->src_address.as_u64[0],
+ ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0],
+ ip60->dst_address.as_u64[1],
+ ports);
+ }
+ return hash;
+}
+
+static_always_inline uword
+lb_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame,
+ u8 is_input_v4, //Compile-time parameter stating that is input is v4 (or v6)
+ u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
+{
+ lb_main_t *lbm = &lb_main;
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ u32 cpu_index = os_get_cpu_number();
+ u32 lb_time = lb_hash_time_now(vm);
+
+ lb_hash_t *sticky_ht = lb_get_sticky_table(cpu_index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ u32 nexthash0 = 0;
+ if (PREDICT_TRUE(n_left_from > 0))
+ nexthash0 = lb_node_get_hash(vlib_get_buffer (vm, from[0]), is_input_v4);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ lb_vip_t *vip0;
+ u32 asindex0;
+ u16 len0;
+ u32 available_index0;
+ u8 counter = 0;
+ u32 hash0 = nexthash0;
+
+ if (PREDICT_TRUE(n_left_from > 1))
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[1]);
+ //Compute next hash and prefetch bucket
+ nexthash0 = lb_node_get_hash(p1, is_input_v4);
+ lb_hash_prefetch_bucket(sticky_ht, nexthash0);
+ //Prefetch for encap, next
+ CLIB_PREFETCH (vlib_buffer_get_current(p1) - 64, 64, STORE);
+ }
+
+ if (PREDICT_TRUE(n_left_from > 2))
+ {
+ vlib_buffer_t *p2;
+ p2 = vlib_get_buffer(vm, from[2]);
+ /* prefetch packet header and data */
+ vlib_prefetch_buffer_header(p2, STORE);
+ CLIB_PREFETCH (vlib_buffer_get_current(p2), 64, STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ vip0 = pool_elt_at_index (lbm->vips,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
+
+ if (is_input_v4)
+ {
+ ip4_header_t *ip40;
+ ip40 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip40->length);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ ip60 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
+ }
+
+ lb_hash_get(sticky_ht, hash0, vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ lb_time, &available_index0, &asindex0);
+
+ if (PREDICT_TRUE(asindex0 != ~0))
+ {
+ //Found an existing entry
+ counter = LB_VIP_COUNTER_NEXT_PACKET;
+ }
+ else if (PREDICT_TRUE(available_index0 != ~0))
+ {
+ //There is an available slot for a new flow
+ asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
+ counter = LB_VIP_COUNTER_FIRST_PACKET;
+ counter = (asindex0 == 0)?LB_VIP_COUNTER_NO_SERVER:counter;
+
+ //TODO: There are race conditions with as0 and vip0 manipulation.
+ //Configuration may be changed, vectors resized, etc...
+
+ //Dereference previously used
+ vlib_refcount_add(&lbm->as_refcount, cpu_index,
+ lb_hash_available_value(sticky_ht, hash0, available_index0), -1);
+ vlib_refcount_add(&lbm->as_refcount, cpu_index,
+ asindex0, 1);
+
+ //Add sticky entry
+ //Note that when there is no AS configured, an entry is configured anyway.
+ //But no configured AS is not something that should happen
+ lb_hash_put(sticky_ht, hash0, asindex0,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ available_index0, lb_time);
+ }
+ else
+ {
+ //Could not store new entry in the table
+ asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
+ counter = LB_VIP_COUNTER_UNTRACKED_PACKET;
+ }
+
+ vlib_increment_simple_counter(&lbm->vip_counters[counter],
+ cpu_index,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ 1);
+
+ //Now let's encap
+ {
+ gre_header_t *gre0;
+ if (is_encap_v4)
+ {
+ ip4_header_t *ip40;
+ vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
+ ip40 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip40 + 1);
+ ip40->src_address = lbm->ip4_src_address;
+ ip40->dst_address = lbm->ass[asindex0].address.ip4;
+ ip40->ip_version_and_header_length = 0x45;
+ ip40->ttl = 128;
+ ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
+ ip40->protocol = IP_PROTOCOL_GRE;
+ ip40->checksum = ip4_header_checksum (ip40);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
+ ip60 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip60 + 1);
+ ip60->dst_address = lbm->ass[asindex0].address.ip6;
+ ip60->src_address = lbm->ip6_src_address;
+ ip60->hop_limit = 128;
+ ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
+ ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
+ ip60->protocol = IP_PROTOCOL_GRE;
+ }
+
+ gre0->flags_and_version = 0;
+ gre0->protocol = (is_input_v4)?
+ clib_host_to_net_u16(0x0800):
+ clib_host_to_net_u16(0x86DD);
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->as_index = asindex0;
+ tr->vip_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ }
+
+ //Enqueue to next
+ //Note that this is going to error if asindex0 == 0
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbm->ass[asindex0].dpo.dpoi_index;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0,
+ lbm->ass[asindex0].dpo.dpoi_next_node);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+lb6_gre6_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return lb_node_fn(vm, node, frame, 0, 0);
+}
+
+static uword
+lb6_gre4_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return lb_node_fn(vm, node, frame, 0, 1);
+}
+
+static uword
+lb4_gre6_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return lb_node_fn(vm, node, frame, 1, 0);
+}
+
+static uword
+lb4_gre4_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return lb_node_fn(vm, node, frame, 1, 1);
+}
+
+VLIB_REGISTER_NODE (lb6_gre6_node) =
+{
+ .function = lb6_gre6_node_fn,
+ .name = "lb6-gre6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ {
+ [LB_NEXT_DROP] = "error-drop"
+ },
+};
+
+VLIB_REGISTER_NODE (lb6_gre4_node) =
+{
+ .function = lb6_gre4_node_fn,
+ .name = "lb6-gre4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ {
+ [LB_NEXT_DROP] = "error-drop"
+ },
+};
+
+VLIB_REGISTER_NODE (lb4_gre6_node) =
+{
+ .function = lb4_gre6_node_fn,
+ .name = "lb4-gre6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ {
+ [LB_NEXT_DROP] = "error-drop"
+ },
+};
+
+VLIB_REGISTER_NODE (lb4_gre4_node) =
+{
+ .function = lb4_gre4_node_fn,
+ .name = "lb4-gre4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lb_trace,
+
+ .n_errors = LB_N_ERROR,
+ .error_strings = lb_error_strings,
+
+ .n_next_nodes = LB_N_NEXT,
+ .next_nodes =
+ {
+ [LB_NEXT_DROP] = "error-drop"
+ },
+};
+
diff --git a/src/plugins/lb/refcount.c b/src/plugins/lb/refcount.c
new file mode 100644
index 00000000000..22415c8889e
--- /dev/null
+++ b/src/plugins/lb/refcount.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/refcount.h>
+
+void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size)
+{
+ u32 *new_counter = 0, *old_counter;
+ vec_validate(new_counter, size);
+ memcpy(new_counter, per_cpu->counters, per_cpu->length);
+ old_counter = per_cpu->counters;
+ per_cpu->counters = new_counter;
+ CLIB_MEMORY_BARRIER();
+ per_cpu->length = vec_len(new_counter);
+ vec_free(old_counter);
+}
+
+u64 vlib_refcount_get(vlib_refcount_t *r, u32 index)
+{
+ u64 count = 0;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 cpu_index;
+ for (cpu_index = 0; cpu_index < tm->n_vlib_mains; cpu_index++) {
+ if (r->per_cpu[cpu_index].length > index)
+ count += r->per_cpu[cpu_index].counters[index];
+ }
+ return count;
+}
+
diff --git a/src/plugins/lb/refcount.h b/src/plugins/lb/refcount.h
new file mode 100644
index 00000000000..8c26e7be76f
--- /dev/null
+++ b/src/plugins/lb/refcount.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * vlib provides lock-free counters but those
+ * - Have 16bits per-CPU counter, which may overflow.
+ * - Would only increment.
+ *
+ * This is very similar to vlib counters, but may be used to count reference.
+ * Such a counter includes an arbitrary number of counters. Each counter
+ * is identified by its index. This is used to aggregate per-cpu memory.
+ *
+ * Warning:
+ * This reference counter is lock-free but is not race-condition free.
+ * The counting result is approximate and another mechanism needs to be used
+ * in order to ensure that an object may be freed.
+ *
+ */
+
+#include <vnet/vnet.h>
+
+typedef struct {
+ u32 *counters;
+ u32 length;
+ u32 *reader_lengths;
+ CLIB_CACHE_LINE_ALIGN_MARK(o);
+} vlib_refcount_per_cpu_t;
+
+typedef struct {
+ vlib_refcount_per_cpu_t *per_cpu;
+} vlib_refcount_t;
+
+void __vlib_refcount_resize(vlib_refcount_per_cpu_t *per_cpu, u32 size);
+
+static_always_inline
+void vlib_refcount_add(vlib_refcount_t *r, u32 cpu_index, u32 counter_index, i32 v)
+{
+ vlib_refcount_per_cpu_t *per_cpu = &r->per_cpu[cpu_index];
+ if (PREDICT_FALSE(counter_index >= per_cpu->length))
+ __vlib_refcount_resize(per_cpu, clib_max(counter_index + 16, per_cpu->length * 2));
+
+ per_cpu->counters[counter_index] += v;
+}
+
+u64 vlib_refcount_get(vlib_refcount_t *r, u32 index);
+
+static_always_inline
+void vlib_refcount_init(vlib_refcount_t *r)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ r->per_cpu = 0;
+ vec_validate (r->per_cpu, tm->n_vlib_mains - 1);
+}
+
+
diff --git a/src/plugins/lb/util.c b/src/plugins/lb/util.c
new file mode 100644
index 00000000000..d969d168dce
--- /dev/null
+++ b/src/plugins/lb/util.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lb/util.h>
+
+void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen)
+{
+ if (plen == 0) {
+ prefix->as_u64[0] = 0;
+ prefix->as_u64[1] = 0;
+ } else if (plen <= 64) {
+ prefix->as_u64[0] &= clib_host_to_net_u64(0xffffffffffffffffL << (64 - plen));
+ prefix->as_u64[1] = 0;
+ } else {
+ prefix->as_u64[1] &= clib_host_to_net_u64(0xffffffffffffffffL << (128 - plen));
+ }
+
+}
+
+uword unformat_ip46_prefix (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ u8 *len = va_arg (*args, u8 *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+
+ u32 l;
+ if ((type != IP46_TYPE_IP6) && unformat(input, "%U/%u", unformat_ip4_address, &ip46->ip4, &l)) {
+ if (l > 32)
+ return 0;
+ *len = l + 96;
+ ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
+ } else if ((type != IP46_TYPE_IP4) && unformat(input, "%U/%u", unformat_ip6_address, &ip46->ip6, &l)) {
+ if (l > 128)
+ return 0;
+ *len = l;
+ } else {
+ return 0;
+ }
+ return 1;
+}
+
+u8 *format_ip46_prefix (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ u32 len = va_arg (*args, u32); //va_arg cannot use u8 or u16
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+
+ int is_ip4 = 0;
+ if (type == IP46_TYPE_IP4)
+ is_ip4 = 1;
+ else if (type == IP46_TYPE_IP6)
+ is_ip4 = 0;
+ else
+ is_ip4 = (len >= 96) && ip46_address_is_ip4(ip46);
+
+ return is_ip4 ?
+ format(s, "%U/%d", format_ip4_address, &ip46->ip4, len - 96):
+ format(s, "%U/%d", format_ip6_address, &ip46->ip6, len);
+}
+
diff --git a/src/plugins/lb/util.h b/src/plugins/lb/util.h
new file mode 100644
index 00000000000..3f082310b69
--- /dev/null
+++ b/src/plugins/lb/util.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Non-LB specific stuff comes here
+ */
+
+#ifndef LB_PLUGIN_LB_UTIL_H_
+#define LB_PLUGIN_LB_UTIL_H_
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+
+#define ip46_address_type(ip46) (ip46_address_is_ip4(ip46)?IP46_TYPE_IP4:IP46_TYPE_IP6)
+#define ip46_prefix_is_ip4(ip46, len) ((len) >= 96 && ip46_address_is_ip4(ip46))
+#define ip46_prefix_type(ip46, len) (ip46_prefix_is_ip4(ip46, len)?IP46_TYPE_IP4:IP46_TYPE_IP6)
+
+void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen);
+uword unformat_ip46_prefix (unformat_input_t * input, va_list * args);
+u8 *format_ip46_prefix (u8 * s, va_list * args);
+
+/**
+ * 32 bits integer comparison for running values.
+ * 1 > 0 is true. But 1 > 0xffffffff also is.
+ */
+#define clib_u32_loop_gt(a, b) (((u32)(a)) - ((u32)(b)) < 0x7fffffff)
+
+#endif /* LB_PLUGIN_LB_UTIL_H_ */
diff --git a/src/plugins/snat.am b/src/plugins/snat.am
new file mode 100644
index 00000000000..7ff2386ec83
--- /dev/null
+++ b/src/plugins/snat.am
@@ -0,0 +1,33 @@
+
+# Copyright (c) <current-year> <your-organization>
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vppapitestplugins_LTLIBRARIES += snat_test_plugin.la
+vppplugins_LTLIBRARIES += snat_plugin.la
+
+snat_plugin_la_SOURCES = snat/snat.c \
+ snat/in2out.c \
+ snat/out2in.c \
+ snat/snat_plugin.api.h
+
+API_FILES += snat/snat.api
+
+nobase_apiinclude_HEADERS += \
+ snat/snat_all_api_h.h \
+ snat/snat_msg_enum.h \
+ snat/snat.api.h
+
+snat_test_plugin_la_SOURCES = \
+ snat/snat_test.c snat/snat_plugin.api.h
+
+# vi:syntax=automake
diff --git a/src/plugins/snat/in2out.c b/src/plugins/snat/in2out.c
new file mode 100644
index 00000000000..c78fdd76631
--- /dev/null
+++ b/src/plugins/snat/in2out.c
@@ -0,0 +1,1597 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/handoff.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/ip4_fib.h>
+#include <snat/snat.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+typedef struct {
+ u32 sw_if_index;
+ u32 next_index;
+ u32 session_index;
+ u32 is_slow_path;
+} snat_in2out_trace_t;
+
+typedef struct {
+ u32 next_worker_index;
+ u8 do_handoff;
+} snat_in2out_worker_handoff_trace_t;
+
+/* packet trace format function */
+static u8 * format_snat_in2out_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
+ char * tag;
+
+ tag = t->is_slow_path ? "SNAT_IN2OUT_SLOW_PATH" : "SNAT_IN2OUT_FAST_PATH";
+
+ s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
+ t->sw_if_index, t->next_index, t->session_index);
+
+ return s;
+}
+
+static u8 * format_snat_in2out_fast_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_in2out_trace_t * t = va_arg (*args, snat_in2out_trace_t *);
+
+ s = format (s, "SANT_IN2OUT_FAST: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+
+ return s;
+}
+
+static u8 * format_snat_in2out_worker_handoff_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_in2out_worker_handoff_trace_t * t =
+ va_arg (*args, snat_in2out_worker_handoff_trace_t *);
+ char * m;
+
+ m = t->do_handoff ? "next worker" : "same worker";
+ s = format (s, "SNAT_IN2OUT_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
+
+ return s;
+}
+
+vlib_node_registration_t snat_in2out_node;
+vlib_node_registration_t snat_in2out_slowpath_node;
+vlib_node_registration_t snat_in2out_fast_node;
+vlib_node_registration_t snat_in2out_worker_handoff_node;
+
+#define foreach_snat_in2out_error \
+_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
+_(IN2OUT_PACKETS, "Good in2out packets processed") \
+_(OUT_OF_PORTS, "Out of ports") \
+_(BAD_OUTSIDE_FIB, "Outside VRF ID not found") \
+_(BAD_ICMP_TYPE, "icmp type not echo-request") \
+_(NO_TRANSLATION, "No translation")
+
+typedef enum {
+#define _(sym,str) SNAT_IN2OUT_ERROR_##sym,
+ foreach_snat_in2out_error
+#undef _
+ SNAT_IN2OUT_N_ERROR,
+} snat_in2out_error_t;
+
+static char * snat_in2out_error_strings[] = {
+#define _(sym,string) string,
+ foreach_snat_in2out_error
+#undef _
+};
+
+typedef enum {
+ SNAT_IN2OUT_NEXT_LOOKUP,
+ SNAT_IN2OUT_NEXT_DROP,
+ SNAT_IN2OUT_NEXT_SLOW_PATH,
+ SNAT_IN2OUT_N_NEXT,
+} snat_in2out_next_t;
+
+static u32 slow_path (snat_main_t *sm, vlib_buffer_t *b0,
+ ip4_header_t * ip0,
+ u32 rx_fib_index0,
+ snat_session_key_t * key0,
+ snat_session_t ** sessionp,
+ vlib_node_runtime_t * node,
+ u32 next0,
+ u32 cpu_index)
+{
+ snat_user_t *u;
+ snat_user_key_t user_key;
+ snat_session_t *s;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u32 oldest_per_user_translation_list_index;
+ dlist_elt_t * oldest_per_user_translation_list_elt;
+ dlist_elt_t * per_user_translation_list_elt;
+ dlist_elt_t * per_user_list_head_elt;
+ u32 session_index;
+ snat_session_key_t key1;
+ u32 address_index = ~0;
+ u32 outside_fib_index;
+ uword * p;
+ snat_static_mapping_key_t worker_by_out_key;
+
+ p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
+ if (! p)
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_OUTSIDE_FIB];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+ outside_fib_index = p[0];
+
+ user_key.addr = ip0->src_address;
+ user_key.fib_index = rx_fib_index0;
+ kv0.key = user_key.as_u64;
+
+ /* Ever heard of the "user" = src ip4 address before? */
+ if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
+ {
+ /* no, make a new one */
+ pool_get (sm->per_thread_data[cpu_index].users, u);
+ memset (u, 0, sizeof (*u));
+ u->addr = ip0->src_address;
+
+ pool_get (sm->per_thread_data[cpu_index].list_pool, per_user_list_head_elt);
+
+ u->sessions_per_user_list_head_index = per_user_list_head_elt -
+ sm->per_thread_data[cpu_index].list_pool;
+
+ clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+ u->sessions_per_user_list_head_index);
+
+ kv0.value = u - sm->per_thread_data[cpu_index].users;
+
+ /* add user */
+ clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
+ }
+ else
+ {
+ u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+ value0.value);
+ }
+
+ /* Over quota? Recycle the least recently used dynamic translation */
+ if (u->nsessions >= sm->max_translations_per_user)
+ {
+ /* Remove the oldest dynamic translation */
+ do {
+ oldest_per_user_translation_list_index =
+ clib_dlist_remove_head (sm->per_thread_data[cpu_index].list_pool,
+ u->sessions_per_user_list_head_index);
+
+ ASSERT (oldest_per_user_translation_list_index != ~0);
+
+ /* add it back to the end of the LRU list */
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ u->sessions_per_user_list_head_index,
+ oldest_per_user_translation_list_index);
+ /* Get the list element */
+ oldest_per_user_translation_list_elt =
+ pool_elt_at_index (sm->per_thread_data[cpu_index].list_pool,
+ oldest_per_user_translation_list_index);
+
+ /* Get the session index from the list element */
+ session_index = oldest_per_user_translation_list_elt->value;
+
+ /* Get the session */
+ s = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ session_index);
+ } while (snat_is_session_static (s));
+
+ /* Remove in2out, out2in keys */
+ kv0.key = s->in2out.as_u64;
+ if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 0 /* is_add */))
+ clib_warning ("in2out key delete failed");
+ kv0.key = s->out2in.as_u64;
+ if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 0 /* is_add */))
+ clib_warning ("out2in key delete failed");
+
+ snat_free_outside_address_and_port
+ (sm, &s->out2in, s->outside_address_index);
+ s->outside_address_index = ~0;
+
+ if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
+ {
+ ASSERT(0);
+
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+ s->outside_address_index = address_index;
+ }
+ else
+ {
+ u8 static_mapping = 1;
+
+ /* First try to match static mapping by local address and port */
+ if (snat_static_mapping_match (sm, *key0, &key1, 0))
+ {
+ static_mapping = 0;
+ /* Try to create dynamic translation */
+ if (snat_alloc_outside_address_and_port (sm, &key1, &address_index))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_OUT_OF_PORTS];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+ }
+
+ /* Create a new session */
+ pool_get (sm->per_thread_data[cpu_index].sessions, s);
+ memset (s, 0, sizeof (*s));
+
+ s->outside_address_index = address_index;
+
+ if (static_mapping)
+ {
+ u->nstaticsessions++;
+ s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+ }
+ else
+ {
+ u->nsessions++;
+ }
+
+ /* Create list elts */
+ pool_get (sm->per_thread_data[cpu_index].list_pool,
+ per_user_translation_list_elt);
+ clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+ per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool);
+
+ per_user_translation_list_elt->value =
+ s - sm->per_thread_data[cpu_index].sessions;
+ s->per_user_index = per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool;
+ s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s->per_user_list_head_index,
+ per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool);
+ }
+
+ s->in2out = *key0;
+ s->out2in = key1;
+ s->out2in.protocol = key0->protocol;
+ s->out2in.fib_index = outside_fib_index;
+ *sessionp = s;
+
+ /* Add to translation hashes */
+ kv0.key = s->in2out.as_u64;
+ kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+ if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
+ clib_warning ("in2out key add failed");
+
+ kv0.key = s->out2in.as_u64;
+ kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+
+ if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
+ clib_warning ("out2in key add failed");
+
+ /* Add to translated packets worker lookup */
+ worker_by_out_key.addr = s->out2in.addr;
+ worker_by_out_key.port = s->out2in.port;
+ worker_by_out_key.fib_index = s->out2in.fib_index;
+ kv0.key = worker_by_out_key.as_u64;
+ kv0.value = cpu_index;
+ clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
+ return next0;
+}
+
+static inline u32 icmp_in2out_slow_path (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0,
+ u32 sw_if_index0,
+ u32 rx_fib_index0,
+ vlib_node_runtime_t * node,
+ u32 next0,
+ f64 now,
+ u32 cpu_index)
+{
+ snat_session_key_t key0;
+ icmp_echo_header_t *echo0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ snat_session_t * s0;
+ u32 new_addr0, old_addr0;
+ u16 old_id0, new_id0;
+ ip_csum_t sum0;
+ snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
+
+ if (PREDICT_FALSE(icmp0->type != ICMP4_echo_request))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_BAD_ICMP_TYPE];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+
+ key0.addr = ip0->src_address;
+ key0.port = echo0->identifier;
+ key0.protocol = SNAT_PROTOCOL_ICMP;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
+ {
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index0,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index0;
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ }
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
+ rt->cached_ip4_address))
+ return next0;
+
+ next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
+ &s0, node, next0, cpu_index);
+
+ if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
+ return next0;
+ }
+ else
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
+
+ old_addr0 = ip0->src_address.as_u32;
+ ip0->src_address = s0->out2in.addr;
+ new_addr0 = ip0->src_address.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ old_id0 = echo0->identifier;
+ new_id0 = s0->out2in.port;
+ echo0->identifier = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+ identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
+ /* Per-user LRU list maintenance for dynamic translations */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+
+ return next0;
+}
+
+/**
+ * @brief Hairpinning
+ *
+ * Hairpinning allows two endpoints on the internal side of the NAT to
+ * communicate even if they only use each other's external IP addresses
+ * and ports.
+ *
+ * @param sm SNAT main.
+ * @param b0 Vlib buffer.
+ * @param ip0 IP header.
+ * @param udp0 UDP header.
+ * @param tcp0 TCP header.
+ * @param proto0 SNAT protocol.
+ */
+static inline void
+snat_hairpinning (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ udp_header_t * udp0,
+ tcp_header_t * tcp0,
+ u32 proto0)
+{
+ snat_session_key_t key0, sm0;
+ snat_static_mapping_key_t k0;
+ snat_session_t * s0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ ip_csum_t sum0;
+ u32 new_dst_addr0 = 0, old_dst_addr0, ti = 0, si;
+ u16 new_dst_port0, old_dst_port0;
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.protocol = proto0;
+ key0.fib_index = sm->outside_fib_index;
+ kv0.key = key0.as_u64;
+
+ /* Check if destination is in active sessions */
+ if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
+ {
+ /* or static mappings */
+ if (!snat_static_mapping_match(sm, key0, &sm0, 1))
+ {
+ new_dst_addr0 = sm0.addr.as_u32;
+ new_dst_port0 = sm0.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+ }
+ }
+ else
+ {
+ si = value0.value;
+ if (sm->num_workers > 1)
+ {
+ k0.addr = ip0->dst_address;
+ k0.port = udp0->dst_port;
+ k0.fib_index = sm->outside_fib_index;
+ kv0.key = k0.as_u64;
+ if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+ ASSERT(0);
+ else
+ ti = value0.value;
+ }
+ else
+ ti = sm->num_workers;
+
+ s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ new_dst_port0 = s0->in2out.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+ }
+
+ /* Destination is behind the same NAT, use internal address and port */
+ if (new_dst_addr0)
+ {
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_dst_addr0;
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ old_dst_port0 = tcp0->ports.dst;
+ if (PREDICT_TRUE(new_dst_port0 != old_dst_port0))
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ tcp0->ports.dst = new_dst_port0;
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
+ ip4_header_t /* cheat */, length);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ udp0->dst_port = new_dst_port0;
+ udp0->checksum = 0;
+ }
+ }
+ }
+}
+
+static inline uword
+snat_in2out_node_fn_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_slow_path)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_in2out_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+ snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
+ f64 now = vlib_time_now (vm);
+ u32 stats_node_index;
+ u32 cpu_index = os_get_cpu_number ();
+
+ stats_node_index = is_slow_path ? snat_in2out_slowpath_node.index :
+ snat_in2out_node.index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0, next1;
+ u32 sw_if_index0, sw_if_index1;
+ ip4_header_t * ip0, * ip1;
+ ip_csum_t sum0, sum1;
+ u32 new_addr0, old_addr0, new_addr1, old_addr1;
+ u16 old_port0, new_port0, old_port1, new_port1;
+ udp_header_t * udp0, * udp1;
+ tcp_header_t * tcp0, * tcp1;
+ icmp46_header_t * icmp0, * icmp1;
+ snat_session_key_t key0, key1;
+ u32 rx_fib_index0, rx_fib_index1;
+ u32 proto0, proto1;
+ snat_session_t * s0 = 0, * s1 = 0;
+ clib_bihash_kv_8_8_t kv0, value0, kv1, value1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ next0 = next1 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+ proto0 = ~0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
+ ? SNAT_PROTOCOL_UDP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
+ ? SNAT_PROTOCOL_TCP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
+ ? SNAT_PROTOCOL_ICMP : proto0;
+
+ /* Next configured feature, probably ip4-lookup */
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE (proto0 == ~0))
+ goto trace00;
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_in2out_slow_path
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0,
+ node, next0, now, cpu_index);
+ goto trace00;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace00;
+ }
+ }
+
+ key0.addr = ip0->src_address;
+ key0.port = udp0->src_port;
+ key0.protocol = proto0;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (PREDICT_FALSE (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0) != 0))
+ {
+ if (is_slow_path)
+ {
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index0,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index0;
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ }
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
+ rt->cached_ip4_address))
+ goto trace00;
+
+ next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
+ &s0, node, next0, cpu_index);
+ if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
+ goto trace00;
+ }
+ else
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace00;
+ }
+ }
+ else
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
+
+ old_addr0 = ip0->src_address.as_u32;
+ ip0->src_address = s0->out2in.addr;
+ new_addr0 = ip0->src_address.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->ports.src;
+ tcp0->ports.src = s0->out2in.port;
+ new_port0 = tcp0->ports.src;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->src_port;
+ udp0->src_port = s0->out2in.port;
+ udp0->checksum = 0;
+ }
+
+ /* Hairpinning */
+ snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+ trace00:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->is_slow_path = is_slow_path;
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (s0)
+ t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ ip1 = vlib_buffer_get_current (b1);
+ udp1 = ip4_next_header (ip1);
+ tcp1 = (tcp_header_t *) udp1;
+ icmp1 = (icmp46_header_t *) udp1;
+
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+ rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index1);
+
+ proto1 = ~0;
+ proto1 = (ip1->protocol == IP_PROTOCOL_UDP)
+ ? SNAT_PROTOCOL_UDP : proto1;
+ proto1 = (ip1->protocol == IP_PROTOCOL_TCP)
+ ? SNAT_PROTOCOL_TCP : proto1;
+ proto1 = (ip1->protocol == IP_PROTOCOL_ICMP)
+ ? SNAT_PROTOCOL_ICMP : proto1;
+
+ /* Next configured feature, probably ip4-lookup */
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE (proto1 == ~0))
+ goto trace01;
+
+ if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
+ {
+ next1 = icmp_in2out_slow_path
+ (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
+ next1, now, cpu_index);
+ goto trace01;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (proto1 == ~0 || proto1 == SNAT_PROTOCOL_ICMP))
+ {
+ next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace01;
+ }
+ }
+
+ key1.addr = ip1->src_address;
+ key1.port = udp1->src_port;
+ key1.protocol = proto1;
+ key1.fib_index = rx_fib_index1;
+
+ kv1.key = key1.as_u64;
+
+ if (PREDICT_FALSE(clib_bihash_search_8_8 (&sm->in2out, &kv1, &value1) != 0))
+ {
+ if (is_slow_path)
+ {
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index1))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index1,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index1;
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ }
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(ip1->dst_address.as_u32 ==
+ rt->cached_ip4_address))
+ goto trace01;
+
+ next1 = slow_path (sm, b1, ip1, rx_fib_index1, &key1,
+ &s1, node, next1, cpu_index);
+ if (PREDICT_FALSE (next1 == SNAT_IN2OUT_NEXT_DROP))
+ goto trace01;
+ }
+ else
+ {
+ next1 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace01;
+ }
+ }
+ else
+ s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value1.value);
+
+ old_addr1 = ip1->src_address.as_u32;
+ ip1->src_address = s1->out2in.addr;
+ new_addr1 = ip1->src_address.as_u32;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->out2in.fib_index;
+
+ sum1 = ip1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
+ {
+ old_port1 = tcp1->ports.src;
+ tcp1->ports.src = s1->out2in.port;
+ new_port1 = tcp1->ports.src;
+
+ sum1 = tcp1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum1 = ip_csum_update (sum1, old_port1, new_port1,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp1->checksum = ip_csum_fold(sum1);
+ }
+ else
+ {
+ old_port1 = udp1->src_port;
+ udp1->src_port = s1->out2in.port;
+ udp1->checksum = 0;
+ }
+
+ /* Hairpinning */
+ snat_hairpinning (sm, b1, ip1, udp1, tcp1, proto1);
+
+ /* Accounting */
+ s1->last_heard = now;
+ s1->total_pkts++;
+ s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s1))
+ {
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s1->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s1->per_user_list_head_index,
+ s1->per_user_index);
+ }
+ trace01:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ t->session_index = ~0;
+ if (s1)
+ t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
+ }
+
+ pkts_processed += next1 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ u32 new_addr0, old_addr0;
+ u16 old_port0, new_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ icmp46_header_t * icmp0;
+ snat_session_key_t key0;
+ u32 rx_fib_index0;
+ u32 proto0;
+ snat_session_t * s0 = 0;
+ clib_bihash_kv_8_8_t kv0, value0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ proto0 = ~0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
+ ? SNAT_PROTOCOL_UDP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
+ ? SNAT_PROTOCOL_TCP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
+ ? SNAT_PROTOCOL_ICMP : proto0;
+
+ /* Next configured feature, probably ip4-lookup */
+ if (is_slow_path)
+ {
+ if (PREDICT_FALSE (proto0 == ~0))
+ goto trace0;
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_in2out_slow_path
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, now, cpu_index);
+ goto trace0;
+ }
+ }
+ else
+ {
+ if (PREDICT_FALSE (proto0 == ~0 || proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace0;
+ }
+ }
+
+ key0.addr = ip0->src_address;
+ key0.port = udp0->src_port;
+ key0.protocol = proto0;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->in2out, &kv0, &value0))
+ {
+ if (is_slow_path)
+ {
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index0,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index0;
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ }
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
+ rt->cached_ip4_address))
+ goto trace0;
+
+ next0 = slow_path (sm, b0, ip0, rx_fib_index0, &key0,
+ &s0, node, next0, cpu_index);
+ if (PREDICT_FALSE (next0 == SNAT_IN2OUT_NEXT_DROP))
+ goto trace0;
+ }
+ else
+ {
+ next0 = SNAT_IN2OUT_NEXT_SLOW_PATH;
+ goto trace0;
+ }
+ }
+ else
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
+
+ old_addr0 = ip0->src_address.as_u32;
+ ip0->src_address = s0->out2in.addr;
+ new_addr0 = ip0->src_address.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->ports.src;
+ tcp0->ports.src = s0->out2in.port;
+ new_port0 = tcp0->ports.src;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->src_port;
+ udp0->src_port = s0->out2in.port;
+ udp0->checksum = 0;
+ }
+
+ /* Hairpinning */
+ snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+
+ trace0:
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->is_slow_path = is_slow_path;
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (s0)
+ t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, stats_node_index,
+ SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+static uword
+snat_in2out_fast_path_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return snat_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_node) = {
+ .function = snat_in2out_fast_path_fn,
+ .name = "snat-in2out",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_node, snat_in2out_fast_path_fn);
+
+static uword
+snat_in2out_slow_path_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return snat_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */);
+}
+
+VLIB_REGISTER_NODE (snat_in2out_slowpath_node) = {
+ .function = snat_in2out_slow_path_fn,
+ .name = "snat-in2out-slowpath",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_slowpath_node, snat_in2out_slow_path_fn);
+
+static uword
+snat_in2out_worker_handoff_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ snat_main_t *sm = &snat_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 n_left_from, *from, *to_next = 0;
+ static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
+ static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
+ = 0;
+ vlib_frame_queue_elt_t *hf = 0;
+ vlib_frame_t *f = 0;
+ int i;
+ u32 n_left_to_next_worker = 0, *to_next_worker = 0;
+ u32 next_worker_index = 0;
+ u32 current_worker_index = ~0;
+ u32 cpu_index = os_get_cpu_number ();
+
+ ASSERT (vec_len (sm->workers));
+
+ if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
+ {
+ vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
+
+ vec_validate_init_empty (congested_handoff_queue_by_worker_index,
+ sm->first_worker_index + sm->num_workers - 1,
+ (vlib_frame_queue_t *) (~0));
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ ip4_header_t * ip0;
+ snat_user_key_t key0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u8 do_handoff;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ key0.addr = ip0->src_address;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ /* Ever heard of of the "user" before? */
+ if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv0, &value0))
+ {
+ /* No, assign next available worker (RR) */
+ next_worker_index = sm->first_worker_index +
+ sm->workers[sm->next_worker++ % vec_len (sm->workers)];
+
+ /* add non-traslated packets worker lookup */
+ kv0.value = next_worker_index;
+ clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
+ }
+ else
+ next_worker_index = value0.value;
+
+ if (PREDICT_FALSE (next_worker_index != cpu_index))
+ {
+ do_handoff = 1;
+
+ if (next_worker_index != current_worker_index)
+ {
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ hf = vlib_get_worker_handoff_queue_elt (sm->fq_in2out_index,
+ next_worker_index,
+ handoff_queue_elt_by_worker_index);
+
+ n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
+ to_next_worker = &hf->buffer_index[hf->n_vectors];
+ current_worker_index = next_worker_index;
+ }
+
+ /* enqueue to correct worker thread */
+ to_next_worker[0] = bi0;
+ to_next_worker++;
+ n_left_to_next_worker--;
+
+ if (n_left_to_next_worker == 0)
+ {
+ hf->n_vectors = VLIB_FRAME_SIZE;
+ vlib_put_frame_queue_elt (hf);
+ current_worker_index = ~0;
+ handoff_queue_elt_by_worker_index[next_worker_index] = 0;
+ hf = 0;
+ }
+ }
+ else
+ {
+ do_handoff = 0;
+ /* if this is 1st frame */
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, snat_in2out_node.index);
+ to_next = vlib_frame_vector_args (f);
+ }
+
+ to_next[0] = bi0;
+ to_next += 1;
+ f->n_vectors++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_worker_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_worker_index = next_worker_index;
+ t->do_handoff = do_handoff;
+ }
+ }
+
+ if (f)
+ vlib_put_frame_to_node (vm, snat_in2out_node.index, f);
+
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ /* Ship frames to the worker nodes */
+ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
+ {
+ if (handoff_queue_elt_by_worker_index[i])
+ {
+ hf = handoff_queue_elt_by_worker_index[i];
+ /*
+ * It works better to let the handoff node
+ * rate-adapt, always ship the handoff queue element.
+ */
+ if (1 || hf->n_vectors == hf->last_n_vectors)
+ {
+ vlib_put_frame_queue_elt (hf);
+ handoff_queue_elt_by_worker_index[i] = 0;
+ }
+ else
+ hf->last_n_vectors = hf->n_vectors;
+ }
+ congested_handoff_queue_by_worker_index[i] =
+ (vlib_frame_queue_t *) (~0);
+ }
+ hf = 0;
+ current_worker_index = ~0;
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_in2out_worker_handoff_node) = {
+ .function = snat_in2out_worker_handoff_fn,
+ .name = "snat-in2out-worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_worker_handoff_node, snat_in2out_worker_handoff_fn);
+
+static inline u32 icmp_in2out_static_map (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0,
+ u32 sw_if_index0,
+ vlib_node_runtime_t * node,
+ u32 next0,
+ u32 rx_fib_index0)
+{
+ snat_session_key_t key0, sm0;
+ icmp_echo_header_t *echo0;
+ u32 new_addr0, old_addr0;
+ u16 old_id0, new_id0;
+ ip_csum_t sum0;
+ snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
+
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+
+ key0.addr = ip0->src_address;
+ key0.port = echo0->identifier;
+ key0.fib_index = rx_fib_index0;
+
+ if (snat_static_mapping_match(sm, key0, &sm0, 0))
+ {
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index0,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index0;
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ }
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
+ rt->cached_ip4_address))
+ return next0;
+
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
+ return SNAT_IN2OUT_NEXT_DROP;
+ }
+
+ new_addr0 = sm0.addr.as_u32;
+ new_id0 = sm0.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+ old_addr0 = ip0->src_address.as_u32;
+ ip0->src_address.as_u32 = new_addr0;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_FALSE(new_id0 != echo0->identifier))
+ {
+ old_id0 = echo0->identifier;
+ echo0->identifier = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+ identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+ }
+
+ return next0;
+}
+
+static uword
+snat_in2out_fast_static_map_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_in2out_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+ snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
+ u32 stats_node_index;
+
+ stats_node_index = snat_in2out_fast_node.index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ u32 new_addr0, old_addr0;
+ u16 old_port0, new_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ icmp46_header_t * icmp0;
+ snat_session_key_t key0, sm0;
+ u32 proto0;
+ u32 rx_fib_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ next0 = SNAT_IN2OUT_NEXT_LOOKUP;
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ proto0 = ~0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
+ ? SNAT_PROTOCOL_UDP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
+ ? SNAT_PROTOCOL_TCP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
+ ? SNAT_PROTOCOL_ICMP : proto0;
+
+ if (PREDICT_FALSE (proto0 == ~0))
+ goto trace0;
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index0,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index0;
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ }
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
+ rt->cached_ip4_address))
+ goto trace0;
+
+ next0 = icmp_in2out_static_map
+ (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
+ goto trace0;
+ }
+
+ key0.addr = ip0->src_address;
+ key0.port = udp0->src_port;
+ key0.fib_index = rx_fib_index0;
+
+ if (snat_static_mapping_match(sm, key0, &sm0, 0))
+ {
+ b0->error = node->errors[SNAT_IN2OUT_ERROR_NO_TRANSLATION];
+ next0= SNAT_IN2OUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ new_addr0 = sm0.addr.as_u32;
+ new_port0 = sm0.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+ old_addr0 = ip0->src_address.as_u32;
+ ip0->src_address.as_u32 = new_addr0;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ src_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_FALSE(new_port0 != udp0->dst_port))
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->ports.src;
+ tcp0->ports.src = new_port0;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->src_port;
+ udp0->src_port = new_port0;
+ udp0->checksum = 0;
+ }
+ }
+ else
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ }
+
+ /* Hairpinning */
+ snat_hairpinning (sm, b0, ip0, udp0, tcp0, proto0);
+
+ trace0:
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_in2out_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ pkts_processed += next0 != SNAT_IN2OUT_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, stats_node_index,
+ SNAT_IN2OUT_ERROR_IN2OUT_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (snat_in2out_fast_node) = {
+ .function = snat_in2out_fast_static_map_fn,
+ .name = "snat-in2out-fast",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_in2out_fast_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_in2out_error_strings),
+ .error_strings = snat_in2out_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_IN2OUT_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_IN2OUT_NEXT_DROP] = "error-drop",
+ [SNAT_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_IN2OUT_NEXT_SLOW_PATH] = "snat-in2out-slowpath",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_in2out_fast_node, snat_in2out_fast_static_map_fn);
diff --git a/src/plugins/snat/out2in.c b/src/plugins/snat/out2in.c
new file mode 100644
index 00000000000..f1f4159cdce
--- /dev/null
+++ b/src/plugins/snat/out2in.c
@@ -0,0 +1,1261 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/handoff.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/ip4_fib.h>
+#include <snat/snat.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+typedef struct {
+ u32 sw_if_index;
+ u32 next_index;
+ u32 session_index;
+} snat_out2in_trace_t;
+
+typedef struct {
+ u32 next_worker_index;
+ u8 do_handoff;
+} snat_out2in_worker_handoff_trace_t;
+
+/* packet trace format function */
+static u8 * format_snat_out2in_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
+
+ s = format (s, "SNAT_OUT2IN: sw_if_index %d, next index %d, session index %d",
+ t->sw_if_index, t->next_index, t->session_index);
+ return s;
+}
+
+static u8 * format_snat_out2in_fast_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_out2in_trace_t * t = va_arg (*args, snat_out2in_trace_t *);
+
+ s = format (s, "SNAT_OUT2IN_FAST: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+static u8 * format_snat_out2in_worker_handoff_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ snat_out2in_worker_handoff_trace_t * t =
+ va_arg (*args, snat_out2in_worker_handoff_trace_t *);
+ char * m;
+
+ m = t->do_handoff ? "next worker" : "same worker";
+ s = format (s, "SNAT_OUT2IN_WORKER_HANDOFF: %s %d", m, t->next_worker_index);
+
+ return s;
+}
+
+vlib_node_registration_t snat_out2in_node;
+vlib_node_registration_t snat_out2in_fast_node;
+vlib_node_registration_t snat_out2in_worker_handoff_node;
+
+#define foreach_snat_out2in_error \
+_(UNSUPPORTED_PROTOCOL, "Unsupported protocol") \
+_(OUT2IN_PACKETS, "Good out2in packets processed") \
+_(BAD_ICMP_TYPE, "icmp type not echo-reply") \
+_(NO_TRANSLATION, "No translation")
+
+typedef enum {
+#define _(sym,str) SNAT_OUT2IN_ERROR_##sym,
+ foreach_snat_out2in_error
+#undef _
+ SNAT_OUT2IN_N_ERROR,
+} snat_out2in_error_t;
+
+static char * snat_out2in_error_strings[] = {
+#define _(sym,string) string,
+ foreach_snat_out2in_error
+#undef _
+};
+
+typedef enum {
+ SNAT_OUT2IN_NEXT_DROP,
+ SNAT_OUT2IN_NEXT_LOOKUP,
+ SNAT_OUT2IN_N_NEXT,
+} snat_out2in_next_t;
+
+/**
+ * @brief Create session for static mapping.
+ *
+ * Create NAT session initiated by host from external network with static
+ * mapping.
+ *
+ * @param sm SNAT main.
+ * @param b0 Vlib buffer.
+ * @param in2out In2out SNAT session key.
+ * @param out2in Out2in SNAT session key.
+ * @param node Vlib node.
+ *
+ * @returns SNAT session if successfully created otherwise 0.
+ */
+static inline snat_session_t *
+create_session_for_static_mapping (snat_main_t *sm,
+ vlib_buffer_t *b0,
+ snat_session_key_t in2out,
+ snat_session_key_t out2in,
+ vlib_node_runtime_t * node,
+ u32 cpu_index)
+{
+ snat_user_t *u;
+ snat_user_key_t user_key;
+ snat_session_t *s;
+ clib_bihash_kv_8_8_t kv0, value0;
+ dlist_elt_t * per_user_translation_list_elt;
+ dlist_elt_t * per_user_list_head_elt;
+
+ user_key.addr = in2out.addr;
+ user_key.fib_index = in2out.fib_index;
+ kv0.key = user_key.as_u64;
+
+ /* Ever heard of the "user" = inside ip4 address before? */
+ if (clib_bihash_search_8_8 (&sm->user_hash, &kv0, &value0))
+ {
+ /* no, make a new one */
+ pool_get (sm->per_thread_data[cpu_index].users, u);
+ memset (u, 0, sizeof (*u));
+ u->addr = in2out.addr;
+
+ pool_get (sm->per_thread_data[cpu_index].list_pool,
+ per_user_list_head_elt);
+
+ u->sessions_per_user_list_head_index = per_user_list_head_elt -
+ sm->per_thread_data[cpu_index].list_pool;
+
+ clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+ u->sessions_per_user_list_head_index);
+
+ kv0.value = u - sm->per_thread_data[cpu_index].users;
+
+ /* add user */
+ clib_bihash_add_del_8_8 (&sm->user_hash, &kv0, 1 /* is_add */);
+
+ /* add non-traslated packets worker lookup */
+ kv0.value = cpu_index;
+ clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv0, 1);
+ }
+ else
+ {
+ u = pool_elt_at_index (sm->per_thread_data[cpu_index].users,
+ value0.value);
+ }
+
+ pool_get (sm->per_thread_data[cpu_index].sessions, s);
+ memset (s, 0, sizeof (*s));
+
+ s->outside_address_index = ~0;
+ s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
+ u->nstaticsessions++;
+
+ /* Create list elts */
+ pool_get (sm->per_thread_data[cpu_index].list_pool,
+ per_user_translation_list_elt);
+ clib_dlist_init (sm->per_thread_data[cpu_index].list_pool,
+ per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool);
+
+ per_user_translation_list_elt->value =
+ s - sm->per_thread_data[cpu_index].sessions;
+ s->per_user_index =
+ per_user_translation_list_elt - sm->per_thread_data[cpu_index].list_pool;
+ s->per_user_list_head_index = u->sessions_per_user_list_head_index;
+
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s->per_user_list_head_index,
+ per_user_translation_list_elt -
+ sm->per_thread_data[cpu_index].list_pool);
+
+ s->in2out = in2out;
+ s->out2in = out2in;
+ s->in2out.protocol = out2in.protocol;
+
+ /* Add to translation hashes */
+ kv0.key = s->in2out.as_u64;
+ kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+ if (clib_bihash_add_del_8_8 (&sm->in2out, &kv0, 1 /* is_add */))
+ clib_warning ("in2out key add failed");
+
+ kv0.key = s->out2in.as_u64;
+ kv0.value = s - sm->per_thread_data[cpu_index].sessions;
+
+ if (clib_bihash_add_del_8_8 (&sm->out2in, &kv0, 1 /* is_add */))
+ clib_warning ("out2in key add failed");
+
+ return s;
+}
+
+static inline u32 icmp_out2in_slow_path (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0,
+ u32 sw_if_index0,
+ u32 rx_fib_index0,
+ vlib_node_runtime_t * node,
+ u32 next0, f64 now,
+ u32 cpu_index)
+{
+ snat_session_key_t key0, sm0;
+ icmp_echo_header_t *echo0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ snat_session_t * s0;
+ u32 new_addr0, old_addr0;
+ u16 old_id0, new_id0;
+ ip_csum_t sum0;
+ snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
+
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+
+ key0.addr = ip0->dst_address;
+ key0.port = echo0->identifier;
+ key0.protocol = SNAT_PROTOCOL_ICMP;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
+ {
+ /* Try to match static mapping by external address and port,
+ destination address and port in packet */
+ if (snat_static_mapping_match(sm, key0, &sm0, 1))
+ {
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index0,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index0;
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ }
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
+ rt->cached_ip4_address))
+ return next0;
+
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ return SNAT_OUT2IN_NEXT_DROP;
+ }
+
+ /* Create session initiated by host from external network */
+ s0 = create_session_for_static_mapping(sm, b0, sm0, key0,
+ node, cpu_index);
+ if (!s0)
+ return SNAT_OUT2IN_NEXT_DROP;
+ }
+ else
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
+
+ old_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address = s0->in2out.addr;
+ new_addr0 = ip0->dst_address.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ old_id0 = echo0->identifier;
+ new_id0 = s0->in2out.port;
+ echo0->identifier = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+ identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (sm->vlib_main, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+
+ return next0;
+}
+
+static uword
+snat_out2in_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_out2in_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+ f64 now = vlib_time_now (vm);
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 next1 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 sw_if_index0, sw_if_index1;
+ ip4_header_t * ip0, *ip1;
+ ip_csum_t sum0, sum1;
+ u32 new_addr0, old_addr0;
+ u16 new_port0, old_port0;
+ u32 new_addr1, old_addr1;
+ u16 new_port1, old_port1;
+ udp_header_t * udp0, * udp1;
+ tcp_header_t * tcp0, * tcp1;
+ icmp46_header_t * icmp0, * icmp1;
+ snat_session_key_t key0, key1, sm0, sm1;
+ u32 rx_fib_index0, rx_fib_index1;
+ u32 proto0, proto1;
+ snat_session_t * s0 = 0, * s1 = 0;
+ clib_bihash_kv_8_8_t kv0, kv1, value0, value1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ proto0 = ~0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
+ ? SNAT_PROTOCOL_UDP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
+ ? SNAT_PROTOCOL_TCP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
+ ? SNAT_PROTOCOL_ICMP : proto0;
+
+ if (PREDICT_FALSE (proto0 == ~0))
+ goto trace0;
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_out2in_slow_path
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, now, cpu_index);
+ goto trace0;
+ }
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.protocol = proto0;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
+ {
+ /* Try to match static mapping by external address and port,
+ destination address and port in packet */
+ if (snat_static_mapping_match(sm, key0, &sm0, 1))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace0;
+ }
+
+ /* Create session initiated by host from external network */
+ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
+ cpu_index);
+ if (!s0)
+ goto trace0;
+ }
+ else
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
+
+ old_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address = s0->in2out.addr;
+ new_addr0 = ip0->dst_address.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->ports.dst;
+ tcp0->ports.dst = s0->in2out.port;
+ new_port0 = tcp0->ports.dst;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->dst_port;
+ udp0->dst_port = s0->in2out.port;
+ udp0->checksum = 0;
+ }
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+ trace0:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (s0)
+ t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+ }
+
+ pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+
+ ip1 = vlib_buffer_get_current (b1);
+ udp1 = ip4_next_header (ip1);
+ tcp1 = (tcp_header_t *) udp1;
+ icmp1 = (icmp46_header_t *) udp1;
+
+ sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+ rx_fib_index1 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index1);
+
+ proto1 = ~0;
+ proto1 = (ip1->protocol == IP_PROTOCOL_UDP)
+ ? SNAT_PROTOCOL_UDP : proto1;
+ proto1 = (ip1->protocol == IP_PROTOCOL_TCP)
+ ? SNAT_PROTOCOL_TCP : proto1;
+ proto1 = (ip1->protocol == IP_PROTOCOL_ICMP)
+ ? SNAT_PROTOCOL_ICMP : proto1;
+
+ if (PREDICT_FALSE (proto1 == ~0))
+ goto trace1;
+
+ if (PREDICT_FALSE (proto1 == SNAT_PROTOCOL_ICMP))
+ {
+ next1 = icmp_out2in_slow_path
+ (sm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node,
+ next1, now, cpu_index);
+ goto trace1;
+ }
+
+ key1.addr = ip1->dst_address;
+ key1.port = udp1->dst_port;
+ key1.protocol = proto1;
+ key1.fib_index = rx_fib_index1;
+
+ kv1.key = key1.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->out2in, &kv1, &value1))
+ {
+ /* Try to match static mapping by external address and port,
+ destination address and port in packet */
+ if (snat_static_mapping_match(sm, key1, &sm1, 1))
+ {
+ b1->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace1;
+ }
+
+ /* Create session initiated by host from external network */
+ s1 = create_session_for_static_mapping(sm, b1, sm1, key1, node,
+ cpu_index);
+ if (!s1)
+ goto trace1;
+ }
+ else
+ s1 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value1.value);
+
+ old_addr1 = ip1->dst_address.as_u32;
+ ip1->dst_address = s1->in2out.addr;
+ new_addr1 = ip1->dst_address.as_u32;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = s1->in2out.fib_index;
+
+ sum1 = ip1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ if (PREDICT_TRUE(proto1 == SNAT_PROTOCOL_TCP))
+ {
+ old_port1 = tcp1->ports.dst;
+ tcp1->ports.dst = s1->in2out.port;
+ new_port1 = tcp1->ports.dst;
+
+ sum1 = tcp1->checksum;
+ sum1 = ip_csum_update (sum1, old_addr1, new_addr1,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum1 = ip_csum_update (sum1, old_port1, new_port1,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp1->checksum = ip_csum_fold(sum1);
+ }
+ else
+ {
+ old_port1 = udp1->dst_port;
+ udp1->dst_port = s1->in2out.port;
+ udp1->checksum = 0;
+ }
+
+ /* Accounting */
+ s1->last_heard = now;
+ s1->total_pkts++;
+ s1->total_bytes += vlib_buffer_length_in_chain (vm, b1);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s1))
+ {
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s1->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s1->per_user_list_head_index,
+ s1->per_user_index);
+ }
+ trace1:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b1->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->sw_if_index = sw_if_index1;
+ t->next_index = next1;
+ t->session_index = ~0;
+ if (s1)
+ t->session_index = s1 - sm->per_thread_data[cpu_index].sessions;
+ }
+
+ pkts_processed += next1 != SNAT_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = SNAT_OUT2IN_NEXT_LOOKUP;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ u32 new_addr0, old_addr0;
+ u16 new_port0, old_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ icmp46_header_t * icmp0;
+ snat_session_key_t key0, sm0;
+ u32 rx_fib_index0;
+ u32 proto0;
+ snat_session_t * s0 = 0;
+ clib_bihash_kv_8_8_t kv0, value0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = vec_elt (sm->ip4_main->fib_index_by_sw_if_index,
+ sw_if_index0);
+
+ proto0 = ~0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
+ ? SNAT_PROTOCOL_UDP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
+ ? SNAT_PROTOCOL_TCP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
+ ? SNAT_PROTOCOL_ICMP : proto0;
+
+ if (PREDICT_FALSE (proto0 == ~0))
+ goto trace00;
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_out2in_slow_path
+ (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
+ next0, now, cpu_index);
+ goto trace00;
+ }
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.protocol = proto0;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->out2in, &kv0, &value0))
+ {
+ /* Try to match static mapping by external address and port,
+ destination address and port in packet */
+ if (snat_static_mapping_match(sm, key0, &sm0, 1))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace00;
+ }
+
+ /* Create session initiated by host from external network */
+ s0 = create_session_for_static_mapping(sm, b0, sm0, key0, node,
+ cpu_index);
+ if (!s0)
+ goto trace00;
+ }
+ else
+ s0 = pool_elt_at_index (sm->per_thread_data[cpu_index].sessions,
+ value0.value);
+
+ old_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address = s0->in2out.addr;
+ new_addr0 = ip0->dst_address.as_u32;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->ports.dst;
+ tcp0->ports.dst = s0->in2out.port;
+ new_port0 = tcp0->ports.dst;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->dst_port;
+ udp0->dst_port = s0->in2out.port;
+ udp0->checksum = 0;
+ }
+
+ /* Accounting */
+ s0->last_heard = now;
+ s0->total_pkts++;
+ s0->total_bytes += vlib_buffer_length_in_chain (vm, b0);
+ /* Per-user LRU list maintenance for dynamic translation */
+ if (!snat_is_session_static (s0))
+ {
+ clib_dlist_remove (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_index);
+ clib_dlist_addtail (sm->per_thread_data[cpu_index].list_pool,
+ s0->per_user_list_head_index,
+ s0->per_user_index);
+ }
+ trace00:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->session_index = ~0;
+ if (s0)
+ t->session_index = s0 - sm->per_thread_data[cpu_index].sessions;
+ }
+
+ pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, snat_out2in_node.index,
+ SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_out2in_node) = {
+ .function = snat_out2in_node_fn,
+ .name = "snat-out2in",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_out2in_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_out2in_error_strings),
+ .error_strings = snat_out2in_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_OUT2IN_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
+ [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_node, snat_out2in_node_fn);
+
+static uword
+snat_out2in_worker_handoff_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ snat_main_t *sm = &snat_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u32 n_left_from, *from, *to_next = 0;
+ static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
+ static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
+ = 0;
+ vlib_frame_queue_elt_t *hf = 0;
+ vlib_frame_t *f = 0;
+ int i;
+ u32 n_left_to_next_worker = 0, *to_next_worker = 0;
+ u32 next_worker_index = 0;
+ u32 current_worker_index = ~0;
+ u32 cpu_index = os_get_cpu_number ();
+
+ ASSERT (vec_len (sm->workers));
+
+ if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
+ {
+ vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
+
+ vec_validate_init_empty (congested_handoff_queue_by_worker_index,
+ sm->first_worker_index + sm->num_workers - 1,
+ (vlib_frame_queue_t *) (~0));
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 sw_if_index0;
+ u32 rx_fib_index0;
+ ip4_header_t * ip0;
+ udp_header_t * udp0;
+ snat_static_mapping_key_t key0;
+ clib_bihash_kv_8_8_t kv0, value0;
+ u8 do_handoff;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.fib_index = rx_fib_index0;
+
+ kv0.key = key0.as_u64;
+
+ /* Ever heard of of the "user" before? */
+ if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+ {
+ key0.port = 0;
+ kv0.key = key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->worker_by_out, &kv0, &value0))
+ {
+ /* No, assign next available worker (RR) */
+ next_worker_index = sm->first_worker_index +
+ sm->workers[sm->next_worker++ % vec_len (sm->workers)];
+ }
+ else
+ {
+ /* Static mapping without port */
+ next_worker_index = value0.value;
+ }
+
+ /* Add to translated packets worker lookup */
+ kv0.value = next_worker_index;
+ clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv0, 1);
+ }
+ else
+ next_worker_index = value0.value;
+
+ if (PREDICT_FALSE (next_worker_index != cpu_index))
+ {
+ do_handoff = 1;
+
+ if (next_worker_index != current_worker_index)
+ {
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ hf = vlib_get_worker_handoff_queue_elt (sm->fq_out2in_index,
+ next_worker_index,
+ handoff_queue_elt_by_worker_index);
+
+ n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
+ to_next_worker = &hf->buffer_index[hf->n_vectors];
+ current_worker_index = next_worker_index;
+ }
+
+ /* enqueue to correct worker thread */
+ to_next_worker[0] = bi0;
+ to_next_worker++;
+ n_left_to_next_worker--;
+
+ if (n_left_to_next_worker == 0)
+ {
+ hf->n_vectors = VLIB_FRAME_SIZE;
+ vlib_put_frame_queue_elt (hf);
+ current_worker_index = ~0;
+ handoff_queue_elt_by_worker_index[next_worker_index] = 0;
+ hf = 0;
+ }
+ }
+ else
+ {
+ do_handoff = 0;
+ /* if this is 1st frame */
+ if (!f)
+ {
+ f = vlib_get_frame_to_node (vm, snat_out2in_node.index);
+ to_next = vlib_frame_vector_args (f);
+ }
+
+ to_next[0] = bi0;
+ to_next += 1;
+ f->n_vectors++;
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_worker_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_worker_index = next_worker_index;
+ t->do_handoff = do_handoff;
+ }
+ }
+
+ if (f)
+ vlib_put_frame_to_node (vm, snat_out2in_node.index, f);
+
+ if (hf)
+ hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
+
+ /* Ship frames to the worker nodes */
+ for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
+ {
+ if (handoff_queue_elt_by_worker_index[i])
+ {
+ hf = handoff_queue_elt_by_worker_index[i];
+ /*
+ * It works better to let the handoff node
+ * rate-adapt, always ship the handoff queue element.
+ */
+ if (1 || hf->n_vectors == hf->last_n_vectors)
+ {
+ vlib_put_frame_queue_elt (hf);
+ handoff_queue_elt_by_worker_index[i] = 0;
+ }
+ else
+ hf->last_n_vectors = hf->n_vectors;
+ }
+ congested_handoff_queue_by_worker_index[i] =
+ (vlib_frame_queue_t *) (~0);
+ }
+ hf = 0;
+ current_worker_index = ~0;
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_out2in_worker_handoff_node) = {
+ .function = snat_out2in_worker_handoff_fn,
+ .name = "snat-out2in-worker-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_out2in_worker_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_worker_handoff_node, snat_out2in_worker_handoff_fn);
+
+static inline u32 icmp_out2in_fast (snat_main_t *sm,
+ vlib_buffer_t * b0,
+ ip4_header_t * ip0,
+ icmp46_header_t * icmp0,
+ u32 sw_if_index0,
+ vlib_node_runtime_t * node,
+ u32 next0,
+ u32 rx_fib_index0)
+{
+ snat_session_key_t key0, sm0;
+ icmp_echo_header_t *echo0;
+ u32 new_addr0, old_addr0;
+ u16 old_id0, new_id0;
+ ip_csum_t sum0;
+ snat_runtime_t * rt = (snat_runtime_t *)node->runtime_data;
+
+ echo0 = (icmp_echo_header_t *)(icmp0+1);
+
+ key0.addr = ip0->dst_address;
+ key0.port = echo0->identifier;
+ key0.fib_index = rx_fib_index0;
+
+ if (snat_static_mapping_match(sm, key0, &sm0, 1))
+ {
+ ip4_address_t * first_int_addr;
+
+ if (PREDICT_FALSE(rt->cached_sw_if_index != sw_if_index0))
+ {
+ first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index0,
+ 0 /* just want the address */);
+ rt->cached_sw_if_index = sw_if_index0;
+ rt->cached_ip4_address = first_int_addr->as_u32;
+ }
+
+ /* Don't NAT packet aimed at the intfc address */
+ if (PREDICT_FALSE(ip0->dst_address.as_u32 ==
+ rt->cached_ip4_address))
+ return next0;
+
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ return SNAT_OUT2IN_NEXT_DROP;
+ }
+
+ new_addr0 = sm0.addr.as_u32;
+ new_id0 = sm0.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+
+ old_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_addr0;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_FALSE(new_id0 != echo0->identifier))
+ {
+ old_id0 = echo0->identifier;
+ echo0->identifier = new_id0;
+
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_id0, new_id0, icmp_echo_header_t,
+ identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+ }
+
+ return next0;
+}
+
+static uword
+snat_out2in_fast_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_left_from, * from, * to_next;
+ snat_out2in_next_t next_index;
+ u32 pkts_processed = 0;
+ snat_main_t * sm = &snat_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u32 next0 = SNAT_OUT2IN_NEXT_DROP;
+ u32 sw_if_index0;
+ ip4_header_t * ip0;
+ ip_csum_t sum0;
+ u32 new_addr0, old_addr0;
+ u16 new_port0, old_port0;
+ udp_header_t * udp0;
+ tcp_header_t * tcp0;
+ icmp46_header_t * icmp0;
+ snat_session_key_t key0, sm0;
+ u32 proto0;
+ u32 rx_fib_index0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ udp0 = ip4_next_header (ip0);
+ tcp0 = (tcp_header_t *) udp0;
+ icmp0 = (icmp46_header_t *) udp0;
+
+ sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
+ rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index(sw_if_index0);
+
+ vnet_feature_next (sw_if_index0, &next0, b0);
+
+ proto0 = ~0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_UDP)
+ ? SNAT_PROTOCOL_UDP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_TCP)
+ ? SNAT_PROTOCOL_TCP : proto0;
+ proto0 = (ip0->protocol == IP_PROTOCOL_ICMP)
+ ? SNAT_PROTOCOL_ICMP : proto0;
+
+ if (PREDICT_FALSE (proto0 == ~0))
+ goto trace00;
+
+ if (PREDICT_FALSE (proto0 == SNAT_PROTOCOL_ICMP))
+ {
+ next0 = icmp_out2in_fast
+ (sm, b0, ip0, icmp0, sw_if_index0, node, next0, rx_fib_index0);
+ goto trace00;
+ }
+
+ key0.addr = ip0->dst_address;
+ key0.port = udp0->dst_port;
+ key0.fib_index = rx_fib_index0;
+
+ if (snat_static_mapping_match(sm, key0, &sm0, 1))
+ {
+ b0->error = node->errors[SNAT_OUT2IN_ERROR_NO_TRANSLATION];
+ goto trace00;
+ }
+
+ new_addr0 = sm0.addr.as_u32;
+ new_port0 = sm0.port;
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = sm0.fib_index;
+ old_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_addr0;
+
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ if (PREDICT_FALSE(new_port0 != udp0->dst_port))
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ old_port0 = tcp0->ports.dst;
+ tcp0->ports.dst = new_port0;
+
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ sum0 = ip_csum_update (sum0, old_port0, new_port0,
+ ip4_header_t /* cheat */,
+ length /* changed member */);
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ else
+ {
+ old_port0 = udp0->dst_port;
+ udp0->dst_port = new_port0;
+ udp0->checksum = 0;
+ }
+ }
+ else
+ {
+ if (PREDICT_TRUE(proto0 == SNAT_PROTOCOL_TCP))
+ {
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
+ ip4_header_t,
+ dst_address /* changed member */);
+
+ tcp0->checksum = ip_csum_fold(sum0);
+ }
+ }
+
+ trace00:
+
+ if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ snat_out2in_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ }
+
+ pkts_processed += next0 != SNAT_OUT2IN_NEXT_DROP;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, snat_out2in_fast_node.index,
+ SNAT_OUT2IN_ERROR_OUT2IN_PACKETS,
+ pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (snat_out2in_fast_node) = {
+ .function = snat_out2in_fast_node_fn,
+ .name = "snat-out2in-fast",
+ .vector_size = sizeof (u32),
+ .format_trace = format_snat_out2in_fast_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(snat_out2in_error_strings),
+ .error_strings = snat_out2in_error_strings,
+
+ .runtime_data_bytes = sizeof (snat_runtime_t),
+
+ .n_next_nodes = SNAT_OUT2IN_N_NEXT,
+
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [SNAT_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
+ [SNAT_OUT2IN_NEXT_DROP] = "error-drop",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (snat_out2in_fast_node, snat_out2in_fast_node_fn);
diff --git a/src/plugins/snat/snat.api b/src/plugins/snat/snat.api
new file mode 100644
index 00000000000..a191eed5944
--- /dev/null
+++ b/src/plugins/snat/snat.api
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file snat.api
+ * @brief VPP control-plane API messages.
+ *
+ * This file defines VPP control-plane API messages which are generally
+ * called through a shared memory interface.
+ */
+
+/** \brief Add/del S-NAT address range
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @first_ip_address - first IP address
+ @last_ip_address - last IP address
+ @is_add - 1 if add, 0 if delete
+*/
+define snat_add_address_range {
+ u32 client_index;
+ u32 context;
+ u8 is_ip4;
+ u8 first_ip_address[16];
+ u8 last_ip_address[16];
+ u8 is_add;
+};
+
+/** \brief Add S-NAT address range reply
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+*/
+define snat_add_address_range_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Dump S-NAT addresses
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_address_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief S-NAT address details response
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param ip_address - IP address
+*/
+define snat_address_details {
+ u32 context;
+ u8 is_ip4;
+ u8 ip_address[16];
+};
+
+/** \brief Enable/disable S-NAT feature on the interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+define snat_interface_add_del_feature {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Enable/disable S-NAT feature on the interface reply
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+*/
+define snat_interface_add_del_feature_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Dump interfaces with S-NAT feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_interface_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief S-NAT interface details response
+ @param context - sender context, to match reply w/ request
+ @param is_inside - 1 if inside, 0 if outside
+ @param sw_if_index - software index of the interface
+*/
+define snat_interface_details {
+ u32 context;
+ u8 is_inside;
+ u32 sw_if_index;
+};
+
+/** \brief Add/delete S-NAT static mapping
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 1 if add, 0 if delete
+ @param is_ip4 - 1 if address type is IPv4
+ @param addr_only - 1 if address only mapping
+ @param local_ip_address - local IP address
+ @param external_ip_address - external IP address
+ @param local_port - local port number
+ @param external_port - external port number
+ @param vfr_id - VRF ID
+*/
+define snat_add_static_mapping {
+ u32 client_index;
+ u32 context;
+ u8 is_add;
+ u8 is_ip4;
+ u8 addr_only;
+ u8 local_ip_address[16];
+ u8 external_ip_address[16];
+ u16 local_port;
+ u16 external_port;
+ u32 vrf_id;
+};
+
+/** \brief Add/delete S-NAT static mapping reply
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+*/
+define snat_add_static_mapping_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Dump S-NAT static mappings
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_static_mapping_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief S-NAT static mapping details response
+ @param context - sender context, to match reply w/ request
+ @param is_ip4 - 1 if address type is IPv4
+ @param addr_only - 1 if address only mapping
+ @param local_ip_address - local IP address
+ @param external_ip_address - external IP address
+ @param local_port - local port number
+ @param external_port - external port number
+ @param vfr_id - VRF ID
+*/
+define snat_static_mapping_details {
+ u32 context;
+ u8 is_ip4;
+ u8 addr_only;
+ u8 local_ip_address[16];
+ u8 external_ip_address[16];
+ u16 local_port;
+ u16 external_port;
+ u32 vrf_id;
+};
+
+/** \brief Control ping from client to api server request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_control_ping
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Control ping from the client to the server response
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param vpe_pid - the pid of the vpe, returned by the server
+*/
+define snat_control_ping_reply
+{
+ u32 context;
+ i32 retval;
+ u32 client_index;
+ u32 vpe_pid;
+};
+
+/** \brief Show S-NAT plugin startup config
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_show_config
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Show S-NAT plugin startup config reply
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param static_mapping_only - if 1 dynamic translations disabled
+ @param static_mapping_connection_tracking - if 1 create session data
+ @param translation_buckets - number of translation hash buckets
+ @param translation_memory_size - translation hash memory size
+ @param user_buckets - number of user hash buckets
+ @param user_memory_size - user hash memory size
+ @param max_translations_per_user - maximum number of translations per user
+ @param outside_vrf_id - outside VRF id
+ @param inside_vrf_id - default inside VRF id
+*/
+define snat_show_config_reply
+{
+ u32 context;
+ i32 retval;
+ u8 static_mapping_only;
+ u8 static_mapping_connection_tracking;
+ u32 translation_buckets;
+ u32 translation_memory_size;
+ u32 user_buckets;
+ u32 user_memory_size;
+ u32 max_translations_per_user;
+ u32 outside_vrf_id;
+ u32 inside_vrf_id;
+};
+
+/** \brief Set S-NAT workers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param worker_mask - S-NAT workers mask
+*/
+define snat_set_workers {
+ u32 client_index;
+ u32 context;
+ u64 worker_mask;
+};
+
+/** \brief Set S-NAT workers reply
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+*/
+define snat_set_workers_reply {
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Dump S-NAT workers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define snat_worker_dump {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief S-NAT workers details response
+ @param context - sender context, to match reply w/ request
+ @param worker_index - worker index
+ @param lcore_id - lcore ID
+ @param name - worker name
+*/
+define snat_worker_details {
+ u32 context;
+ u32 worker_index;
+ u32 lcore_id;
+ u8 name[64];
+};
diff --git a/src/plugins/snat/snat.c b/src/plugins/snat/snat.c
new file mode 100644
index 00000000000..bc9956841d2
--- /dev/null
+++ b/src/plugins/snat/snat.c
@@ -0,0 +1,1957 @@
+/*
+ * snat.c - simple nat plugin
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vlibapi/api.h>
+#include <snat/snat.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+
+snat_main_t snat_main;
+
+/* define message IDs */
+#include <snat/snat_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <snat/snat_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <snat/snat_all_api_h.h>
+#undef vl_endianfun
+
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <snat/snat_all_api_h.h>
+#undef vl_api_version
+
+/* Macro to finish up custom dump fns */
+#define FINISH \
+ vec_add1 (s, 0); \
+ vl_print (handle, (char *)s); \
+ vec_free (s); \
+ return handle;
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define REPLY_MACRO(t) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+#define REPLY_MACRO2(t, body) \
+do { \
+ unix_shared_memory_queue_t * q = \
+ vl_api_client_index_to_input_queue (mp->client_index); \
+ if (!q) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl(rv); \
+ do {body;} while (0); \
+ vl_msg_api_send_shmem (q, (u8 *)&rmp); \
+} while(0);
+
+
+/* Hook up input features */
+VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "snat-in2out",
+ .runs_before = VNET_FEATURES ("snat-out2in"),
+};
+VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "snat-out2in",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+VNET_FEATURE_INIT (ip4_snat_in2out_worker_handoff, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "snat-in2out-worker-handoff",
+ .runs_before = VNET_FEATURES ("snat-out2in-worker-handoff"),
+};
+VNET_FEATURE_INIT (ip4_snat_out2in_worker_handoff, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "snat-out2in-worker-handoff",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "snat-in2out-fast",
+ .runs_before = VNET_FEATURES ("snat-out2in-fast"),
+};
+VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "snat-out2in-fast",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+
+/*
+ * This routine exists to convince the vlib plugin framework that
+ * we haven't accidentally copied a random .dll into the plugin directory.
+ *
+ * Also collects global variable pointers passed from the vpp engine
+ */
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h,
+ int from_early_init)
+{
+ snat_main_t * sm = &snat_main;
+ clib_error_t * error = 0;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = h->vnet_main;
+ sm->ethernet_main = h->ethernet_main;
+
+ return error;
+}
+
+/*$$$$$ move to an installed header file */
+#if (1 || CLIB_DEBUG > 0) /* "trust, but verify" */
+
+#define VALIDATE_SW_IF_INDEX(mp) \
+ do { u32 __sw_if_index = ntohl(mp->sw_if_index); \
+ vnet_main_t *__vnm = vnet_get_main(); \
+ if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \
+ __sw_if_index)) { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_sw_if_index; \
+ } \
+} while(0);
+
+#define BAD_SW_IF_INDEX_LABEL \
+do { \
+bad_sw_if_index: \
+ ; \
+} while (0);
+
+#define VALIDATE_RX_SW_IF_INDEX(mp) \
+ do { u32 __rx_sw_if_index = ntohl(mp->rx_sw_if_index); \
+ vnet_main_t *__vnm = vnet_get_main(); \
+ if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \
+ __rx_sw_if_index)) { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_rx_sw_if_index; \
+ } \
+} while(0);
+
+#define BAD_RX_SW_IF_INDEX_LABEL \
+do { \
+bad_rx_sw_if_index: \
+ ; \
+} while (0);
+
+#define VALIDATE_TX_SW_IF_INDEX(mp) \
+ do { u32 __tx_sw_if_index = ntohl(mp->tx_sw_if_index); \
+ vnet_main_t *__vnm = vnet_get_main(); \
+ if (pool_is_free_index(__vnm->interface_main.sw_interfaces, \
+ __tx_sw_if_index)) { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_tx_sw_if_index; \
+ } \
+} while(0);
+
+#define BAD_TX_SW_IF_INDEX_LABEL \
+do { \
+bad_tx_sw_if_index: \
+ ; \
+} while (0);
+
+#else
+
+#define VALIDATE_SW_IF_INDEX(mp)
+#define BAD_SW_IF_INDEX_LABEL
+#define VALIDATE_RX_SW_IF_INDEX(mp)
+#define BAD_RX_SW_IF_INDEX_LABEL
+#define VALIDATE_TX_SW_IF_INDEX(mp)
+#define BAD_TX_SW_IF_INDEX_LABEL
+
+#endif /* CLIB_DEBUG > 0 */
+
+void snat_add_address (snat_main_t *sm, ip4_address_t *addr)
+{
+ snat_address_t * ap;
+
+ /* Check if address already exists */
+ vec_foreach (ap, sm->addresses)
+ {
+ if (ap->addr.as_u32 == addr->as_u32)
+ return;
+ }
+
+ vec_add2 (sm->addresses, ap, 1);
+ ap->addr = *addr;
+ clib_bitmap_alloc (ap->busy_port_bitmap, 65535);
+}
+
+static int is_snat_address_used_in_static_mapping (snat_main_t *sm,
+ ip4_address_t addr)
+{
+ snat_static_mapping_t *m;
+ pool_foreach (m, sm->static_mappings,
+ ({
+ if (m->external_addr.as_u32 == addr.as_u32)
+ return 1;
+ }));
+
+ return 0;
+}
+
+int snat_del_address (snat_main_t *sm, ip4_address_t addr)
+{
+ snat_address_t *a = 0;
+ snat_session_t *ses;
+ u32 *ses_to_be_removed = 0, *ses_index;
+ clib_bihash_kv_8_8_t kv, value;
+ snat_user_key_t user_key;
+ snat_user_t *u;
+ snat_main_per_thread_data_t *tsm;
+
+ int i;
+
+ /* Find SNAT address */
+ for (i=0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].addr.as_u32 == addr.as_u32)
+ {
+ a = sm->addresses + i;
+ break;
+ }
+ }
+ if (!a)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* Check if address is used in some static mapping */
+ if (is_snat_address_used_in_static_mapping(sm, addr))
+ {
+ clib_warning ("address used in static mapping");
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
+
+ /* Delete sessions using address */
+ if (a->busy_ports)
+ {
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ pool_foreach (ses, tsm->sessions, ({
+ if (ses->out2in.addr.as_u32 == addr.as_u32)
+ {
+ vec_add1 (ses_to_be_removed, ses - tsm->sessions);
+ kv.key = ses->in2out.as_u64;
+ clib_bihash_add_del_8_8 (&sm->in2out, &kv, 0);
+ kv.key = ses->out2in.as_u64;
+ clib_bihash_add_del_8_8 (&sm->out2in, &kv, 0);
+ clib_dlist_remove (tsm->list_pool, ses->per_user_index);
+ user_key.addr = ses->in2out.addr;
+ user_key.fib_index = ses->in2out.fib_index;
+ kv.key = user_key.as_u64;
+ if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+ {
+ u = pool_elt_at_index (tsm->users, value.value);
+ u->nsessions--;
+ }
+ }
+ }));
+
+ vec_foreach (ses_index, ses_to_be_removed)
+ pool_put_index (tsm->sessions, ses_index[0]);
+
+ vec_free (ses_to_be_removed);
+ }
+ }
+
+ vec_del1 (sm->addresses, i);
+
+ return 0;
+}
+
+static void increment_v4_address (ip4_address_t * a)
+{
+ u32 v;
+
+ v = clib_net_to_host_u32(a->as_u32) + 1;
+ a->as_u32 = clib_host_to_net_u32(v);
+}
+
+/**
+ * @brief Add static mapping.
+ *
+ * Create static mapping between local addr+port and external addr+port.
+ *
+ * @param l_addr Local IPv4 address.
+ * @param e_addr External IPv4 address.
+ * @param l_port Local port number.
+ * @param e_port External port number.
+ * @param vrf_id VRF ID.
+ * @param addr_only If 0 address port and pair mapping, otherwise address only.
+ * @param is_add If 0 delete static mapping, otherwise add.
+ *
+ * @returns
+ */
+int snat_add_static_mapping(ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, u32 vrf_id, int addr_only,
+ int is_add)
+{
+ snat_main_t * sm = &snat_main;
+ snat_static_mapping_t *m;
+ snat_static_mapping_key_t m_key;
+ clib_bihash_kv_8_8_t kv, value;
+ snat_address_t *a = 0;
+ u32 fib_index = ~0;
+ uword * p;
+ int i;
+
+ /* If outside FIB index is not resolved yet */
+ if (sm->outside_fib_index == ~0)
+ {
+ p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->outside_vrf_id);
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ sm->outside_fib_index = p[0];
+ }
+
+ m_key.addr = e_addr;
+ m_key.port = addr_only ? 0 : e_port;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = 0;
+ else
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+
+ if (is_add)
+ {
+ if (m)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ /* Convert VRF id to FIB index */
+ if (vrf_id != ~0)
+ {
+ p = hash_get (sm->ip4_main->fib_index_by_table_id, vrf_id);
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ fib_index = p[0];
+ }
+ /* If not specified use inside VRF id from SNAT plugin startup config */
+ else
+ {
+ if (sm->inside_fib_index == ~0)
+ {
+ p = hash_get (sm->ip4_main->fib_index_by_table_id, sm->inside_vrf_id);
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ fib_index = p[0];
+ sm->inside_fib_index = fib_index;
+ }
+ else
+ fib_index = sm->inside_fib_index;
+
+ vrf_id = sm->inside_vrf_id;
+ }
+
+ /* Find external address in allocated addresses and reserve port for
+ address and port pair mapping when dynamic translations enabled */
+ if (!addr_only && !(sm->static_mapping_only))
+ {
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
+ {
+ a = sm->addresses + i;
+ /* External port must be unused */
+ if (clib_bitmap_get_no_check (a->busy_port_bitmap, e_port))
+ return VNET_API_ERROR_INVALID_VALUE;
+ clib_bitmap_set_no_check (a->busy_port_bitmap, e_port, 1);
+ if (e_port > 1024)
+ a->busy_ports++;
+
+ break;
+ }
+ }
+ /* External address must be allocated */
+ if (!a)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ pool_get (sm->static_mappings, m);
+ memset (m, 0, sizeof (*m));
+ m->local_addr = l_addr;
+ m->external_addr = e_addr;
+ m->addr_only = addr_only;
+ m->vrf_id = vrf_id;
+ m->fib_index = fib_index;
+ if (!addr_only)
+ {
+ m->local_port = l_port;
+ m->external_port = e_port;
+ }
+
+ m_key.addr = m->local_addr;
+ m_key.port = m->local_port;
+ m_key.fib_index = m->fib_index;
+ kv.key = m_key.as_u64;
+ kv.value = m - sm->static_mappings;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 1);
+
+ m_key.addr = m->external_addr;
+ m_key.port = m->external_port;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ kv.value = m - sm->static_mappings;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 1);
+
+ /* Assign worker */
+ if (sm->workers)
+ {
+ snat_user_key_t w_key0;
+ snat_static_mapping_key_t w_key1;
+
+ w_key0.addr = m->local_addr;
+ w_key0.fib_index = m->fib_index;
+ kv.key = w_key0.as_u64;
+
+ if (clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
+ {
+ kv.value = sm->first_worker_index +
+ sm->workers[sm->next_worker++ % vec_len (sm->workers)];
+
+ clib_bihash_add_del_8_8 (&sm->worker_by_in, &kv, 1);
+ }
+ else
+ {
+ kv.value = value.value;
+ }
+
+ w_key1.addr = m->external_addr;
+ w_key1.port = clib_host_to_net_u16 (m->external_port);
+ w_key1.fib_index = sm->outside_fib_index;
+ kv.key = w_key1.as_u64;
+ clib_bihash_add_del_8_8 (&sm->worker_by_out, &kv, 1);
+ }
+ }
+ else
+ {
+ if (!m)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* Free external address port */
+ if (!addr_only && !(sm->static_mapping_only))
+ {
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
+ {
+ a = sm->addresses + i;
+ clib_bitmap_set_no_check (a->busy_port_bitmap, e_port, 0);
+ a->busy_ports--;
+
+ break;
+ }
+ }
+ }
+
+ m_key.addr = m->local_addr;
+ m_key.port = m->local_port;
+ m_key.fib_index = m->fib_index;
+ kv.key = m_key.as_u64;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_local, &kv, 0);
+
+ m_key.addr = m->external_addr;
+ m_key.port = m->external_port;
+ m_key.fib_index = sm->outside_fib_index;
+ kv.key = m_key.as_u64;
+ clib_bihash_add_del_8_8(&sm->static_mapping_by_external, &kv, 0);
+
+ /* Delete session(s) for static mapping if exist */
+ if (!(sm->static_mapping_only) ||
+ (sm->static_mapping_only && sm->static_mapping_connection_tracking))
+ {
+ snat_user_key_t u_key;
+ snat_user_t *u;
+ dlist_elt_t * head, * elt;
+ u32 elt_index, head_index, del_elt_index;
+ u32 ses_index;
+ u64 user_index;
+ snat_session_t * s;
+ snat_main_per_thread_data_t *tsm;
+
+ u_key.addr = m->local_addr;
+ u_key.fib_index = m->fib_index;
+ kv.key = u_key.as_u64;
+ if (!clib_bihash_search_8_8 (&sm->user_hash, &kv, &value))
+ {
+ user_index = value.value;
+ if (!clib_bihash_search_8_8 (&sm->worker_by_in, &kv, &value))
+ tsm = vec_elt_at_index (sm->per_thread_data, value.value);
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+ u = pool_elt_at_index (tsm->users, user_index);
+ if (u->nstaticsessions)
+ {
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tsm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ ses_index = elt->value;
+ while (ses_index != ~0)
+ {
+ s = pool_elt_at_index (tsm->sessions, ses_index);
+ del_elt_index = elt_index;
+ elt_index = elt->next;
+ elt = pool_elt_at_index (tsm->list_pool, elt_index);
+ ses_index = elt->value;
+
+ if (!addr_only)
+ {
+ if ((s->out2in.addr.as_u32 != e_addr.as_u32) &&
+ (clib_net_to_host_u16 (s->out2in.port) != e_port))
+ continue;
+ }
+
+ value.key = s->in2out.as_u64;
+ clib_bihash_add_del_8_8 (&sm->in2out, &value, 0);
+ value.key = s->out2in.as_u64;
+ clib_bihash_add_del_8_8 (&sm->out2in, &value, 0);
+ pool_put (tsm->sessions, s);
+
+ clib_dlist_remove (tsm->list_pool, del_elt_index);
+ pool_put_index (tsm->list_pool, del_elt_index);
+ u->nstaticsessions--;
+
+ if (!addr_only)
+ break;
+ }
+ if (addr_only)
+ {
+ pool_put (tsm->users, u);
+ clib_bihash_add_del_8_8 (&sm->user_hash, &kv, 0);
+ }
+ }
+ }
+ }
+
+ /* Delete static mapping from pool */
+ pool_put (sm->static_mappings, m);
+ }
+
+ return 0;
+}
+
+static int snat_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+ const char * feature_name;
+
+ if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
+ feature_name = is_inside ? "snat-in2out-fast" : "snat-out2in-fast";
+ else
+ {
+ if (sm->num_workers > 1)
+ feature_name = is_inside ? "snat-in2out-worker-handoff" : "snat-out2in-worker-handoff";
+ else
+ feature_name = is_inside ? "snat-in2out" : "snat-out2in";
+ }
+
+ vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index,
+ !is_del, 0, 0);
+
+ if (sm->fq_in2out_index == ~0)
+ sm->fq_in2out_index = vlib_frame_queue_main_init (snat_in2out_node.index, 0);
+
+ if (sm->fq_out2in_index == ~0)
+ sm->fq_out2in_index = vlib_frame_queue_main_init (snat_out2in_node.index, 0);
+
+ pool_foreach (i, sm->interfaces,
+ ({
+ if (i->sw_if_index == sw_if_index)
+ {
+ if (is_del)
+ pool_put (sm->interfaces, i);
+ else
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ return 0;
+ }
+ }));
+
+ if (is_del)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ pool_get (sm->interfaces, i);
+ i->sw_if_index = sw_if_index;
+ i->is_inside = is_inside;
+
+ return 0;
+}
+
+static int snat_set_workers (uword * bitmap)
+{
+ snat_main_t *sm = &snat_main;
+ int i;
+
+ if (sm->num_workers < 2)
+ return VNET_API_ERROR_FEATURE_DISABLED;
+
+ if (clib_bitmap_last_set (bitmap) >= sm->num_workers)
+ return VNET_API_ERROR_INVALID_WORKER;
+
+ vec_free (sm->workers);
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ vec_add1(sm->workers, i);
+ }));
+
+ return 0;
+}
+
+static void
+vl_api_snat_add_address_range_t_handler
+(vl_api_snat_add_address_range_t * mp)
+{
+ snat_main_t * sm = &snat_main;
+ vl_api_snat_add_address_range_reply_t * rmp;
+ ip4_address_t this_addr;
+ u32 start_host_order, end_host_order;
+ int i, count;
+ int rv = 0;
+ u32 * tmp;
+
+ if (mp->is_ip4 != 1)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto send_reply;
+ }
+
+ if (sm->static_mapping_only)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ tmp = (u32 *) mp->first_ip_address;
+ start_host_order = clib_host_to_net_u32 (tmp[0]);
+ tmp = (u32 *) mp->last_ip_address;
+ end_host_order = clib_host_to_net_u32 (tmp[0]);
+
+ count = (end_host_order - start_host_order) + 1;
+
+ if (count > 1024)
+ clib_warning ("%U - %U, %d addresses...",
+ format_ip4_address, mp->first_ip_address,
+ format_ip4_address, mp->last_ip_address,
+ count);
+
+ memcpy (&this_addr.as_u8, mp->first_ip_address, 4);
+
+ for (i = 0; i < count; i++)
+ {
+ if (mp->is_add)
+ snat_add_address (sm, &this_addr);
+ else
+ rv = snat_del_address (sm, this_addr);
+
+ if (rv)
+ goto send_reply;
+
+ increment_v4_address (&this_addr);
+ }
+
+ send_reply:
+ REPLY_MACRO (VL_API_SNAT_ADD_ADDRESS_RANGE_REPLY);
+}
+
+static void *vl_api_snat_add_address_range_t_print
+(vl_api_snat_add_address_range_t *mp, void * handle)
+{
+ u8 * s;
+
+ s = format (0, "SCRIPT: snat_add_address_range ");
+ s = format (s, "%U ", format_ip4_address, mp->first_ip_address);
+ if (memcmp (mp->first_ip_address, mp->last_ip_address, 4))
+ {
+ s = format (s, " - %U ", format_ip4_address, mp->last_ip_address);
+ }
+ FINISH;
+}
+
+static void
+send_snat_address_details
+(snat_address_t * a, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_address_details_t *rmp;
+ snat_main_t * sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_ADDRESS_DETAILS+sm->msg_id_base);
+ rmp->is_ip4 = 1;
+ clib_memcpy (rmp->ip_address, &(a->addr), 4);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_address_dump_t_handler
+(vl_api_snat_address_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t * sm = &snat_main;
+ snat_address_t * a;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ vec_foreach (a, sm->addresses)
+ send_snat_address_details (a, q, mp->context);
+}
+
+static void *vl_api_snat_address_dump_t_print
+(vl_api_snat_address_dump_t *mp, void * handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_address_dump ");
+
+ FINISH;
+}
+
+static void
+vl_api_snat_interface_add_del_feature_t_handler
+(vl_api_snat_interface_add_del_feature_t * mp)
+{
+ snat_main_t * sm = &snat_main;
+ vl_api_snat_interface_add_del_feature_reply_t * rmp;
+ u8 is_del = mp->is_add == 0;
+ u32 sw_if_index = ntohl(mp->sw_if_index);
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX(mp);
+
+ rv = snat_interface_add_del (sw_if_index, mp->is_inside, is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO(VL_API_SNAT_INTERFACE_ADD_DEL_FEATURE_REPLY);
+}
+
+static void *vl_api_snat_interface_add_del_feature_t_print
+(vl_api_snat_interface_add_del_feature_t * mp, void *handle)
+{
+ u8 * s;
+
+ s = format (0, "SCRIPT: snat_interface_add_del_feature ");
+ s = format (s, "sw_if_index %d %s %s",
+ clib_host_to_net_u32(mp->sw_if_index),
+ mp->is_inside ? "in":"out",
+ mp->is_add ? "" : "del");
+
+ FINISH;
+}
+
+static void
+send_snat_interface_details
+(snat_interface_t * i, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_interface_details_t *rmp;
+ snat_main_t * sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_INTERFACE_DETAILS+sm->msg_id_base);
+ rmp->sw_if_index = ntohl (i->sw_if_index);
+ rmp->is_inside = i->is_inside;
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_interface_dump_t_handler
+(vl_api_snat_interface_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t * sm = &snat_main;
+ snat_interface_t * i;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ pool_foreach (i, sm->interfaces,
+ ({
+ send_snat_interface_details(i, q, mp->context);
+ }));
+}
+
+static void *vl_api_snat_interface_dump_t_print
+(vl_api_snat_interface_dump_t *mp, void * handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_interface_dump ");
+
+ FINISH;
+}static void
+
+vl_api_snat_add_static_mapping_t_handler
+(vl_api_snat_add_static_mapping_t * mp)
+{
+ snat_main_t * sm = &snat_main;
+ vl_api_snat_add_static_mapping_reply_t * rmp;
+ ip4_address_t local_addr, external_addr;
+ u16 local_port = 0, external_port = 0;
+ u32 vrf_id;
+ int rv = 0;
+
+ if (mp->is_ip4 != 1)
+ {
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto send_reply;
+ }
+
+ memcpy (&local_addr.as_u8, mp->local_ip_address, 4);
+ memcpy (&external_addr.as_u8, mp->external_ip_address, 4);
+ if (mp->addr_only == 0)
+ {
+ local_port = clib_net_to_host_u16 (mp->local_port);
+ external_port = clib_net_to_host_u16 (mp->external_port);
+ }
+ vrf_id = clib_net_to_host_u32 (mp->vrf_id);
+
+ rv = snat_add_static_mapping(local_addr, external_addr, local_port,
+ external_port, vrf_id, mp->addr_only,
+ mp->is_add);
+
+ send_reply:
+ REPLY_MACRO (VL_API_SNAT_ADD_ADDRESS_RANGE_REPLY);
+}
+
+static void *vl_api_snat_add_static_mapping_t_print
+(vl_api_snat_add_static_mapping_t *mp, void * handle)
+{
+ u8 * s;
+
+ s = format (0, "SCRIPT: snat_add_static_mapping ");
+ s = format (s, "local_addr %U external_addr %U ",
+ format_ip4_address, mp->local_ip_address,
+ format_ip4_address, mp->external_ip_address);
+
+ if (mp->addr_only == 0)
+ s = format (s, "local_port %d external_port %d ",
+ clib_net_to_host_u16 (mp->local_port),
+ clib_net_to_host_u16 (mp->external_port));
+
+ if (mp->vrf_id != ~0)
+ s = format (s, "vrf %d", clib_net_to_host_u32 (mp->vrf_id));
+
+ FINISH;
+}
+
+static void
+send_snat_static_mapping_details
+(snat_static_mapping_t * m, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_static_mapping_details_t *rmp;
+ snat_main_t * sm = &snat_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_STATIC_MAPPING_DETAILS+sm->msg_id_base);
+ rmp->is_ip4 = 1;
+ rmp->addr_only = m->addr_only;
+ clib_memcpy (rmp->local_ip_address, &(m->local_addr), 4);
+ clib_memcpy (rmp->external_ip_address, &(m->external_addr), 4);
+ rmp->local_port = htons (m->local_port);
+ rmp->external_port = htons (m->external_port);
+ rmp->vrf_id = htonl (m->vrf_id);
+ rmp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_static_mapping_dump_t_handler
+(vl_api_snat_static_mapping_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t * sm = &snat_main;
+ snat_static_mapping_t * m;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ pool_foreach (m, sm->static_mappings,
+ ({
+ send_snat_static_mapping_details (m, q, mp->context);
+ }));
+}
+
+static void *vl_api_snat_static_mapping_dump_t_print
+(vl_api_snat_static_mapping_dump_t *mp, void * handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_static_mapping_dump ");
+
+ FINISH;
+}
+
+static void
+vl_api_snat_control_ping_t_handler
+(vl_api_snat_control_ping_t * mp)
+{
+ vl_api_snat_control_ping_reply_t *rmp;
+ snat_main_t * sm = &snat_main;
+ int rv = 0;
+
+ REPLY_MACRO2(VL_API_SNAT_CONTROL_PING_REPLY,
+ ({
+ rmp->vpe_pid = ntohl (getpid());
+ }));
+}
+
+static void *vl_api_snat_control_ping_t_print
+(vl_api_snat_control_ping_t *mp, void * handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_control_ping ");
+
+ FINISH;
+}
+
+static void
+vl_api_snat_show_config_t_handler
+(vl_api_snat_show_config_t * mp)
+{
+ vl_api_snat_show_config_reply_t *rmp;
+ snat_main_t * sm = &snat_main;
+ int rv = 0;
+
+ REPLY_MACRO2(VL_API_SNAT_SHOW_CONFIG_REPLY,
+ ({
+ rmp->translation_buckets = htonl (sm->translation_buckets);
+ rmp->translation_memory_size = htonl (sm->translation_memory_size);
+ rmp->user_buckets = htonl (sm->user_buckets);
+ rmp->user_memory_size = htonl (sm->user_memory_size);
+ rmp->max_translations_per_user = htonl (sm->max_translations_per_user);
+ rmp->outside_vrf_id = htonl (sm->outside_vrf_id);
+ rmp->inside_vrf_id = htonl (sm->inside_vrf_id);
+ rmp->static_mapping_only = sm->static_mapping_only;
+ rmp->static_mapping_connection_tracking =
+ sm->static_mapping_connection_tracking;
+ }));
+}
+
+static void *vl_api_snat_show_config_t_print
+(vl_api_snat_show_config_t *mp, void * handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_show_config ");
+
+ FINISH;
+}
+
+static void
+vl_api_snat_set_workers_t_handler
+(vl_api_snat_set_workers_t * mp)
+{
+ snat_main_t * sm = &snat_main;
+ vl_api_snat_set_workers_reply_t * rmp;
+ int rv = 0;
+ uword *bitmap = 0;
+ u64 mask = clib_net_to_host_u64 (mp->worker_mask);
+
+ if (sm->num_workers < 2)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto send_reply;
+ }
+
+ bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask));
+ rv = snat_set_workers(bitmap);
+ clib_bitmap_free (bitmap);
+
+ send_reply:
+ REPLY_MACRO (VL_API_SNAT_SET_WORKERS_REPLY);
+}
+
+static void *vl_api_snat_set_workers_t_print
+(vl_api_snat_set_workers_t *mp, void * handle)
+{
+ u8 * s;
+ uword *bitmap = 0;
+ u8 first = 1;
+ int i;
+ u64 mask = clib_net_to_host_u64 (mp->worker_mask);
+
+ s = format (0, "SCRIPT: snat_set_workers ");
+ bitmap = clib_bitmap_set_multiple (bitmap, 0, mask, BITS (mask));
+ clib_bitmap_foreach (i, bitmap,
+ ({
+ if (first)
+ s = format (s, "%d", i);
+ else
+ s = format (s, ",%d", i);
+ first = 0;
+ }));
+ clib_bitmap_free (bitmap);
+ FINISH;
+}
+
+static void
+send_snat_worker_details
+(u32 worker_index, unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_snat_worker_details_t *rmp;
+ snat_main_t * sm = &snat_main;
+ vlib_worker_thread_t *w =
+ vlib_worker_threads + worker_index + sm->first_worker_index;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_SNAT_WORKER_DETAILS+sm->msg_id_base);
+ rmp->context = context;
+ rmp->worker_index = htonl (worker_index);
+ rmp->lcore_id = htonl (w->lcore_id);
+ strncpy ((char *) rmp->name, (char *) w->name, ARRAY_LEN (rmp->name) - 1);
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+static void
+vl_api_snat_worker_dump_t_handler
+(vl_api_snat_worker_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ snat_main_t * sm = &snat_main;
+ u32 * worker_index;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ vec_foreach (worker_index, sm->workers)
+ {
+ send_snat_worker_details(*worker_index, q, mp->context);
+ }
+}
+
+static void *vl_api_snat_worker_dump_t_print
+(vl_api_snat_worker_dump_t *mp, void * handle)
+{
+ u8 *s;
+
+ s = format (0, "SCRIPT: snat_worker_dump ");
+
+ FINISH;
+}
+
+/* List of message types that this plugin understands */
+#define foreach_snat_plugin_api_msg \
+_(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range) \
+_(SNAT_INTERFACE_ADD_DEL_FEATURE, snat_interface_add_del_feature) \
+_(SNAT_ADD_STATIC_MAPPING, snat_add_static_mapping) \
+_(SNAT_CONTROL_PING, snat_control_ping) \
+_(SNAT_STATIC_MAPPING_DUMP, snat_static_mapping_dump) \
+_(SNAT_SHOW_CONFIG, snat_show_config) \
+_(SNAT_ADDRESS_DUMP, snat_address_dump) \
+_(SNAT_INTERFACE_DUMP, snat_interface_dump) \
+_(SNAT_SET_WORKERS, snat_set_workers) \
+_(SNAT_WORKER_DUMP, snat_worker_dump)
+
+/* Set up the API message handling tables */
+static clib_error_t *
+snat_plugin_api_hookup (vlib_main_t *vm)
+{
+ snat_main_t * sm __attribute__ ((unused)) = &snat_main;
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_snat_plugin_api_msg;
+#undef _
+
+ return 0;
+}
+
+#define vl_msg_name_crc_list
+#include <snat/snat_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (snat_main_t * sm, api_main_t * am)
+{
+#define _(id,n,crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base);
+ foreach_vl_msg_name_crc_snat;
+#undef _
+}
+
+static void plugin_custom_dump_configure (snat_main_t * sm)
+{
+#define _(n,f) sm->api_main->msg_print_handlers \
+ [VL_API_##n + sm->msg_id_base] \
+ = (void *) vl_api_##f##_t_print;
+ foreach_snat_plugin_api_msg;
+#undef _
+}
+
+static clib_error_t * snat_init (vlib_main_t * vm)
+{
+ snat_main_t * sm = &snat_main;
+ clib_error_t * error = 0;
+ ip4_main_t * im = &ip4_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u8 * name;
+ uword *p;
+ vlib_thread_registration_t *tr;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ uword *bitmap = 0;
+ u32 i;
+
+ name = format (0, "snat_%08x%c", api_version, 0);
+
+ /* Ask for a correctly-sized block of API message decode slots */
+ sm->msg_id_base = vl_msg_api_get_msg_ids
+ ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main();
+ sm->ip4_main = im;
+ sm->ip4_lookup_main = lm;
+ sm->api_main = &api_main;
+ sm->first_worker_index = 0;
+ sm->next_worker = 0;
+ sm->num_workers = 0;
+ sm->workers = 0;
+ sm->fq_in2out_index = ~0;
+ sm->fq_out2in_index = ~0;
+
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ if (p)
+ {
+ tr = (vlib_thread_registration_t *) p[0];
+ if (tr)
+ {
+ sm->num_workers = tr->count;
+ sm->first_worker_index = tr->first_index;
+ }
+ }
+
+ /* Use all available workers by default */
+ if (sm->num_workers > 1)
+ {
+ for (i=0; i < sm->num_workers; i++)
+ bitmap = clib_bitmap_set (bitmap, i, 1);
+ snat_set_workers(bitmap);
+ clib_bitmap_free (bitmap);
+ }
+
+ error = snat_plugin_api_hookup (vm);
+
+ /* Add our API messages to the global name_crc hash table */
+ setup_message_id_table (sm, &api_main);
+
+ plugin_custom_dump_configure (sm);
+ vec_free(name);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (snat_init);
+
+void snat_free_outside_address_and_port (snat_main_t * sm,
+ snat_session_key_t * k,
+ u32 address_index)
+{
+ snat_address_t *a;
+ u16 port_host_byte_order = clib_net_to_host_u16 (k->port);
+
+ ASSERT (address_index < vec_len (sm->addresses));
+
+ a = sm->addresses + address_index;
+
+ ASSERT (clib_bitmap_get_no_check (a->busy_port_bitmap,
+ port_host_byte_order) == 1);
+
+ clib_bitmap_set_no_check (a->busy_port_bitmap, port_host_byte_order, 0);
+ a->busy_ports--;
+}
+
+/**
+ * @brief Match SNAT static mapping.
+ *
+ * @param sm SNAT main.
+ * @param match Address and port to match.
+ * @param mapping External or local address and port of the matched mapping.
+ * @param by_external If 0 match by local address otherwise match by external
+ * address.
+ *
+ * @returns 0 if match found otherwise 1.
+ */
+int snat_static_mapping_match (snat_main_t * sm,
+ snat_session_key_t match,
+ snat_session_key_t * mapping,
+ u8 by_external)
+{
+ clib_bihash_kv_8_8_t kv, value;
+ snat_static_mapping_t *m;
+ snat_static_mapping_key_t m_key;
+ clib_bihash_8_8_t *mapping_hash = &sm->static_mapping_by_local;
+
+ if (by_external)
+ mapping_hash = &sm->static_mapping_by_external;
+
+ m_key.addr = match.addr;
+ m_key.port = clib_net_to_host_u16 (match.port);
+ m_key.fib_index = match.fib_index;
+
+ kv.key = m_key.as_u64;
+
+ if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
+ {
+ /* Try address only mapping */
+ m_key.port = 0;
+ kv.key = m_key.as_u64;
+ if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
+ return 1;
+ }
+
+ m = pool_elt_at_index (sm->static_mappings, value.value);
+
+ if (by_external)
+ {
+ mapping->addr = m->local_addr;
+ /* Address only mapping doesn't change port */
+ mapping->port = m->addr_only ? match.port
+ : clib_host_to_net_u16 (m->local_port);
+ mapping->fib_index = m->fib_index;
+ }
+ else
+ {
+ mapping->addr = m->external_addr;
+ /* Address only mapping doesn't change port */
+ mapping->port = m->addr_only ? match.port
+ : clib_host_to_net_u16 (m->external_port);
+ mapping->fib_index = sm->outside_fib_index;
+ }
+
+ return 0;
+}
+
+int snat_alloc_outside_address_and_port (snat_main_t * sm,
+ snat_session_key_t * k,
+ u32 * address_indexp)
+{
+ int i;
+ snat_address_t *a;
+ u32 portnum;
+
+ for (i = 0; i < vec_len (sm->addresses); i++)
+ {
+ if (sm->addresses[i].busy_ports < (65535-1024))
+ {
+ a = sm->addresses + i;
+
+ while (1)
+ {
+ portnum = random_u32 (&sm->random_seed);
+ portnum &= 0xFFFF;
+ if (portnum < 1024)
+ continue;
+ if (clib_bitmap_get_no_check (a->busy_port_bitmap, portnum))
+ continue;
+ clib_bitmap_set_no_check (a->busy_port_bitmap, portnum, 1);
+ a->busy_ports++;
+ /* Caller sets protocol and fib index */
+ k->addr = a->addr;
+ k->port = clib_host_to_net_u16(portnum);
+ *address_indexp = i;
+ return 0;
+ }
+ }
+ }
+ /* Totally out of translations to use... */
+ return 1;
+}
+
+
+static clib_error_t *
+add_address_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ snat_main_t * sm = &snat_main;
+ ip4_address_t start_addr, end_addr, this_addr;
+ u32 start_host_order, end_host_order;
+ int i, count;
+ int is_add = 1;
+ int rv = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U - %U",
+ unformat_ip4_address, &start_addr,
+ unformat_ip4_address, &end_addr))
+ ;
+ else if (unformat (line_input, "%U", unformat_ip4_address, &start_addr))
+ end_addr = start_addr;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (sm->static_mapping_only)
+ return clib_error_return (0, "static mapping only mode");
+
+ start_host_order = clib_host_to_net_u32 (start_addr.as_u32);
+ end_host_order = clib_host_to_net_u32 (end_addr.as_u32);
+
+ if (end_host_order < start_host_order)
+ return clib_error_return (0, "end address less than start address");
+
+ count = (end_host_order - start_host_order) + 1;
+
+ if (count > 1024)
+ clib_warning ("%U - %U, %d addresses...",
+ format_ip4_address, &start_addr,
+ format_ip4_address, &end_addr,
+ count);
+
+ this_addr = start_addr;
+
+ for (i = 0; i < count; i++)
+ {
+ if (is_add)
+ snat_add_address (sm, &this_addr);
+ else
+ rv = snat_del_address (sm, this_addr);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "S-NAT address not exist.");
+ break;
+ case VNET_API_ERROR_UNSPECIFIED:
+ return clib_error_return (0, "S-NAT address used in static mapping.");
+ break;
+ default:
+ break;
+ }
+
+ increment_v4_address (&this_addr);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (add_address_command, static) = {
+ .path = "snat add address",
+ .short_help = "snat add addresses <ip4-range-start> [- <ip4-range-end>] [del]",
+ .function = add_address_command_fn,
+};
+
+static clib_error_t *
+snat_feature_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index;
+ u32 * inside_sw_if_indices = 0;
+ u32 * outside_sw_if_indices = 0;
+ int is_del = 0;
+ int i;
+
+ sw_if_index = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "in %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ vec_add1 (inside_sw_if_indices, sw_if_index);
+ else if (unformat (line_input, "out %U", unformat_vnet_sw_interface,
+ vnm, &sw_if_index))
+ vec_add1 (outside_sw_if_indices, sw_if_index);
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (vec_len (inside_sw_if_indices))
+ {
+ for (i = 0; i < vec_len(inside_sw_if_indices); i++)
+ {
+ sw_if_index = inside_sw_if_indices[i];
+ snat_interface_add_del (sw_if_index, 1, is_del);
+ }
+ }
+
+ if (vec_len (outside_sw_if_indices))
+ {
+ for (i = 0; i < vec_len(outside_sw_if_indices); i++)
+ {
+ sw_if_index = outside_sw_if_indices[i];
+ snat_interface_add_del (sw_if_index, 0, is_del);
+ }
+ }
+
+ vec_free (inside_sw_if_indices);
+ vec_free (outside_sw_if_indices);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_interface_snat_command, static) = {
+ .path = "set interface snat",
+ .function = snat_feature_command_fn,
+ .short_help = "set interface snat in <intfc> out <intfc> [del]",
+};
+
+static clib_error_t *
+add_static_mapping_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t * error = 0;
+ ip4_address_t l_addr, e_addr;
+ u32 l_port = 0, e_port = 0, vrf_id = ~0;
+ int is_add = 1;
+ int addr_only = 1;
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "local %U %u", unformat_ip4_address, &l_addr,
+ &l_port))
+ addr_only = 0;
+ else if (unformat (line_input, "local %U", unformat_ip4_address, &l_addr))
+ ;
+ else if (unformat (line_input, "external %U %u", unformat_ip4_address,
+ &e_addr, &e_port))
+ addr_only = 0;
+ else if (unformat (line_input, "external %U", unformat_ip4_address,
+ &e_addr))
+ ;
+ else if (unformat (line_input, "vrf %u", &vrf_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else
+ return clib_error_return (0, "unknown input: '%U'",
+ format_unformat_error, line_input);
+ }
+ unformat_free (line_input);
+
+ rv = snat_add_static_mapping(l_addr, e_addr, (u16) l_port, (u16) e_port,
+ vrf_id, addr_only, is_add);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ return clib_error_return (0, "External port already in use.");
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ if (is_add)
+ return clib_error_return (0, "External addres must be allocated.");
+ else
+ return clib_error_return (0, "Mapping not exist.");
+ break;
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "No such VRF id.");
+ case VNET_API_ERROR_VALUE_EXIST:
+ return clib_error_return (0, "Mapping already exist.");
+ default:
+ break;
+ }
+
+ return error;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{snat add static mapping}
+ * Static mapping allows hosts on the external network to initiate connection
+ * to to the local network host.
+ * To create static mapping between local host address 10.0.0.3 port 6303 and
+ * external address 4.4.4.4 port 3606 use:
+ * vpp# snat add static mapping local 10.0.0.3 6303 external 4.4.4.4 3606
+ * If not runnig "static mapping only" S-NAT plugin mode use before:
+ * vpp# snat add address 4.4.4.4
+ * To create static mapping between local and external address use:
+ * vpp# snat add static mapping local 10.0.0.3 external 4.4.4.4
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (add_static_mapping_command, static) = {
+ .path = "snat add static mapping",
+ .function = add_static_mapping_command_fn,
+ .short_help =
+ "snat add static mapping local <addr> [<port>] external <addr> [<port>] [vrf <table-id>] [del]",
+};
+
+static clib_error_t *
+set_workers_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ uword *bitmap = 0;
+ int rv = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_bitmap_list, &bitmap))
+ ;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (bitmap == 0)
+ return clib_error_return (0, "List of workers must be specified.");
+
+ rv = snat_set_workers(bitmap);
+
+ clib_bitmap_free (bitmap);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_WORKER:
+ return clib_error_return (0, "Invalid worker(s).");
+ break;
+ case VNET_API_ERROR_FEATURE_DISABLED:
+ return clib_error_return (0,
+ "Supported only if 2 or more workes available.");
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * @cliexpar
+ * @cliexstart{set snat workers}
+ * Set SNAT workers if 2 or more workers available, use:
+ * vpp# set snat workers 0-2,5
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (set_workers_command, static) = {
+ .path = "set snat workers",
+ .function = set_workers_command_fn,
+ .short_help =
+ "set snat workers <workers-list>",
+};
+
+static clib_error_t *
+snat_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ snat_main_t * sm = &snat_main;
+ u32 translation_buckets = 1024;
+ u32 translation_memory_size = 128<<20;
+ u32 user_buckets = 128;
+ u32 user_memory_size = 64<<20;
+ u32 max_translations_per_user = 100;
+ u32 outside_vrf_id = 0;
+ u32 inside_vrf_id = 0;
+ u32 static_mapping_buckets = 1024;
+ u32 static_mapping_memory_size = 64<<20;
+ u8 static_mapping_only = 0;
+ u8 static_mapping_connection_tracking = 0;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "translation hash buckets %d", &translation_buckets))
+ ;
+ else if (unformat (input, "translation hash memory %d",
+ &translation_memory_size));
+ else if (unformat (input, "user hash buckets %d", &user_buckets))
+ ;
+ else if (unformat (input, "user hash memory %d",
+ &user_memory_size))
+ ;
+ else if (unformat (input, "max translations per user %d",
+ &max_translations_per_user))
+ ;
+ else if (unformat (input, "outside VRF id %d",
+ &outside_vrf_id))
+ ;
+ else if (unformat (input, "inside VRF id %d",
+ &inside_vrf_id))
+ ;
+ else if (unformat (input, "static mapping only"))
+ {
+ static_mapping_only = 1;
+ if (unformat (input, "connection tracking"))
+ static_mapping_connection_tracking = 1;
+ }
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ /* for show commands, etc. */
+ sm->translation_buckets = translation_buckets;
+ sm->translation_memory_size = translation_memory_size;
+ sm->user_buckets = user_buckets;
+ sm->user_memory_size = user_memory_size;
+ sm->max_translations_per_user = max_translations_per_user;
+ sm->outside_vrf_id = outside_vrf_id;
+ sm->outside_fib_index = ~0;
+ sm->inside_vrf_id = inside_vrf_id;
+ sm->inside_fib_index = ~0;
+ sm->static_mapping_only = static_mapping_only;
+ sm->static_mapping_connection_tracking = static_mapping_connection_tracking;
+
+ if (!static_mapping_only ||
+ (static_mapping_only && static_mapping_connection_tracking))
+ {
+ clib_bihash_init_8_8 (&sm->worker_by_in, "worker-by-in", user_buckets,
+ user_memory_size);
+
+ clib_bihash_init_8_8 (&sm->worker_by_out, "worker-by-out", user_buckets,
+ user_memory_size);
+
+ vec_validate (sm->per_thread_data, tm->n_vlib_mains - 1);
+
+ clib_bihash_init_8_8 (&sm->in2out, "in2out", translation_buckets,
+ translation_memory_size);
+
+ clib_bihash_init_8_8 (&sm->out2in, "out2in", translation_buckets,
+ translation_memory_size);
+
+ clib_bihash_init_8_8 (&sm->user_hash, "users", user_buckets,
+ user_memory_size);
+ }
+ clib_bihash_init_8_8 (&sm->static_mapping_by_local,
+ "static_mapping_by_local", static_mapping_buckets,
+ static_mapping_memory_size);
+
+ clib_bihash_init_8_8 (&sm->static_mapping_by_external,
+ "static_mapping_by_external", static_mapping_buckets,
+ static_mapping_memory_size);
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (snat_config, "snat");
+
+u8 * format_snat_key (u8 * s, va_list * args)
+{
+ snat_session_key_t * key = va_arg (*args, snat_session_key_t *);
+ char * protocol_string = "unknown";
+ static char *protocol_strings[] = {
+ "UDP",
+ "TCP",
+ "ICMP",
+ };
+
+ if (key->protocol < ARRAY_LEN(protocol_strings))
+ protocol_string = protocol_strings[key->protocol];
+
+ s = format (s, "%U proto %s port %d fib %d",
+ format_ip4_address, &key->addr, protocol_string,
+ clib_net_to_host_u16 (key->port), key->fib_index);
+ return s;
+}
+
+u8 * format_snat_session (u8 * s, va_list * args)
+{
+ snat_main_t * sm __attribute__((unused)) = va_arg (*args, snat_main_t *);
+ snat_session_t * sess = va_arg (*args, snat_session_t *);
+
+ s = format (s, " i2o %U\n", format_snat_key, &sess->in2out);
+ s = format (s, " o2i %U\n", format_snat_key, &sess->out2in);
+ s = format (s, " last heard %.2f\n", sess->last_heard);
+ s = format (s, " total pkts %d, total bytes %lld\n",
+ sess->total_pkts, sess->total_bytes);
+ if (snat_is_session_static (sess))
+ s = format (s, " static translation\n");
+ else
+ s = format (s, " dynamic translation\n");
+
+ return s;
+}
+
+u8 * format_snat_user (u8 * s, va_list * args)
+{
+ snat_main_per_thread_data_t * sm = va_arg (*args, snat_main_per_thread_data_t *);
+ snat_user_t * u = va_arg (*args, snat_user_t *);
+ int verbose = va_arg (*args, int);
+ dlist_elt_t * head, * elt;
+ u32 elt_index, head_index;
+ u32 session_index;
+ snat_session_t * sess;
+
+ s = format (s, "%U: %d dynamic translations, %d static translations\n",
+ format_ip4_address, &u->addr, u->nsessions, u->nstaticsessions);
+
+ if (verbose == 0)
+ return s;
+
+ if (u->nsessions || u->nstaticsessions)
+ {
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (sm->list_pool, head_index);
+
+ elt_index = head->next;
+ elt = pool_elt_at_index (sm->list_pool, elt_index);
+ session_index = elt->value;
+
+ while (session_index != ~0)
+ {
+ sess = pool_elt_at_index (sm->sessions, session_index);
+
+ s = format (s, " %U\n", format_snat_session, sm, sess);
+
+ elt_index = elt->next;
+ elt = pool_elt_at_index (sm->list_pool, elt_index);
+ session_index = elt->value;
+ }
+ }
+
+ return s;
+}
+
+u8 * format_snat_static_mapping (u8 * s, va_list * args)
+{
+ snat_static_mapping_t *m = va_arg (*args, snat_static_mapping_t *);
+
+ if (m->addr_only)
+ s = format (s, "local %U external %U vrf %d",
+ format_ip4_address, &m->local_addr,
+ format_ip4_address, &m->external_addr,
+ m->vrf_id);
+ else
+ s = format (s, "local %U:%d external %U:%d vrf %d",
+ format_ip4_address, &m->local_addr, m->local_port,
+ format_ip4_address, &m->external_addr, m->external_port,
+ m->vrf_id);
+
+ return s;
+}
+
+static clib_error_t *
+show_snat_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int verbose = 0;
+ snat_main_t * sm = &snat_main;
+ snat_user_t * u;
+ snat_static_mapping_t *m;
+ snat_interface_t *i;
+ snat_address_t * ap;
+ vnet_main_t *vnm = vnet_get_main();
+ snat_main_per_thread_data_t *tsm;
+ u32 users_num = 0, sessions_num = 0, *worker;
+ uword j = 0;
+
+ if (unformat (input, "detail"))
+ verbose = 1;
+ else if (unformat (input, "verbose"))
+ verbose = 2;
+
+ if (sm->static_mapping_only)
+ {
+ if (sm->static_mapping_connection_tracking)
+ vlib_cli_output (vm, "SNAT mode: static mapping only connection "
+ "tracking");
+ else
+ vlib_cli_output (vm, "SNAT mode: static mapping only");
+ }
+ else
+ {
+ vlib_cli_output (vm, "SNAT mode: dynamic translations enabled");
+ }
+
+ if (verbose > 0)
+ {
+ pool_foreach (i, sm->interfaces,
+ ({
+ vlib_cli_output (vm, "%U %s", format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, i->sw_if_index),
+ i->is_inside ? "in" : "out");
+ }));
+
+ vec_foreach (ap, sm->addresses)
+ {
+ u8 * s = format (0, "");
+ vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr);
+ clib_bitmap_foreach (j, ap->busy_port_bitmap,
+ ({
+ s = format (s, " %d", j);
+ }));
+ vlib_cli_output (vm, " %d busy ports:%v", ap->busy_ports, s);
+ }
+ }
+
+ if (sm->num_workers > 1)
+ {
+ vlib_cli_output (vm, "%d workers", vec_len (sm->workers));
+ if (verbose > 0)
+ {
+ vec_foreach (worker, sm->workers)
+ {
+ vlib_worker_thread_t *w =
+ vlib_worker_threads + *worker + sm->first_worker_index;
+ vlib_cli_output (vm, " %v", w->name);
+ }
+ }
+ }
+
+ if (sm->static_mapping_only && !(sm->static_mapping_connection_tracking))
+ {
+ vlib_cli_output (vm, "%d static mappings",
+ pool_elts (sm->static_mappings));
+
+ if (verbose > 0)
+ {
+ pool_foreach (m, sm->static_mappings,
+ ({
+ vlib_cli_output (vm, "%U", format_snat_static_mapping, m);
+ }));
+ }
+ }
+ else
+ {
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ users_num += pool_elts (tsm->users);
+ sessions_num += pool_elts (tsm->sessions);
+ }
+
+ vlib_cli_output (vm, "%d users, %d outside addresses, %d active sessions,"
+ " %d static mappings",
+ users_num,
+ vec_len (sm->addresses),
+ sessions_num,
+ pool_elts (sm->static_mappings));
+
+ if (verbose > 0)
+ {
+ vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->in2out,
+ verbose - 1);
+ vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->out2in,
+ verbose - 1);
+ vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_in,
+ verbose - 1);
+ vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->worker_by_out,
+ verbose - 1);
+ vec_foreach_index (j, sm->per_thread_data)
+ {
+ tsm = vec_elt_at_index (sm->per_thread_data, j);
+
+ if (pool_elts (tsm->users) == 0)
+ continue;
+
+ vlib_worker_thread_t *w = vlib_worker_threads + j;
+ vlib_cli_output (vm, "Thread %d (%v at lcore %u):", j, w->name,
+ w->lcore_id);
+ vlib_cli_output (vm, " %d list pool elements",
+ pool_elts (tsm->list_pool));
+
+ pool_foreach (u, tsm->users,
+ ({
+ vlib_cli_output (vm, " %U", format_snat_user, tsm, u,
+ verbose - 1);
+ }));
+ }
+
+ if (pool_elts (sm->static_mappings))
+ {
+ vlib_cli_output (vm, "static mappings:");
+ pool_foreach (m, sm->static_mappings,
+ ({
+ vlib_cli_output (vm, "%U", format_snat_static_mapping, m);
+ }));
+ }
+ }
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_snat_command, static) = {
+ .path = "show snat",
+ .short_help = "show snat",
+ .function = show_snat_command_fn,
+};
diff --git a/src/plugins/snat/snat.h b/src/plugins/snat/snat.h
new file mode 100644
index 00000000000..cb31dc51423
--- /dev/null
+++ b/src/plugins/snat/snat.h
@@ -0,0 +1,259 @@
+
+/*
+ * snat.h - simple nat definitions
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_snat_h__
+#define __included_snat_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/icmp46_packet.h>
+#include <vnet/api_errno.h>
+#include <vppinfra/bihash_8_8.h>
+#include <vppinfra/dlist.h>
+#include <vppinfra/error.h>
+#include <vlibapi/api.h>
+
+/* Key */
+typedef struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t addr;
+ u16 port;
+ u16 protocol:3,
+ fib_index:13;
+ };
+ u64 as_u64;
+ };
+} snat_session_key_t;
+
+typedef struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t addr;
+ u32 fib_index;
+ };
+ u64 as_u64;
+ };
+} snat_user_key_t;
+
+typedef struct {
+ union
+ {
+ struct
+ {
+ ip4_address_t addr;
+ u16 port;
+ u16 fib_index;
+ };
+ u64 as_u64;
+ };
+} snat_static_mapping_key_t;
+
+
+typedef enum {
+ SNAT_PROTOCOL_UDP = 0,
+ SNAT_PROTOCOL_TCP,
+ SNAT_PROTOCOL_ICMP,
+} snat_protocol_t;
+
+
+#define SNAT_SESSION_FLAG_STATIC_MAPPING 1
+
+typedef CLIB_PACKED(struct {
+ snat_session_key_t out2in; /* 0-15 */
+
+ snat_session_key_t in2out; /* 16-31 */
+
+ u32 flags; /* 32-35 */
+
+ /* per-user translations */
+ u32 per_user_index; /* 36-39 */
+
+ u32 per_user_list_head_index; /* 40-43 */
+
+ /* Last heard timer */
+ f64 last_heard; /* 44-51 */
+
+ u64 total_bytes; /* 52-59 */
+
+ u32 total_pkts; /* 60-63 */
+
+ /* Outside address */
+ u32 outside_address_index; /* 64-67 */
+
+}) snat_session_t;
+
+
+typedef struct {
+ ip4_address_t addr;
+ u32 sessions_per_user_list_head_index;
+ u32 nsessions;
+ u32 nstaticsessions;
+} snat_user_t;
+
+typedef struct {
+ ip4_address_t addr;
+ u32 busy_ports;
+ uword * busy_port_bitmap;
+} snat_address_t;
+
+typedef struct {
+ ip4_address_t local_addr;
+ ip4_address_t external_addr;
+ u16 local_port;
+ u16 external_port;
+ u8 addr_only;
+ u32 vrf_id;
+ u32 fib_index;
+} snat_static_mapping_t;
+
+typedef struct {
+ u32 sw_if_index;
+ u8 is_inside;
+} snat_interface_t;
+
+typedef struct {
+ /* User pool */
+ snat_user_t * users;
+
+ /* Session pool */
+ snat_session_t * sessions;
+
+ /* Pool of doubly-linked list elements */
+ dlist_elt_t * list_pool;
+} snat_main_per_thread_data_t;
+
+typedef struct {
+ /* Main lookup tables */
+ clib_bihash_8_8_t out2in;
+ clib_bihash_8_8_t in2out;
+
+ /* Find-a-user => src address lookup */
+ clib_bihash_8_8_t user_hash;
+
+ /* Non-translated packets worker lookup => src address + VRF */
+ clib_bihash_8_8_t worker_by_in;
+
+ /* Translated packets worker lookup => IP address + port number */
+ clib_bihash_8_8_t worker_by_out;
+
+ u32 num_workers;
+ u32 first_worker_index;
+ u32 next_worker;
+ u32 * workers;
+
+ /* Per thread data */
+ snat_main_per_thread_data_t * per_thread_data;
+
+ /* Find a static mapping by local */
+ clib_bihash_8_8_t static_mapping_by_local;
+
+ /* Find a static mapping by external */
+ clib_bihash_8_8_t static_mapping_by_external;
+
+ /* Static mapping pool */
+ snat_static_mapping_t * static_mappings;
+
+ /* Interface pool */
+ snat_interface_t * interfaces;
+
+ /* Vector of outside addresses */
+ snat_address_t * addresses;
+
+ /* Randomize port allocation order */
+ u32 random_seed;
+
+ /* Worker handoff index */
+ u32 fq_in2out_index;
+ u32 fq_out2in_index;
+
+ /* Config parameters */
+ u8 static_mapping_only;
+ u8 static_mapping_connection_tracking;
+ u32 translation_buckets;
+ u32 translation_memory_size;
+ u32 user_buckets;
+ u32 user_memory_size;
+ u32 max_translations_per_user;
+ u32 outside_vrf_id;
+ u32 outside_fib_index;
+ u32 inside_vrf_id;
+ u32 inside_fib_index;
+
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+ ip4_main_t * ip4_main;
+ ip_lookup_main_t * ip4_lookup_main;
+ ethernet_main_t * ethernet_main;
+ api_main_t * api_main;
+} snat_main_t;
+
+extern snat_main_t snat_main;
+extern vlib_node_registration_t snat_in2out_node;
+extern vlib_node_registration_t snat_out2in_node;
+extern vlib_node_registration_t snat_in2out_fast_node;
+extern vlib_node_registration_t snat_out2in_fast_node;
+extern vlib_node_registration_t snat_in2out_worker_handoff_node;
+extern vlib_node_registration_t snat_out2in_worker_handoff_node;
+
+void snat_free_outside_address_and_port (snat_main_t * sm,
+ snat_session_key_t * k,
+ u32 address_index);
+
+int snat_alloc_outside_address_and_port (snat_main_t * sm,
+ snat_session_key_t * k,
+ u32 * address_indexp);
+
+int snat_static_mapping_match (snat_main_t * sm,
+ snat_session_key_t match,
+ snat_session_key_t * mapping,
+ u8 by_external);
+
+format_function_t format_snat_user;
+
+typedef struct {
+ u32 cached_sw_if_index;
+ u32 cached_ip4_address;
+} snat_runtime_t;
+
+/** \brief Check if SNAT session is created from static mapping.
+ @param s SNAT session
+ @return 1 if SNAT session is created from static mapping otherwise 0
+*/
+#define snat_is_session_static(s) s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING
+
+/*
+ * Why is this here? Because we don't need to touch this layer to
+ * simply reply to an icmp. We need to change id to a unique
+ * value to NAT an echo request/reply.
+ */
+
+typedef struct {
+ u16 identifier;
+ u16 sequence;
+} icmp_echo_header_t;
+
+#endif /* __included_snat_h__ */
diff --git a/src/plugins/snat/snat_all_api_h.h b/src/plugins/snat/snat_all_api_h.h
new file mode 100644
index 00000000000..490177008e0
--- /dev/null
+++ b/src/plugins/snat/snat_all_api_h.h
@@ -0,0 +1,19 @@
+
+/*
+ * snat_all_api_h.h - skeleton vpp engine plug-in api #include file
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <snat/snat.api.h>
diff --git a/src/plugins/snat/snat_msg_enum.h b/src/plugins/snat/snat_msg_enum.h
new file mode 100644
index 00000000000..2c76fd51158
--- /dev/null
+++ b/src/plugins/snat/snat_msg_enum.h
@@ -0,0 +1,31 @@
+
+/*
+ * snat_msg_enum.h - skeleton vpp engine plug-in message enumeration
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_snat_msg_enum_h
+#define included_snat_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <snat/snat_all_api_h.h>
+ /* We'll want to know how many messages IDs we need... */
+ VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_snat_msg_enum_h */
diff --git a/src/plugins/snat/snat_test.c b/src/plugins/snat/snat_test.c
new file mode 100644
index 00000000000..2a003ba60c6
--- /dev/null
+++ b/src/plugins/snat/snat_test.c
@@ -0,0 +1,602 @@
+
+/*
+ * snat.c - skeleton vpp-api-test plug-in
+ *
+ * Copyright (c) <current-year> <your-organization>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+
+uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
+
+/* Declare message IDs */
+#include <snat/snat_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <snat/snat_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun /* define message structures */
+#include <snat/snat_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <snat/snat_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <snat/snat_all_api_h.h>
+#undef vl_api_version
+
+typedef struct {
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} snat_test_main_t;
+
+snat_test_main_t snat_test_main;
+
+#define foreach_standard_reply_retval_handler \
+_(snat_add_address_range_reply) \
+_(snat_interface_add_del_feature_reply) \
+_(snat_add_static_mapping_reply) \
+_(snat_set_workers_reply)
+
+#define _(n) \
+ static void vl_api_##n##_t_handler \
+ (vl_api_##n##_t * mp) \
+ { \
+ vat_main_t * vam = snat_test_main.vat_main; \
+ i32 retval = ntohl(mp->retval); \
+ if (vam->async_mode) { \
+ vam->async_errors += (retval < 0); \
+ } else { \
+ vam->retval = retval; \
+ vam->result_ready = 1; \
+ } \
+ }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg \
+_(SNAT_ADD_ADDRESS_RANGE_REPLY, snat_add_address_range_reply) \
+_(SNAT_INTERFACE_ADD_DEL_FEATURE_REPLY, \
+ snat_interface_add_del_feature_reply) \
+_(SNAT_ADD_STATIC_MAPPING_REPLY, snat_add_static_mapping_reply) \
+_(SNAT_CONTROL_PING_REPLY, snat_control_ping_reply) \
+_(SNAT_STATIC_MAPPING_DETAILS, snat_static_mapping_details) \
+_(SNAT_SHOW_CONFIG_REPLY, snat_show_config_reply) \
+_(SNAT_ADDRESS_DETAILS, snat_address_details) \
+_(SNAT_INTERFACE_DETAILS, snat_interface_details) \
+_(SNAT_SET_WORKERS_REPLY, snat_set_workers_reply) \
+_(SNAT_WORKER_DETAILS, snat_worker_details)
+
+/* M: construct, but don't yet send a message */
+#define M(T,t) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+#define M2(T,t,n) \
+do { \
+ vam->result_ready = 0; \
+ mp = vl_msg_api_alloc(sizeof(*mp)+(n)); \
+ memset (mp, 0, sizeof (*mp)); \
+ mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base); \
+ mp->client_index = vam->my_client_index; \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W \
+do { \
+ timeout = vat_time_now (vam) + 1.0; \
+ \
+ while (vat_time_now (vam) < timeout) { \
+ if (vam->result_ready == 1) { \
+ return (vam->retval); \
+ } \
+ } \
+ return -99; \
+} while(0);
+
+static int api_snat_add_address_range (vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ ip4_address_t start_addr, end_addr;
+ u32 start_host_order, end_host_order;
+ vl_api_snat_add_address_range_t * mp;
+ u8 is_add = 1;
+ int count;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U - %U",
+ unformat_ip4_address, &start_addr,
+ unformat_ip4_address, &end_addr))
+ ;
+ else if (unformat (i, "%U", unformat_ip4_address, &start_addr))
+ end_addr = start_addr;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ start_host_order = clib_host_to_net_u32 (start_addr.as_u32);
+ end_host_order = clib_host_to_net_u32 (end_addr.as_u32);
+
+ if (end_host_order < start_host_order)
+ {
+ errmsg ("end address less than start address\n");
+ return -99;
+ }
+
+ count = (end_host_order - start_host_order) + 1;
+
+ if (count > 1024)
+ {
+ errmsg ("%U - %U, %d addresses...\n",
+ format_ip4_address, &start_addr,
+ format_ip4_address, &end_addr,
+ count);
+ }
+
+ M(SNAT_ADD_ADDRESS_RANGE, snat_add_address_range);
+
+ memcpy (mp->first_ip_address, &start_addr, 4);
+ memcpy (mp->last_ip_address, &end_addr, 4);
+ mp->is_ip4 = 1;
+ mp->is_add = is_add;
+
+ S; W;
+
+ /* NOTREACHED */
+ return 0;
+}
+
+static int api_snat_interface_add_del_feature (vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_snat_interface_add_del_feature_t * mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_inside = 1;
+ u8 is_add = 1;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "out"))
+ is_inside = 0;
+ else if (unformat (i, "in"))
+ is_inside = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("interface / sw_if_index required\n");
+ return -99;
+ }
+
+ M(SNAT_INTERFACE_ADD_DEL_FEATURE, snat_interface_add_del_feature);
+ mp->sw_if_index = ntohl(sw_if_index);
+ mp->is_add = is_add;
+ mp->is_inside = is_inside;
+
+ S; W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static int api_snat_add_static_mapping(vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_snat_add_static_mapping_t * mp;
+ u8 addr_set_n = 0;
+ u8 is_add = 1;
+ u8 addr_only = 1;
+ ip4_address_t local_addr, external_addr;
+ u32 local_port = 0, external_port = 0, vrf_id = ~0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "local_addr %U", unformat_ip4_address, &local_addr))
+ addr_set_n++;
+ else if (unformat (i, "external_addr %U", unformat_ip4_address,
+ &external_addr))
+ addr_set_n++;
+ else if (unformat (i, "local_port %u", &local_port))
+ addr_only = 0;
+ else if (unformat (i, "external_port %u", &external_port))
+ addr_only = 0;
+ else if (unformat (i, "vrf %u", &vrf_id))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (addr_set_n != 2)
+ {
+ errmsg ("local_addr and remote_addr required\n");
+ return -99;
+ }
+
+ M(SNAT_ADD_STATIC_MAPPING, snat_add_static_mapping);
+ mp->is_add = is_add;
+ mp->is_ip4 = 1;
+ mp->addr_only = addr_only;
+ mp->local_port = ntohs ((u16) local_port);
+ mp->external_port = ntohs ((u16) external_port);
+ mp->vrf_id = ntohl (vrf_id);
+ memcpy (mp->local_ip_address, &local_addr, 4);
+ memcpy (mp->external_ip_address, &external_addr, 4);
+
+ S; W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static void vl_api_snat_control_ping_reply_t_handler
+ (vl_api_snat_control_ping_reply_t * mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static void vl_api_snat_static_mapping_details_t_handler
+ (vl_api_snat_static_mapping_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ if (mp->addr_only)
+ fformat (vam->ofp, "%15U%6s%15U%6s%11d\n",
+ format_ip4_address, &mp->local_ip_address, "",
+ format_ip4_address, &mp->external_ip_address, "",
+ ntohl (mp->vrf_id));
+ else
+ fformat (vam->ofp, "%15U%6d%15U%6d%11d\n",
+ format_ip4_address, &mp->local_ip_address,
+ ntohs (mp->local_port),
+ format_ip4_address, &mp->external_ip_address,
+ ntohs (mp->external_port),
+ ntohl (mp->vrf_id));
+
+}
+
+static int api_snat_static_mapping_dump(vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ f64 timeout;
+ vl_api_snat_static_mapping_dump_t * mp;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_static_mapping_dump");
+ return -99;
+ }
+
+ fformat (vam->ofp, "%21s%21s\n", "local", "external");
+ fformat (vam->ofp, "%15s%6s%15s%6s%11s\n", "address", "port", "address",
+ "port", "vrf");
+
+ M(SNAT_STATIC_MAPPING_DUMP, snat_static_mapping_dump);
+ S;
+ /* Use a control ping for synchronization */
+ {
+ vl_api_snat_control_ping_t *mp;
+ M (SNAT_CONTROL_PING, snat_control_ping);
+ S;
+ }
+ W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static void vl_api_snat_show_config_reply_t_handler
+ (vl_api_snat_show_config_reply_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval >= 0)
+ {
+ fformat (vam->ofp, "translation hash buckets %d\n",
+ ntohl (mp->translation_buckets));
+ fformat (vam->ofp, "translation hash memory %d\n",
+ ntohl (mp->translation_memory_size));
+ fformat (vam->ofp, "user hash buckets %d\n", ntohl (mp->user_buckets));
+ fformat (vam->ofp, "user hash memory %d\n", ntohl (mp->user_memory_size));
+ fformat (vam->ofp, "max translations per user %d\n",
+ ntohl (mp->max_translations_per_user));
+ fformat (vam->ofp, "outside VRF id %d\n", ntohl (mp->outside_vrf_id));
+ fformat (vam->ofp, "inside VRF id %d\n", ntohl (mp->inside_vrf_id));
+ if (mp->static_mapping_only)
+ {
+ fformat (vam->ofp, "static mapping only");
+ if (mp->static_mapping_connection_tracking)
+ fformat (vam->ofp, " connection tracking");
+ fformat (vam->ofp, "\n");
+ }
+ }
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static int api_snat_show_config(vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ f64 timeout;
+ vl_api_snat_show_config_t * mp;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_show_config");
+ return -99;
+ }
+
+ M(SNAT_SHOW_CONFIG, snat_show_config);
+ S; W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static void vl_api_snat_address_details_t_handler
+ (vl_api_snat_address_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "%U\n", format_ip4_address, &mp->ip_address);
+}
+
+static int api_snat_address_dump(vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ f64 timeout;
+ vl_api_snat_address_dump_t * mp;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_ADDRESS_DUMP, snat_address_dump);
+ S;
+ /* Use a control ping for synchronization */
+ {
+ vl_api_snat_control_ping_t *mp;
+ M (SNAT_CONTROL_PING, snat_control_ping);
+ S;
+ }
+ W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static void vl_api_snat_interface_details_t_handler
+ (vl_api_snat_interface_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "sw_if_index %d %s\n", ntohl (mp->sw_if_index),
+ mp->is_inside ? "in" : "out");
+}
+
+static int api_snat_interface_dump(vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ f64 timeout;
+ vl_api_snat_interface_dump_t * mp;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_INTERFACE_DUMP, snat_interface_dump);
+ S;
+ /* Use a control ping for synchronization */
+ {
+ vl_api_snat_control_ping_t *mp;
+ M (SNAT_CONTROL_PING, snat_control_ping);
+ S;
+ }
+ W;
+ /* NOTREACHED */
+ return 0;
+}
+
+static int api_snat_set_workers (vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ unformat_input_t * i = vam->input;
+ f64 timeout;
+ vl_api_snat_set_workers_t * mp;
+ uword *bitmap;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_bitmap_list, &bitmap))
+ ;
+ else
+ {
+ clib_warning("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M(SNAT_SET_WORKERS, snat_set_workers);
+ mp->worker_mask = clib_host_to_net_u64 (bitmap[0]);
+
+ S; W;
+
+ /* NOTREACHED */
+ return 0;
+}
+
+static void vl_api_snat_worker_details_t_handler
+ (vl_api_snat_worker_details_t *mp)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ vat_main_t *vam = sm->vat_main;
+
+ fformat (vam->ofp, "worker_index %d (%s at lcore %u)\n",
+ ntohl (mp->worker_index), mp->name, ntohl (mp->lcore_id));
+}
+
+static int api_snat_worker_dump(vat_main_t * vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ f64 timeout;
+ vl_api_snat_worker_dump_t * mp;
+
+ if (vam->json_output)
+ {
+ clib_warning ("JSON output not supported for snat_address_dump");
+ return -99;
+ }
+
+ M(SNAT_WORKER_DUMP, snat_worker_dump);
+ S;
+ /* Use a control ping for synchronization */
+ {
+ vl_api_snat_control_ping_t *mp;
+ M (SNAT_CONTROL_PING, snat_control_ping);
+ S;
+ }
+ W;
+ /* NOTREACHED */
+ return 0;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(snat_add_address_range, "<start-addr> [- <end-addr] [del]") \
+_(snat_interface_add_del_feature, \
+ "<intfc> | sw_if_index <id> [in] [out] [del]") \
+_(snat_add_static_mapping, "local_addr <ip> external_addr <ip> " \
+ "[local_port <n>] [external_port <n>] [vrf <table-id>] [del]") \
+_(snat_set_workers, "<wokrers_bitmap>") \
+_(snat_static_mapping_dump, "") \
+_(snat_show_config, "") \
+_(snat_address_dump, "") \
+_(snat_interface_dump, "") \
+_(snat_worker_dump, "")
+
+void vat_api_hookup (vat_main_t *vam)
+{
+ snat_test_main_t * sm __attribute__((unused)) = &snat_test_main;
+ /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n) \
+ vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \
+ #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_reply_msg;
+#undef _
+
+ /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+ foreach_vpe_api_msg;
+#undef _
+
+ /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+ foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t * vat_plugin_register (vat_main_t *vam)
+{
+ snat_test_main_t * sm = &snat_test_main;
+ u8 * name;
+
+ sm->vat_main = vam;
+
+ /* Ask the vpp engine for the first assigned message-id */
+ name = format (0, "snat_%08x%c", api_version, 0);
+ sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+ if (sm->msg_id_base != (u16) ~0)
+ vat_api_hookup (vam);
+
+ vec_free(name);
+
+ return 0;
+}