aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/mpls
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/mpls')
-rw-r--r--src/vnet/mpls/error.def31
-rw-r--r--src/vnet/mpls/interface.c132
-rw-r--r--src/vnet/mpls/mpls.api246
-rw-r--r--src/vnet/mpls/mpls.c627
-rw-r--r--src/vnet/mpls/mpls.h110
-rw-r--r--src/vnet/mpls/mpls_api.c582
-rw-r--r--src/vnet/mpls/mpls_features.c154
-rw-r--r--src/vnet/mpls/mpls_input.c324
-rw-r--r--src/vnet/mpls/mpls_lookup.c723
-rw-r--r--src/vnet/mpls/mpls_lookup.h102
-rw-r--r--src/vnet/mpls/mpls_output.c498
-rw-r--r--src/vnet/mpls/mpls_tunnel.c1070
-rw-r--r--src/vnet/mpls/mpls_tunnel.h137
-rw-r--r--src/vnet/mpls/mpls_types.h60
-rw-r--r--src/vnet/mpls/packet.h125
-rw-r--r--src/vnet/mpls/pg.c71
16 files changed, 4992 insertions, 0 deletions
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def
new file mode 100644
index 00000000..34a46522
--- /dev/null
+++ b/src/vnet/mpls/error.def
@@ -0,0 +1,31 @@
+/*
+ * mpls_error.def: mpls errors
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+mpls_error (NONE, "no error")
+mpls_error (UNKNOWN_PROTOCOL, "unknown protocol")
+mpls_error (UNSUPPORTED_VERSION, "unsupported version")
+mpls_error (PKTS_DECAP, "MPLS input packets decapsulated")
+mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated")
+mpls_error (NO_LABEL, "MPLS no label for fib/dst")
+mpls_error (TTL_EXPIRED, "MPLS ttl expired")
+mpls_error (S_NOT_SET, "MPLS s-bit not set")
+mpls_error (BAD_LABEL, "invalid FIB id in label")
+mpls_error (NOT_IP4, "non-ip4 packets dropped")
+mpls_error (DISALLOWED_FIB, "disallowed FIB id")
+mpls_error (NOT_ENABLED, "MPLS not enabled")
+mpls_error (DROP, "MPLS DROP DPO")
+mpls_error (PUNT, "MPLS PUNT DPO")
diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c
new file mode 100644
index 00000000..d7c8e7d3
--- /dev/null
+++ b/src/vnet/mpls/interface.c
@@ -0,0 +1,132 @@
+/*
+ * interface.c: mpls interfaces
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/dpo/classify_dpo.h>
+
+
+u8
+mpls_sw_interface_is_enabled (u32 sw_if_index)
+{
+ mpls_main_t * mm = &mpls_main;
+
+ if (vec_len(mm->mpls_enabled_by_sw_if_index) < sw_if_index)
+ return (0);
+
+ return (mm->mpls_enabled_by_sw_if_index[sw_if_index]);
+}
+
+int
+mpls_sw_interface_enable_disable (mpls_main_t * mm,
+ u32 sw_if_index,
+ u8 is_enable,
+ u8 is_api)
+{
+ fib_node_index_t lfib_index;
+
+ vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0);
+
+ lfib_index = fib_table_find(FIB_PROTOCOL_MPLS,
+ MPLS_FIB_DEFAULT_TABLE_ID);
+
+ if (~0 == lfib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index])
+ return (0);
+
+ fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS,
+ (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI));
+
+ vec_validate(mm->fib_index_by_sw_if_index, 0);
+ mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index;
+ }
+ else
+ {
+ ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index])
+ return (0);
+
+ fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_MPLS,
+ (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI));
+ }
+
+ vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled",
+ sw_if_index, !is_enable, 0, 0);
+
+ return (0);
+}
+
+static clib_error_t *
+mpls_interface_enable_disable (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t * vnm = vnet_get_main();
+ clib_error_t * error = 0;
+ u32 sw_if_index, enable;
+
+ sw_if_index = ~0;
+
+ if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "enable"))
+ enable = 1;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ {
+ error = clib_error_return (0, "expected 'enable' or 'disable'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable, 0);
+
+ done:
+ return error;
+}
+
+/*?
+ * This command enables an interface to accpet MPLS packets
+ *
+ * @cliexpar
+ * @cliexstart{set interface mpls}
+ * set interface mpls GigEthernet0/8/0 enable
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
+ .path = "set interface mpls",
+ .function = mpls_interface_enable_disable,
+ .short_help = "Enable/Disable an interface for MPLS forwarding",
+};
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
new file mode 100644
index 00000000..36488d0c
--- /dev/null
+++ b/src/vnet/mpls/mpls.api
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief Bind/Unbind an MPLS local label to an IP prefix. i.e. create
+ a per-prefix label entry.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mb_mpls_table_id - The MPLS table-id the MPLS entry will be added in
+ @param mb_label - The MPLS label value to bind
+ @param mb_ip_table_id - The IP table-id of the IP prefix to bind to.
+ @param mb_create_table_if_needed - Create either/both tables if required.
+ @param mb_is_bind - Bind or unbind
+ @param mb_is_ip4 - The prefix to bind to is IPv4
+ @param mb_address_length - Length of IP prefix
+ @param mb_address[16] - IP prefix/
+*/
+autoreply define mpls_ip_bind_unbind
+{
+ u32 client_index;
+ u32 context;
+ u32 mb_mpls_table_id;
+ u32 mb_label;
+ u32 mb_ip_table_id;
+ u8 mb_create_table_if_needed;
+ u8 mb_is_bind;
+ u8 mb_is_ip4;
+ u8 mb_address_length;
+ u8 mb_address[16];
+};
+
+/** \brief MPLS tunnel Add / del route
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mt_is_add - Is this a route add or delete
+ @param mt_sw_if_index - The SW interface index of the tunnel to delete
+ @param mt_is_multicast - Is the tunnel's underlying LSP multicast
+ @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4
+ @param mt_next_hop_weight - The weight, for UCMP
+ @param mt_next_hop_preference - The preference
+ @param mt_next_hop[16] - the nextop address
+ @param mt_next_hop_sw_if_index - the next-hop SW interface
+ @param mt_next_hop_table_id - the next-hop table-id (if appropriate)
+ @param mt_next_hop_n_out_labels - the number of next-hop output labels
+ @param mt_next_hop_out_label_stack - the next-hop output label stack, outer most first
+*/
+define mpls_tunnel_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 mt_sw_if_index;
+ u8 mt_is_add;
+ u8 mt_l2_only;
+ u8 mt_is_multicast;
+ u8 mt_next_hop_proto_is_ip4;
+ u8 mt_next_hop_weight;
+ u8 mt_next_hop_preference;
+ u8 mt_next_hop[16];
+ u8 mt_next_hop_n_out_labels;
+ u32 mt_next_hop_sw_if_index;
+ u32 mt_next_hop_table_id;
+ u32 mt_next_hop_out_label_stack[mt_next_hop_n_out_labels];
+};
+
+/** \brief Reply for MPLS tunnel add / del request
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - SW interface index of the tunnel created
+*/
+define mpls_tunnel_add_del_reply
+{
+ u32 context;
+ i32 retval;
+ u32 sw_if_index;
+};
+
+/** \brief Dump mpls eth tunnel table
+ @param client_index - opaque cookie to identify the sender
+ @param tunnel_index - eth tunnel identifier or -1 in case of all tunnels
+*/
+define mpls_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+ i32 tunnel_index;
+};
+
+/** \brief FIB path
+ @param sw_if_index - index of the interface
+ @param weight - The weight, for UCMP
+ @param is_local - local if non-zero, else remote
+ @param is_drop - Drop the packet
+ @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+ @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+ @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2
+ @param next_hop[16] - the next hop address
+
+ WARNING: this type is replicated, pending cleanup completion
+
+*/
+typeonly manual_print manual_endian define fib_path2
+{
+ u32 sw_if_index;
+ u8 weight;
+ u8 preference;
+ u8 is_local;
+ u8 is_drop;
+ u8 is_unreach;
+ u8 is_prohibit;
+ u8 afi;
+ u8 next_hop[16];
+ u32 labels[16];
+};
+
+/** \brief mpls tunnel details
+*/
+manual_endian manual_print define mpls_tunnel_details
+{
+ u32 context;
+ u8 mt_sw_if_index;
+ u8 mt_tunnel_index;
+ u8 mt_l2_only;
+ u8 mt_is_multicast;
+ u32 mt_count;
+ vl_api_fib_path2_t mt_paths[mt_count];
+};
+
+/** \brief MPLS Route Add / del route
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mt_table_id - The MPLS table-id the route is added in
+ @param mt_is_add - Is this a route add or delete
+ @param mt_name - A client provided name/tag for the table. If this
+ is not set by the client, then VPP will generate
+ something meaningfull.
+*/
+autoreply define mpls_table_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 mt_table_id;
+ u8 mt_is_add;
+ u8 mt_name[64];
+};
+
+/** \brief MPLS Route Add / del route
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mr_label - The MPLS label value
+ @param mr_eos - The End of stack bit
+ @param mr_table_id - The MPLS table-id the route is added in
+ @param mr_classify_table_index - If this is a classify route,
+ this is the classify table index
+ @param mr_create_table_if_needed - If the MPLS or IP tables do not exist,
+ create them
+ @param mr_is_add - Is this a route add or delete
+ @param mr_is_classify - Is this route result a classify
+ @param mr_is_multicast - Is this a multicast route
+ @param mr_is_multipath - Is this route update a multipath - i.e. is this
+ a path addition to an existing route
+ @param mr_is_resolve_host - Recurse resolution constraint via a host prefix
+ @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix
+ @param mr_is_interface_rx - Interface Receive path
+ @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface
+ is used as the RPF-ID
+ @param mr_next_hop_proto - The next-hop protocol, of type dpo_proto_t
+ @param mr_next_hop_weight - The weight, for UCMP
+ @param mr_next_hop[16] - the nextop address
+ @param mr_next_hop_sw_if_index - the next-hop SW interface
+ @param mr_next_hop_table_id - the next-hop table-id (if appropriate)
+ @param mr_next_hop_n_out_labels - the number of labels in the label stack
+ @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first
+ @param next_hop_via_label - The next-hop is a resolved via a local label
+*/
+autoreply define mpls_route_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 mr_label;
+ u8 mr_eos;
+ u32 mr_table_id;
+ u32 mr_classify_table_index;
+ u8 mr_create_table_if_needed;
+ u8 mr_is_add;
+ u8 mr_is_classify;
+ u8 mr_is_multicast;
+ u8 mr_is_multipath;
+ u8 mr_is_resolve_host;
+ u8 mr_is_resolve_attached;
+ u8 mr_is_interface_rx;
+ u8 mr_is_rpf_id;
+ u8 mr_next_hop_proto;
+ u8 mr_next_hop_weight;
+ u8 mr_next_hop_preference;
+ u8 mr_next_hop[16];
+ u8 mr_next_hop_n_out_labels;
+ u32 mr_next_hop_sw_if_index;
+ u32 mr_next_hop_table_id;
+ u32 mr_next_hop_via_label;
+ u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels];
+};
+
+/** \brief Dump MPLS fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define mpls_fib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief mpls FIB table response
+ @param table_id - MPLS fib table id
+ @param s_bit - End-of-stack bit
+ @param label - MPLS label value
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define mpls_fib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 table_name[64];
+ u8 eos_bit;
+ u32 label;
+ u32 count;
+ vl_api_fib_path2_t path[count];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
+
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
new file mode 100644
index 00000000..ed24f75f
--- /dev/null
+++ b/src/vnet/mpls/mpls.c
@@ -0,0 +1,627 @@
+/*
+ * mpls.c: mpls
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/mpls_fib.h>
+
+const static char* mpls_eos_bit_names[] = MPLS_EOS_BITS;
+
+mpls_main_t mpls_main;
+
+u8 * format_mpls_unicast_label (u8 * s, va_list * args)
+{
+ mpls_label_t label = va_arg (*args, mpls_label_t);
+
+ switch (label) {
+ case MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IPV4_EXPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_ROUTER_ALERT_LABEL:
+ s = format (s, "%s", MPLS_IETF_ROUTER_ALERT_STRING);
+ break;
+ case MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IPV6_EXPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_IMPLICIT_NULL_LABEL:
+ s = format (s, "%s", MPLS_IETF_IMPLICIT_NULL_STRING);
+ break;
+ case MPLS_IETF_ELI_LABEL:
+ s = format (s, "%s", MPLS_IETF_ELI_STRING);
+ break;
+ case MPLS_IETF_GAL_LABEL:
+ s = format (s, "%s", MPLS_IETF_GAL_STRING);
+ break;
+ default:
+ s = format (s, "%d", label);
+ break;
+ }
+ return s;
+}
+
+uword unformat_mpls_unicast_label (unformat_input_t * input, va_list * args)
+{
+ mpls_label_t *label = va_arg (*args, mpls_label_t*);
+
+ if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_ROUTER_ALERT_STRING))
+ *label = MPLS_IETF_ROUTER_ALERT_LABEL;
+ else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_STRING))
+ *label = MPLS_IETF_IMPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING))
+ *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING))
+ *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL;
+ else if (unformat (input, MPLS_IETF_ROUTER_ALERT_BRIEF_STRING))
+ *label = MPLS_IETF_ROUTER_ALERT_LABEL;
+ else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING))
+ *label = MPLS_IETF_IMPLICIT_NULL_LABEL;
+ else if (unformat (input, "%d", label))
+ ;
+ else
+ return (0);
+
+ return (1);
+}
+
+u8 * format_mpls_eos_bit (u8 * s, va_list * args)
+{
+ mpls_eos_bit_t eb = va_arg (*args, mpls_eos_bit_t);
+
+ ASSERT(eb <= MPLS_EOS);
+
+ s = format(s, "%s", mpls_eos_bit_names[eb]);
+
+ return (s);
+}
+
+u8 * format_mpls_header (u8 * s, va_list * args)
+{
+ mpls_unicast_header_t hdr = va_arg (*args, mpls_unicast_header_t);
+
+ return (format(s, "[%U:%d:%d:%U]",
+ format_mpls_unicast_label,
+ vnet_mpls_uc_get_label(hdr.label_exp_s_ttl),
+ vnet_mpls_uc_get_ttl(hdr.label_exp_s_ttl),
+ vnet_mpls_uc_get_exp(hdr.label_exp_s_ttl),
+ format_mpls_eos_bit,
+ vnet_mpls_uc_get_s(hdr.label_exp_s_ttl)));
+}
+
+uword
+unformat_mpls_header (unformat_input_t * input, va_list * args)
+{
+ u8 ** result = va_arg (*args, u8 **);
+ mpls_unicast_header_t _h, * h = &_h;
+ u32 label, label_exp_s_ttl;
+
+ if (! unformat (input, "MPLS %d", &label))
+ return 0;
+
+ label_exp_s_ttl = (label<<12) | (1<<8) /* s-bit */ | 0xFF;
+ h->label_exp_s_ttl = clib_host_to_net_u32 (label_exp_s_ttl);
+
+ /* Add gre, mpls headers to result. */
+ {
+ void * p;
+ u32 h_n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, h_n_bytes);
+ clib_memcpy (p, h, h_n_bytes);
+ }
+
+ return 1;
+}
+
+uword
+unformat_mpls_label_net_byte_order (unformat_input_t * input,
+ va_list * args)
+{
+ u32 * result = va_arg (*args, u32 *);
+ u32 label;
+
+ if (!unformat (input, "MPLS: label %d", &label))
+ return 0;
+
+ label = (label<<12) | (1<<8) /* s-bit set */ | 0xFF /* ttl */;
+
+ *result = clib_host_to_net_u32 (label);
+ return 1;
+}
+
+u8 * format_mpls_unicast_header_host_byte_order (u8 * s, va_list * args)
+{
+ mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *);
+ u32 label = h->label_exp_s_ttl;
+
+ s = format (s, "label %d exp %d, s %d, ttl %d",
+ vnet_mpls_uc_get_label (label),
+ vnet_mpls_uc_get_exp (label),
+ vnet_mpls_uc_get_s (label),
+ vnet_mpls_uc_get_ttl (label));
+ return s;
+}
+
+u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args)
+{
+ mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *);
+ mpls_unicast_header_t h_host;
+
+ h_host.label_exp_s_ttl = clib_net_to_host_u32 (h->label_exp_s_ttl);
+
+ return format (s, "%U", format_mpls_unicast_header_host_byte_order,
+ &h_host);
+}
+
+typedef struct {
+ u32 fib_index;
+ u32 entry_index;
+ u32 dest;
+ u32 s_bit;
+ u32 label;
+} show_mpls_fib_t;
+
+int
+mpls_dest_cmp(void * a1, void * a2)
+{
+ show_mpls_fib_t * r1 = a1;
+ show_mpls_fib_t * r2 = a2;
+
+ return clib_net_to_host_u32(r1->dest) - clib_net_to_host_u32(r2->dest);
+}
+
+int
+mpls_fib_index_cmp(void * a1, void * a2)
+{
+ show_mpls_fib_t * r1 = a1;
+ show_mpls_fib_t * r2 = a2;
+
+ return r1->fib_index - r2->fib_index;
+}
+
+int
+mpls_label_cmp(void * a1, void * a2)
+{
+ show_mpls_fib_t * r1 = a1;
+ show_mpls_fib_t * r2 = a2;
+
+ return r1->label - r2->label;
+}
+
+static clib_error_t *
+vnet_mpls_local_label (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ fib_route_path_t *rpaths = NULL, rpath;
+ u32 table_id, is_del, is_ip;
+ mpls_label_t local_label;
+ mpls_label_t out_label;
+ clib_error_t * error;
+ mpls_eos_bit_t eos;
+ vnet_main_t * vnm;
+ fib_prefix_t pfx;
+
+ vnm = vnet_get_main();
+ error = NULL;
+ is_ip = 0;
+ table_id = 0;
+ eos = MPLS_EOS;
+ is_del = 0;
+ local_label = MPLS_LABEL_INVALID;
+ memset(&pfx, 0, sizeof(pfx));
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ memset(&rpath, 0, sizeof(rpath));
+
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "eos"))
+ pfx.fp_eos = MPLS_EOS;
+ else if (unformat (line_input, "non-eos"))
+ pfx.fp_eos = MPLS_NON_EOS;
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address,
+ &pfx.fp_addr.ip4,
+ &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ is_ip = 1;
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address,
+ &pfx.fp_addr.ip6,
+ &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ is_ip = 1;
+ }
+ else if (unformat (line_input, "via %U %U weight %u",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index,
+ &rpath.frp_weight))
+ {
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U weight %u",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index,
+ &rpath.frp_weight))
+ {
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1(rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "rx-ip4 %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4))
+ {
+ /*
+ * the recursive next-hops are by default in the same table
+ * as the prefix
+ */
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6))
+ {
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "%d", &local_label))
+ ;
+ else if (unformat (line_input,
+ "ip4-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = DPO_PROTO_IP4;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ pfx.fp_payload_proto = DPO_PROTO_IP4;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "ip6-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = DPO_PROTO_IP6;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ vec_add1(rpaths, rpath);
+ pfx.fp_payload_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (line_input,
+ "mpls-lookup-in-table %d",
+ &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = DPO_PROTO_MPLS;
+ rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID;
+ pfx.fp_payload_proto = DPO_PROTO_MPLS;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "l2-input-on %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_proto = DPO_PROTO_ETHERNET;
+ pfx.fp_payload_proto = DPO_PROTO_ETHERNET;
+ rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX;
+ vec_add1(rpaths, rpath);
+ }
+ else if (unformat (line_input, "out-labels"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then labels");
+ goto done;
+ }
+ else
+ {
+ while (unformat (line_input, "%U",
+ unformat_mpls_unicast_label,
+ &out_label))
+ {
+ vec_add1 (rpaths[vec_len (rpaths) - 1].frp_label_stack,
+ out_label);
+ }
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "unkown input: %U",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ }
+
+ if (MPLS_LABEL_INVALID == local_label)
+ {
+ error = clib_error_return (0, "local-label required: %U",
+ format_unformat_error, input);
+ goto done;
+ }
+
+
+ if (is_ip)
+ {
+ u32 fib_index = fib_table_find(pfx.fp_proto, table_id);
+
+ if (FIB_NODE_INDEX_INVALID == fib_index)
+ {
+ error = clib_error_return (0, "%U table-id %d does not exist",
+ format_fib_protocol, pfx.fp_proto, table_id);
+ goto done;
+ }
+
+ if (is_del)
+ {
+ fib_table_entry_local_label_remove(fib_index, &pfx, local_label);
+ }
+ else
+ {
+ fib_table_entry_local_label_add(fib_index, &pfx, local_label);
+ }
+ }
+ else
+ {
+ fib_node_index_t fib_index;
+ u32 fi;
+
+ if (NULL == rpaths)
+ {
+ error = clib_error_return(0 , "no paths");
+ goto done;
+ }
+
+ pfx.fp_proto = FIB_PROTOCOL_MPLS;
+ pfx.fp_len = 21;
+ pfx.fp_label = local_label;
+ pfx.fp_payload_proto = rpaths[0].frp_proto;
+
+ /*
+ * the CLI parsing stored table Ids, swap to FIB indicies
+ */
+ if (FIB_NODE_INDEX_INVALID == rpath.frp_sw_if_index)
+ {
+ fi = fib_table_find(dpo_proto_to_fib(pfx.fp_payload_proto),
+ rpaths[0].frp_fib_index);
+
+ if (~0 == fi)
+ {
+ error = clib_error_return(0 , "%U Via table %d does not exist",
+ format_dpo_proto, pfx.fp_payload_proto,
+ rpaths[0].frp_fib_index);
+ goto done;
+ }
+ rpaths[0].frp_fib_index = fi;
+ }
+
+ fib_index = mpls_fib_index_from_table_id(table_id);
+
+ if (FIB_NODE_INDEX_INVALID == fib_index)
+ {
+ error = clib_error_return (0, "MPLS table-id %d does not exist",
+ table_id);
+ goto done;
+ }
+
+ if (is_del)
+ {
+ fib_table_entry_path_remove2(fib_index,
+ &pfx,
+ FIB_SOURCE_CLI,
+ rpaths);
+ }
+ else
+ {
+ fib_node_index_t lfe;
+
+ lfe = fib_table_entry_path_add2(fib_index,
+ &pfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ rpaths);
+
+ if (FIB_NODE_INDEX_INVALID == lfe)
+ {
+ error = clib_error_return (0, "Failed to create %U-%U in MPLS table-id %d",
+ format_mpls_unicast_label, local_label,
+ format_mpls_eos_bit, eos,
+ table_id);
+ goto done;
+ }
+ }
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (mpls_local_label_command, static) = {
+ .path = "mpls local-label",
+ .function = vnet_mpls_local_label,
+ .short_help = "Create/Delete MPL local labels",
+};
+
+clib_error_t *
+vnet_mpls_table_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input,
+ vlib_cli_command_t * cmdo)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ u32 table_id, is_add;
+ u8 *name = NULL;
+
+ is_add = 1;
+ table_id = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "name %s", &name))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ error = clib_error_return (0, "No table id");
+ goto done;
+ }
+ else
+ {
+ if (is_add)
+ {
+ mpls_table_create (table_id, 0, name);
+ }
+ else
+ {
+ mpls_table_delete (table_id, 0);
+ }
+ }
+
+ done:
+ unformat_free (line_input);
+ return error;
+}
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete MPLS Tables. All
+ * Tables must be explicitly added before that can be used,
+ * Including the default table.
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (mpls_table_command, static) = {
+ .path = "mpls table",
+ .short_help = "mpls table [add|del] <table-id>",
+ .function = vnet_mpls_table_cmd,
+ .is_mp_safe = 1,
+};
+
+int
+mpls_fib_reset_labels (u32 fib_id)
+{
+ // FIXME
+ return 0;
+}
+
+static clib_error_t *
+mpls_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ return vlib_call_init_function (vm, mpls_input_init);
+}
+
+VLIB_INIT_FUNCTION (mpls_init);
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
new file mode 100644
index 00000000..cc3eeed0
--- /dev/null
+++ b/src/vnet/mpls/mpls.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_mpls_h
+#define included_vnet_mpls_h
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/mpls/mpls_types.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/adj/adj.h>
+
+typedef enum
+{
+#define mpls_error(n,s) MPLS_ERROR_##n,
+#include <vnet/mpls/error.def>
+#undef mpls_error
+ MPLS_N_ERROR,
+} mpls_error_t;
+
+/**
+ * @brief Definition of a callback for receiving MPLS interface state change
+ * notifications
+ */
+typedef void (*mpls_interface_state_change_callback_t) (u32 sw_if_index,
+ u32 is_enable);
+
+typedef struct
+{
+ /* MPLS FIB index for each software interface */
+ u32 *fib_index_by_sw_if_index;
+
+ /** A pool of all the MPLS FIBs */
+ struct fib_table_t_ *fibs;
+
+ /** A pool of all the MPLS FIBs */
+ struct mpls_fib_t_ *mpls_fibs;
+
+ /** A hash table to lookup the mpls_fib by table ID */
+ uword *fib_index_by_table_id;
+
+ /* Feature arc indices */
+ u8 input_feature_arc_index;
+ u8 output_feature_arc_index;
+
+ /* IP4 enabled count by software interface */
+ u8 *mpls_enabled_by_sw_if_index;
+} mpls_main_t;
+
+extern mpls_main_t mpls_main;
+
+extern clib_error_t *mpls_feature_init (vlib_main_t * vm);
+
+format_function_t format_mpls_eos_bit;
+format_function_t format_mpls_unicast_header_net_byte_order;
+format_function_t format_mpls_unicast_label;
+format_function_t format_mpls_header;
+
+extern vlib_node_registration_t mpls_input_node;
+extern vlib_node_registration_t mpls_output_node;
+extern vlib_node_registration_t mpls_midchain_node;
+
+/* Parse mpls protocol as 0xXXXX or protocol name.
+ In either host or network byte order. */
+unformat_function_t unformat_mpls_label_net_byte_order;
+unformat_function_t unformat_mpls_unicast_label;
+
+/* Parse mpls header. */
+unformat_function_t unformat_mpls_header;
+unformat_function_t unformat_pg_mpls_header;
+
+int mpls_sw_interface_enable_disable (mpls_main_t * mm,
+ u32 sw_if_index,
+ u8 is_enable, u8 is_api);
+
+u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
+
+int mpls_fib_reset_labels (u32 fib_id);
+
+int mpls_dest_cmp (void *a1, void *a2);
+
+int mpls_fib_index_cmp (void *a1, void *a2);
+
+int mpls_label_cmp (void *a1, void *a2);
+
+void mpls_table_create (u32 table_id, u8 is_api, const u8 * name);
+void mpls_table_delete (u32 table_id, u8 is_api);
+
+#endif /* included_vnet_mpls_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
new file mode 100644
index 00000000..762c40ff
--- /dev/null
+++ b/src/vnet/mpls/mpls_api.c
@@ -0,0 +1,582 @@
+/*
+ *------------------------------------------------------------------
+ * mpls_api.c - mpls api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/mpls/mpls_tunnel.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/fib/fib_path_list.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \
+_(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \
+_(MPLS_TABLE_ADD_DEL, mpls_table_add_del) \
+_(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \
+_(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \
+_(MPLS_FIB_DUMP, mpls_fib_dump)
+
+extern void stats_dslock_with_hint (int hint, int tag);
+extern void stats_dsunlock (void);
+
+void
+mpls_table_delete (u32 table_id, u8 is_api)
+{
+ u32 fib_index;
+
+ /*
+ * The MPLS defult table must also be explicitly created via the API.
+ * So in contrast to IP, it gets no special treatment here.
+ *
+ * The API holds only one lock on the table.
+ * i.e. it can be added many times via the API but needs to be
+ * deleted only once.
+ */
+ fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id);
+
+ if (~0 != fib_index)
+ {
+ fib_table_unlock (fib_index,
+ FIB_PROTOCOL_MPLS,
+ (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI));
+ }
+}
+
+void
+vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp)
+{
+ vl_api_mpls_table_add_del_reply_t *rmp;
+ vnet_main_t *vnm;
+ int rv = 0;
+
+ vnm = vnet_get_main ();
+ vnm->api_errno = 0;
+
+ if (mp->mt_is_add)
+ mpls_table_create (ntohl (mp->mt_table_id), 1, mp->mt_name);
+ else
+ mpls_table_delete (ntohl (mp->mt_table_id), 1);
+
+ // NB: Nothing sets rv; none of the above returns an error
+
+ REPLY_MACRO (VL_API_MPLS_TABLE_ADD_DEL_REPLY);
+}
+
+static int
+mpls_ip_bind_unbind_handler (vnet_main_t * vnm,
+ vl_api_mpls_ip_bind_unbind_t * mp)
+{
+ u32 mpls_fib_index, ip_fib_index;
+
+ mpls_fib_index =
+ fib_table_find (FIB_PROTOCOL_MPLS, ntohl (mp->mb_mpls_table_id));
+
+ if (~0 == mpls_fib_index)
+ {
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+
+ ip_fib_index = fib_table_find ((mp->mb_is_ip4 ?
+ FIB_PROTOCOL_IP4 :
+ FIB_PROTOCOL_IP6),
+ ntohl (mp->mb_ip_table_id));
+ if (~0 == ip_fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib_prefix_t pfx = {
+ .fp_len = mp->mb_address_length,
+ };
+
+ if (mp->mb_is_ip4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ clib_memcpy (&pfx.fp_addr.ip4, mp->mb_address,
+ sizeof (pfx.fp_addr.ip4));
+ }
+ else
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ clib_memcpy (&pfx.fp_addr.ip6, mp->mb_address,
+ sizeof (pfx.fp_addr.ip6));
+ }
+
+ if (mp->mb_is_bind)
+ fib_table_entry_local_label_add (ip_fib_index, &pfx,
+ ntohl (mp->mb_label));
+ else
+ fib_table_entry_local_label_remove (ip_fib_index, &pfx,
+ ntohl (mp->mb_label));
+
+ return (0);
+}
+
+void
+vl_api_mpls_ip_bind_unbind_t_handler (vl_api_mpls_ip_bind_unbind_t * mp)
+{
+ vl_api_mpls_ip_bind_unbind_reply_t *rmp;
+ vnet_main_t *vnm;
+ int rv;
+
+ vnm = vnet_get_main ();
+ vnm->api_errno = 0;
+
+ rv = mpls_ip_bind_unbind_handler (vnm, mp);
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_MPLS_IP_BIND_UNBIND_REPLY);
+}
+
+static int
+mpls_route_add_del_t_handler (vnet_main_t * vnm,
+ vl_api_mpls_route_add_del_t * mp)
+{
+ u32 fib_index, next_hop_fib_index;
+ mpls_label_t *label_stack = NULL;
+ int rv, ii, n_labels;;
+
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_eos = mp->mr_eos,
+ .fp_label = ntohl (mp->mr_label),
+ };
+ if (pfx.fp_eos)
+ {
+ pfx.fp_payload_proto = mp->mr_next_hop_proto;
+ }
+ else
+ {
+ pfx.fp_payload_proto = DPO_PROTO_MPLS;
+ }
+
+ rv = add_del_route_check (FIB_PROTOCOL_MPLS,
+ mp->mr_table_id,
+ mp->mr_next_hop_sw_if_index,
+ pfx.fp_payload_proto,
+ mp->mr_next_hop_table_id,
+ mp->mr_is_rpf_id,
+ &fib_index, &next_hop_fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ ip46_address_t nh;
+ memset (&nh, 0, sizeof (nh));
+
+ if (DPO_PROTO_IP4 == mp->mr_next_hop_proto)
+ memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4));
+ else if (DPO_PROTO_IP6 == mp->mr_next_hop_proto)
+ memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6));
+
+ n_labels = mp->mr_next_hop_n_out_labels;
+ if (n_labels == 0)
+ ;
+ else if (1 == n_labels)
+ vec_add1 (label_stack, ntohl (mp->mr_next_hop_out_label_stack[0]));
+ else
+ {
+ vec_validate (label_stack, n_labels - 1);
+ for (ii = 0; ii < n_labels; ii++)
+ label_stack[ii] = ntohl (mp->mr_next_hop_out_label_stack[ii]);
+ }
+
+ return (add_del_route_t_handler (mp->mr_is_multipath, mp->mr_is_add, 0, // mp->is_drop,
+ 0, // mp->is_unreach,
+ 0, // mp->is_prohibit,
+ 0, // mp->is_local,
+ mp->mr_is_multicast,
+ mp->mr_is_classify,
+ mp->mr_classify_table_index,
+ mp->mr_is_resolve_host,
+ mp->mr_is_resolve_attached,
+ mp->mr_is_interface_rx,
+ mp->mr_is_rpf_id,
+ fib_index, &pfx,
+ mp->mr_next_hop_proto,
+ &nh, ntohl (mp->mr_next_hop_sw_if_index),
+ next_hop_fib_index,
+ mp->mr_next_hop_weight,
+ mp->mr_next_hop_preference,
+ ntohl (mp->mr_next_hop_via_label),
+ label_stack));
+}
+
+void
+vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp)
+{
+ vl_api_mpls_route_add_del_reply_t *rmp;
+ vnet_main_t *vnm;
+ int rv;
+
+ vnm = vnet_get_main ();
+ vnm->api_errno = 0;
+
+ rv = mpls_route_add_del_t_handler (vnm, mp);
+
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY);
+}
+
+void
+mpls_table_create (u32 table_id, u8 is_api, const u8 * name)
+{
+ u32 fib_index;
+
+ /*
+ * The MPLS defult table must also be explicitly created via the API.
+ * So in contrast to IP, it gets no special treatment here.
+ */
+
+ /*
+ * The API holds only one lock on the table.
+ * i.e. it can be added many times via the API but needs to be
+ * deleted only once.
+ */
+ fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id);
+
+ if (~0 == fib_index)
+ {
+ fib_table_find_or_create_and_lock_w_name (FIB_PROTOCOL_MPLS,
+ table_id,
+ (is_api ?
+ FIB_SOURCE_API :
+ FIB_SOURCE_CLI), name);
+ }
+}
+
+static void
+vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp)
+{
+ vl_api_mpls_tunnel_add_del_reply_t *rmp;
+ int rv = 0;
+ u32 tunnel_sw_if_index;
+ int ii;
+ fib_route_path_t rpath, *rpaths = NULL;
+
+ memset (&rpath, 0, sizeof (rpath));
+
+ stats_dslock_with_hint (1 /* release hint */ , 5 /* tag */ );
+
+ if (mp->mt_next_hop_proto_is_ip4)
+ {
+ rpath.frp_proto = DPO_PROTO_IP4;
+ clib_memcpy (&rpath.frp_addr.ip4,
+ mp->mt_next_hop, sizeof (rpath.frp_addr.ip4));
+ }
+ else
+ {
+ rpath.frp_proto = DPO_PROTO_IP6;
+ clib_memcpy (&rpath.frp_addr.ip6,
+ mp->mt_next_hop, sizeof (rpath.frp_addr.ip6));
+ }
+ rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index);
+ rpath.frp_weight = 1;
+
+ if (mp->mt_is_add)
+ {
+ for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++)
+ vec_add1 (rpath.frp_label_stack,
+ ntohl (mp->mt_next_hop_out_label_stack[ii]));
+ }
+
+ vec_add1 (rpaths, rpath);
+
+ tunnel_sw_if_index = ntohl (mp->mt_sw_if_index);
+
+ if (mp->mt_is_add)
+ {
+ if (~0 == tunnel_sw_if_index)
+ tunnel_sw_if_index = vnet_mpls_tunnel_create (mp->mt_l2_only,
+ mp->mt_is_multicast);
+ vnet_mpls_tunnel_path_add (tunnel_sw_if_index, rpaths);
+ }
+ else
+ {
+ tunnel_sw_if_index = ntohl (mp->mt_sw_if_index);
+ if (!vnet_mpls_tunnel_path_remove (tunnel_sw_if_index, rpaths))
+ vnet_mpls_tunnel_del (tunnel_sw_if_index);
+ }
+
+ vec_free (rpaths);
+
+ stats_dsunlock ();
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl(tunnel_sw_if_index);
+ }));
+ /* *INDENT-ON* */
+}
+
+typedef struct mpls_tunnel_send_walk_ctx_t_
+{
+ unix_shared_memory_queue_t *q;
+ u32 index;
+ u32 context;
+} mpls_tunnel_send_walk_ctx_t;
+
+static void
+send_mpls_tunnel_entry (u32 mti, void *arg)
+{
+ fib_route_path_encode_t *api_rpaths, *api_rpath;
+ mpls_tunnel_send_walk_ctx_t *ctx;
+ vl_api_mpls_tunnel_details_t *mp;
+ const mpls_tunnel_t *mt;
+ vl_api_fib_path2_t *fp;
+ u32 n;
+
+ ctx = arg;
+
+ if (~0 != ctx->index && mti != ctx->index)
+ return;
+
+ mt = mpls_tunnel_get (mti);
+ n = fib_path_list_get_n_paths (mt->mt_path_list);
+
+ mp = vl_msg_api_alloc (sizeof (*mp) + n * sizeof (vl_api_fib_path2_t));
+ memset (mp, 0, sizeof (*mp) + n * sizeof (vl_api_fib_path2_t));
+
+ mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS);
+ mp->context = ctx->context;
+
+ mp->mt_tunnel_index = ntohl (mti);
+ mp->mt_count = ntohl (n);
+
+ fib_path_list_walk (mt->mt_path_list, fib_path_encode, &api_rpaths);
+
+ fp = mp->mt_paths;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+
+ fp->weight = api_rpath->rpath.frp_weight;
+ fp->preference = api_rpath->rpath.frp_preference;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ // FIXME
+ // memcpy (mp->mt_next_hop_out_labels,
+ // mt->mt_label_stack, nlabels * sizeof (u32));
+
+
+ vl_msg_api_send_shmem (ctx->q, (u8 *) & mp);
+}
+
+static void
+vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ mpls_tunnel_send_walk_ctx_t ctx = {
+ .q = q,
+ .index = ntohl (mp->tunnel_index),
+ .context = mp->context,
+ };
+ mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx);
+}
+
+static void
+send_mpls_fib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ const fib_table_t * table,
+ u32 label, u32 eos,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_mpls_fib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path2_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_MPLS_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table->ft_table_id);
+ memcpy (mp->table_name, table->ft_desc,
+ clib_min (vec_len (table->ft_desc), sizeof (mp->table_name)));
+ mp->eos_bit = eos;
+ mp->label = htonl (label);
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+ fp->weight = api_rpath->rpath.frp_weight;
+ fp->preference = api_rpath->rpath.frp_preference;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct vl_api_mpls_fib_dump_table_walk_ctx_t_
+{
+ fib_node_index_t *lfeis;
+} vl_api_mpls_fib_dump_table_walk_ctx_t;
+
+static int
+vl_api_mpls_fib_dump_table_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_mpls_fib_dump_table_walk_ctx_t *ctx = arg;
+
+ vec_add1 (ctx->lfeis, fei);
+
+ return (1);
+}
+
+static void
+vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ mpls_main_t *mm = &mpls_main;
+ fib_table_t *fib_table;
+ mpls_fib_t *mpls_fib;
+ fib_node_index_t *lfeip = NULL;
+ fib_prefix_t pfx;
+ u32 fib_index;
+ fib_route_path_encode_t *api_rpaths;
+ vl_api_mpls_fib_dump_table_walk_ctx_t ctx = {
+ .lfeis = NULL,
+ };
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (mpls_fib, mm->mpls_fibs,
+ ({
+ mpls_fib_table_walk (mpls_fib,
+ vl_api_mpls_fib_dump_table_walk,
+ &ctx);
+ }));
+ /* *INDENT-ON* */
+ vec_sort_with_function (ctx.lfeis, fib_entry_cmp_for_sort);
+
+ vec_foreach (lfeip, ctx.lfeis)
+ {
+ fib_entry_get_prefix (*lfeip, &pfx);
+ fib_index = fib_entry_get_fib_index (*lfeip);
+ fib_table = fib_table_get (fib_index, pfx.fp_proto);
+ api_rpaths = NULL;
+ fib_entry_encode (*lfeip, &api_rpaths);
+ send_mpls_fib_details (am, q,
+ fib_table, pfx.fp_label,
+ pfx.fp_eos, api_rpaths, mp->context);
+ vec_free (api_rpaths);
+ }
+
+ vec_free (ctx.lfeis);
+}
+
+/*
+ * mpls_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_mpls;
+#undef _
+}
+
+static clib_error_t *
+mpls_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Trace space for 8 MPLS encap labels
+ */
+ am->api_trace_cfg[VL_API_MPLS_TUNNEL_ADD_DEL].size += 8 * sizeof (u32);
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (mpls_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/mpls/mpls_features.c b/src/vnet/mpls/mpls_features.c
new file mode 100644
index 00000000..0281d0c2
--- /dev/null
+++ b/src/vnet/mpls/mpls_features.c
@@ -0,0 +1,154 @@
+/*
+ * mpls_features.c: MPLS input and output features
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/mpls/mpls.h>
+
+always_inline uword
+mpls_terminate (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int error_code)
+{
+ u32 * buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node,
+ buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ mpls_input_node.index,
+ error_code);
+
+ return n_packets;
+}
+
+static uword
+mpls_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_PUNT));
+}
+
+VLIB_REGISTER_NODE (mpls_punt_node) = {
+ .function = mpls_punt,
+ .name = "mpls-punt",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-punt",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_punt_node, mpls_punt)
+
+static uword
+mpls_drop (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_DROP));
+}
+
+VLIB_REGISTER_NODE (mpls_drop_node) = {
+ .function = mpls_drop,
+ .name = "mpls-drop",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_drop_node, mpls_drop)
+
+static uword
+mpls_not_enabled (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (mpls_terminate(vm, node, frame, MPLS_ERROR_NOT_ENABLED));
+}
+
+VLIB_REGISTER_NODE (mpls_not_enabled_node) = {
+ .function = mpls_not_enabled,
+ .name = "mpls-not-enabled",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_not_enabled_node, mpls_not_enabled)
+
+VNET_FEATURE_ARC_INIT (mpls_input, static) =
+{
+ .arc_name = "mpls-input",
+ .start_nodes = VNET_FEATURES ("mpls-input"),
+ .arc_index_ptr = &mpls_main.input_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (mpls_not_enabled, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "mpls-not-enabled",
+ .runs_before = VNET_FEATURES ("mpls-lookup"),
+};
+
+VNET_FEATURE_INIT (mpls_lookup, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "mpls-lookup",
+ .runs_before = VNET_FEATURES (0), /* not before any other features */
+};
+
+VNET_FEATURE_ARC_INIT (mpls_output, static) =
+{
+ .arc_name = "mpls-output",
+ .start_nodes = VNET_FEATURES ("mpls-output", "mpls-midchain"),
+ .arc_index_ptr = &mpls_main.output_feature_arc_index,
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_FEATURE_INIT (mpls_interface_output, static) = {
+ .arc_name = "mpls-output",
+ .node_name = "interface-output",
+ .runs_before = 0, /* not before any other features */
+};
+
+static clib_error_t *
+mpls_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index,
+ u32 is_add)
+{
+ mpls_main_t * mm = &mpls_main;
+
+ vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0);
+ vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0);
+
+ vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled", sw_if_index,
+ is_add, 0, 0);
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (mpls_sw_interface_add_del);
+
+
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
new file mode 100644
index 00000000..86ad8bba
--- /dev/null
+++ b/src/vnet/mpls/mpls_input.c
@@ -0,0 +1,324 @@
+/*
+ * node.c: MPLS input
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/feature/feature.h>
+
+typedef struct {
+ u32 next_index;
+ u32 label_net_byte_order;
+} mpls_input_trace_t;
+
+#define foreach_mpls_input_next \
+_(DROP, "error-drop") \
+_(LOOKUP, "mpls-lookup")
+
+typedef enum {
+#define _(s,n) MPLS_INPUT_NEXT_##s,
+ foreach_mpls_input_next
+#undef _
+ MPLS_INPUT_N_NEXT,
+} mpls_input_next_t;
+
+static u8 *
+format_mpls_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *);
+ char * next_name;
+ u32 label;
+ next_name = "BUG!";
+ label = clib_net_to_host_u32(t->label_net_byte_order);
+
+#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b;
+ foreach_mpls_input_next;
+#undef _
+
+ s = format (s, "MPLS: next %s[%d] label %d ttl %d",
+ next_name, t->next_index,
+ vnet_mpls_uc_get_label(label),
+ vnet_mpls_uc_get_ttl(label));
+
+ return s;
+}
+
+vlib_node_registration_t mpls_input_node;
+
+typedef struct {
+ u32 last_label;
+ u32 last_inner_fib_index;
+ u32 last_outer_fib_index;
+ mpls_main_t * mpls_main;
+} mpls_input_runtime_t;
+
+static inline uword
+mpls_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ mpls_input_runtime_t * rt;
+ mpls_main_t * mm;
+ u32 thread_index = vlib_get_thread_index();
+ vlib_simple_counter_main_t * cm;
+ vnet_main_t * vnm = vnet_get_main();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+ mm = rt->mpls_main;
+ /*
+ * Force an initial lookup every time, in case the control-plane
+ * changed the label->FIB mapping.
+ */
+ rt->last_label = ~0;
+
+ next_index = node->cached_next_index;
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_MPLS);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, next0, sw_if_index0;
+ u32 bi1, next1, sw_if_index1;
+ vlib_buffer_t *b0, *b1;
+ char *h0, *h1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+
+ /* TTL expired? */
+ if (PREDICT_FALSE(h0[3] == 0))
+ {
+ next0 = MPLS_INPUT_NEXT_DROP;
+ b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+ }
+ else
+ {
+ next0 = MPLS_INPUT_NEXT_LOOKUP;
+ vnet_feature_arc_start(mm->input_feature_arc_index,
+ sw_if_index0, &next0, b0);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ }
+
+ if (PREDICT_FALSE(h1[3] == 0))
+ {
+ next1 = MPLS_INPUT_NEXT_DROP;
+ b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+ }
+ else
+ {
+ next1 = MPLS_INPUT_NEXT_LOOKUP;
+ vnet_feature_arc_start(mm->input_feature_arc_index,
+ sw_if_index1, &next1, b1);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->label_net_byte_order = *((u32*)h0);
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->label_net_byte_order = *((u32*)h1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1,
+ next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 sw_if_index0, next0, bi0;
+ vlib_buffer_t * b0;
+ char * h0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ /* TTL expired? */
+ if (PREDICT_FALSE(h0[3] == 0))
+ {
+ next0 = MPLS_INPUT_NEXT_DROP;
+ b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
+ }
+ else
+ {
+ next0 = MPLS_INPUT_NEXT_LOOKUP;
+ vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_input_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->label_net_byte_order = *(u32*)h0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_input_node.index,
+ MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+static uword
+mpls_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return mpls_input_inline (vm, node, from_frame);
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+VLIB_REGISTER_NODE (mpls_input_node) = {
+ .function = mpls_input,
+ .name = "mpls-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof(mpls_input_runtime_t),
+
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_INPUT_NEXT_##s] = n,
+ foreach_mpls_input_next
+#undef _
+ },
+
+ .format_buffer = format_mpls_unicast_header_net_byte_order,
+ .format_trace = format_mpls_input_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input)
+
+static void
+mpls_setup_nodes (vlib_main_t * vm)
+{
+ mpls_input_runtime_t * rt;
+ pg_node_t * pn;
+
+ pn = pg_get_node (mpls_input_node.index);
+ pn->unformat_edit = unformat_pg_mpls_header;
+
+ rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+ rt->last_label = (u32) ~0;
+ rt->last_inner_fib_index = 0;
+ rt->last_outer_fib_index = 0;
+ rt->mpls_main = &mpls_main;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS,
+ mpls_input_node.index);
+}
+
+static clib_error_t * mpls_input_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ error = vlib_call_init_function (vm, mpls_init);
+ if (error)
+ clib_error_report (error);
+
+ mpls_setup_nodes (vm);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (mpls_input_init);
+
+static clib_error_t * mpls_input_worker_init (vlib_main_t * vm)
+{
+ mpls_input_runtime_t * rt;
+ rt = vlib_node_get_runtime_data (vm, mpls_input_node.index);
+ rt->last_label = (u32) ~0;
+ rt->last_inner_fib_index = 0;
+ rt->last_outer_fib_index = 0;
+ rt->mpls_main = &mpls_main;
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (mpls_input_worker_init);
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
new file mode 100644
index 00000000..30031e51
--- /dev/null
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -0,0 +1,723 @@
+/*
+ * mpls_lookup.c: MPLS lookup
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls_lookup.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+/**
+ * Static MPLS VLIB forwarding node
+ */
+static vlib_node_registration_t mpls_lookup_node;
+
+/**
+ * The arc/edge from the MPLS lookup node to the MPLS replicate node
+ */
+u32 mpls_lookup_to_replicate_edge;
+
+typedef struct {
+ u32 next_index;
+ u32 lb_index;
+ u32 lfib_index;
+ u32 label_net_byte_order;
+ u32 hash;
+} mpls_lookup_trace_t;
+
+static u8 *
+format_mpls_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
+
+ s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %x "
+ "label %d eos %d",
+ t->next_index, t->lfib_index, t->lb_index, t->hash,
+ vnet_mpls_uc_get_label(
+ clib_net_to_host_u32(t->label_net_byte_order)),
+ vnet_mpls_uc_get_s(
+ clib_net_to_host_u32(t->label_net_byte_order)));
+ return s;
+}
+
+static inline uword
+mpls_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, next_index, * from, * to_next;
+ mpls_main_t * mm = &mpls_main;
+ u32 thread_index = vlib_get_thread_index();
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 lbi0, next0, lfib_index0, bi0, hash_c0;
+ const mpls_unicast_header_t * h0;
+ const load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+ u32 lbi1, next1, lfib_index1, bi1, hash_c1;
+ const mpls_unicast_header_t * h1;
+ const load_balance_t *lb1;
+ const dpo_id_t *dpo1;
+ vlib_buffer_t * b1;
+ u32 lbi2, next2, lfib_index2, bi2, hash_c2;
+ const mpls_unicast_header_t * h2;
+ const load_balance_t *lb2;
+ const dpo_id_t *dpo2;
+ vlib_buffer_t * b2;
+ u32 lbi3, next3, lfib_index3, bi3, hash_c3;
+ const mpls_unicast_header_t * h3;
+ const load_balance_t *lb3;
+ const dpo_id_t *dpo3;
+ vlib_buffer_t * b3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3, *p4, *p5;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p4->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (h0[0]), STORE);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+ bi2 = to_next[2] = from[2];
+ bi3 = to_next[3] = from[3];
+
+ from += 4;
+ n_left_from -= 4;
+ to_next += 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
+
+ lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+ lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ lfib_index2 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b2)->sw_if_index[VLIB_RX]);
+ lfib_index3 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b3)->sw_if_index[VLIB_RX]);
+
+ lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0);
+ lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1);
+ lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2);
+ lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3);
+
+ hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
+ hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0;
+ hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0;
+ hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0;
+
+ if (MPLS_IS_REPLICATE & lbi0)
+ {
+ next0 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ (lbi0 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb0 = load_balance_get(lbi0);
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+ {
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0,
+ (hash_c0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ next0 = dpo0->dpoi_next_node;
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+ if (MPLS_IS_REPLICATE & lbi1)
+ {
+ next1 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] =
+ (lbi1 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb1 = load_balance_get(lbi1);
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
+ {
+ hash_c1 = vnet_buffer (b1)->ip.flow_hash =
+ mpls_compute_flow_hash(h1, lb1->lb_hash_config);
+ dpo1 = load_balance_get_fwd_bucket
+ (lb1,
+ (hash_c1 & (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1,
+ vlib_buffer_length_in_chain (vm, b1));
+ }
+ if (MPLS_IS_REPLICATE & lbi2)
+ {
+ next2 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] =
+ (lbi2 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb2 = load_balance_get(lbi2);
+ ASSERT (lb2->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb2->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
+ {
+ hash_c2 = vnet_buffer (b2)->ip.flow_hash =
+ mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+ dpo2 = load_balance_get_fwd_bucket
+ (lb2,
+ (hash_c2 & (lb2->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo2 = load_balance_get_bucket_i (lb2, 0);
+ }
+ next2 = dpo2->dpoi_next_node;
+
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi2, 1,
+ vlib_buffer_length_in_chain (vm, b2));
+ }
+ if (MPLS_IS_REPLICATE & lbi3)
+ {
+ next3 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] =
+ (lbi3 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb3 = load_balance_get(lbi3);
+ ASSERT (lb3->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb3->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
+ {
+ hash_c3 = vnet_buffer (b3)->ip.flow_hash =
+ mpls_compute_flow_hash(h3, lb3->lb_hash_config);
+ dpo3 = load_balance_get_fwd_bucket
+ (lb3,
+ (hash_c3 & (lb3->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo3 = load_balance_get_bucket_i (lb3, 0);
+ }
+ next3 = dpo3->dpoi_next_node;
+
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi3, 1,
+ vlib_buffer_length_in_chain (vm, b3));
+ }
+
+ /*
+ * before we pop the label copy th values we need to maintain.
+ * The label header is in network byte order.
+ * last byte is the TTL.
+ * bits 2 to 4 inclusive are the EXP bits
+ */
+ vnet_buffer (b0)->mpls.ttl = ((char*)h0)[3];
+ vnet_buffer (b0)->mpls.exp = (((char*)h0)[2] & 0xe) >> 1;
+ vnet_buffer (b0)->mpls.first = 1;
+ vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3];
+ vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1;
+ vnet_buffer (b1)->mpls.first = 1;
+ vnet_buffer (b2)->mpls.ttl = ((char*)h2)[3];
+ vnet_buffer (b2)->mpls.exp = (((char*)h2)[2] & 0xe) >> 1;
+ vnet_buffer (b2)->mpls.first = 1;
+ vnet_buffer (b3)->mpls.ttl = ((char*)h3)[3];
+ vnet_buffer (b3)->mpls.exp = (((char*)h3)[2] & 0xe) >> 1;
+ vnet_buffer (b3)->mpls.first = 1;
+
+ /*
+ * pop the label that was just used in the lookup
+ */
+ vlib_buffer_advance(b0, sizeof(*h0));
+ vlib_buffer_advance(b1, sizeof(*h1));
+ vlib_buffer_advance(b2, sizeof(*h2));
+ vlib_buffer_advance(b3, sizeof(*h3));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->lfib_index = lfib_index0;
+ tr->hash = hash_c0;
+ tr->label_net_byte_order = h0->label_exp_s_ttl;
+ }
+
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->lb_index = lbi1;
+ tr->lfib_index = lfib_index1;
+ tr->hash = hash_c1;
+ tr->label_net_byte_order = h1->label_exp_s_ttl;
+ }
+
+ if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b2, sizeof (*tr));
+ tr->next_index = next2;
+ tr->lb_index = lbi2;
+ tr->lfib_index = lfib_index2;
+ tr->hash = hash_c2;
+ tr->label_net_byte_order = h2->label_exp_s_ttl;
+ }
+
+ if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b3, sizeof (*tr));
+ tr->next_index = next3;
+ tr->lb_index = lbi3;
+ tr->lfib_index = lfib_index3;
+ tr->hash = hash_c3;
+ tr->label_net_byte_order = h3->label_exp_s_ttl;
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 lbi0, next0, lfib_index0, bi0, hash_c0;
+ const mpls_unicast_header_t * h0;
+ const load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+ vlib_buffer_t * b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ h0 = vlib_buffer_get_current (b0);
+
+ lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
+
+ lbi0 = mpls_fib_table_forwarding_lookup(lfib_index0, h0);
+ hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
+
+ if (MPLS_IS_REPLICATE & lbi0)
+ {
+ next0 = mpls_lookup_to_replicate_edge;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ (lbi0 & ~MPLS_IS_REPLICATE);
+ }
+ else
+ {
+ lb0 = load_balance_get(lbi0);
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
+ {
+ hash_c0 = vnet_buffer (b0)->ip.flow_hash =
+ mpls_compute_flow_hash(h0, lb0->lb_hash_config);
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0,
+ (hash_c0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+
+ /*
+ * before we pop the label copy, values we need to maintain.
+ * The label header is in network byte order.
+ * last byte is the TTL.
+ * bits 2 to 4 inclusive are the EXP bits
+ */
+ vnet_buffer (b0)->mpls.ttl = ((char*)h0)[3];
+ vnet_buffer (b0)->mpls.exp = (((char*)h0)[2] & 0xe) >> 1;
+ vnet_buffer (b0)->mpls.first = 1;
+
+ /*
+ * pop the label that was just used in the lookup
+ */
+ vlib_buffer_advance(b0, sizeof(*h0));
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->lfib_index = lfib_index0;
+ tr->hash = hash_c0;
+ tr->label_net_byte_order = h0->label_exp_s_ttl;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_lookup_node.index,
+ MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+VLIB_REGISTER_NODE (mpls_lookup_node, static) = {
+ .function = mpls_lookup,
+ .name = "mpls-lookup",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .sibling_of = "mpls-load-balance",
+
+ .format_buffer = format_mpls_header,
+ .format_trace = format_mpls_lookup_trace,
+ .unformat_buffer = unformat_mpls_header,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup)
+
+typedef struct {
+ u32 next_index;
+ u32 lb_index;
+ u32 hash;
+} mpls_load_balance_trace_t;
+
+static u8 *
+format_mpls_load_balance_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *);
+
+ s = format (s, "MPLS: next [%d], LB index %d hash %d",
+ t->next_index, t->lb_index, t->hash);
+ return s;
+}
+
+always_inline uword
+mpls_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, * from, * to_next;
+ u32 thread_index = vlib_get_thread_index();
+ u32 next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next,
+ to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ const load_balance_t *lb0, *lb1;
+ vlib_buffer_t * p0, *p1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1, next0, next1;
+ const mpls_unicast_header_t *mpls0, *mpls1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (mpls0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (mpls0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ mpls0 = vlib_buffer_get_current (p0);
+ mpls1 = vlib_buffer_get_current (p1);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get(lbi0);
+ lb1 = load_balance_get(lbi1);
+
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
+ }
+ dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
+ }
+ dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1,
+ vlib_buffer_length_in_chain (vm, p1));
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const load_balance_t *lb0;
+ vlib_buffer_t * p0;
+ u32 pi0, lbi0, hc0, next0;
+ const mpls_unicast_header_t *mpls0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ mpls0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get(lbi0);
+
+ hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
+ }
+ dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (mpls_load_balance_node) = {
+ .function = mpls_load_balance,
+ .name = "mpls-load-balance",
+ .vector_size = sizeof (u32),
+ .format_trace = format_mpls_load_balance_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "mpls-drop",
+ },
+
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance)
+
+
+static clib_error_t *
+mpls_lookup_init (vlib_main_t * vm)
+{
+ clib_error_t * error;
+
+ if ((error = vlib_call_init_function (vm, mpls_init)))
+ return error;
+
+ mpls_lookup_to_replicate_edge =
+ vlib_node_add_named_next(vm,
+ mpls_lookup_node.index,
+ "mpls-replicate");
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (mpls_lookup_init);
diff --git a/src/vnet/mpls/mpls_lookup.h b/src/vnet/mpls/mpls_lookup.h
new file mode 100644
index 00000000..28c9124f
--- /dev/null
+++ b/src/vnet/mpls/mpls_lookup.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_LOOKUP_H__
+#define __MPLS_LOOKUP_H__
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/ip/ip.h>
+
+/**
+ * The arc/edge from the MPLS lookup node to the MPLS replicate node
+ */
+u32 mpls_lookup_to_replicate_edge;
+
+/*
+ * Compute flow hash.
+ * We'll use it to select which adjacency to use for this flow. And other things.
+ */
+always_inline u32
+mpls_compute_flow_hash (const mpls_unicast_header_t * hdr,
+ flow_hash_config_t flow_hash_config)
+{
+ /*
+ * We need to byte swap so we use the numerical value. i.e. an odd label
+ * leads to an odd bucket. as opposed to a label above and below value X.
+ */
+ u8 next_label_is_entropy;
+ mpls_label_t ho_label;
+ u32 hash, value;
+
+ ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+ hash = vnet_mpls_uc_get_label(ho_label);
+ next_label_is_entropy = 0;
+
+ while (MPLS_EOS != vnet_mpls_uc_get_s(ho_label))
+ {
+ hdr++;
+ ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl);
+ value = vnet_mpls_uc_get_label(ho_label);
+
+ if (1 == next_label_is_entropy)
+ {
+ /*
+ * The label is an entropy value, use it alone as the hash
+ */
+ return (ho_label);
+ }
+ if (MPLS_IETF_ENTROPY_LABEL == value)
+ {
+ /*
+ * we've met a label in the stack indicating that tha next
+ * label is an entropy value
+ */
+ next_label_is_entropy = 1;
+ }
+ else
+ {
+ /*
+ * XOR the label values in the stack together to
+ * build up the hash value
+ */
+ hash ^= value;
+ }
+ }
+
+ /*
+ * check the top nibble for v4 and v6
+ */
+ hdr++;
+
+ switch (((u8*)hdr)[0] >> 4)
+ {
+ case 4:
+ /* incorporate the v4 flow-hash */
+ hash ^= ip4_compute_flow_hash ((const ip4_header_t *)hdr,
+ IP_FLOW_HASH_DEFAULT);
+ break;
+ case 6:
+ /* incorporate the v6 flow-hash */
+ hash ^= ip6_compute_flow_hash ((const ip6_header_t *)hdr,
+ IP_FLOW_HASH_DEFAULT);
+ break;
+ default:
+ break;
+ }
+
+ return (hash);
+}
+
+#endif /* __MPLS_LOOKUP_H__ */
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
new file mode 100644
index 00000000..241a7835
--- /dev/null
+++ b/src/vnet/mpls/mpls_output.c
@@ -0,0 +1,498 @@
+/*
+ * mpls_output.c: MPLS Adj rewrite
+ *
+ * Copyright (c) 2012-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+#include <vnet/mpls/mpls.h>
+
+typedef struct {
+ /* Adjacency taken. */
+ u32 adj_index;
+ u32 flow_hash;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[64 - 1*sizeof(u32)];
+} mpls_output_trace_t;
+
+#define foreach_mpls_output_next \
+_(DROP, "error-drop")
+
+typedef enum {
+#define _(s,n) MPLS_OUTPUT_NEXT_##s,
+ foreach_mpls_output_next
+#undef _
+ MPLS_OUTPUT_N_NEXT,
+} mpls_output_next_t;
+
+static u8 *
+format_mpls_output_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "adj-idx %d : %U flow hash: 0x%08x",
+ t->adj_index,
+ format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
+ t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ t->adj_index, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static inline uword
+mpls_output_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame,
+ int is_midchain)
+{
+ u32 n_left_from, next_index, * from, * to_next, thread_index;
+ vlib_node_runtime_t * error_node;
+ u32 n_left_to_next;
+ mpls_main_t *mm;
+
+ thread_index = vlib_get_thread_index();
+ error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ mm = &mpls_main;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t * adj0;
+ mpls_unicast_header_t *hdr0;
+ vlib_buffer_t * p0;
+ u32 pi0, rw_len0, adj_index0, next0, error0;
+
+ ip_adjacency_t * adj1;
+ mpls_unicast_header_t *hdr1;
+ vlib_buffer_t * p1;
+ u32 pi1, rw_len1, adj_index1, next1, error1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+ adj1 = adj_get(adj_index1);
+ hdr0 = vlib_buffer_get_current (p0);
+ hdr1 = vlib_buffer_get_current (p1);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+
+ /* Bump the adj counters for packet and bytes */
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0,
+ 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index1,
+ 1,
+ vlib_buffer_length_in_chain (vm, p1) + rw_len1);
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ adj0[0].rewrite_header.sw_if_index;
+ next0 = adj0[0].rewrite_header.next_index;
+ error0 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (mm->output_feature_arc_index,
+ adj0[0].rewrite_header.sw_if_index,
+ &next0, p0);
+ }
+ else
+ {
+ error0 = IP4_ERROR_MTU_EXCEEDED;
+ next0 = MPLS_OUTPUT_NEXT_DROP;
+ }
+ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <=
+ adj1[0].rewrite_header.max_l3_packet_bytes))
+ {
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] =
+ adj1[0].rewrite_header.sw_if_index;
+ next1 = adj1[0].rewrite_header.next_index;
+ error1 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (mm->output_feature_arc_index,
+ adj1[0].rewrite_header.sw_if_index,
+ &next1, p1);
+ }
+ else
+ {
+ error1 = IP4_ERROR_MTU_EXCEEDED;
+ next1 = MPLS_OUTPUT_NEXT_DROP;
+ }
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
+ clib_memcpy (tr->packet_data,
+ vlib_buffer_get_current (p0),
+ sizeof (tr->packet_data));
+ }
+ if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+ p1, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX];
+ tr->flow_hash = vnet_buffer(p1)->ip.flow_hash;
+ clib_memcpy (tr->packet_data,
+ vlib_buffer_get_current (p1),
+ sizeof (tr->packet_data));
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t * adj0;
+ mpls_unicast_header_t *hdr0;
+ vlib_buffer_t * p0;
+ u32 pi0, rw_len0, adj_index0, next0, error0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+ hdr0 = vlib_buffer_get_current (p0);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], hdr0,
+ sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0,
+ 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
+ adj0[0].rewrite_header.max_l3_packet_bytes))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ adj0[0].rewrite_header.sw_if_index;
+ next0 = adj0[0].rewrite_header.next_index;
+ error0 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (mm->output_feature_arc_index,
+ adj0[0].rewrite_header.sw_if_index,
+ &next0, p0);
+ }
+ else
+ {
+ error0 = IP4_ERROR_MTU_EXCEEDED;
+ next0 = MPLS_OUTPUT_NEXT_DROP;
+ }
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
+ }
+
+ p0->error = error_node->errors[error0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_output_trace_t *tr = vlib_add_trace (vm, node,
+ p0, sizeof (*tr));
+ tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
+ tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, mpls_output_node.index,
+ MPLS_ERROR_PKTS_ENCAP,
+ from_frame->n_vectors);
+
+ return from_frame->n_vectors;
+}
+
+static char * mpls_error_strings[] = {
+#define mpls_error(n,s) s,
+#include "error.def"
+#undef mpls_error
+};
+
+static inline uword
+mpls_output (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0));
+}
+
+VLIB_REGISTER_NODE (mpls_output_node) = {
+ .function = mpls_output,
+ .name = "mpls-output",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n,
+ foreach_mpls_output_next
+#undef _
+ },
+
+ .format_trace = format_mpls_output_trace,
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_output_node, mpls_output)
+
+static inline uword
+mpls_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1));
+}
+
+VLIB_REGISTER_NODE (mpls_midchain_node) = {
+ .function = mpls_midchain,
+ .name = "mpls-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_output_trace,
+
+ .sibling_of = "mpls-output",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_midchain_node, mpls_midchain)
+
+/**
+ * @brief Next index values from the MPLS incomplete adj node
+ */
+#define foreach_mpls_adj_incomplete_next \
+_(DROP, "error-drop") \
+_(IP4, "ip4-arp") \
+_(IP6, "ip6-discover-neighbor")
+
+typedef enum {
+#define _(s,n) MPLS_ADJ_INCOMPLETE_NEXT_##s,
+ foreach_mpls_adj_incomplete_next
+#undef _
+ MPLS_ADJ_INCOMPLETE_N_NEXT,
+} mpls_adj_incomplete_next_t;
+
+/**
+ * @brief A struct to hold tracing information for the MPLS label imposition
+ * node.
+ */
+typedef struct mpls_adj_incomplete_trace_t_
+{
+ u32 next;
+} mpls_adj_incomplete_trace_t;
+
+
+/**
+ * @brief Graph node for incomplete MPLS adjacency.
+ * This node will push traffic to either the v4-arp or v6-nd node
+ * based on the next-hop proto of the adj.
+ * We pay a cost for this 'routing' node, but an incomplete adj is the
+ * exception case.
+ */
+static inline uword
+mpls_adj_incomplete (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, next0, adj_index0;
+ ip_adjacency_t * adj0;
+ vlib_buffer_t * p0;
+
+ pi0 = to_next[0] = from[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get(adj_index0);
+
+ if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto))
+ {
+ next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP4;
+ }
+ else
+ {
+ next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_adj_incomplete_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next = next0;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static u8 *
+format_mpls_adj_incomplete_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_adj_incomplete_trace_t * t;
+ uword indent;
+
+ t = va_arg (*args, mpls_adj_incomplete_trace_t *);
+ indent = format_get_indent (s);
+
+ s = format (s, "%Unext:%d",
+ format_white_space, indent,
+ t->next);
+ return (s);
+}
+
+VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = {
+ .function = mpls_adj_incomplete,
+ .name = "mpls-adj-incomplete",
+ .format_trace = format_mpls_adj_incomplete_trace,
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = MPLS_N_ERROR,
+ .error_strings = mpls_error_strings,
+
+ .n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [MPLS_ADJ_INCOMPLETE_NEXT_##s] = n,
+ foreach_mpls_adj_incomplete_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (mpls_adj_incomplete_node,
+ mpls_adj_incomplete)
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
new file mode 100644
index 00000000..2d5521f4
--- /dev/null
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -0,0 +1,1070 @@
+/*
+ * mpls_tunnel.c: MPLS tunnel interfaces (i.e. for RSVP-TE)
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/mpls/mpls_tunnel.h>
+#include <vnet/mpls/mpls_types.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/fib/mpls_fib.h>
+
+/**
+ * @brief pool of tunnel instances
+ */
+static mpls_tunnel_t *mpls_tunnel_pool;
+
+/**
+ * @brief Pool of free tunnel SW indices - i.e. recycled indices
+ */
+static u32 * mpls_tunnel_free_hw_if_indices;
+
+/**
+ * @brief DB of SW index to tunnel index
+ */
+static u32 *mpls_tunnel_db;
+
+/**
+ * @brief MPLS tunnel flags strings
+ */
+static const char *mpls_tunnel_attribute_names[] = MPLS_TUNNEL_ATTRIBUTES;
+
+/**
+ * @brief Get a tunnel object from a SW interface index
+ */
+static mpls_tunnel_t*
+mpls_tunnel_get_from_sw_if_index (u32 sw_if_index)
+{
+ if ((vec_len(mpls_tunnel_db) < sw_if_index) ||
+ (~0 == mpls_tunnel_db[sw_if_index]))
+ return (NULL);
+
+ return (pool_elt_at_index(mpls_tunnel_pool,
+ mpls_tunnel_db[sw_if_index]));
+}
+
+/**
+ * @brief Build a rewrite string for the MPLS tunnel.
+ */
+static u8*
+mpls_tunnel_build_rewrite_i (void)
+{
+ /*
+ * passing the adj code a NULL rewirte means 'i don't have one cos
+ * t'other end is unresolved'. That's not the case here. For the mpls
+ * tunnel there are just no bytes of encap to apply in the adj. We'll impose
+ * the label stack once we choose a path. So return a zero length rewrite.
+ */
+ u8 *rewrite = NULL;
+
+ vec_validate(rewrite, 0);
+ vec_reset_length(rewrite);
+
+ return (rewrite);
+}
+
+/**
+ * @brief Build a rewrite string for the MPLS tunnel.
+ */
+static u8*
+mpls_tunnel_build_rewrite (vnet_main_t * vnm,
+ u32 sw_if_index,
+ vnet_link_t link_type,
+ const void *dst_address)
+{
+ return (mpls_tunnel_build_rewrite_i());
+}
+
+typedef struct mpls_tunnel_collect_forwarding_ctx_t_
+{
+ load_balance_path_t * next_hops;
+ const mpls_tunnel_t *mt;
+ fib_forward_chain_type_t fct;
+} mpls_tunnel_collect_forwarding_ctx_t;
+
+static fib_path_list_walk_rc_t
+mpls_tunnel_collect_forwarding (fib_node_index_t pl_index,
+ fib_node_index_t path_index,
+ void *arg)
+{
+ mpls_tunnel_collect_forwarding_ctx_t *ctx;
+ fib_path_ext_t *path_ext;
+
+ ctx = arg;
+
+ /*
+ * if the path is not resolved, don't include it.
+ */
+ if (!fib_path_is_resolved(path_index))
+ {
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+ }
+
+ /*
+ * get the matching path-extension for the path being visited.
+ */
+ path_ext = fib_path_ext_list_find_by_path_index(&ctx->mt->mt_path_exts,
+ path_index);
+
+ if (NULL != path_ext)
+ {
+ /*
+ * found a matching extension. stack it to obtain the forwarding
+ * info for this path.
+ */
+ ctx->next_hops = fib_path_ext_stack(path_ext,
+ ctx->fct,
+ ctx->fct,
+ ctx->next_hops);
+ }
+ else
+ ASSERT(0);
+ /*
+ * else
+ * There should be a path-extenios associated with each path
+ */
+
+ return (FIB_PATH_LIST_WALK_CONTINUE);
+}
+
+static void
+mpls_tunnel_mk_lb (mpls_tunnel_t *mt,
+ vnet_link_t linkt,
+ fib_forward_chain_type_t fct,
+ dpo_id_t *dpo_lb)
+{
+ dpo_proto_t lb_proto;
+
+ /*
+ * If the entry has path extensions then we construct a load-balance
+ * by stacking the extensions on the forwarding chains of the paths.
+ * Otherwise we use the load-balance of the path-list
+ */
+ mpls_tunnel_collect_forwarding_ctx_t ctx = {
+ .mt = mt,
+ .next_hops = NULL,
+ .fct = fct,
+ };
+
+ /*
+ * As an optimisation we allocate the vector of next-hops to be sized
+ * equal to the maximum nuber of paths we will need, which is also the
+ * most likely number we will need, since in most cases the paths are 'up'.
+ */
+ vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list));
+ vec_reset_length(ctx.next_hops);
+
+ lb_proto = fib_forw_chain_type_to_dpo_proto(fct);
+
+ fib_path_list_walk(mt->mt_path_list,
+ mpls_tunnel_collect_forwarding,
+ &ctx);
+
+ if (!dpo_id_is_valid(dpo_lb))
+ {
+ /*
+ * first time create
+ */
+ if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST)
+ {
+ dpo_set(dpo_lb,
+ DPO_REPLICATE,
+ lb_proto,
+ replicate_create(0, lb_proto));
+ }
+ else
+ {
+ flow_hash_config_t fhc;
+
+ switch (linkt)
+ {
+ case VNET_LINK_MPLS:
+ fhc = MPLS_FLOW_HASH_DEFAULT;
+ break;
+ case VNET_LINK_IP4:
+ case VNET_LINK_IP6:
+ fhc = IP_FLOW_HASH_DEFAULT;
+ break;
+ default:
+ fhc = 0;
+ break;
+ }
+
+ dpo_set(dpo_lb,
+ DPO_LOAD_BALANCE,
+ lb_proto,
+ load_balance_create(0, lb_proto, fhc));
+ }
+ }
+
+ if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST)
+ {
+ /*
+ * MPLS multicast
+ */
+ replicate_multipath_update(dpo_lb, ctx.next_hops);
+ }
+ else
+ {
+ load_balance_multipath_update(dpo_lb,
+ ctx.next_hops,
+ LOAD_BALANCE_FLAG_NONE);
+ vec_free(ctx.next_hops);
+ }
+}
+
+/**
+ * mpls_tunnel_stack
+ *
+ * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
+ */
+static void
+mpls_tunnel_stack (adj_index_t ai)
+{
+ ip_adjacency_t *adj;
+ mpls_tunnel_t *mt;
+ u32 sw_if_index;
+
+ adj = adj_get(ai);
+ sw_if_index = adj->rewrite_header.sw_if_index;
+
+ mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+ if (NULL == mt)
+ return;
+
+ /*
+ * while we're stacking the adj, remove the tunnel from the child list
+ * of the path list. this breaks a circular dependency of walk updates
+ * where the create of adjacencies in the children can lead to walks
+ * that get back here.
+ */
+ fib_path_list_lock(mt->mt_path_list);
+
+ fib_path_list_child_remove(mt->mt_path_list,
+ mt->mt_sibling_index);
+
+ /*
+ * Construct the DPO (load-balance or replicate) that we can stack
+ * the tunnel's midchain on
+ */
+ if (vnet_hw_interface_get_flags(vnet_get_main(),
+ mt->mt_hw_if_index) &
+ VNET_HW_INTERFACE_FLAG_LINK_UP)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ mpls_tunnel_mk_lb(mt,
+ adj->ia_link,
+ (VNET_LINK_MPLS == adj_get_link_type(ai) ?
+ FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
+ FIB_FORW_CHAIN_TYPE_MPLS_EOS),
+ &dpo);
+
+ adj_nbr_midchain_stack(ai, &dpo);
+ dpo_reset(&dpo);
+ }
+ else
+ {
+ adj_nbr_midchain_unstack(ai);
+ }
+
+ mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ mt - mpls_tunnel_pool);
+
+ fib_path_list_unlock(mt->mt_path_list);
+}
+
+/**
+ * @brief Call back when restacking all adjacencies on a MPLS interface
+ */
+static adj_walk_rc_t
+mpls_adj_walk_cb (adj_index_t ai,
+ void *ctx)
+{
+ mpls_tunnel_stack(ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+mpls_tunnel_restack (mpls_tunnel_t *mt)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk all the adjacencies on the MPLS interface and restack them
+ */
+ if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2)
+ {
+ /*
+ * Stack a load-balance that drops, whilst we have no paths
+ */
+ vnet_hw_interface_t * hi;
+ dpo_id_t dpo = DPO_INVALID;
+
+ mpls_tunnel_mk_lb(mt,
+ VNET_LINK_MPLS,
+ FIB_FORW_CHAIN_TYPE_ETHERNET,
+ &dpo);
+
+ hi = vnet_get_hw_interface(vnet_get_main(), mt->mt_hw_if_index);
+ dpo_stack_from_node(hi->tx_node_index,
+ &mt->mt_l2_lb,
+ &dpo);
+ dpo_reset(&dpo);
+ }
+ else
+ {
+ FOR_EACH_FIB_PROTOCOL(proto)
+ {
+ adj_nbr_walk(mt->mt_sw_if_index,
+ proto,
+ mpls_adj_walk_cb,
+ NULL);
+ }
+ }
+}
+
+static clib_error_t *
+mpls_tunnel_admin_up_down (vnet_main_t * vnm,
+ u32 hw_if_index,
+ u32 flags)
+{
+ vnet_hw_interface_t * hi;
+ mpls_tunnel_t *mt;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ mt = mpls_tunnel_get_from_sw_if_index(hi->sw_if_index);
+
+ if (NULL == mt)
+ return (NULL);
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ vnet_hw_interface_set_flags (vnm, hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ else
+ vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
+
+ mpls_tunnel_restack(mt);
+
+ return (NULL);
+}
+
+/**
+ * @brief Fixup the adj rewrite post encap. This is a no-op since the
+ * rewrite is a stack of labels.
+ */
+static void
+mpls_tunnel_fixup (vlib_main_t *vm,
+ ip_adjacency_t *adj,
+ vlib_buffer_t *b0)
+{
+ /*
+ * A no-op w.r.t. the header. but reset the 'have we pushed any
+ * MPLS labels onto the packet' flag. That way when we enter the
+ * tunnel we'll get a TTL set to 255
+ */
+ vnet_buffer(b0)->mpls.first = 0;
+}
+
+static void
+mpls_tunnel_update_adj (vnet_main_t * vnm,
+ u32 sw_if_index,
+ adj_index_t ai)
+{
+ ip_adjacency_t *adj;
+
+ ASSERT(ADJ_INDEX_INVALID != ai);
+
+ adj = adj_get(ai);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_GLEAN:
+ adj_nbr_midchain_update_rewrite(ai, mpls_tunnel_fixup,
+ ADJ_FLAG_NONE,
+ mpls_tunnel_build_rewrite_i());
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ /*
+ * Construct a partial rewrite from the known ethernet mcast dest MAC
+ * There's no MAC fixup, so the last 2 parameters are 0
+ */
+ adj_mcast_midchain_update_rewrite(ai, mpls_tunnel_fixup,
+ ADJ_FLAG_NONE,
+ mpls_tunnel_build_rewrite_i(),
+ 0, 0);
+ break;
+
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ ASSERT (0);
+ break;
+ }
+
+ mpls_tunnel_stack(ai);
+}
+
+static u8 *
+format_mpls_tunnel_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "mpls-tunnel%d", dev_instance);
+}
+
+static u8 *
+format_mpls_tunnel_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ CLIB_UNUSED (int verbose) = va_arg (*args, int);
+
+ return (format (s, "MPLS-tunnel: id %d\n", dev_instance));
+}
+
+/**
+ * @brief Packet trace structure
+ */
+typedef struct mpls_tunnel_trace_t_
+{
+ /**
+ * Tunnel-id / index in tunnel vector
+ */
+ u32 tunnel_id;
+} mpls_tunnel_trace_t;
+
+static u8 *
+format_mpls_tunnel_tx_trace (u8 * s,
+ va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ mpls_tunnel_trace_t * t = va_arg (*args, mpls_tunnel_trace_t *);
+
+ s = format (s, "MPLS: tunnel %d", t->tunnel_id);
+ return s;
+}
+
+/**
+ * @brief TX function. Only called L2. L3 traffic uses the adj-midchains
+ */
+static uword
+mpls_tunnel_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 next_index;
+ u32 * from, * to_next, n_left_from, n_left_to_next;
+ vnet_interface_output_runtime_t * rd = (void *) node->runtime_data;
+ const mpls_tunnel_t *mt;
+
+ mt = pool_elt_at_index(mpls_tunnel_pool, rd->dev_instance);
+
+ /* Vector of buffer / pkt indices we're supposed to process */
+ from = vlib_frame_vector_args (frame);
+
+ /* Number of buffers / pkts */
+ n_left_from = frame->n_vectors;
+
+ /* Speculatively send the first buffer to the last disposition we used */
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ /* set up to enqueue to our disposition with index = next_index */
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /*
+ * FIXME DUAL LOOP
+ */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * b0;
+ u32 bi0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer(vm, bi0);
+
+ vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_lb.dpoi_index;
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_id = rd->dev_instance;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, mt->mt_l2_lb.dpoi_next_node);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VNET_DEVICE_CLASS (mpls_tunnel_class) = {
+ .name = "MPLS tunnel device",
+ .format_device_name = format_mpls_tunnel_name,
+ .format_device = format_mpls_tunnel_device,
+ .format_tx_trace = format_mpls_tunnel_tx_trace,
+ .tx_function = mpls_tunnel_tx,
+ .admin_up_down_function = mpls_tunnel_admin_up_down,
+};
+
+VNET_HW_INTERFACE_CLASS (mpls_tunnel_hw_interface_class) = {
+ .name = "MPLS-Tunnel",
+ .update_adjacency = mpls_tunnel_update_adj,
+ .build_rewrite = mpls_tunnel_build_rewrite,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+const mpls_tunnel_t *
+mpls_tunnel_get (u32 mti)
+{
+ return (pool_elt_at_index(mpls_tunnel_pool, mti));
+}
+
+/**
+ * @brief Walk all the MPLS tunnels
+ */
+void
+mpls_tunnel_walk (mpls_tunnel_walk_cb_t cb,
+ void *ctx)
+{
+ u32 mti;
+
+ pool_foreach_index(mti, mpls_tunnel_pool,
+ ({
+ cb(mti, ctx);
+ }));
+}
+
+void
+vnet_mpls_tunnel_del (u32 sw_if_index)
+{
+ mpls_tunnel_t *mt;
+
+ mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+ if (NULL == mt)
+ return;
+
+ if (FIB_NODE_INDEX_INVALID != mt->mt_path_list)
+ fib_path_list_child_remove(mt->mt_path_list,
+ mt->mt_sibling_index);
+ dpo_reset(&mt->mt_l2_lb);
+
+ vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index);
+ pool_put(mpls_tunnel_pool, mt);
+ mpls_tunnel_db[sw_if_index] = ~0;
+}
+
+u32
+vnet_mpls_tunnel_create (u8 l2_only,
+ u8 is_multicast)
+{
+ vnet_hw_interface_t * hi;
+ mpls_tunnel_t *mt;
+ vnet_main_t * vnm;
+ u32 mti;
+
+ vnm = vnet_get_main();
+ pool_get(mpls_tunnel_pool, mt);
+ memset (mt, 0, sizeof (*mt));
+ mti = mt - mpls_tunnel_pool;
+ fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL);
+ mt->mt_path_list = FIB_NODE_INDEX_INVALID;
+ mt->mt_sibling_index = FIB_NODE_INDEX_INVALID;
+
+ if (is_multicast)
+ mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST;
+ if (l2_only)
+ mt->mt_flags |= MPLS_TUNNEL_FLAG_L2;
+
+ /*
+ * Create a new, or re=use and old, tunnel HW interface
+ */
+ if (vec_len (mpls_tunnel_free_hw_if_indices) > 0)
+ {
+ mt->mt_hw_if_index =
+ mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1];
+ _vec_len (mpls_tunnel_free_hw_if_indices) -= 1;
+ hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
+ hi->hw_instance = mti;
+ hi->dev_instance = mti;
+ }
+ else
+ {
+ mt->mt_hw_if_index = vnet_register_interface(
+ vnm,
+ mpls_tunnel_class.index,
+ mti,
+ mpls_tunnel_hw_interface_class.index,
+ mti);
+ hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index);
+ }
+
+ /*
+ * Add the new tunnel to the tunnel DB - key:SW if index
+ */
+ mt->mt_sw_if_index = hi->sw_if_index;
+ vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0);
+ mpls_tunnel_db[mt->mt_sw_if_index] = mti;
+
+ return (mt->mt_sw_if_index);
+}
+
+void
+vnet_mpls_tunnel_path_add (u32 sw_if_index,
+ fib_route_path_t *rpaths)
+{
+ mpls_tunnel_t *mt;
+ u32 mti;
+
+ mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+ if (NULL == mt)
+ return;
+
+ mti = mt - mpls_tunnel_pool;
+
+ /*
+ * construct a path-list from the path provided
+ */
+ if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
+ {
+ mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths);
+ mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ mti);
+ }
+ else
+ {
+ fib_node_index_t old_pl_index;
+
+ old_pl_index = mt->mt_path_list;
+
+ mt->mt_path_list =
+ fib_path_list_copy_and_path_add(old_pl_index,
+ FIB_PATH_LIST_FLAG_SHARED,
+ rpaths);
+
+ fib_path_list_child_remove(old_pl_index,
+ mt->mt_sibling_index);
+ mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ mti);
+ /*
+ * re-resolve all the path-extensions with the new path-list
+ */
+ fib_path_ext_list_resolve(&mt->mt_path_exts, mt->mt_path_list);
+ }
+ fib_path_ext_list_insert(&mt->mt_path_exts,
+ mt->mt_path_list,
+ FIB_PATH_EXT_MPLS,
+ rpaths);
+ mpls_tunnel_restack(mt);
+}
+
+int
+vnet_mpls_tunnel_path_remove (u32 sw_if_index,
+ fib_route_path_t *rpaths)
+{
+ mpls_tunnel_t *mt;
+ u32 mti;
+
+ mt = mpls_tunnel_get_from_sw_if_index(sw_if_index);
+
+ if (NULL == mt)
+ return (0);
+
+ mti = mt - mpls_tunnel_pool;
+
+ /*
+ * construct a path-list from the path provided
+ */
+ if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
+ {
+ /* can't remove a path if we have onoe */
+ return (0);
+ }
+ else
+ {
+ fib_node_index_t old_pl_index;
+
+ old_pl_index = mt->mt_path_list;
+
+ mt->mt_path_list =
+ fib_path_list_copy_and_path_remove(old_pl_index,
+ FIB_PATH_LIST_FLAG_SHARED,
+ rpaths);
+
+ fib_path_list_child_remove(old_pl_index,
+ mt->mt_sibling_index);
+
+ if (FIB_NODE_INDEX_INVALID == mt->mt_path_list)
+ {
+ /* no paths left */
+ return (0);
+ }
+ else
+ {
+ mt->mt_sibling_index =
+ fib_path_list_child_add(mt->mt_path_list,
+ FIB_NODE_TYPE_MPLS_TUNNEL,
+ mti);
+ }
+ /*
+ * find the matching path extension and remove it
+ */
+ fib_path_ext_list_remove(&mt->mt_path_exts,
+ FIB_PATH_EXT_MPLS,
+ rpaths);
+
+ /*
+ * re-resolve all the path-extensions with the new path-list
+ */
+ fib_path_ext_list_resolve(&mt->mt_path_exts,
+ mt->mt_path_list);
+
+ mpls_tunnel_restack(mt);
+ }
+
+ return (fib_path_list_get_n_paths(mt->mt_path_list));
+}
+
+
+static clib_error_t *
+vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, * line_input = &_line_input;
+ vnet_main_t * vnm = vnet_get_main();
+ u8 is_del = 0, l2_only = 0, is_multicast =0;
+ fib_route_path_t rpath, *rpaths = NULL;
+ mpls_label_t out_label = MPLS_LABEL_INVALID;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
+
+ memset(&rpath, 0, sizeof(rpath));
+
+ /* Get a line of input. */
+ if (! unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del %U",
+ unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ is_del = 1;
+ else if (unformat (line_input, "add %U",
+ unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ is_del = 0;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "out-labels"))
+ {
+ while (unformat (line_input, "%U",
+ unformat_mpls_unicast_label,
+ &out_label))
+ {
+ vec_add1 (rpath.frp_label_stack, out_label);
+ }
+ }
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6))
+ {
+ rpath.frp_fib_index = 0;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4))
+ {
+ rpath.frp_fib_index = 0;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ }
+ else if (unformat (line_input, "l2-only"))
+ l2_only = 1;
+ else if (unformat (line_input, "multicast"))
+ is_multicast = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vec_add1(rpaths, rpath);
+
+ if (is_del)
+ {
+ if (!vnet_mpls_tunnel_path_remove(sw_if_index, rpaths))
+ {
+ vnet_mpls_tunnel_del(sw_if_index);
+ }
+ }
+ else
+ {
+ if (0 == vec_len(rpath.frp_label_stack))
+ {
+ error = clib_error_return (0, "No Output Labels '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ if (~0 == sw_if_index)
+ {
+ sw_if_index = vnet_mpls_tunnel_create(l2_only, is_multicast);
+ }
+ vnet_mpls_tunnel_path_add(sw_if_index, rpaths);
+ }
+
+done:
+ vec_free(rpaths);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command create a uni-directional MPLS tunnel
+ *
+ * @cliexpar
+ * @cliexstart{create mpls tunnel}
+ * create mpls tunnel via 10.0.0.1 GigEthernet0/8/0 out-label 33 out-label 34
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = {
+ .path = "mpls tunnel",
+ .short_help =
+ "mpls tunnel via [addr] [interface] [out-labels]",
+ .function = vnet_create_mpls_tunnel_command_fn,
+};
+
+static u8 *
+format_mpls_tunnel (u8 * s, va_list * args)
+{
+ mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *);
+ mpls_tunnel_attribute_t attr;
+
+ s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d",
+ mt - mpls_tunnel_pool,
+ mt->mt_sw_if_index,
+ mt->mt_hw_if_index);
+ if (MPLS_TUNNEL_FLAG_NONE != mt->mt_flags) {
+ s = format(s, " \n flags:");
+ FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(attr) {
+ if ((1<<attr) & mt->mt_flags) {
+ s = format (s, "%s,", mpls_tunnel_attribute_names[attr]);
+ }
+ }
+ }
+ s = format(s, "\n via:\n");
+ s = fib_path_list_format(mt->mt_path_list, s);
+ s = format(s, "%U", format_fib_path_ext_list, &mt->mt_path_exts);
+ s = format(s, "\n");
+
+ if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2)
+ {
+ s = format(s, " forwarding: %U\n",
+ format_fib_forw_chain_type,
+ FIB_FORW_CHAIN_TYPE_ETHERNET);
+ s = format(s, " %U\n", format_dpo_id, &mt->mt_l2_lb, 2);
+ }
+
+ return (s);
+}
+
+static clib_error_t *
+show_mpls_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_tunnel_t * mt;
+ u32 mti = ~0;
+
+ if (pool_elts (mpls_tunnel_pool) == 0)
+ vlib_cli_output (vm, "No MPLS tunnels configured...");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &mti))
+ ;
+ else
+ break;
+ }
+
+ if (~0 == mti)
+ {
+ pool_foreach (mt, mpls_tunnel_pool,
+ ({
+ vlib_cli_output (vm, "[@%d] %U",
+ mt - mpls_tunnel_pool,
+ format_mpls_tunnel, mt);
+ }));
+ }
+ else
+ {
+ if (pool_is_free_index(mpls_tunnel_pool, mti))
+ return clib_error_return (0, "Not atunnel index %d", mti);
+
+ mt = pool_elt_at_index(mpls_tunnel_pool, mti);
+
+ vlib_cli_output (vm, "[@%d] %U",
+ mt - mpls_tunnel_pool,
+ format_mpls_tunnel, mt);
+ }
+
+ return 0;
+}
+
+/*?
+ * This command to show MPLS tunnels
+ *
+ * @cliexpar
+ * @cliexstart{sh mpls tunnel 2}
+ * [@2] mpls_tunnel2: sw_if_index:5 hw_if_index:5
+ * label-stack:
+ * 3,
+ * via:
+ * index:26 locks:1 proto:ipv4 uPRF-list:26 len:1 itfs:[2, ]
+ * index:26 pl-index:26 ipv4 weight=1 attached-nexthop: oper-flags:resolved,
+ * 10.0.0.2 loop0
+ * [@0]: ipv4 via 10.0.0.2 loop0: IP4: de:ad:00:00:00:00 -> 00:00:11:aa:bb:cc
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = {
+ .path = "show mpls tunnel",
+ .function = show_mpls_tunnel_command_fn,
+};
+
+static mpls_tunnel_t *
+mpls_tunnel_from_fib_node (fib_node_t *node)
+{
+#if (CLIB_DEBUG > 0)
+ ASSERT(FIB_NODE_TYPE_MPLS_TUNNEL == node->fn_type);
+#endif
+ return ((mpls_tunnel_t*) (((char*)node) -
+ STRUCT_OFFSET_OF(mpls_tunnel_t, mt_node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node
+ */
+static fib_node_back_walk_rc_t
+mpls_tunnel_back_walk (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ mpls_tunnel_restack(mpls_tunnel_from_fib_node(node));
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t*
+mpls_tunnel_fib_node_get (fib_node_index_t index)
+{
+ mpls_tunnel_t * mt;
+
+ mt = pool_elt_at_index(mpls_tunnel_pool, index);
+
+ return (&mt->mt_node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+mpls_tunnel_last_lock_gone (fib_node_t *node)
+{
+ /*
+ * The MPLS MPLS tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT(0);
+}
+
+/*
+ * Virtual function table registered by MPLS MPLS tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t mpls_vft = {
+ .fnv_get = mpls_tunnel_fib_node_get,
+ .fnv_last_lock = mpls_tunnel_last_lock_gone,
+ .fnv_back_walk = mpls_tunnel_back_walk,
+};
+
+static clib_error_t *
+mpls_tunnel_init (vlib_main_t *vm)
+{
+ fib_node_register_type(FIB_NODE_TYPE_MPLS_TUNNEL, &mpls_vft);
+
+ return 0;
+}
+VLIB_INIT_FUNCTION(mpls_tunnel_init);
diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h
new file mode 100644
index 00000000..285817c3
--- /dev/null
+++ b/src/vnet/mpls/mpls_tunnel.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MPLS_TUNNEL_H__
+#define __MPLS_TUNNEL_H__
+
+#include <vnet/mpls/mpls.h>
+#include <vnet/fib/fib_path_ext.h>
+
+typedef enum mpls_tunnel_attribute_t_
+{
+ MPLS_TUNNEL_ATTRIBUTE_FIRST = 0,
+ /**
+ * @brief The tunnel is L2 only
+ */
+ MPLS_TUNNEL_ATTRIBUTE_L2 = MPLS_TUNNEL_ATTRIBUTE_FIRST,
+ /**
+ * @brief The tunnel has an underlying multicast LSP
+ */
+ MPLS_TUNNEL_ATTRIBUTE_MCAST,
+ MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST,
+} mpls_tunnel_attribute_t;
+
+#define MPLS_TUNNEL_ATTRIBUTES { \
+ [MPLS_TUNNEL_ATTRIBUTE_MCAST] = "multicast", \
+ [MPLS_TUNNEL_ATTRIBUTE_L2] = "L2", \
+}
+#define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item) \
+ for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST; \
+ _item <= MPLS_TUNNEL_ATTRIBUTE_LAST; \
+ _item++)
+
+typedef enum mpls_tunnel_flag_t_ {
+ MPLS_TUNNEL_FLAG_NONE = 0,
+ MPLS_TUNNEL_FLAG_L2 = (1 << MPLS_TUNNEL_ATTRIBUTE_L2),
+ MPLS_TUNNEL_FLAG_MCAST = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST),
+} __attribute__ ((packed)) mpls_tunnel_flags_t;
+
+
+/**
+ * @brief A uni-directional MPLS tunnel
+ */
+typedef struct mpls_tunnel_t_
+{
+ /**
+ * @brief The tunnel hooks into the FIB control plane graph.
+ */
+ fib_node_t mt_node;
+
+ /**
+ * @brief Tunnel flags
+ */
+ mpls_tunnel_flags_t mt_flags;
+
+ /**
+ * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET
+ * load-balance
+ */
+ dpo_id_t mt_l2_lb;
+
+ /**
+ * @brief The HW interface index of the tunnel interfaces
+ */
+ u32 mt_hw_if_index;
+
+ /**
+ * @brief The SW interface index of the tunnel interfaces
+ */
+ u32 mt_sw_if_index;
+
+ /**
+ * @brief The path-list over which the tunnel's destination is reachable
+ */
+ fib_node_index_t mt_path_list;
+
+ /**
+ * @brief sibling index on the path-list so notifications are received.
+ */
+ u32 mt_sibling_index;
+
+ /**
+ * A vector of path extensions o hold the label stack for each path
+ */
+ fib_path_ext_list_t mt_path_exts;
+} mpls_tunnel_t;
+
+/**
+ * @brief Create a new MPLS tunnel
+ * @return the SW Interface index of the newly created tuneel
+ */
+extern u32 vnet_mpls_tunnel_create (u8 l2_only,
+ u8 is_multicast);
+
+/**
+ * @brief Add a path to an MPLS tunnel
+ */
+extern void vnet_mpls_tunnel_path_add (u32 sw_if_index,
+ fib_route_path_t *rpath);
+
+/**
+ * @brief remove a path from a tunnel.
+ * @return the number of remaining paths. 0 implies the tunnel can be deleted
+ */
+extern int vnet_mpls_tunnel_path_remove (u32 sw_if_index,
+ fib_route_path_t *rpath);
+
+/**
+ * @brief Delete an MPLS tunnel
+ */
+extern void vnet_mpls_tunnel_del (u32 sw_if_index);
+
+extern const mpls_tunnel_t *mpls_tunnel_get(u32 index);
+
+/**
+ * @brief Callback function invoked while walking MPLS tunnels
+ */
+typedef void (*mpls_tunnel_walk_cb_t)(u32 index, void *ctx);
+
+/**
+ * @brief Walk all the MPLS tunnels
+ */
+extern void mpls_tunnel_walk(mpls_tunnel_walk_cb_t cb,
+ void *ctx);
+
+#endif
diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h
new file mode 100644
index 00000000..f1c3191e
--- /dev/null
+++ b/src/vnet/mpls/mpls_types.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __MPLS_TYPES_H__
+#define __MPLS_TYPES_H__
+
+#define MPLS_IETF_MIN_LABEL 0x00000
+#define MPLS_IETF_MAX_LABEL 0xfffff
+
+#define MPLS_IETF_MIN_RESERVED_LABEL 0x00000
+#define MPLS_IETF_MAX_RESERVED_LABEL 0x0000f
+
+#define MPLS_IETF_MIN_UNRES_LABEL 0x00010
+#define MPLS_IETF_MAX_UNRES_LABEL 0xfffff
+
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL 0x00000
+#define MPLS_IETF_ROUTER_ALERT_LABEL 0x00001
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL 0x00002
+#define MPLS_IETF_IMPLICIT_NULL_LABEL 0x00003
+#define MPLS_IETF_ELI_LABEL 0x00007
+#define MPLS_IETF_GAL_LABEL 0x0000D
+#define MPLS_IETF_ENTROPY_LABEL 0x0000E
+
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_STRING "ip4-explicit-null"
+#define MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING "e-nul"
+#define MPLS_IETF_IMPLICIT_NULL_STRING "implicit-null"
+#define MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING "i-nul"
+#define MPLS_IETF_ROUTER_ALERT_STRING "router-alert"
+#define MPLS_IETF_ROUTER_ALERT_BRIEF_STRING "r-alt"
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_STRING "ipv6-explicit-null"
+#define MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING "v6enl"
+#define MPLS_IETF_ELI_STRING "entropy-label-indicator"
+#define MPLS_IETF_ELI_BRIEF_STRING "eli"
+#define MPLS_IETF_GAL_STRING "gal"
+#define MPLS_IETF_GAL_BRIEF_STRING "gal"
+
+#define MPLS_LABEL_INVALID (MPLS_IETF_MAX_LABEL+1)
+
+#define MPLS_LABEL_IS_REAL(_lbl) \
+ (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \
+ ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL))
+
+/**
+ * The top bit of the index, which is the result of the MPLS lookup
+ * is used to determine if the DPO is a load-balance or a replicate
+ */
+#define MPLS_IS_REPLICATE 0x80000000
+
+#endif
diff --git a/src/vnet/mpls/packet.h b/src/vnet/mpls/packet.h
new file mode 100644
index 00000000..bc67445b
--- /dev/null
+++ b/src/vnet/mpls/packet.h
@@ -0,0 +1,125 @@
+#ifndef included_vnet_mpls_packet_h
+#define included_vnet_mpls_packet_h
+
+/*
+ * MPLS packet format
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A label value only, i.e. 20bits.
+ */
+typedef u32 mpls_label_t;
+
+typedef struct {
+ /* Label: top 20 bits [in network byte order] */
+ /* Experimental: 3 bits ... */
+ /* S (bottom of label stack): 1 bit */
+ /* TTL: 8 bits */
+ mpls_label_t label_exp_s_ttl;
+} mpls_unicast_header_t;
+
+typedef enum mpls_eos_bit_t_
+{
+ MPLS_NON_EOS = 0,
+ MPLS_EOS = 1,
+} mpls_eos_bit_t;
+
+#define MPLS_EOS_BITS { \
+ [MPLS_NON_EOS] = "neos", \
+ [MPLS_EOS] = "eos", \
+}
+
+#define FOR_EACH_MPLS_EOS_BIT(_eos) \
+ for (_eos = MPLS_NON_EOS; _eos <= MPLS_EOS; _eos++)
+
+#define MPLS_ENTRY_LABEL_OFFSET 0
+#define MPLS_ENTRY_LABEL_SHIFT 12
+#define MPLS_ENTRY_LABEL_MASK 0x000fffff
+#define MPLS_ENTRY_LABEL_BITS \
+ (MPLS_ENTRY_LABEL_MASK << MPLS_ENTRY_LABEL_SHIFT)
+
+#define MPLS_ENTRY_EXP_OFFSET 2 /* byte offset to EXP bits */
+#define MPLS_ENTRY_EXP_SHIFT 9
+#define MPLS_ENTRY_EXP_MASK 0x07
+#define MPLS_ENTRY_EXP(mpls) \
+ (((mpls)>>MPLS_ENTRY_EXP_SHIFT) & MPLS_ENTRY_EXP_MASK)
+#define MPLS_ENTRY_EXP_BITS \
+ (MPLS_ENTRY_EXP_MASK << MPLS_ENTRY_EXP_SHIFT)
+
+#define MPLS_ENTRY_EOS_OFFSET 2 /* byte offset to EOS bit */
+#define MPLS_ENTRY_EOS_SHIFT 8
+#define MPLS_ENTRY_EOS_MASK 0x01 /* EOS bit in its byte */
+#define MPLS_ENTRY_EOS(mpls) \
+ (((mpls) >> MPLS_ENTRY_EOS_SHIFT) & MPLS_ENTRY_EOS_MASK)
+#define MPLS_ENTRY_EOS_BIT (MPLS_ENTRY_EOS_MASK << MPLS_ENTRY_EOS_SHIFT)
+
+#define MPLS_ENTRY_TTL_OFFSET 3 /* byte offset to ttl field */
+#define MPLS_ENTRY_TTL_SHIFT 0
+#define MPLS_ENTRY_TTL_MASK 0xff
+#define MPLS_ENTRY_TTL(mpls) \
+ (((mpls) >> MPLS_ENTRY_TTL_SHIFT) & MPLS_ENTRY_TTL_MASK)
+#define MPLS_ENTRY_TTL_BITS \
+ (MPLS_ENTRY_TTL_MASK << MPLS_ENTRY_TTL_SHIFT)
+
+static inline u32 vnet_mpls_uc_get_label (mpls_label_t label_exp_s_ttl)
+{
+ return (label_exp_s_ttl>>MPLS_ENTRY_LABEL_SHIFT);
+}
+
+static inline u32 vnet_mpls_uc_get_exp (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_EXP(label_exp_s_ttl));
+}
+
+static inline u32 vnet_mpls_uc_get_s (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_EOS(label_exp_s_ttl));
+}
+
+static inline u32 vnet_mpls_uc_get_ttl (mpls_label_t label_exp_s_ttl)
+{
+ return (MPLS_ENTRY_TTL(label_exp_s_ttl));
+}
+
+static inline void vnet_mpls_uc_set_label (mpls_label_t *label_exp_s_ttl,
+ u32 value)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_LABEL_BITS)) |
+ ((value & MPLS_ENTRY_LABEL_MASK) << MPLS_ENTRY_LABEL_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_exp (mpls_label_t *label_exp_s_ttl,
+ u32 exp)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EXP_BITS)) |
+ ((exp & MPLS_ENTRY_EXP_MASK) << MPLS_ENTRY_EXP_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_s (mpls_label_t *label_exp_s_ttl,
+ u32 eos)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EOS_BIT)) |
+ ((eos & MPLS_ENTRY_EOS_MASK) << MPLS_ENTRY_EOS_SHIFT));
+}
+
+static inline void vnet_mpls_uc_set_ttl (mpls_label_t *label_exp_s_ttl,
+ u32 ttl)
+{
+ *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_TTL_BITS)) |
+ ((ttl & MPLS_ENTRY_TTL_MASK)));
+}
+
+#endif /* included_vnet_mpls_packet_h */
diff --git a/src/vnet/mpls/pg.c b/src/vnet/mpls/pg.c
new file mode 100644
index 00000000..6ff86e32
--- /dev/null
+++ b/src/vnet/mpls/pg.c
@@ -0,0 +1,71 @@
+/*
+ * pg.c: packet generator mpls interface
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/gre/gre.h>
+#include <vnet/mpls/mpls.h>
+
+typedef struct {
+ pg_edit_t label;
+} pg_mpls_header_t;
+
+static inline void
+pg_mpls_header_init (pg_mpls_header_t * e)
+{
+ pg_edit_init (&e->label, mpls_unicast_header_t, label_exp_s_ttl);
+}
+
+uword
+unformat_pg_mpls_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t * s = va_arg (*args, pg_stream_t *);
+ pg_mpls_header_t * h;
+ vlib_main_t * vm = vlib_get_main();
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (mpls_unicast_header_t),
+ &group_index);
+ pg_mpls_header_init (h);
+
+ error = 1;
+ if (! unformat (input, "%U",
+ unformat_pg_edit,
+ unformat_mpls_label_net_byte_order, &h->label))
+ goto done;
+
+ {
+ pg_node_t * pg_node = 0;
+ vlib_node_t * ip_lookup_node;
+
+ ip_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip4-input");
+ ASSERT (ip_lookup_node);
+
+ pg_node = pg_get_node (ip_lookup_node->index);
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+ }
+
+ error = 0;
+ done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+