diff options
Diffstat (limited to 'src/vnet/mpls')
-rw-r--r-- | src/vnet/mpls/error.def | 31 | ||||
-rw-r--r-- | src/vnet/mpls/interface.c | 132 | ||||
-rw-r--r-- | src/vnet/mpls/mpls.api | 246 | ||||
-rw-r--r-- | src/vnet/mpls/mpls.c | 627 | ||||
-rw-r--r-- | src/vnet/mpls/mpls.h | 110 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_api.c | 582 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_features.c | 154 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_input.c | 324 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_lookup.c | 723 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_lookup.h | 102 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_output.c | 498 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_tunnel.c | 1070 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_tunnel.h | 137 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_types.h | 60 | ||||
-rw-r--r-- | src/vnet/mpls/packet.h | 125 | ||||
-rw-r--r-- | src/vnet/mpls/pg.c | 71 |
16 files changed, 4992 insertions, 0 deletions
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def new file mode 100644 index 00000000..34a46522 --- /dev/null +++ b/src/vnet/mpls/error.def @@ -0,0 +1,31 @@ +/* + * mpls_error.def: mpls errors + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +mpls_error (NONE, "no error") +mpls_error (UNKNOWN_PROTOCOL, "unknown protocol") +mpls_error (UNSUPPORTED_VERSION, "unsupported version") +mpls_error (PKTS_DECAP, "MPLS input packets decapsulated") +mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated") +mpls_error (NO_LABEL, "MPLS no label for fib/dst") +mpls_error (TTL_EXPIRED, "MPLS ttl expired") +mpls_error (S_NOT_SET, "MPLS s-bit not set") +mpls_error (BAD_LABEL, "invalid FIB id in label") +mpls_error (NOT_IP4, "non-ip4 packets dropped") +mpls_error (DISALLOWED_FIB, "disallowed FIB id") +mpls_error (NOT_ENABLED, "MPLS not enabled") +mpls_error (DROP, "MPLS DROP DPO") +mpls_error (PUNT, "MPLS PUNT DPO") diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c new file mode 100644 index 00000000..d7c8e7d3 --- /dev/null +++ b/src/vnet/mpls/interface.c @@ -0,0 +1,132 @@ +/* + * interface.c: mpls interfaces + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/adj/adj_midchain.h> +#include <vnet/dpo/classify_dpo.h> + + +u8 +mpls_sw_interface_is_enabled (u32 sw_if_index) +{ + mpls_main_t * mm = &mpls_main; + + if (vec_len(mm->mpls_enabled_by_sw_if_index) < sw_if_index) + return (0); + + return (mm->mpls_enabled_by_sw_if_index[sw_if_index]); +} + +int +mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable, + u8 is_api) +{ + fib_node_index_t lfib_index; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + + lfib_index = fib_table_find(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + + if (~0 == lfib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) + { + if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index]) + return (0); + + fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS, + (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); + + vec_validate(mm->fib_index_by_sw_if_index, 0); + mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index; + } + else + { + ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index]) + return (0); + + fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_MPLS, + (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI)); + } + + vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled", + sw_if_index, !is_enable, 0, 0); + + return (0); +} + +static clib_error_t * +mpls_interface_enable_disable (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index, enable; + + sw_if_index = ~0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "enable")) + enable = 1; + else if (unformat (input, "disable")) + enable = 0; + else + { + error = clib_error_return (0, "expected 'enable' or 'disable'", + format_unformat_error, input); + goto done; + } + + mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable, 0); + + done: + return error; +} + +/*? + * This command enables an interface to accpet MPLS packets + * + * @cliexpar + * @cliexstart{set interface mpls} + * set interface mpls GigEthernet0/8/0 enable + * @cliexend + ?*/ +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { + .path = "set interface mpls", + .function = mpls_interface_enable_disable, + .short_help = "Enable/Disable an interface for MPLS forwarding", +}; diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api new file mode 100644 index 00000000..36488d0c --- /dev/null +++ b/src/vnet/mpls/mpls.api @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \brief Bind/Unbind an MPLS local label to an IP prefix. i.e. create + a per-prefix label entry. + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mb_mpls_table_id - The MPLS table-id the MPLS entry will be added in + @param mb_label - The MPLS label value to bind + @param mb_ip_table_id - The IP table-id of the IP prefix to bind to. + @param mb_create_table_if_needed - Create either/both tables if required. + @param mb_is_bind - Bind or unbind + @param mb_is_ip4 - The prefix to bind to is IPv4 + @param mb_address_length - Length of IP prefix + @param mb_address[16] - IP prefix/ +*/ +autoreply define mpls_ip_bind_unbind +{ + u32 client_index; + u32 context; + u32 mb_mpls_table_id; + u32 mb_label; + u32 mb_ip_table_id; + u8 mb_create_table_if_needed; + u8 mb_is_bind; + u8 mb_is_ip4; + u8 mb_address_length; + u8 mb_address[16]; +}; + +/** \brief MPLS tunnel Add / del route + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mt_is_add - Is this a route add or delete + @param mt_sw_if_index - The SW interface index of the tunnel to delete + @param mt_is_multicast - Is the tunnel's underlying LSP multicast + @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4 + @param mt_next_hop_weight - The weight, for UCMP + @param mt_next_hop_preference - The preference + @param mt_next_hop[16] - the nextop address + @param mt_next_hop_sw_if_index - the next-hop SW interface + @param mt_next_hop_table_id - the next-hop table-id (if appropriate) + @param mt_next_hop_n_out_labels - the number of next-hop output labels + @param mt_next_hop_out_label_stack - the next-hop output label stack, outer most first +*/ +define mpls_tunnel_add_del +{ + u32 client_index; + u32 context; + u32 mt_sw_if_index; + u8 mt_is_add; + u8 mt_l2_only; + u8 mt_is_multicast; + u8 mt_next_hop_proto_is_ip4; + u8 mt_next_hop_weight; + u8 mt_next_hop_preference; + u8 mt_next_hop[16]; + u8 mt_next_hop_n_out_labels; + u32 mt_next_hop_sw_if_index; + u32 mt_next_hop_table_id; + u32 mt_next_hop_out_label_stack[mt_next_hop_n_out_labels]; +}; + +/** \brief Reply for MPLS tunnel add / del request + @param context - returned sender context, to match reply w/ request + @param retval - return code + @param sw_if_index - SW interface index of the tunnel created +*/ +define mpls_tunnel_add_del_reply +{ + u32 context; + i32 retval; + u32 sw_if_index; +}; + +/** \brief Dump mpls eth tunnel table + @param client_index - opaque cookie to identify the sender + @param tunnel_index - eth tunnel identifier or -1 in case of all tunnels +*/ +define mpls_tunnel_dump +{ + u32 client_index; + u32 context; + i32 tunnel_index; +}; + +/** \brief FIB path + @param sw_if_index - index of the interface + @param weight - The weight, for UCMP + @param is_local - local if non-zero, else remote + @param is_drop - Drop the packet + @param is_unreach - Drop the packet and rate limit send ICMP unreachable + @param is_prohibit - Drop the packet and rate limit send ICMP prohibited + @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 + @param next_hop[16] - the next hop address + + WARNING: this type is replicated, pending cleanup completion + +*/ +typeonly manual_print manual_endian define fib_path2 +{ + u32 sw_if_index; + u8 weight; + u8 preference; + u8 is_local; + u8 is_drop; + u8 is_unreach; + u8 is_prohibit; + u8 afi; + u8 next_hop[16]; + u32 labels[16]; +}; + +/** \brief mpls tunnel details +*/ +manual_endian manual_print define mpls_tunnel_details +{ + u32 context; + u8 mt_sw_if_index; + u8 mt_tunnel_index; + u8 mt_l2_only; + u8 mt_is_multicast; + u32 mt_count; + vl_api_fib_path2_t mt_paths[mt_count]; +}; + +/** \brief MPLS Route Add / del route + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mt_table_id - The MPLS table-id the route is added in + @param mt_is_add - Is this a route add or delete + @param mt_name - A client provided name/tag for the table. If this + is not set by the client, then VPP will generate + something meaningfull. +*/ +autoreply define mpls_table_add_del +{ + u32 client_index; + u32 context; + u32 mt_table_id; + u8 mt_is_add; + u8 mt_name[64]; +}; + +/** \brief MPLS Route Add / del route + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param mr_label - The MPLS label value + @param mr_eos - The End of stack bit + @param mr_table_id - The MPLS table-id the route is added in + @param mr_classify_table_index - If this is a classify route, + this is the classify table index + @param mr_create_table_if_needed - If the MPLS or IP tables do not exist, + create them + @param mr_is_add - Is this a route add or delete + @param mr_is_classify - Is this route result a classify + @param mr_is_multicast - Is this a multicast route + @param mr_is_multipath - Is this route update a multipath - i.e. is this + a path addition to an existing route + @param mr_is_resolve_host - Recurse resolution constraint via a host prefix + @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix + @param mr_is_interface_rx - Interface Receive path + @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface + is used as the RPF-ID + @param mr_next_hop_proto - The next-hop protocol, of type dpo_proto_t + @param mr_next_hop_weight - The weight, for UCMP + @param mr_next_hop[16] - the nextop address + @param mr_next_hop_sw_if_index - the next-hop SW interface + @param mr_next_hop_table_id - the next-hop table-id (if appropriate) + @param mr_next_hop_n_out_labels - the number of labels in the label stack + @param mr_next_hop_out_label_stack - the next-hop output label stack, outer most first + @param next_hop_via_label - The next-hop is a resolved via a local label +*/ +autoreply define mpls_route_add_del +{ + u32 client_index; + u32 context; + u32 mr_label; + u8 mr_eos; + u32 mr_table_id; + u32 mr_classify_table_index; + u8 mr_create_table_if_needed; + u8 mr_is_add; + u8 mr_is_classify; + u8 mr_is_multicast; + u8 mr_is_multipath; + u8 mr_is_resolve_host; + u8 mr_is_resolve_attached; + u8 mr_is_interface_rx; + u8 mr_is_rpf_id; + u8 mr_next_hop_proto; + u8 mr_next_hop_weight; + u8 mr_next_hop_preference; + u8 mr_next_hop[16]; + u8 mr_next_hop_n_out_labels; + u32 mr_next_hop_sw_if_index; + u32 mr_next_hop_table_id; + u32 mr_next_hop_via_label; + u32 mr_next_hop_out_label_stack[mr_next_hop_n_out_labels]; +}; + +/** \brief Dump MPLS fib table + @param client_index - opaque cookie to identify the sender +*/ +define mpls_fib_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief mpls FIB table response + @param table_id - MPLS fib table id + @param s_bit - End-of-stack bit + @param label - MPLS label value + @param count - the number of fib_path in path + @param path - array of of fib_path structures +*/ +manual_endian manual_print define mpls_fib_details +{ + u32 context; + u32 table_id; + u8 table_name[64]; + u8 eos_bit; + u32 label; + u32 count; + vl_api_fib_path2_t path[count]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c new file mode 100644 index 00000000..ed24f75f --- /dev/null +++ b/src/vnet/mpls/mpls.c @@ -0,0 +1,627 @@ +/* + * mpls.c: mpls + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/mpls_fib.h> + +const static char* mpls_eos_bit_names[] = MPLS_EOS_BITS; + +mpls_main_t mpls_main; + +u8 * format_mpls_unicast_label (u8 * s, va_list * args) +{ + mpls_label_t label = va_arg (*args, mpls_label_t); + + switch (label) { + case MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IPV4_EXPLICIT_NULL_STRING); + break; + case MPLS_IETF_ROUTER_ALERT_LABEL: + s = format (s, "%s", MPLS_IETF_ROUTER_ALERT_STRING); + break; + case MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IPV6_EXPLICIT_NULL_STRING); + break; + case MPLS_IETF_IMPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IMPLICIT_NULL_STRING); + break; + case MPLS_IETF_ELI_LABEL: + s = format (s, "%s", MPLS_IETF_ELI_STRING); + break; + case MPLS_IETF_GAL_LABEL: + s = format (s, "%s", MPLS_IETF_GAL_STRING); + break; + default: + s = format (s, "%d", label); + break; + } + return s; +} + +uword unformat_mpls_unicast_label (unformat_input_t * input, va_list * args) +{ + mpls_label_t *label = va_arg (*args, mpls_label_t*); + + if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_STRING)) + *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_STRING)) + *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_ROUTER_ALERT_STRING)) + *label = MPLS_IETF_ROUTER_ALERT_LABEL; + else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_STRING)) + *label = MPLS_IETF_IMPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING)) + *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING)) + *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_ROUTER_ALERT_BRIEF_STRING)) + *label = MPLS_IETF_ROUTER_ALERT_LABEL; + else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING)) + *label = MPLS_IETF_IMPLICIT_NULL_LABEL; + else if (unformat (input, "%d", label)) + ; + else + return (0); + + return (1); +} + +u8 * format_mpls_eos_bit (u8 * s, va_list * args) +{ + mpls_eos_bit_t eb = va_arg (*args, mpls_eos_bit_t); + + ASSERT(eb <= MPLS_EOS); + + s = format(s, "%s", mpls_eos_bit_names[eb]); + + return (s); +} + +u8 * format_mpls_header (u8 * s, va_list * args) +{ + mpls_unicast_header_t hdr = va_arg (*args, mpls_unicast_header_t); + + return (format(s, "[%U:%d:%d:%U]", + format_mpls_unicast_label, + vnet_mpls_uc_get_label(hdr.label_exp_s_ttl), + vnet_mpls_uc_get_ttl(hdr.label_exp_s_ttl), + vnet_mpls_uc_get_exp(hdr.label_exp_s_ttl), + format_mpls_eos_bit, + vnet_mpls_uc_get_s(hdr.label_exp_s_ttl))); +} + +uword +unformat_mpls_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + mpls_unicast_header_t _h, * h = &_h; + u32 label, label_exp_s_ttl; + + if (! unformat (input, "MPLS %d", &label)) + return 0; + + label_exp_s_ttl = (label<<12) | (1<<8) /* s-bit */ | 0xFF; + h->label_exp_s_ttl = clib_host_to_net_u32 (label_exp_s_ttl); + + /* Add gre, mpls headers to result. */ + { + void * p; + u32 h_n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, h_n_bytes); + clib_memcpy (p, h, h_n_bytes); + } + + return 1; +} + +uword +unformat_mpls_label_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 label; + + if (!unformat (input, "MPLS: label %d", &label)) + return 0; + + label = (label<<12) | (1<<8) /* s-bit set */ | 0xFF /* ttl */; + + *result = clib_host_to_net_u32 (label); + return 1; +} + +u8 * format_mpls_unicast_header_host_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + u32 label = h->label_exp_s_ttl; + + s = format (s, "label %d exp %d, s %d, ttl %d", + vnet_mpls_uc_get_label (label), + vnet_mpls_uc_get_exp (label), + vnet_mpls_uc_get_s (label), + vnet_mpls_uc_get_ttl (label)); + return s; +} + +u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + mpls_unicast_header_t h_host; + + h_host.label_exp_s_ttl = clib_net_to_host_u32 (h->label_exp_s_ttl); + + return format (s, "%U", format_mpls_unicast_header_host_byte_order, + &h_host); +} + +typedef struct { + u32 fib_index; + u32 entry_index; + u32 dest; + u32 s_bit; + u32 label; +} show_mpls_fib_t; + +int +mpls_dest_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return clib_net_to_host_u32(r1->dest) - clib_net_to_host_u32(r2->dest); +} + +int +mpls_fib_index_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return r1->fib_index - r2->fib_index; +} + +int +mpls_label_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return r1->label - r2->label; +} + +static clib_error_t * +vnet_mpls_local_label (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + fib_route_path_t *rpaths = NULL, rpath; + u32 table_id, is_del, is_ip; + mpls_label_t local_label; + mpls_label_t out_label; + clib_error_t * error; + mpls_eos_bit_t eos; + vnet_main_t * vnm; + fib_prefix_t pfx; + + vnm = vnet_get_main(); + error = NULL; + is_ip = 0; + table_id = 0; + eos = MPLS_EOS; + is_del = 0; + local_label = MPLS_LABEL_INVALID; + memset(&pfx, 0, sizeof(pfx)); + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + memset(&rpath, 0, sizeof(rpath)); + + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "eos")) + pfx.fp_eos = MPLS_EOS; + else if (unformat (line_input, "non-eos")) + pfx.fp_eos = MPLS_NON_EOS; + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, + &pfx.fp_addr.ip4, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + is_ip = 1; + } + else if (unformat (line_input, "%U/%d", + unformat_ip6_address, + &pfx.fp_addr.ip6, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + is_ip = 1; + } + else if (unformat (line_input, "via %U %U weight %u", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index, + &rpath.frp_weight)) + { + rpath.frp_proto = DPO_PROTO_IP4; + vec_add1(rpaths, rpath); + } + + else if (unformat (line_input, "via %U %U weight %u", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index, + &rpath.frp_weight)) + { + rpath.frp_proto = DPO_PROTO_IP6; + vec_add1(rpaths, rpath); + } + + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "rx-ip4 %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP4; + rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip4_address, + &rpath.frp_addr.ip4, + &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip6_address, + &rpath.frp_addr.ip6, + &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + /* + * the recursive next-hops are by default in the same table + * as the prefix + */ + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "%d", &local_label)) + ; + else if (unformat (line_input, + "ip4-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_proto = DPO_PROTO_IP4; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + pfx.fp_payload_proto = DPO_PROTO_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "ip6-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_proto = DPO_PROTO_IP6; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + vec_add1(rpaths, rpath); + pfx.fp_payload_proto = DPO_PROTO_IP6; + } + else if (unformat (line_input, + "mpls-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_proto = DPO_PROTO_MPLS; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + pfx.fp_payload_proto = DPO_PROTO_MPLS; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "l2-input-on %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_proto = DPO_PROTO_ETHERNET; + pfx.fp_payload_proto = DPO_PROTO_ETHERNET; + rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "out-labels")) + { + if (vec_len (rpaths) == 0) + { + error = clib_error_return (0, "Paths then labels"); + goto done; + } + else + { + while (unformat (line_input, "%U", + unformat_mpls_unicast_label, + &out_label)) + { + vec_add1 (rpaths[vec_len (rpaths) - 1].frp_label_stack, + out_label); + } + } + } + else + { + error = clib_error_return (0, "unkown input: %U", + format_unformat_error, line_input); + goto done; + } + + } + + if (MPLS_LABEL_INVALID == local_label) + { + error = clib_error_return (0, "local-label required: %U", + format_unformat_error, input); + goto done; + } + + + if (is_ip) + { + u32 fib_index = fib_table_find(pfx.fp_proto, table_id); + + if (FIB_NODE_INDEX_INVALID == fib_index) + { + error = clib_error_return (0, "%U table-id %d does not exist", + format_fib_protocol, pfx.fp_proto, table_id); + goto done; + } + + if (is_del) + { + fib_table_entry_local_label_remove(fib_index, &pfx, local_label); + } + else + { + fib_table_entry_local_label_add(fib_index, &pfx, local_label); + } + } + else + { + fib_node_index_t fib_index; + u32 fi; + + if (NULL == rpaths) + { + error = clib_error_return(0 , "no paths"); + goto done; + } + + pfx.fp_proto = FIB_PROTOCOL_MPLS; + pfx.fp_len = 21; + pfx.fp_label = local_label; + pfx.fp_payload_proto = rpaths[0].frp_proto; + + /* + * the CLI parsing stored table Ids, swap to FIB indicies + */ + if (FIB_NODE_INDEX_INVALID == rpath.frp_sw_if_index) + { + fi = fib_table_find(dpo_proto_to_fib(pfx.fp_payload_proto), + rpaths[0].frp_fib_index); + + if (~0 == fi) + { + error = clib_error_return(0 , "%U Via table %d does not exist", + format_dpo_proto, pfx.fp_payload_proto, + rpaths[0].frp_fib_index); + goto done; + } + rpaths[0].frp_fib_index = fi; + } + + fib_index = mpls_fib_index_from_table_id(table_id); + + if (FIB_NODE_INDEX_INVALID == fib_index) + { + error = clib_error_return (0, "MPLS table-id %d does not exist", + table_id); + goto done; + } + + if (is_del) + { + fib_table_entry_path_remove2(fib_index, + &pfx, + FIB_SOURCE_CLI, + rpaths); + } + else + { + fib_node_index_t lfe; + + lfe = fib_table_entry_path_add2(fib_index, + &pfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + rpaths); + + if (FIB_NODE_INDEX_INVALID == lfe) + { + error = clib_error_return (0, "Failed to create %U-%U in MPLS table-id %d", + format_mpls_unicast_label, local_label, + format_mpls_eos_bit, eos, + table_id); + goto done; + } + } + } + +done: + unformat_free (line_input); + + return error; +} + +VLIB_CLI_COMMAND (mpls_local_label_command, static) = { + .path = "mpls local-label", + .function = vnet_mpls_local_label, + .short_help = "Create/Delete MPL local labels", +}; + +clib_error_t * +vnet_mpls_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmdo) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + u32 table_id, is_add; + u8 *name = NULL; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else if (unformat (line_input, "name %s", &name)) + ; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + else + { + if (is_add) + { + mpls_table_create (table_id, 0, name); + } + else + { + mpls_table_delete (table_id, 0); + } + } + + done: + unformat_free (line_input); + return error; +} + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete MPLS Tables. All + * Tables must be explicitly added before that can be used, + * Including the default table. + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (mpls_table_command, static) = { + .path = "mpls table", + .short_help = "mpls table [add|del] <table-id>", + .function = vnet_mpls_table_cmd, + .is_mp_safe = 1, +}; + +int +mpls_fib_reset_labels (u32 fib_id) +{ + // FIXME + return 0; +} + +static clib_error_t * +mpls_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + + return vlib_call_init_function (vm, mpls_input_init); +} + +VLIB_INIT_FUNCTION (mpls_init); diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h new file mode 100644 index 00000000..cc3eeed0 --- /dev/null +++ b/src/vnet/mpls/mpls.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_mpls_h +#define included_vnet_mpls_h + +#include <vnet/vnet.h> +#include <vnet/mpls/packet.h> +#include <vnet/mpls/mpls_types.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/fib_node.h> +#include <vnet/adj/adj.h> + +typedef enum +{ +#define mpls_error(n,s) MPLS_ERROR_##n, +#include <vnet/mpls/error.def> +#undef mpls_error + MPLS_N_ERROR, +} mpls_error_t; + +/** + * @brief Definition of a callback for receiving MPLS interface state change + * notifications + */ +typedef void (*mpls_interface_state_change_callback_t) (u32 sw_if_index, + u32 is_enable); + +typedef struct +{ + /* MPLS FIB index for each software interface */ + u32 *fib_index_by_sw_if_index; + + /** A pool of all the MPLS FIBs */ + struct fib_table_t_ *fibs; + + /** A pool of all the MPLS FIBs */ + struct mpls_fib_t_ *mpls_fibs; + + /** A hash table to lookup the mpls_fib by table ID */ + uword *fib_index_by_table_id; + + /* Feature arc indices */ + u8 input_feature_arc_index; + u8 output_feature_arc_index; + + /* IP4 enabled count by software interface */ + u8 *mpls_enabled_by_sw_if_index; +} mpls_main_t; + +extern mpls_main_t mpls_main; + +extern clib_error_t *mpls_feature_init (vlib_main_t * vm); + +format_function_t format_mpls_eos_bit; +format_function_t format_mpls_unicast_header_net_byte_order; +format_function_t format_mpls_unicast_label; +format_function_t format_mpls_header; + +extern vlib_node_registration_t mpls_input_node; +extern vlib_node_registration_t mpls_output_node; +extern vlib_node_registration_t mpls_midchain_node; + +/* Parse mpls protocol as 0xXXXX or protocol name. + In either host or network byte order. */ +unformat_function_t unformat_mpls_label_net_byte_order; +unformat_function_t unformat_mpls_unicast_label; + +/* Parse mpls header. */ +unformat_function_t unformat_mpls_header; +unformat_function_t unformat_pg_mpls_header; + +int mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable, u8 is_api); + +u8 mpls_sw_interface_is_enabled (u32 sw_if_index); + +int mpls_fib_reset_labels (u32 fib_id); + +int mpls_dest_cmp (void *a1, void *a2); + +int mpls_fib_index_cmp (void *a1, void *a2); + +int mpls_label_cmp (void *a1, void *a2); + +void mpls_table_create (u32 table_id, u8 is_api, const u8 * name); +void mpls_table_delete (u32 table_id, u8 is_api); + +#endif /* included_vnet_mpls_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c new file mode 100644 index 00000000..762c40ff --- /dev/null +++ b/src/vnet/mpls/mpls_api.c @@ -0,0 +1,582 @@ +/* + *------------------------------------------------------------------ + * mpls_api.c - mpls api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/mpls/mpls.h> +#include <vnet/mpls/mpls_tunnel.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_api.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/fib/fib_path_list.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> + +#define foreach_vpe_api_msg \ +_(MPLS_IP_BIND_UNBIND, mpls_ip_bind_unbind) \ +_(MPLS_ROUTE_ADD_DEL, mpls_route_add_del) \ +_(MPLS_TABLE_ADD_DEL, mpls_table_add_del) \ +_(MPLS_TUNNEL_ADD_DEL, mpls_tunnel_add_del) \ +_(MPLS_TUNNEL_DUMP, mpls_tunnel_dump) \ +_(MPLS_FIB_DUMP, mpls_fib_dump) + +extern void stats_dslock_with_hint (int hint, int tag); +extern void stats_dsunlock (void); + +void +mpls_table_delete (u32 table_id, u8 is_api) +{ + u32 fib_index; + + /* + * The MPLS defult table must also be explicitly created via the API. + * So in contrast to IP, it gets no special treatment here. + * + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + */ + fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id); + + if (~0 != fib_index) + { + fib_table_unlock (fib_index, + FIB_PROTOCOL_MPLS, + (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI)); + } +} + +void +vl_api_mpls_table_add_del_t_handler (vl_api_mpls_table_add_del_t * mp) +{ + vl_api_mpls_table_add_del_reply_t *rmp; + vnet_main_t *vnm; + int rv = 0; + + vnm = vnet_get_main (); + vnm->api_errno = 0; + + if (mp->mt_is_add) + mpls_table_create (ntohl (mp->mt_table_id), 1, mp->mt_name); + else + mpls_table_delete (ntohl (mp->mt_table_id), 1); + + // NB: Nothing sets rv; none of the above returns an error + + REPLY_MACRO (VL_API_MPLS_TABLE_ADD_DEL_REPLY); +} + +static int +mpls_ip_bind_unbind_handler (vnet_main_t * vnm, + vl_api_mpls_ip_bind_unbind_t * mp) +{ + u32 mpls_fib_index, ip_fib_index; + + mpls_fib_index = + fib_table_find (FIB_PROTOCOL_MPLS, ntohl (mp->mb_mpls_table_id)); + + if (~0 == mpls_fib_index) + { + return VNET_API_ERROR_NO_SUCH_FIB; + } + + ip_fib_index = fib_table_find ((mp->mb_is_ip4 ? + FIB_PROTOCOL_IP4 : + FIB_PROTOCOL_IP6), + ntohl (mp->mb_ip_table_id)); + if (~0 == ip_fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_prefix_t pfx = { + .fp_len = mp->mb_address_length, + }; + + if (mp->mb_is_ip4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&pfx.fp_addr.ip4, mp->mb_address, + sizeof (pfx.fp_addr.ip4)); + } + else + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&pfx.fp_addr.ip6, mp->mb_address, + sizeof (pfx.fp_addr.ip6)); + } + + if (mp->mb_is_bind) + fib_table_entry_local_label_add (ip_fib_index, &pfx, + ntohl (mp->mb_label)); + else + fib_table_entry_local_label_remove (ip_fib_index, &pfx, + ntohl (mp->mb_label)); + + return (0); +} + +void +vl_api_mpls_ip_bind_unbind_t_handler (vl_api_mpls_ip_bind_unbind_t * mp) +{ + vl_api_mpls_ip_bind_unbind_reply_t *rmp; + vnet_main_t *vnm; + int rv; + + vnm = vnet_get_main (); + vnm->api_errno = 0; + + rv = mpls_ip_bind_unbind_handler (vnm, mp); + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_MPLS_IP_BIND_UNBIND_REPLY); +} + +static int +mpls_route_add_del_t_handler (vnet_main_t * vnm, + vl_api_mpls_route_add_del_t * mp) +{ + u32 fib_index, next_hop_fib_index; + mpls_label_t *label_stack = NULL; + int rv, ii, n_labels;; + + fib_prefix_t pfx = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_eos = mp->mr_eos, + .fp_label = ntohl (mp->mr_label), + }; + if (pfx.fp_eos) + { + pfx.fp_payload_proto = mp->mr_next_hop_proto; + } + else + { + pfx.fp_payload_proto = DPO_PROTO_MPLS; + } + + rv = add_del_route_check (FIB_PROTOCOL_MPLS, + mp->mr_table_id, + mp->mr_next_hop_sw_if_index, + pfx.fp_payload_proto, + mp->mr_next_hop_table_id, + mp->mr_is_rpf_id, + &fib_index, &next_hop_fib_index); + + if (0 != rv) + return (rv); + + ip46_address_t nh; + memset (&nh, 0, sizeof (nh)); + + if (DPO_PROTO_IP4 == mp->mr_next_hop_proto) + memcpy (&nh.ip4, mp->mr_next_hop, sizeof (nh.ip4)); + else if (DPO_PROTO_IP6 == mp->mr_next_hop_proto) + memcpy (&nh.ip6, mp->mr_next_hop, sizeof (nh.ip6)); + + n_labels = mp->mr_next_hop_n_out_labels; + if (n_labels == 0) + ; + else if (1 == n_labels) + vec_add1 (label_stack, ntohl (mp->mr_next_hop_out_label_stack[0])); + else + { + vec_validate (label_stack, n_labels - 1); + for (ii = 0; ii < n_labels; ii++) + label_stack[ii] = ntohl (mp->mr_next_hop_out_label_stack[ii]); + } + + return (add_del_route_t_handler (mp->mr_is_multipath, mp->mr_is_add, 0, // mp->is_drop, + 0, // mp->is_unreach, + 0, // mp->is_prohibit, + 0, // mp->is_local, + mp->mr_is_multicast, + mp->mr_is_classify, + mp->mr_classify_table_index, + mp->mr_is_resolve_host, + mp->mr_is_resolve_attached, + mp->mr_is_interface_rx, + mp->mr_is_rpf_id, + fib_index, &pfx, + mp->mr_next_hop_proto, + &nh, ntohl (mp->mr_next_hop_sw_if_index), + next_hop_fib_index, + mp->mr_next_hop_weight, + mp->mr_next_hop_preference, + ntohl (mp->mr_next_hop_via_label), + label_stack)); +} + +void +vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp) +{ + vl_api_mpls_route_add_del_reply_t *rmp; + vnet_main_t *vnm; + int rv; + + vnm = vnet_get_main (); + vnm->api_errno = 0; + + rv = mpls_route_add_del_t_handler (vnm, mp); + + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_MPLS_ROUTE_ADD_DEL_REPLY); +} + +void +mpls_table_create (u32 table_id, u8 is_api, const u8 * name) +{ + u32 fib_index; + + /* + * The MPLS defult table must also be explicitly created via the API. + * So in contrast to IP, it gets no special treatment here. + */ + + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + */ + fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id); + + if (~0 == fib_index) + { + fib_table_find_or_create_and_lock_w_name (FIB_PROTOCOL_MPLS, + table_id, + (is_api ? + FIB_SOURCE_API : + FIB_SOURCE_CLI), name); + } +} + +static void +vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) +{ + vl_api_mpls_tunnel_add_del_reply_t *rmp; + int rv = 0; + u32 tunnel_sw_if_index; + int ii; + fib_route_path_t rpath, *rpaths = NULL; + + memset (&rpath, 0, sizeof (rpath)); + + stats_dslock_with_hint (1 /* release hint */ , 5 /* tag */ ); + + if (mp->mt_next_hop_proto_is_ip4) + { + rpath.frp_proto = DPO_PROTO_IP4; + clib_memcpy (&rpath.frp_addr.ip4, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); + } + else + { + rpath.frp_proto = DPO_PROTO_IP6; + clib_memcpy (&rpath.frp_addr.ip6, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); + } + rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + rpath.frp_weight = 1; + + if (mp->mt_is_add) + { + for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++) + vec_add1 (rpath.frp_label_stack, + ntohl (mp->mt_next_hop_out_label_stack[ii])); + } + + vec_add1 (rpaths, rpath); + + tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); + + if (mp->mt_is_add) + { + if (~0 == tunnel_sw_if_index) + tunnel_sw_if_index = vnet_mpls_tunnel_create (mp->mt_l2_only, + mp->mt_is_multicast); + vnet_mpls_tunnel_path_add (tunnel_sw_if_index, rpaths); + } + else + { + tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); + if (!vnet_mpls_tunnel_path_remove (tunnel_sw_if_index, rpaths)) + vnet_mpls_tunnel_del (tunnel_sw_if_index); + } + + vec_free (rpaths); + + stats_dsunlock (); + + /* *INDENT-OFF* */ + REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY, + ({ + rmp->sw_if_index = ntohl(tunnel_sw_if_index); + })); + /* *INDENT-ON* */ +} + +typedef struct mpls_tunnel_send_walk_ctx_t_ +{ + unix_shared_memory_queue_t *q; + u32 index; + u32 context; +} mpls_tunnel_send_walk_ctx_t; + +static void +send_mpls_tunnel_entry (u32 mti, void *arg) +{ + fib_route_path_encode_t *api_rpaths, *api_rpath; + mpls_tunnel_send_walk_ctx_t *ctx; + vl_api_mpls_tunnel_details_t *mp; + const mpls_tunnel_t *mt; + vl_api_fib_path2_t *fp; + u32 n; + + ctx = arg; + + if (~0 != ctx->index && mti != ctx->index) + return; + + mt = mpls_tunnel_get (mti); + n = fib_path_list_get_n_paths (mt->mt_path_list); + + mp = vl_msg_api_alloc (sizeof (*mp) + n * sizeof (vl_api_fib_path2_t)); + memset (mp, 0, sizeof (*mp) + n * sizeof (vl_api_fib_path2_t)); + + mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS); + mp->context = ctx->context; + + mp->mt_tunnel_index = ntohl (mti); + mp->mt_count = ntohl (n); + + fib_path_list_walk (mt->mt_path_list, fib_path_encode, &api_rpaths); + + fp = mp->mt_paths; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + + fp->weight = api_rpath->rpath.frp_weight; + fp->preference = api_rpath->rpath.frp_preference; + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } + + // FIXME + // memcpy (mp->mt_next_hop_out_labels, + // mt->mt_label_stack, nlabels * sizeof (u32)); + + + vl_msg_api_send_shmem (ctx->q, (u8 *) & mp); +} + +static void +vl_api_mpls_tunnel_dump_t_handler (vl_api_mpls_tunnel_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + mpls_tunnel_send_walk_ctx_t ctx = { + .q = q, + .index = ntohl (mp->tunnel_index), + .context = mp->context, + }; + mpls_tunnel_walk (send_mpls_tunnel_entry, &ctx); +} + +static void +send_mpls_fib_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + const fib_table_t * table, + u32 label, u32 eos, + fib_route_path_encode_t * api_rpaths, u32 context) +{ + vl_api_mpls_fib_details_t *mp; + fib_route_path_encode_t *api_rpath; + vl_api_fib_path2_t *fp; + int path_count; + + path_count = vec_len (api_rpaths); + mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); + if (!mp) + return; + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_MPLS_FIB_DETAILS); + mp->context = context; + + mp->table_id = htonl (table->ft_table_id); + memcpy (mp->table_name, table->ft_desc, + clib_min (vec_len (table->ft_desc), sizeof (mp->table_name))); + mp->eos_bit = eos; + mp->label = htonl (label); + + mp->count = htonl (path_count); + fp = mp->path; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + fp->weight = api_rpath->rpath.frp_weight; + fp->preference = api_rpath->rpath.frp_preference; + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +typedef struct vl_api_mpls_fib_dump_table_walk_ctx_t_ +{ + fib_node_index_t *lfeis; +} vl_api_mpls_fib_dump_table_walk_ctx_t; + +static int +vl_api_mpls_fib_dump_table_walk (fib_node_index_t fei, void *arg) +{ + vl_api_mpls_fib_dump_table_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->lfeis, fei); + + return (1); +} + +static void +vl_api_mpls_fib_dump_t_handler (vl_api_mpls_fib_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + unix_shared_memory_queue_t *q; + mpls_main_t *mm = &mpls_main; + fib_table_t *fib_table; + mpls_fib_t *mpls_fib; + fib_node_index_t *lfeip = NULL; + fib_prefix_t pfx; + u32 fib_index; + fib_route_path_encode_t *api_rpaths; + vl_api_mpls_fib_dump_table_walk_ctx_t ctx = { + .lfeis = NULL, + }; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (mpls_fib, mm->mpls_fibs, + ({ + mpls_fib_table_walk (mpls_fib, + vl_api_mpls_fib_dump_table_walk, + &ctx); + })); + /* *INDENT-ON* */ + vec_sort_with_function (ctx.lfeis, fib_entry_cmp_for_sort); + + vec_foreach (lfeip, ctx.lfeis) + { + fib_entry_get_prefix (*lfeip, &pfx); + fib_index = fib_entry_get_fib_index (*lfeip); + fib_table = fib_table_get (fib_index, pfx.fp_proto); + api_rpaths = NULL; + fib_entry_encode (*lfeip, &api_rpaths); + send_mpls_fib_details (am, q, + fib_table, pfx.fp_label, + pfx.fp_eos, api_rpaths, mp->context); + vec_free (api_rpaths); + } + + vec_free (ctx.lfeis); +} + +/* + * mpls_api_hookup + * Add vpe's API message handlers to the table. + * vlib has alread mapped shared memory and + * added the client registration handlers. + * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process() + */ +#define vl_msg_name_crc_list +#include <vnet/vnet_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_mpls; +#undef _ +} + +static clib_error_t * +mpls_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_vpe_api_msg; +#undef _ + + /* + * Trace space for 8 MPLS encap labels + */ + am->api_trace_cfg[VL_API_MPLS_TUNNEL_ADD_DEL].size += 8 * sizeof (u32); + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (mpls_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/mpls/mpls_features.c b/src/vnet/mpls/mpls_features.c new file mode 100644 index 00000000..0281d0c2 --- /dev/null +++ b/src/vnet/mpls/mpls_features.c @@ -0,0 +1,154 @@ +/* + * mpls_features.c: MPLS input and output features + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/mpls/mpls.h> + +always_inline uword +mpls_terminate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int error_code) +{ + u32 * buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + + vlib_error_drop_buffers (vm, node, + buffers, + /* stride */ 1, + n_packets, + /* next */ 0, + mpls_input_node.index, + error_code); + + return n_packets; +} + +static uword +mpls_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_PUNT)); +} + +VLIB_REGISTER_NODE (mpls_punt_node) = { + .function = mpls_punt, + .name = "mpls-punt", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-punt", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_punt_node, mpls_punt) + +static uword +mpls_drop (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_DROP)); +} + +VLIB_REGISTER_NODE (mpls_drop_node) = { + .function = mpls_drop, + .name = "mpls-drop", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_drop_node, mpls_drop) + +static uword +mpls_not_enabled (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_NOT_ENABLED)); +} + +VLIB_REGISTER_NODE (mpls_not_enabled_node) = { + .function = mpls_not_enabled, + .name = "mpls-not-enabled", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_not_enabled_node, mpls_not_enabled) + +VNET_FEATURE_ARC_INIT (mpls_input, static) = +{ + .arc_name = "mpls-input", + .start_nodes = VNET_FEATURES ("mpls-input"), + .arc_index_ptr = &mpls_main.input_feature_arc_index, +}; + +VNET_FEATURE_INIT (mpls_not_enabled, static) = { + .arc_name = "mpls-input", + .node_name = "mpls-not-enabled", + .runs_before = VNET_FEATURES ("mpls-lookup"), +}; + +VNET_FEATURE_INIT (mpls_lookup, static) = { + .arc_name = "mpls-input", + .node_name = "mpls-lookup", + .runs_before = VNET_FEATURES (0), /* not before any other features */ +}; + +VNET_FEATURE_ARC_INIT (mpls_output, static) = +{ + .arc_name = "mpls-output", + .start_nodes = VNET_FEATURES ("mpls-output", "mpls-midchain"), + .arc_index_ptr = &mpls_main.output_feature_arc_index, +}; + +/* Built-in ip4 tx feature path definition */ +VNET_FEATURE_INIT (mpls_interface_output, static) = { + .arc_name = "mpls-output", + .node_name = "interface-output", + .runs_before = 0, /* not before any other features */ +}; + +static clib_error_t * +mpls_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + mpls_main_t * mm = &mpls_main; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0); + + vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled", sw_if_index, + is_add, 0, 0); + + return /* no error */ 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (mpls_sw_interface_add_del); + + diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c new file mode 100644 index 00000000..86ad8bba --- /dev/null +++ b/src/vnet/mpls/mpls_input.c @@ -0,0 +1,324 @@ +/* + * node.c: MPLS input + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> +#include <vnet/feature/feature.h> + +typedef struct { + u32 next_index; + u32 label_net_byte_order; +} mpls_input_trace_t; + +#define foreach_mpls_input_next \ +_(DROP, "error-drop") \ +_(LOOKUP, "mpls-lookup") + +typedef enum { +#define _(s,n) MPLS_INPUT_NEXT_##s, + foreach_mpls_input_next +#undef _ + MPLS_INPUT_N_NEXT, +} mpls_input_next_t; + +static u8 * +format_mpls_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *); + char * next_name; + u32 label; + next_name = "BUG!"; + label = clib_net_to_host_u32(t->label_net_byte_order); + +#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b; + foreach_mpls_input_next; +#undef _ + + s = format (s, "MPLS: next %s[%d] label %d ttl %d", + next_name, t->next_index, + vnet_mpls_uc_get_label(label), + vnet_mpls_uc_get_ttl(label)); + + return s; +} + +vlib_node_registration_t mpls_input_node; + +typedef struct { + u32 last_label; + u32 last_inner_fib_index; + u32 last_outer_fib_index; + mpls_main_t * mpls_main; +} mpls_input_runtime_t; + +static inline uword +mpls_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + mpls_input_runtime_t * rt; + mpls_main_t * mm; + u32 thread_index = vlib_get_thread_index(); + vlib_simple_counter_main_t * cm; + vnet_main_t * vnm = vnet_get_main(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + mm = rt->mpls_main; + /* + * Force an initial lookup every time, in case the control-plane + * changed the label->FIB mapping. + */ + rt->last_label = ~0; + + next_index = node->cached_next_index; + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_MPLS); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, next0, sw_if_index0; + u32 bi1, next1, sw_if_index1; + vlib_buffer_t *b0, *b1; + char *h0, *h1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE); + } + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + /* TTL expired? */ + if (PREDICT_FALSE(h0[3] == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + next0 = MPLS_INPUT_NEXT_LOOKUP; + vnet_feature_arc_start(mm->input_feature_arc_index, + sw_if_index0, &next0, b0); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + } + + if (PREDICT_FALSE(h1[3] == 0)) + { + next1 = MPLS_INPUT_NEXT_DROP; + b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + next1 = MPLS_INPUT_NEXT_LOOKUP; + vnet_feature_arc_start(mm->input_feature_arc_index, + sw_if_index1, &next1, b1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1); + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->label_net_byte_order = *((u32*)h0); + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->next_index = next1; + tr->label_net_byte_order = *((u32*)h1); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, + next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 sw_if_index0, next0, bi0; + vlib_buffer_t * b0; + char * h0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + /* TTL expired? */ + if (PREDICT_FALSE(h0[3] == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + next0 = MPLS_INPUT_NEXT_LOOKUP; + vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0); + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->label_net_byte_order = *(u32*)h0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static uword +mpls_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return mpls_input_inline (vm, node, from_frame); +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_input_node) = { + .function = mpls_input, + .name = "mpls-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof(mpls_input_runtime_t), + + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, + foreach_mpls_input_next +#undef _ + }, + + .format_buffer = format_mpls_unicast_header_net_byte_order, + .format_trace = format_mpls_input_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input) + +static void +mpls_setup_nodes (vlib_main_t * vm) +{ + mpls_input_runtime_t * rt; + pg_node_t * pn; + + pn = pg_get_node (mpls_input_node.index); + pn->unformat_edit = unformat_pg_mpls_header; + + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS, + mpls_input_node.index); +} + +static clib_error_t * mpls_input_init (vlib_main_t * vm) +{ + clib_error_t * error; + + error = vlib_call_init_function (vm, mpls_init); + if (error) + clib_error_report (error); + + mpls_setup_nodes (vm); + + return 0; +} + +VLIB_INIT_FUNCTION (mpls_input_init); + +static clib_error_t * mpls_input_worker_init (vlib_main_t * vm) +{ + mpls_input_runtime_t * rt; + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + return 0; +} + +VLIB_WORKER_INIT_FUNCTION (mpls_input_worker_init); diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c new file mode 100644 index 00000000..30031e51 --- /dev/null +++ b/src/vnet/mpls/mpls_lookup.c @@ -0,0 +1,723 @@ +/* + * mpls_lookup.c: MPLS lookup + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls_lookup.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/replicate_dpo.h> + +/** + * Static MPLS VLIB forwarding node + */ +static vlib_node_registration_t mpls_lookup_node; + +/** + * The arc/edge from the MPLS lookup node to the MPLS replicate node + */ +u32 mpls_lookup_to_replicate_edge; + +typedef struct { + u32 next_index; + u32 lb_index; + u32 lfib_index; + u32 label_net_byte_order; + u32 hash; +} mpls_lookup_trace_t; + +static u8 * +format_mpls_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *); + + s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %x " + "label %d eos %d", + t->next_index, t->lfib_index, t->lb_index, t->hash, + vnet_mpls_uc_get_label( + clib_net_to_host_u32(t->label_net_byte_order)), + vnet_mpls_uc_get_s( + clib_net_to_host_u32(t->label_net_byte_order))); + return s; +} + +static inline uword +mpls_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + u32 n_left_from, next_index, * from, * to_next; + mpls_main_t * mm = &mpls_main; + u32 thread_index = vlib_get_thread_index(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 lbi0, next0, lfib_index0, bi0, hash_c0; + const mpls_unicast_header_t * h0; + const load_balance_t *lb0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + u32 lbi1, next1, lfib_index1, bi1, hash_c1; + const mpls_unicast_header_t * h1; + const load_balance_t *lb1; + const dpo_id_t *dpo1; + vlib_buffer_t * b1; + u32 lbi2, next2, lfib_index2, bi2, hash_c2; + const mpls_unicast_header_t * h2; + const load_balance_t *lb2; + const dpo_id_t *dpo2; + vlib_buffer_t * b2; + u32 lbi3, next3, lfib_index3, bi3, hash_c3; + const mpls_unicast_header_t * h3; + const load_balance_t *lb3; + const dpo_id_t *dpo3; + vlib_buffer_t * b3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3, *p4, *p5; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + vlib_prefetch_buffer_header (p4, STORE); + vlib_prefetch_buffer_header (p5, STORE); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p4->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p5->data, sizeof (h0[0]), STORE); + } + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + bi2 = to_next[2] = from[2]; + bi3 = to_next[3] = from[3]; + + from += 4; + n_left_from -= 4; + to_next += 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + h2 = vlib_buffer_get_current (b2); + h3 = vlib_buffer_get_current (b3); + + lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b1)->sw_if_index[VLIB_RX]); + lfib_index2 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b2)->sw_if_index[VLIB_RX]); + lfib_index3 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b3)->sw_if_index[VLIB_RX]); + + lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0); + lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1); + lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2); + lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3); + + hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; + hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0; + hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0; + hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0; + + if (MPLS_IS_REPLICATE & lbi0) + { + next0 = mpls_lookup_to_replicate_edge; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + (lbi0 & ~MPLS_IS_REPLICATE); + } + else + { + lb0 = load_balance_get(lbi0); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + dpo0 = load_balance_get_fwd_bucket + (lb0, + (hash_c0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + next0 = dpo0->dpoi_next_node; + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + } + if (MPLS_IS_REPLICATE & lbi1) + { + next1 = mpls_lookup_to_replicate_edge; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = + (lbi1 & ~MPLS_IS_REPLICATE); + } + else + { + lb1 = load_balance_get(lbi1); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + { + hash_c1 = vnet_buffer (b1)->ip.flow_hash = + mpls_compute_flow_hash(h1, lb1->lb_hash_config); + dpo1 = load_balance_get_fwd_bucket + (lb1, + (hash_c1 & (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); + } + next1 = dpo1->dpoi_next_node; + + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, b1)); + } + if (MPLS_IS_REPLICATE & lbi2) + { + next2 = mpls_lookup_to_replicate_edge; + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = + (lbi2 & ~MPLS_IS_REPLICATE); + } + else + { + lb2 = load_balance_get(lbi2); + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); + + if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + { + hash_c2 = vnet_buffer (b2)->ip.flow_hash = + mpls_compute_flow_hash(h2, lb2->lb_hash_config); + dpo2 = load_balance_get_fwd_bucket + (lb2, + (hash_c2 & (lb2->lb_n_buckets_minus_1))); + } + else + { + dpo2 = load_balance_get_bucket_i (lb2, 0); + } + next2 = dpo2->dpoi_next_node; + + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi2, 1, + vlib_buffer_length_in_chain (vm, b2)); + } + if (MPLS_IS_REPLICATE & lbi3) + { + next3 = mpls_lookup_to_replicate_edge; + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = + (lbi3 & ~MPLS_IS_REPLICATE); + } + else + { + lb3 = load_balance_get(lbi3); + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); + + if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + { + hash_c3 = vnet_buffer (b3)->ip.flow_hash = + mpls_compute_flow_hash(h3, lb3->lb_hash_config); + dpo3 = load_balance_get_fwd_bucket + (lb3, + (hash_c3 & (lb3->lb_n_buckets_minus_1))); + } + else + { + dpo3 = load_balance_get_bucket_i (lb3, 0); + } + next3 = dpo3->dpoi_next_node; + + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi3, 1, + vlib_buffer_length_in_chain (vm, b3)); + } + + /* + * before we pop the label copy th values we need to maintain. + * The label header is in network byte order. + * last byte is the TTL. + * bits 2 to 4 inclusive are the EXP bits + */ + vnet_buffer (b0)->mpls.ttl = ((char*)h0)[3]; + vnet_buffer (b0)->mpls.exp = (((char*)h0)[2] & 0xe) >> 1; + vnet_buffer (b0)->mpls.first = 1; + vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3]; + vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1; + vnet_buffer (b1)->mpls.first = 1; + vnet_buffer (b2)->mpls.ttl = ((char*)h2)[3]; + vnet_buffer (b2)->mpls.exp = (((char*)h2)[2] & 0xe) >> 1; + vnet_buffer (b2)->mpls.first = 1; + vnet_buffer (b3)->mpls.ttl = ((char*)h3)[3]; + vnet_buffer (b3)->mpls.exp = (((char*)h3)[2] & 0xe) >> 1; + vnet_buffer (b3)->mpls.first = 1; + + /* + * pop the label that was just used in the lookup + */ + vlib_buffer_advance(b0, sizeof(*h0)); + vlib_buffer_advance(b1, sizeof(*h1)); + vlib_buffer_advance(b2, sizeof(*h2)); + vlib_buffer_advance(b3, sizeof(*h3)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->lfib_index = lfib_index0; + tr->hash = hash_c0; + tr->label_net_byte_order = h0->label_exp_s_ttl; + } + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->next_index = next1; + tr->lb_index = lbi1; + tr->lfib_index = lfib_index1; + tr->hash = hash_c1; + tr->label_net_byte_order = h1->label_exp_s_ttl; + } + + if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b2, sizeof (*tr)); + tr->next_index = next2; + tr->lb_index = lbi2; + tr->lfib_index = lfib_index2; + tr->hash = hash_c2; + tr->label_net_byte_order = h2->label_exp_s_ttl; + } + + if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b3, sizeof (*tr)); + tr->next_index = next3; + tr->lb_index = lbi3; + tr->lfib_index = lfib_index3; + tr->hash = hash_c3; + tr->label_net_byte_order = h3->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 lbi0, next0, lfib_index0, bi0, hash_c0; + const mpls_unicast_header_t * h0; + const load_balance_t *lb0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + + lbi0 = mpls_fib_table_forwarding_lookup(lfib_index0, h0); + hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; + + if (MPLS_IS_REPLICATE & lbi0) + { + next0 = mpls_lookup_to_replicate_edge; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + (lbi0 & ~MPLS_IS_REPLICATE); + } + else + { + lb0 = load_balance_get(lbi0); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + dpo0 = load_balance_get_fwd_bucket + (lb0, + (hash_c0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + } + + /* + * before we pop the label copy, values we need to maintain. + * The label header is in network byte order. + * last byte is the TTL. + * bits 2 to 4 inclusive are the EXP bits + */ + vnet_buffer (b0)->mpls.ttl = ((char*)h0)[3]; + vnet_buffer (b0)->mpls.exp = (((char*)h0)[2] & 0xe) >> 1; + vnet_buffer (b0)->mpls.first = 1; + + /* + * pop the label that was just used in the lookup + */ + vlib_buffer_advance(b0, sizeof(*h0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->lfib_index = lfib_index0; + tr->hash = hash_c0; + tr->label_net_byte_order = h0->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_lookup_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_lookup_node, static) = { + .function = mpls_lookup, + .name = "mpls-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .sibling_of = "mpls-load-balance", + + .format_buffer = format_mpls_header, + .format_trace = format_mpls_lookup_trace, + .unformat_buffer = unformat_mpls_header, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup) + +typedef struct { + u32 next_index; + u32 lb_index; + u32 hash; +} mpls_load_balance_trace_t; + +static u8 * +format_mpls_load_balance_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *); + + s = format (s, "MPLS: next [%d], LB index %d hash %d", + t->next_index, t->lb_index, t->hash); + return s; +} + +always_inline uword +mpls_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + u32 thread_index = vlib_get_thread_index(); + u32 next; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + const load_balance_t *lb0, *lb1; + vlib_buffer_t * p0, *p1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1, next0, next1; + const mpls_unicast_header_t *mpls0, *mpls1; + const dpo_id_t *dpo0, *dpo1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (mpls0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (mpls0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + mpls0 = vlib_buffer_get_current (p0); + mpls1 = vlib_buffer_get_current (p1); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + + /* + * this node is for via FIBs we can re-use the hash value from the + * to node if present. + * We don't want to use the same hash value at each level in the recursion + * graph as that would lead to polarisation + */ + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + hc1 = vnet_buffer (p1)->ip.flow_hash = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); + } + dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash)) + { + hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1; + } + else + { + hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1); + } + dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1)); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); + } + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, p1)); + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->hash = hc0; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0, next0; + const mpls_unicast_header_t *mpls0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + mpls0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); + } + dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_load_balance_node) = { + .function = mpls_load_balance, + .name = "mpls-load-balance", + .vector_size = sizeof (u32), + .format_trace = format_mpls_load_balance_trace, + .n_next_nodes = 1, + .next_nodes = + { + [0] = "mpls-drop", + }, + +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance) + + +static clib_error_t * +mpls_lookup_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + + mpls_lookup_to_replicate_edge = + vlib_node_add_named_next(vm, + mpls_lookup_node.index, + "mpls-replicate"); + + return (NULL); +} + +VLIB_INIT_FUNCTION (mpls_lookup_init); diff --git a/src/vnet/mpls/mpls_lookup.h b/src/vnet/mpls/mpls_lookup.h new file mode 100644 index 00000000..28c9124f --- /dev/null +++ b/src/vnet/mpls/mpls_lookup.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_LOOKUP_H__ +#define __MPLS_LOOKUP_H__ + +#include <vnet/mpls/mpls.h> +#include <vnet/ip/ip.h> + +/** + * The arc/edge from the MPLS lookup node to the MPLS replicate node + */ +u32 mpls_lookup_to_replicate_edge; + +/* + * Compute flow hash. + * We'll use it to select which adjacency to use for this flow. And other things. + */ +always_inline u32 +mpls_compute_flow_hash (const mpls_unicast_header_t * hdr, + flow_hash_config_t flow_hash_config) +{ + /* + * We need to byte swap so we use the numerical value. i.e. an odd label + * leads to an odd bucket. as opposed to a label above and below value X. + */ + u8 next_label_is_entropy; + mpls_label_t ho_label; + u32 hash, value; + + ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl); + hash = vnet_mpls_uc_get_label(ho_label); + next_label_is_entropy = 0; + + while (MPLS_EOS != vnet_mpls_uc_get_s(ho_label)) + { + hdr++; + ho_label = clib_net_to_host_u32(hdr->label_exp_s_ttl); + value = vnet_mpls_uc_get_label(ho_label); + + if (1 == next_label_is_entropy) + { + /* + * The label is an entropy value, use it alone as the hash + */ + return (ho_label); + } + if (MPLS_IETF_ENTROPY_LABEL == value) + { + /* + * we've met a label in the stack indicating that tha next + * label is an entropy value + */ + next_label_is_entropy = 1; + } + else + { + /* + * XOR the label values in the stack together to + * build up the hash value + */ + hash ^= value; + } + } + + /* + * check the top nibble for v4 and v6 + */ + hdr++; + + switch (((u8*)hdr)[0] >> 4) + { + case 4: + /* incorporate the v4 flow-hash */ + hash ^= ip4_compute_flow_hash ((const ip4_header_t *)hdr, + IP_FLOW_HASH_DEFAULT); + break; + case 6: + /* incorporate the v6 flow-hash */ + hash ^= ip6_compute_flow_hash ((const ip6_header_t *)hdr, + IP_FLOW_HASH_DEFAULT); + break; + default: + break; + } + + return (hash); +} + +#endif /* __MPLS_LOOKUP_H__ */ diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c new file mode 100644 index 00000000..241a7835 --- /dev/null +++ b/src/vnet/mpls/mpls_output.c @@ -0,0 +1,498 @@ +/* + * mpls_output.c: MPLS Adj rewrite + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ip/ip.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + /* Adjacency taken. */ + u32 adj_index; + u32 flow_hash; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[64 - 1*sizeof(u32)]; +} mpls_output_trace_t; + +#define foreach_mpls_output_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) MPLS_OUTPUT_NEXT_##s, + foreach_mpls_output_next +#undef _ + MPLS_OUTPUT_N_NEXT, +} mpls_output_next_t; + +static u8 * +format_mpls_output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "adj-idx %d : %U flow hash: 0x%08x", + t->adj_index, + format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + t->adj_index, t->packet_data, sizeof (t->packet_data)); + return s; +} + +static inline uword +mpls_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int is_midchain) +{ + u32 n_left_from, next_index, * from, * to_next, thread_index; + vlib_node_runtime_t * error_node; + u32 n_left_to_next; + mpls_main_t *mm; + + thread_index = vlib_get_thread_index(); + error_node = vlib_node_get_runtime (vm, mpls_output_node.index); + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + mm = &mpls_main; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_adjacency_t * adj0; + mpls_unicast_header_t *hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + ip_adjacency_t * adj1; + mpls_unicast_header_t *hdr1; + vlib_buffer_t * p1; + u32 pi1, rw_len1, adj_index1, next1, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + adj0 = adj_get(adj_index0); + adj1 = adj_get(adj_index1); + hdr0 = vlib_buffer_get_current (p0); + hdr1 = vlib_buffer_get_current (p1); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + rw_len1 = adj1[0].rewrite_header.data_bytes; + + /* Bump the adj counters for packet and bytes */ + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index0, + 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index1, + 1, + vlib_buffer_length_in_chain (vm, p1) + rw_len1); + + /* Check MTU of outgoing interface. */ + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = adj0[0].rewrite_header.next_index; + error0 = IP4_ERROR_NONE; + + if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (mm->output_feature_arc_index, + adj0[0].rewrite_header.sw_if_index, + &next0, p0); + } + else + { + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = MPLS_OUTPUT_NEXT_DROP; + } + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <= + adj1[0].rewrite_header.max_l3_packet_bytes)) + { + p1->current_data -= rw_len1; + p1->current_length += rw_len1; + + vnet_buffer (p1)->sw_if_index[VLIB_TX] = + adj1[0].rewrite_header.sw_if_index; + next1 = adj1[0].rewrite_header.next_index; + error1 = IP4_ERROR_NONE; + + if (PREDICT_FALSE(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (mm->output_feature_arc_index, + adj1[0].rewrite_header.sw_if_index, + &next1, p1); + } + else + { + error1 = IP4_ERROR_MTU_EXCEEDED; + next1 = MPLS_OUTPUT_NEXT_DROP; + } + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + adj1->sub_type.midchain.fixup_func(vm, adj1, p1); + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + clib_memcpy (tr->packet_data, + vlib_buffer_get_current (p0), + sizeof (tr->packet_data)); + } + if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p1, sizeof (*tr)); + tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p1)->ip.flow_hash; + clib_memcpy (tr->packet_data, + vlib_buffer_get_current (p1), + sizeof (tr->packet_data)); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + mpls_unicast_header_t *hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + adj0 = adj_get(adj_index0); + hdr0 = vlib_buffer_get_current (p0); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], hdr0, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index0, + 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + + /* Check MTU of outgoing interface. */ + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = adj0[0].rewrite_header.next_index; + error0 = IP4_ERROR_NONE; + + if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (mm->output_feature_arc_index, + adj0[0].rewrite_header.sw_if_index, + &next0, p0); + } + else + { + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = MPLS_OUTPUT_NEXT_DROP; + } + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } + + p0->error = error_node->errors[error0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_ENCAP, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +static inline uword +mpls_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0)); +} + +VLIB_REGISTER_NODE (mpls_output_node) = { + .function = mpls_output, + .name = "mpls-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n, + foreach_mpls_output_next +#undef _ + }, + + .format_trace = format_mpls_output_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_output_node, mpls_output) + +static inline uword +mpls_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1)); +} + +VLIB_REGISTER_NODE (mpls_midchain_node) = { + .function = mpls_midchain, + .name = "mpls-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_output_trace, + + .sibling_of = "mpls-output", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_midchain_node, mpls_midchain) + +/** + * @brief Next index values from the MPLS incomplete adj node + */ +#define foreach_mpls_adj_incomplete_next \ +_(DROP, "error-drop") \ +_(IP4, "ip4-arp") \ +_(IP6, "ip6-discover-neighbor") + +typedef enum { +#define _(s,n) MPLS_ADJ_INCOMPLETE_NEXT_##s, + foreach_mpls_adj_incomplete_next +#undef _ + MPLS_ADJ_INCOMPLETE_N_NEXT, +} mpls_adj_incomplete_next_t; + +/** + * @brief A struct to hold tracing information for the MPLS label imposition + * node. + */ +typedef struct mpls_adj_incomplete_trace_t_ +{ + u32 next; +} mpls_adj_incomplete_trace_t; + + +/** + * @brief Graph node for incomplete MPLS adjacency. + * This node will push traffic to either the v4-arp or v6-nd node + * based on the next-hop proto of the adj. + * We pay a cost for this 'routing' node, but an incomplete adj is the + * exception case. + */ +static inline uword +mpls_adj_incomplete (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0, next0, adj_index0; + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + + pi0 = to_next[0] = from[0]; + p0 = vlib_get_buffer (vm, pi0); + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + adj0 = adj_get(adj_index0); + + if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto)) + { + next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP4; + } + else + { + next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_adj_incomplete_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->next = next0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static u8 * +format_mpls_adj_incomplete_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_adj_incomplete_trace_t * t; + uword indent; + + t = va_arg (*args, mpls_adj_incomplete_trace_t *); + indent = format_get_indent (s); + + s = format (s, "%Unext:%d", + format_white_space, indent, + t->next); + return (s); +} + +VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = { + .function = mpls_adj_incomplete, + .name = "mpls-adj-incomplete", + .format_trace = format_mpls_adj_incomplete_trace, + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_ADJ_INCOMPLETE_NEXT_##s] = n, + foreach_mpls_adj_incomplete_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_adj_incomplete_node, + mpls_adj_incomplete) diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c new file mode 100644 index 00000000..2d5521f4 --- /dev/null +++ b/src/vnet/mpls/mpls_tunnel.c @@ -0,0 +1,1070 @@ +/* + * mpls_tunnel.c: MPLS tunnel interfaces (i.e. for RSVP-TE) + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls_tunnel.h> +#include <vnet/mpls/mpls_types.h> +#include <vnet/ip/ip.h> +#include <vnet/fib/fib_path_list.h> +#include <vnet/adj/adj_midchain.h> +#include <vnet/adj/adj_mcast.h> +#include <vnet/dpo/replicate_dpo.h> +#include <vnet/fib/mpls_fib.h> + +/** + * @brief pool of tunnel instances + */ +static mpls_tunnel_t *mpls_tunnel_pool; + +/** + * @brief Pool of free tunnel SW indices - i.e. recycled indices + */ +static u32 * mpls_tunnel_free_hw_if_indices; + +/** + * @brief DB of SW index to tunnel index + */ +static u32 *mpls_tunnel_db; + +/** + * @brief MPLS tunnel flags strings + */ +static const char *mpls_tunnel_attribute_names[] = MPLS_TUNNEL_ATTRIBUTES; + +/** + * @brief Get a tunnel object from a SW interface index + */ +static mpls_tunnel_t* +mpls_tunnel_get_from_sw_if_index (u32 sw_if_index) +{ + if ((vec_len(mpls_tunnel_db) < sw_if_index) || + (~0 == mpls_tunnel_db[sw_if_index])) + return (NULL); + + return (pool_elt_at_index(mpls_tunnel_pool, + mpls_tunnel_db[sw_if_index])); +} + +/** + * @brief Build a rewrite string for the MPLS tunnel. + */ +static u8* +mpls_tunnel_build_rewrite_i (void) +{ + /* + * passing the adj code a NULL rewirte means 'i don't have one cos + * t'other end is unresolved'. That's not the case here. For the mpls + * tunnel there are just no bytes of encap to apply in the adj. We'll impose + * the label stack once we choose a path. So return a zero length rewrite. + */ + u8 *rewrite = NULL; + + vec_validate(rewrite, 0); + vec_reset_length(rewrite); + + return (rewrite); +} + +/** + * @brief Build a rewrite string for the MPLS tunnel. + */ +static u8* +mpls_tunnel_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) +{ + return (mpls_tunnel_build_rewrite_i()); +} + +typedef struct mpls_tunnel_collect_forwarding_ctx_t_ +{ + load_balance_path_t * next_hops; + const mpls_tunnel_t *mt; + fib_forward_chain_type_t fct; +} mpls_tunnel_collect_forwarding_ctx_t; + +static fib_path_list_walk_rc_t +mpls_tunnel_collect_forwarding (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *arg) +{ + mpls_tunnel_collect_forwarding_ctx_t *ctx; + fib_path_ext_t *path_ext; + + ctx = arg; + + /* + * if the path is not resolved, don't include it. + */ + if (!fib_path_is_resolved(path_index)) + { + return (FIB_PATH_LIST_WALK_CONTINUE); + } + + /* + * get the matching path-extension for the path being visited. + */ + path_ext = fib_path_ext_list_find_by_path_index(&ctx->mt->mt_path_exts, + path_index); + + if (NULL != path_ext) + { + /* + * found a matching extension. stack it to obtain the forwarding + * info for this path. + */ + ctx->next_hops = fib_path_ext_stack(path_ext, + ctx->fct, + ctx->fct, + ctx->next_hops); + } + else + ASSERT(0); + /* + * else + * There should be a path-extenios associated with each path + */ + + return (FIB_PATH_LIST_WALK_CONTINUE); +} + +static void +mpls_tunnel_mk_lb (mpls_tunnel_t *mt, + vnet_link_t linkt, + fib_forward_chain_type_t fct, + dpo_id_t *dpo_lb) +{ + dpo_proto_t lb_proto; + + /* + * If the entry has path extensions then we construct a load-balance + * by stacking the extensions on the forwarding chains of the paths. + * Otherwise we use the load-balance of the path-list + */ + mpls_tunnel_collect_forwarding_ctx_t ctx = { + .mt = mt, + .next_hops = NULL, + .fct = fct, + }; + + /* + * As an optimisation we allocate the vector of next-hops to be sized + * equal to the maximum nuber of paths we will need, which is also the + * most likely number we will need, since in most cases the paths are 'up'. + */ + vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list)); + vec_reset_length(ctx.next_hops); + + lb_proto = fib_forw_chain_type_to_dpo_proto(fct); + + fib_path_list_walk(mt->mt_path_list, + mpls_tunnel_collect_forwarding, + &ctx); + + if (!dpo_id_is_valid(dpo_lb)) + { + /* + * first time create + */ + if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST) + { + dpo_set(dpo_lb, + DPO_REPLICATE, + lb_proto, + replicate_create(0, lb_proto)); + } + else + { + flow_hash_config_t fhc; + + switch (linkt) + { + case VNET_LINK_MPLS: + fhc = MPLS_FLOW_HASH_DEFAULT; + break; + case VNET_LINK_IP4: + case VNET_LINK_IP6: + fhc = IP_FLOW_HASH_DEFAULT; + break; + default: + fhc = 0; + break; + } + + dpo_set(dpo_lb, + DPO_LOAD_BALANCE, + lb_proto, + load_balance_create(0, lb_proto, fhc)); + } + } + + if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST) + { + /* + * MPLS multicast + */ + replicate_multipath_update(dpo_lb, ctx.next_hops); + } + else + { + load_balance_multipath_update(dpo_lb, + ctx.next_hops, + LOAD_BALANCE_FLAG_NONE); + vec_free(ctx.next_hops); + } +} + +/** + * mpls_tunnel_stack + * + * 'stack' (resolve the recursion for) the tunnel's midchain adjacency + */ +static void +mpls_tunnel_stack (adj_index_t ai) +{ + ip_adjacency_t *adj; + mpls_tunnel_t *mt; + u32 sw_if_index; + + adj = adj_get(ai); + sw_if_index = adj->rewrite_header.sw_if_index; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return; + + /* + * while we're stacking the adj, remove the tunnel from the child list + * of the path list. this breaks a circular dependency of walk updates + * where the create of adjacencies in the children can lead to walks + * that get back here. + */ + fib_path_list_lock(mt->mt_path_list); + + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); + + /* + * Construct the DPO (load-balance or replicate) that we can stack + * the tunnel's midchain on + */ + if (vnet_hw_interface_get_flags(vnet_get_main(), + mt->mt_hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) + { + dpo_id_t dpo = DPO_INVALID; + + mpls_tunnel_mk_lb(mt, + adj->ia_link, + (VNET_LINK_MPLS == adj_get_link_type(ai) ? + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + FIB_FORW_CHAIN_TYPE_MPLS_EOS), + &dpo); + + adj_nbr_midchain_stack(ai, &dpo); + dpo_reset(&dpo); + } + else + { + adj_nbr_midchain_unstack(ai); + } + + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mt - mpls_tunnel_pool); + + fib_path_list_unlock(mt->mt_path_list); +} + +/** + * @brief Call back when restacking all adjacencies on a MPLS interface + */ +static adj_walk_rc_t +mpls_adj_walk_cb (adj_index_t ai, + void *ctx) +{ + mpls_tunnel_stack(ai); + + return (ADJ_WALK_RC_CONTINUE); +} + +static void +mpls_tunnel_restack (mpls_tunnel_t *mt) +{ + fib_protocol_t proto; + + /* + * walk all the adjacencies on the MPLS interface and restack them + */ + if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2) + { + /* + * Stack a load-balance that drops, whilst we have no paths + */ + vnet_hw_interface_t * hi; + dpo_id_t dpo = DPO_INVALID; + + mpls_tunnel_mk_lb(mt, + VNET_LINK_MPLS, + FIB_FORW_CHAIN_TYPE_ETHERNET, + &dpo); + + hi = vnet_get_hw_interface(vnet_get_main(), mt->mt_hw_if_index); + dpo_stack_from_node(hi->tx_node_index, + &mt->mt_l2_lb, + &dpo); + dpo_reset(&dpo); + } + else + { + FOR_EACH_FIB_PROTOCOL(proto) + { + adj_nbr_walk(mt->mt_sw_if_index, + proto, + mpls_adj_walk_cb, + NULL); + } + } +} + +static clib_error_t * +mpls_tunnel_admin_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + vnet_hw_interface_t * hi; + mpls_tunnel_t *mt; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + mt = mpls_tunnel_get_from_sw_if_index(hi->sw_if_index); + + if (NULL == mt) + return (NULL); + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + vnet_hw_interface_set_flags (vnm, hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + else + vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); + + mpls_tunnel_restack(mt); + + return (NULL); +} + +/** + * @brief Fixup the adj rewrite post encap. This is a no-op since the + * rewrite is a stack of labels. + */ +static void +mpls_tunnel_fixup (vlib_main_t *vm, + ip_adjacency_t *adj, + vlib_buffer_t *b0) +{ + /* + * A no-op w.r.t. the header. but reset the 'have we pushed any + * MPLS labels onto the packet' flag. That way when we enter the + * tunnel we'll get a TTL set to 255 + */ + vnet_buffer(b0)->mpls.first = 0; +} + +static void +mpls_tunnel_update_adj (vnet_main_t * vnm, + u32 sw_if_index, + adj_index_t ai) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != ai); + + adj = adj_get(ai); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + adj_nbr_midchain_update_rewrite(ai, mpls_tunnel_fixup, + ADJ_FLAG_NONE, + mpls_tunnel_build_rewrite_i()); + break; + case IP_LOOKUP_NEXT_MCAST: + /* + * Construct a partial rewrite from the known ethernet mcast dest MAC + * There's no MAC fixup, so the last 2 parameters are 0 + */ + adj_mcast_midchain_update_rewrite(ai, mpls_tunnel_fixup, + ADJ_FLAG_NONE, + mpls_tunnel_build_rewrite_i(), + 0, 0); + break; + + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + ASSERT (0); + break; + } + + mpls_tunnel_stack(ai); +} + +static u8 * +format_mpls_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "mpls-tunnel%d", dev_instance); +} + +static u8 * +format_mpls_tunnel_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + return (format (s, "MPLS-tunnel: id %d\n", dev_instance)); +} + +/** + * @brief Packet trace structure + */ +typedef struct mpls_tunnel_trace_t_ +{ + /** + * Tunnel-id / index in tunnel vector + */ + u32 tunnel_id; +} mpls_tunnel_trace_t; + +static u8 * +format_mpls_tunnel_tx_trace (u8 * s, + va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_tunnel_trace_t * t = va_arg (*args, mpls_tunnel_trace_t *); + + s = format (s, "MPLS: tunnel %d", t->tunnel_id); + return s; +} + +/** + * @brief TX function. Only called L2. L3 traffic uses the adj-midchains + */ +static uword +mpls_tunnel_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; + const mpls_tunnel_t *mt; + + mt = pool_elt_at_index(mpls_tunnel_pool, rd->dev_instance); + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * FIXME DUAL LOOP + */ + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer(vm, bi0); + + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_lb.dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = rd->dev_instance; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, mt->mt_l2_lb.dpoi_next_node); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (mpls_tunnel_class) = { + .name = "MPLS tunnel device", + .format_device_name = format_mpls_tunnel_name, + .format_device = format_mpls_tunnel_device, + .format_tx_trace = format_mpls_tunnel_tx_trace, + .tx_function = mpls_tunnel_tx, + .admin_up_down_function = mpls_tunnel_admin_up_down, +}; + +VNET_HW_INTERFACE_CLASS (mpls_tunnel_hw_interface_class) = { + .name = "MPLS-Tunnel", + .update_adjacency = mpls_tunnel_update_adj, + .build_rewrite = mpls_tunnel_build_rewrite, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, +}; + +const mpls_tunnel_t * +mpls_tunnel_get (u32 mti) +{ + return (pool_elt_at_index(mpls_tunnel_pool, mti)); +} + +/** + * @brief Walk all the MPLS tunnels + */ +void +mpls_tunnel_walk (mpls_tunnel_walk_cb_t cb, + void *ctx) +{ + u32 mti; + + pool_foreach_index(mti, mpls_tunnel_pool, + ({ + cb(mti, ctx); + })); +} + +void +vnet_mpls_tunnel_del (u32 sw_if_index) +{ + mpls_tunnel_t *mt; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return; + + if (FIB_NODE_INDEX_INVALID != mt->mt_path_list) + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); + dpo_reset(&mt->mt_l2_lb); + + vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index); + pool_put(mpls_tunnel_pool, mt); + mpls_tunnel_db[sw_if_index] = ~0; +} + +u32 +vnet_mpls_tunnel_create (u8 l2_only, + u8 is_multicast) +{ + vnet_hw_interface_t * hi; + mpls_tunnel_t *mt; + vnet_main_t * vnm; + u32 mti; + + vnm = vnet_get_main(); + pool_get(mpls_tunnel_pool, mt); + memset (mt, 0, sizeof (*mt)); + mti = mt - mpls_tunnel_pool; + fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL); + mt->mt_path_list = FIB_NODE_INDEX_INVALID; + mt->mt_sibling_index = FIB_NODE_INDEX_INVALID; + + if (is_multicast) + mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST; + if (l2_only) + mt->mt_flags |= MPLS_TUNNEL_FLAG_L2; + + /* + * Create a new, or re=use and old, tunnel HW interface + */ + if (vec_len (mpls_tunnel_free_hw_if_indices) > 0) + { + mt->mt_hw_if_index = + mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1]; + _vec_len (mpls_tunnel_free_hw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); + hi->hw_instance = mti; + hi->dev_instance = mti; + } + else + { + mt->mt_hw_if_index = vnet_register_interface( + vnm, + mpls_tunnel_class.index, + mti, + mpls_tunnel_hw_interface_class.index, + mti); + hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); + } + + /* + * Add the new tunnel to the tunnel DB - key:SW if index + */ + mt->mt_sw_if_index = hi->sw_if_index; + vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0); + mpls_tunnel_db[mt->mt_sw_if_index] = mti; + + return (mt->mt_sw_if_index); +} + +void +vnet_mpls_tunnel_path_add (u32 sw_if_index, + fib_route_path_t *rpaths) +{ + mpls_tunnel_t *mt; + u32 mti; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return; + + mti = mt - mpls_tunnel_pool; + + /* + * construct a path-list from the path provided + */ + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + } + else + { + fib_node_index_t old_pl_index; + + old_pl_index = mt->mt_path_list; + + mt->mt_path_list = + fib_path_list_copy_and_path_add(old_pl_index, + FIB_PATH_LIST_FLAG_SHARED, + rpaths); + + fib_path_list_child_remove(old_pl_index, + mt->mt_sibling_index); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + /* + * re-resolve all the path-extensions with the new path-list + */ + fib_path_ext_list_resolve(&mt->mt_path_exts, mt->mt_path_list); + } + fib_path_ext_list_insert(&mt->mt_path_exts, + mt->mt_path_list, + FIB_PATH_EXT_MPLS, + rpaths); + mpls_tunnel_restack(mt); +} + +int +vnet_mpls_tunnel_path_remove (u32 sw_if_index, + fib_route_path_t *rpaths) +{ + mpls_tunnel_t *mt; + u32 mti; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return (0); + + mti = mt - mpls_tunnel_pool; + + /* + * construct a path-list from the path provided + */ + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + /* can't remove a path if we have onoe */ + return (0); + } + else + { + fib_node_index_t old_pl_index; + + old_pl_index = mt->mt_path_list; + + mt->mt_path_list = + fib_path_list_copy_and_path_remove(old_pl_index, + FIB_PATH_LIST_FLAG_SHARED, + rpaths); + + fib_path_list_child_remove(old_pl_index, + mt->mt_sibling_index); + + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + /* no paths left */ + return (0); + } + else + { + mt->mt_sibling_index = + fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + } + /* + * find the matching path extension and remove it + */ + fib_path_ext_list_remove(&mt->mt_path_exts, + FIB_PATH_EXT_MPLS, + rpaths); + + /* + * re-resolve all the path-extensions with the new path-list + */ + fib_path_ext_list_resolve(&mt->mt_path_exts, + mt->mt_path_list); + + mpls_tunnel_restack(mt); + } + + return (fib_path_list_get_n_paths(mt->mt_path_list)); +} + + +static clib_error_t * +vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + u8 is_del = 0, l2_only = 0, is_multicast =0; + fib_route_path_t rpath, *rpaths = NULL; + mpls_label_t out_label = MPLS_LABEL_INVALID; + u32 sw_if_index = ~0; + clib_error_t *error = NULL; + + memset(&rpath, 0, sizeof(rpath)); + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "del %U", + unformat_vnet_sw_interface, vnm, + &sw_if_index)) + is_del = 1; + else if (unformat (line_input, "add %U", + unformat_vnet_sw_interface, vnm, + &sw_if_index)) + is_del = 0; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "out-labels")) + { + while (unformat (line_input, "%U", + unformat_mpls_unicast_label, + &out_label)) + { + vec_add1 (rpath.frp_label_stack, out_label); + } + } + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP4; + } + + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP4; + } + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "multicast")) + is_multicast = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + vec_add1(rpaths, rpath); + + if (is_del) + { + if (!vnet_mpls_tunnel_path_remove(sw_if_index, rpaths)) + { + vnet_mpls_tunnel_del(sw_if_index); + } + } + else + { + if (0 == vec_len(rpath.frp_label_stack)) + { + error = clib_error_return (0, "No Output Labels '%U'", + format_unformat_error, line_input); + goto done; + } + + if (~0 == sw_if_index) + { + sw_if_index = vnet_mpls_tunnel_create(l2_only, is_multicast); + } + vnet_mpls_tunnel_path_add(sw_if_index, rpaths); + } + +done: + vec_free(rpaths); + unformat_free (line_input); + + return error; +} + +/*? + * This command create a uni-directional MPLS tunnel + * + * @cliexpar + * @cliexstart{create mpls tunnel} + * create mpls tunnel via 10.0.0.1 GigEthernet0/8/0 out-label 33 out-label 34 + * @cliexend + ?*/ +VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { + .path = "mpls tunnel", + .short_help = + "mpls tunnel via [addr] [interface] [out-labels]", + .function = vnet_create_mpls_tunnel_command_fn, +}; + +static u8 * +format_mpls_tunnel (u8 * s, va_list * args) +{ + mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *); + mpls_tunnel_attribute_t attr; + + s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d", + mt - mpls_tunnel_pool, + mt->mt_sw_if_index, + mt->mt_hw_if_index); + if (MPLS_TUNNEL_FLAG_NONE != mt->mt_flags) { + s = format(s, " \n flags:"); + FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(attr) { + if ((1<<attr) & mt->mt_flags) { + s = format (s, "%s,", mpls_tunnel_attribute_names[attr]); + } + } + } + s = format(s, "\n via:\n"); + s = fib_path_list_format(mt->mt_path_list, s); + s = format(s, "%U", format_fib_path_ext_list, &mt->mt_path_exts); + s = format(s, "\n"); + + if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2) + { + s = format(s, " forwarding: %U\n", + format_fib_forw_chain_type, + FIB_FORW_CHAIN_TYPE_ETHERNET); + s = format(s, " %U\n", format_dpo_id, &mt->mt_l2_lb, 2); + } + + return (s); +} + +static clib_error_t * +show_mpls_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_tunnel_t * mt; + u32 mti = ~0; + + if (pool_elts (mpls_tunnel_pool) == 0) + vlib_cli_output (vm, "No MPLS tunnels configured..."); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &mti)) + ; + else + break; + } + + if (~0 == mti) + { + pool_foreach (mt, mpls_tunnel_pool, + ({ + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); + })); + } + else + { + if (pool_is_free_index(mpls_tunnel_pool, mti)) + return clib_error_return (0, "Not atunnel index %d", mti); + + mt = pool_elt_at_index(mpls_tunnel_pool, mti); + + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); + } + + return 0; +} + +/*? + * This command to show MPLS tunnels + * + * @cliexpar + * @cliexstart{sh mpls tunnel 2} + * [@2] mpls_tunnel2: sw_if_index:5 hw_if_index:5 + * label-stack: + * 3, + * via: + * index:26 locks:1 proto:ipv4 uPRF-list:26 len:1 itfs:[2, ] + * index:26 pl-index:26 ipv4 weight=1 attached-nexthop: oper-flags:resolved, + * 10.0.0.2 loop0 + * [@0]: ipv4 via 10.0.0.2 loop0: IP4: de:ad:00:00:00:00 -> 00:00:11:aa:bb:cc + * @cliexend + ?*/ +VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = { + .path = "show mpls tunnel", + .function = show_mpls_tunnel_command_fn, +}; + +static mpls_tunnel_t * +mpls_tunnel_from_fib_node (fib_node_t *node) +{ +#if (CLIB_DEBUG > 0) + ASSERT(FIB_NODE_TYPE_MPLS_TUNNEL == node->fn_type); +#endif + return ((mpls_tunnel_t*) (((char*)node) - + STRUCT_OFFSET_OF(mpls_tunnel_t, mt_node))); +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +mpls_tunnel_back_walk (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + mpls_tunnel_restack(mpls_tunnel_from_fib_node(node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t* +mpls_tunnel_fib_node_get (fib_node_index_t index) +{ + mpls_tunnel_t * mt; + + mt = pool_elt_at_index(mpls_tunnel_pool, index); + + return (&mt->mt_node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +mpls_tunnel_last_lock_gone (fib_node_t *node) +{ + /* + * The MPLS MPLS tunnel is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT(0); +} + +/* + * Virtual function table registered by MPLS MPLS tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t mpls_vft = { + .fnv_get = mpls_tunnel_fib_node_get, + .fnv_last_lock = mpls_tunnel_last_lock_gone, + .fnv_back_walk = mpls_tunnel_back_walk, +}; + +static clib_error_t * +mpls_tunnel_init (vlib_main_t *vm) +{ + fib_node_register_type(FIB_NODE_TYPE_MPLS_TUNNEL, &mpls_vft); + + return 0; +} +VLIB_INIT_FUNCTION(mpls_tunnel_init); diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h new file mode 100644 index 00000000..285817c3 --- /dev/null +++ b/src/vnet/mpls/mpls_tunnel.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_TUNNEL_H__ +#define __MPLS_TUNNEL_H__ + +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_path_ext.h> + +typedef enum mpls_tunnel_attribute_t_ +{ + MPLS_TUNNEL_ATTRIBUTE_FIRST = 0, + /** + * @brief The tunnel is L2 only + */ + MPLS_TUNNEL_ATTRIBUTE_L2 = MPLS_TUNNEL_ATTRIBUTE_FIRST, + /** + * @brief The tunnel has an underlying multicast LSP + */ + MPLS_TUNNEL_ATTRIBUTE_MCAST, + MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST, +} mpls_tunnel_attribute_t; + +#define MPLS_TUNNEL_ATTRIBUTES { \ + [MPLS_TUNNEL_ATTRIBUTE_MCAST] = "multicast", \ + [MPLS_TUNNEL_ATTRIBUTE_L2] = "L2", \ +} +#define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item) \ + for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST; \ + _item <= MPLS_TUNNEL_ATTRIBUTE_LAST; \ + _item++) + +typedef enum mpls_tunnel_flag_t_ { + MPLS_TUNNEL_FLAG_NONE = 0, + MPLS_TUNNEL_FLAG_L2 = (1 << MPLS_TUNNEL_ATTRIBUTE_L2), + MPLS_TUNNEL_FLAG_MCAST = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST), +} __attribute__ ((packed)) mpls_tunnel_flags_t; + + +/** + * @brief A uni-directional MPLS tunnel + */ +typedef struct mpls_tunnel_t_ +{ + /** + * @brief The tunnel hooks into the FIB control plane graph. + */ + fib_node_t mt_node; + + /** + * @brief Tunnel flags + */ + mpls_tunnel_flags_t mt_flags; + + /** + * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET + * load-balance + */ + dpo_id_t mt_l2_lb; + + /** + * @brief The HW interface index of the tunnel interfaces + */ + u32 mt_hw_if_index; + + /** + * @brief The SW interface index of the tunnel interfaces + */ + u32 mt_sw_if_index; + + /** + * @brief The path-list over which the tunnel's destination is reachable + */ + fib_node_index_t mt_path_list; + + /** + * @brief sibling index on the path-list so notifications are received. + */ + u32 mt_sibling_index; + + /** + * A vector of path extensions o hold the label stack for each path + */ + fib_path_ext_list_t mt_path_exts; +} mpls_tunnel_t; + +/** + * @brief Create a new MPLS tunnel + * @return the SW Interface index of the newly created tuneel + */ +extern u32 vnet_mpls_tunnel_create (u8 l2_only, + u8 is_multicast); + +/** + * @brief Add a path to an MPLS tunnel + */ +extern void vnet_mpls_tunnel_path_add (u32 sw_if_index, + fib_route_path_t *rpath); + +/** + * @brief remove a path from a tunnel. + * @return the number of remaining paths. 0 implies the tunnel can be deleted + */ +extern int vnet_mpls_tunnel_path_remove (u32 sw_if_index, + fib_route_path_t *rpath); + +/** + * @brief Delete an MPLS tunnel + */ +extern void vnet_mpls_tunnel_del (u32 sw_if_index); + +extern const mpls_tunnel_t *mpls_tunnel_get(u32 index); + +/** + * @brief Callback function invoked while walking MPLS tunnels + */ +typedef void (*mpls_tunnel_walk_cb_t)(u32 index, void *ctx); + +/** + * @brief Walk all the MPLS tunnels + */ +extern void mpls_tunnel_walk(mpls_tunnel_walk_cb_t cb, + void *ctx); + +#endif diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h new file mode 100644 index 00000000..f1c3191e --- /dev/null +++ b/src/vnet/mpls/mpls_types.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __MPLS_TYPES_H__ +#define __MPLS_TYPES_H__ + +#define MPLS_IETF_MIN_LABEL 0x00000 +#define MPLS_IETF_MAX_LABEL 0xfffff + +#define MPLS_IETF_MIN_RESERVED_LABEL 0x00000 +#define MPLS_IETF_MAX_RESERVED_LABEL 0x0000f + +#define MPLS_IETF_MIN_UNRES_LABEL 0x00010 +#define MPLS_IETF_MAX_UNRES_LABEL 0xfffff + +#define MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL 0x00000 +#define MPLS_IETF_ROUTER_ALERT_LABEL 0x00001 +#define MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL 0x00002 +#define MPLS_IETF_IMPLICIT_NULL_LABEL 0x00003 +#define MPLS_IETF_ELI_LABEL 0x00007 +#define MPLS_IETF_GAL_LABEL 0x0000D +#define MPLS_IETF_ENTROPY_LABEL 0x0000E + +#define MPLS_IETF_IPV4_EXPLICIT_NULL_STRING "ip4-explicit-null" +#define MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING "e-nul" +#define MPLS_IETF_IMPLICIT_NULL_STRING "implicit-null" +#define MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING "i-nul" +#define MPLS_IETF_ROUTER_ALERT_STRING "router-alert" +#define MPLS_IETF_ROUTER_ALERT_BRIEF_STRING "r-alt" +#define MPLS_IETF_IPV6_EXPLICIT_NULL_STRING "ipv6-explicit-null" +#define MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING "v6enl" +#define MPLS_IETF_ELI_STRING "entropy-label-indicator" +#define MPLS_IETF_ELI_BRIEF_STRING "eli" +#define MPLS_IETF_GAL_STRING "gal" +#define MPLS_IETF_GAL_BRIEF_STRING "gal" + +#define MPLS_LABEL_INVALID (MPLS_IETF_MAX_LABEL+1) + +#define MPLS_LABEL_IS_REAL(_lbl) \ + (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \ + ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL)) + +/** + * The top bit of the index, which is the result of the MPLS lookup + * is used to determine if the DPO is a load-balance or a replicate + */ +#define MPLS_IS_REPLICATE 0x80000000 + +#endif diff --git a/src/vnet/mpls/packet.h b/src/vnet/mpls/packet.h new file mode 100644 index 00000000..bc67445b --- /dev/null +++ b/src/vnet/mpls/packet.h @@ -0,0 +1,125 @@ +#ifndef included_vnet_mpls_packet_h +#define included_vnet_mpls_packet_h + +/* + * MPLS packet format + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A label value only, i.e. 20bits. + */ +typedef u32 mpls_label_t; + +typedef struct { + /* Label: top 20 bits [in network byte order] */ + /* Experimental: 3 bits ... */ + /* S (bottom of label stack): 1 bit */ + /* TTL: 8 bits */ + mpls_label_t label_exp_s_ttl; +} mpls_unicast_header_t; + +typedef enum mpls_eos_bit_t_ +{ + MPLS_NON_EOS = 0, + MPLS_EOS = 1, +} mpls_eos_bit_t; + +#define MPLS_EOS_BITS { \ + [MPLS_NON_EOS] = "neos", \ + [MPLS_EOS] = "eos", \ +} + +#define FOR_EACH_MPLS_EOS_BIT(_eos) \ + for (_eos = MPLS_NON_EOS; _eos <= MPLS_EOS; _eos++) + +#define MPLS_ENTRY_LABEL_OFFSET 0 +#define MPLS_ENTRY_LABEL_SHIFT 12 +#define MPLS_ENTRY_LABEL_MASK 0x000fffff +#define MPLS_ENTRY_LABEL_BITS \ + (MPLS_ENTRY_LABEL_MASK << MPLS_ENTRY_LABEL_SHIFT) + +#define MPLS_ENTRY_EXP_OFFSET 2 /* byte offset to EXP bits */ +#define MPLS_ENTRY_EXP_SHIFT 9 +#define MPLS_ENTRY_EXP_MASK 0x07 +#define MPLS_ENTRY_EXP(mpls) \ + (((mpls)>>MPLS_ENTRY_EXP_SHIFT) & MPLS_ENTRY_EXP_MASK) +#define MPLS_ENTRY_EXP_BITS \ + (MPLS_ENTRY_EXP_MASK << MPLS_ENTRY_EXP_SHIFT) + +#define MPLS_ENTRY_EOS_OFFSET 2 /* byte offset to EOS bit */ +#define MPLS_ENTRY_EOS_SHIFT 8 +#define MPLS_ENTRY_EOS_MASK 0x01 /* EOS bit in its byte */ +#define MPLS_ENTRY_EOS(mpls) \ + (((mpls) >> MPLS_ENTRY_EOS_SHIFT) & MPLS_ENTRY_EOS_MASK) +#define MPLS_ENTRY_EOS_BIT (MPLS_ENTRY_EOS_MASK << MPLS_ENTRY_EOS_SHIFT) + +#define MPLS_ENTRY_TTL_OFFSET 3 /* byte offset to ttl field */ +#define MPLS_ENTRY_TTL_SHIFT 0 +#define MPLS_ENTRY_TTL_MASK 0xff +#define MPLS_ENTRY_TTL(mpls) \ + (((mpls) >> MPLS_ENTRY_TTL_SHIFT) & MPLS_ENTRY_TTL_MASK) +#define MPLS_ENTRY_TTL_BITS \ + (MPLS_ENTRY_TTL_MASK << MPLS_ENTRY_TTL_SHIFT) + +static inline u32 vnet_mpls_uc_get_label (mpls_label_t label_exp_s_ttl) +{ + return (label_exp_s_ttl>>MPLS_ENTRY_LABEL_SHIFT); +} + +static inline u32 vnet_mpls_uc_get_exp (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_EXP(label_exp_s_ttl)); +} + +static inline u32 vnet_mpls_uc_get_s (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_EOS(label_exp_s_ttl)); +} + +static inline u32 vnet_mpls_uc_get_ttl (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_TTL(label_exp_s_ttl)); +} + +static inline void vnet_mpls_uc_set_label (mpls_label_t *label_exp_s_ttl, + u32 value) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_LABEL_BITS)) | + ((value & MPLS_ENTRY_LABEL_MASK) << MPLS_ENTRY_LABEL_SHIFT)); +} + +static inline void vnet_mpls_uc_set_exp (mpls_label_t *label_exp_s_ttl, + u32 exp) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EXP_BITS)) | + ((exp & MPLS_ENTRY_EXP_MASK) << MPLS_ENTRY_EXP_SHIFT)); +} + +static inline void vnet_mpls_uc_set_s (mpls_label_t *label_exp_s_ttl, + u32 eos) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EOS_BIT)) | + ((eos & MPLS_ENTRY_EOS_MASK) << MPLS_ENTRY_EOS_SHIFT)); +} + +static inline void vnet_mpls_uc_set_ttl (mpls_label_t *label_exp_s_ttl, + u32 ttl) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_TTL_BITS)) | + ((ttl & MPLS_ENTRY_TTL_MASK))); +} + +#endif /* included_vnet_mpls_packet_h */ diff --git a/src/vnet/mpls/pg.c b/src/vnet/mpls/pg.c new file mode 100644 index 00000000..6ff86e32 --- /dev/null +++ b/src/vnet/mpls/pg.c @@ -0,0 +1,71 @@ +/* + * pg.c: packet generator mpls interface + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + pg_edit_t label; +} pg_mpls_header_t; + +static inline void +pg_mpls_header_init (pg_mpls_header_t * e) +{ + pg_edit_init (&e->label, mpls_unicast_header_t, label_exp_s_ttl); +} + +uword +unformat_pg_mpls_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_mpls_header_t * h; + vlib_main_t * vm = vlib_get_main(); + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (mpls_unicast_header_t), + &group_index); + pg_mpls_header_init (h); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_mpls_label_net_byte_order, &h->label)) + goto done; + + { + pg_node_t * pg_node = 0; + vlib_node_t * ip_lookup_node; + + ip_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip4-input"); + ASSERT (ip_lookup_node); + + pg_node = pg_get_node (ip_lookup_node->index); + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + |