diff options
author | Damjan Marion <damarion@cisco.com> | 2016-12-19 23:05:39 +0100 |
---|---|---|
committer | Damjan Marion <damarion@cisco.com> | 2016-12-28 12:25:14 +0100 |
commit | 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 (patch) | |
tree | 5de62f8dbd3a752f5a676ca600e43d2652d1ff1a /src/vnet/mpls | |
parent | 696f1adec0df3b8f161862566dd9c86174302658 (diff) |
Reorganize source tree to use single autotools instance
Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vnet/mpls')
-rw-r--r-- | src/vnet/mpls/error.def | 31 | ||||
-rw-r--r-- | src/vnet/mpls/interface.c | 121 | ||||
-rw-r--r-- | src/vnet/mpls/mpls.c | 511 | ||||
-rw-r--r-- | src/vnet/mpls/mpls.h | 172 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_features.c | 156 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_lookup.c | 531 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_output.c | 479 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_tunnel.c | 787 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_tunnel.h | 98 | ||||
-rw-r--r-- | src/vnet/mpls/mpls_types.h | 39 | ||||
-rw-r--r-- | src/vnet/mpls/node.c | 303 | ||||
-rw-r--r-- | src/vnet/mpls/packet.h | 125 | ||||
-rw-r--r-- | src/vnet/mpls/pg.c | 71 |
13 files changed, 3424 insertions, 0 deletions
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def new file mode 100644 index 00000000..de8b9665 --- /dev/null +++ b/src/vnet/mpls/error.def @@ -0,0 +1,31 @@ +/* + * mpls_error.def: mpls errors + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +mpls_error (NONE, "no error") +mpls_error (UNKNOWN_PROTOCOL, "unknown protocol") +mpls_error (UNSUPPORTED_VERSION, "unsupported version") +mpls_error (PKTS_DECAP, "MPLS-GRE input packets decapsulated") +mpls_error (PKTS_ENCAP, "MPLS-GRE output packets encapsulated") +mpls_error (NO_LABEL, "MPLS-GRE no label for fib/dst") +mpls_error (TTL_EXPIRED, "MPLS-GRE ttl expired") +mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set") +mpls_error (BAD_LABEL, "invalid FIB id in label") +mpls_error (NOT_IP4, "non-ip4 packets dropped") +mpls_error (DISALLOWED_FIB, "disallowed FIB id") +mpls_error (NOT_ENABLED, "MPLS not enabled") +mpls_error (DROP, "MPLS DROP DPO") +mpls_error (PUNT, "MPLS PUNT DPO") diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c new file mode 100644 index 00000000..692a2d1e --- /dev/null +++ b/src/vnet/mpls/interface.c @@ -0,0 +1,121 @@ +/* + * interface.c: mpls interfaces + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/adj/adj_midchain.h> +#include <vnet/dpo/classify_dpo.h> + + +u8 +mpls_sw_interface_is_enabled (u32 sw_if_index) +{ + mpls_main_t * mm = &mpls_main; + + if (vec_len(mm->mpls_enabled_by_sw_if_index) < sw_if_index) + return (0); + + return (mm->mpls_enabled_by_sw_if_index[sw_if_index]); +} + +void +mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable) +{ + fib_node_index_t lfib_index; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) + { + if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index]) + return; + + lfib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + vec_validate(mm->fib_index_by_sw_if_index, 0); + mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index; + } + else + { + ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index]) + return; + + fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_MPLS); + } + + vnet_feature_enable_disable ("mpls-input", "mpls-lookup", sw_if_index, + is_enable, 0, 0); + +} + +static clib_error_t * +mpls_interface_enable_disable (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index, enable; + + sw_if_index = ~0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "enable")) + enable = 1; + else if (unformat (input, "disable")) + enable = 0; + else + { + error = clib_error_return (0, "expected 'enable' or 'disable'", + format_unformat_error, input); + goto done; + } + + mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable); + + done: + return error; +} + +/*? + * This command enables an interface to accpet MPLS packets + * + * @cliexpar + * @cliexstart{set interface mpls} + * set interface mpls GigEthernet0/8/0 enable + * @cliexend + ?*/ +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { + .path = "set interface mpls", + .function = mpls_interface_enable_disable, + .short_help = "Enable/Disable an interface for MPLS forwarding", +}; diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c new file mode 100644 index 00000000..0e610e17 --- /dev/null +++ b/src/vnet/mpls/mpls.c @@ -0,0 +1,511 @@ +/* + * mpls.c: mpls + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/mpls_fib.h> + +const static char* mpls_eos_bit_names[] = MPLS_EOS_BITS; + +mpls_main_t mpls_main; + +u8 * format_mpls_unicast_label (u8 * s, va_list * args) +{ + mpls_label_t label = va_arg (*args, mpls_label_t); + + switch (label) { + case MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IPV4_EXPLICIT_NULL_STRING); + break; + case MPLS_IETF_ROUTER_ALERT_LABEL: + s = format (s, "%s", MPLS_IETF_ROUTER_ALERT_STRING); + break; + case MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IPV6_EXPLICIT_NULL_STRING); + break; + case MPLS_IETF_IMPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IMPLICIT_NULL_STRING); + break; + case MPLS_IETF_ELI_LABEL: + s = format (s, "%s", MPLS_IETF_ELI_STRING); + break; + case MPLS_IETF_GAL_LABEL: + s = format (s, "%s", MPLS_IETF_GAL_STRING); + break; + default: + s = format (s, "%d", label); + break; + } + return s; +} + +uword unformat_mpls_unicast_label (unformat_input_t * input, va_list * args) +{ + mpls_label_t *label = va_arg (*args, mpls_label_t*); + + if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_STRING)) + *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_STRING)) + *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_ROUTER_ALERT_STRING)) + *label = MPLS_IETF_ROUTER_ALERT_LABEL; + else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_STRING)) + *label = MPLS_IETF_IMPLICIT_NULL_LABEL; + else if (unformat (input, "%d", label)) + ; + + return (1); +} + +u8 * format_mpls_eos_bit (u8 * s, va_list * args) +{ + mpls_eos_bit_t eb = va_arg (*args, mpls_eos_bit_t); + + ASSERT(eb <= MPLS_EOS); + + s = format(s, "%s", mpls_eos_bit_names[eb]); + + return (s); +} + +u8 * format_mpls_header (u8 * s, va_list * args) +{ + mpls_unicast_header_t hdr = va_arg (*args, mpls_unicast_header_t); + + return (format(s, "[%U:%d:%d:%U]", + format_mpls_unicast_label, + vnet_mpls_uc_get_label(hdr.label_exp_s_ttl), + vnet_mpls_uc_get_ttl(hdr.label_exp_s_ttl), + vnet_mpls_uc_get_exp(hdr.label_exp_s_ttl), + format_mpls_eos_bit, + vnet_mpls_uc_get_s(hdr.label_exp_s_ttl))); +} + +uword +unformat_mpls_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + mpls_unicast_header_t _h, * h = &_h; + u32 label, label_exp_s_ttl; + + if (! unformat (input, "MPLS %d", &label)) + return 0; + + label_exp_s_ttl = (label<<12) | (1<<8) /* s-bit */ | 0xFF; + h->label_exp_s_ttl = clib_host_to_net_u32 (label_exp_s_ttl); + + /* Add gre, mpls headers to result. */ + { + void * p; + u32 h_n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, h_n_bytes); + clib_memcpy (p, h, h_n_bytes); + } + + return 1; +} + +uword +unformat_mpls_label_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 label; + + if (!unformat (input, "MPLS: label %d", &label)) + return 0; + + label = (label<<12) | (1<<8) /* s-bit set */ | 0xFF /* ttl */; + + *result = clib_host_to_net_u32 (label); + return 1; +} + +u8 * format_mpls_unicast_header_host_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + u32 label = h->label_exp_s_ttl; + + s = format (s, "label %d exp %d, s %d, ttl %d", + vnet_mpls_uc_get_label (label), + vnet_mpls_uc_get_exp (label), + vnet_mpls_uc_get_s (label), + vnet_mpls_uc_get_ttl (label)); + return s; +} + +u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + mpls_unicast_header_t h_host; + + h_host.label_exp_s_ttl = clib_net_to_host_u32 (h->label_exp_s_ttl); + + return format (s, "%U", format_mpls_unicast_header_host_byte_order, + &h_host); +} + +int +mpls_dest_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return clib_net_to_host_u32(r1->dest) - clib_net_to_host_u32(r2->dest); +} + +int +mpls_fib_index_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return r1->fib_index - r2->fib_index; +} + +int +mpls_label_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return r1->label - r2->label; +} + +static clib_error_t * +vnet_mpls_local_label (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + fib_route_path_t *rpaths = NULL, rpath; + u32 table_id, is_del, is_ip; + mpls_label_t local_label; + mpls_label_t out_label; + clib_error_t * error; + mpls_eos_bit_t eos; + vnet_main_t * vnm; + fib_prefix_t pfx; + + vnm = vnet_get_main(); + error = NULL; + is_ip = 0; + table_id = 0; + eos = MPLS_EOS; + is_del = 0; + local_label = MPLS_LABEL_INVALID; + memset(&pfx, 0, sizeof(pfx)); + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + memset(&rpath, 0, sizeof(rpath)); + + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "eos")) + pfx.fp_eos = MPLS_EOS; + else if (unformat (line_input, "non-eos")) + pfx.fp_eos = MPLS_NON_EOS; + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, + &pfx.fp_addr.ip4, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + is_ip = 1; + } + else if (unformat (line_input, "%U/%d", + unformat_ip6_address, + &pfx.fp_addr.ip6, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + is_ip = 1; + } + else if (unformat (line_input, "via %U %U weight %u", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index, + &rpath.frp_weight)) + { + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } + + else if (unformat (line_input, "via %U %U weight %u", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index, + &rpath.frp_weight)) + { + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } + + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } + + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip4_address, + &rpath.frp_addr.ip4, + &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip6_address, + &rpath.frp_addr.ip6, + &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + /* + * the recursive next-hops are by default in the same table + * as the prefix + */ + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "%d", &local_label)) + ; + else if (unformat (line_input, + "ip4-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + pfx.fp_payload_proto = DPO_PROTO_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "ip6-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + vec_add1(rpaths, rpath); + pfx.fp_payload_proto = DPO_PROTO_IP6; + } + else if (unformat (line_input, + "mpls-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_proto = FIB_PROTOCOL_MPLS; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + pfx.fp_payload_proto = DPO_PROTO_MPLS; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "out-label %U", + unformat_mpls_unicast_label, + &out_label)) + { + if (vec_len(rpaths) == 0) + { + error = clib_error_return(0 , "Paths then labels"); + goto done; + } + vec_add1(rpaths[vec_len(rpaths)-1].frp_label_stack, out_label); + } + else + { + error = clib_error_return (0, "unkown input: %U", + format_unformat_error, line_input); + goto done; + } + + } + + if (MPLS_LABEL_INVALID == local_label) + { + error = clib_error_return (0, "local-label required: %U", + format_unformat_error, input); + goto done; + } + + + if (is_ip) + { + u32 fib_index = fib_table_find(pfx.fp_proto, table_id); + + if (FIB_NODE_INDEX_INVALID == fib_index) + { + error = clib_error_return (0, "%U table-id %d does not exist", + format_fib_protocol, pfx.fp_proto, table_id); + goto done; + } + + if (is_del) + { + fib_table_entry_local_label_remove(fib_index, &pfx, local_label); + } + else + { + fib_table_entry_local_label_add(fib_index, &pfx, local_label); + } + } + else + { + fib_node_index_t lfe, fib_index; + u32 fi; + + if (NULL == rpaths) + { + error = clib_error_return(0 , "no paths"); + goto done; + } + + pfx.fp_proto = FIB_PROTOCOL_MPLS; + pfx.fp_len = 21; + pfx.fp_label = local_label; + pfx.fp_payload_proto = fib_proto_to_dpo(rpaths[0].frp_proto); + + /* + * the CLI parsing stored table Ids, swap to FIB indicies + */ + if (FIB_NODE_INDEX_INVALID == rpath.frp_sw_if_index) + { + fi = fib_table_id_find_fib_index(dpo_proto_to_fib(pfx.fp_payload_proto), + rpaths[0].frp_fib_index); + + if (~0 == fi) + { + error = clib_error_return(0 , "%U Via table %d does not exist", + format_dpo_proto, pfx.fp_payload_proto, + rpaths[0].frp_fib_index); + goto done; + } + rpaths[0].frp_fib_index = fi; + } + + fib_index = mpls_fib_index_from_table_id(table_id); + + if (FIB_NODE_INDEX_INVALID == fib_index) + { + error = clib_error_return (0, "MPLS table-id %d does not exist", + table_id); + goto done; + } + + lfe = fib_table_entry_path_add2(fib_index, + &pfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + rpaths); + + if (FIB_NODE_INDEX_INVALID == lfe) + { + error = clib_error_return (0, "Failed to create %U-%U in MPLS table-id %d", + format_mpls_unicast_label, local_label, + format_mpls_eos_bit, eos, + table_id); + goto done; + } + } + +done: + return error; +} + +VLIB_CLI_COMMAND (mpls_local_label_command, static) = { + .path = "mpls local-label", + .function = vnet_mpls_local_label, + .short_help = "Create/Delete MPL local labels", +}; + +int +mpls_fib_reset_labels (u32 fib_id) +{ + // FIXME + return 0; +} + +static clib_error_t * +mpls_init (vlib_main_t * vm) +{ + mpls_main_t * mm = &mpls_main; + clib_error_t * error; + + mm->vlib_main = vm; + mm->vnet_main = vnet_get_main(); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + + return vlib_call_init_function (vm, mpls_input_init); +} + +VLIB_INIT_FUNCTION (mpls_init); + +mpls_main_t * mpls_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, mpls_init); + return &mpls_main; +} + diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h new file mode 100644 index 00000000..b6fdbce7 --- /dev/null +++ b/src/vnet/mpls/mpls.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_mpls_h +#define included_vnet_mpls_h + +#include <vnet/vnet.h> +#include <vnet/mpls/packet.h> +#include <vnet/mpls/mpls_types.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/fib_node.h> +#include <vnet/adj/adj.h> + +typedef enum { +#define mpls_error(n,s) MPLS_ERROR_##n, +#include <vnet/mpls/error.def> +#undef mpls_error + MPLS_N_ERROR, +} mpls_error_t; + +#define MPLS_FIB_DEFAULT_TABLE_ID 0 + +/** + * Type exposure is to allow the DP fast/inlined access + */ +#define MPLS_FIB_KEY_SIZE 21 +#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) + +typedef struct mpls_fib_t_ +{ + /** + * A hash table of entries. 21 bit key + * Hash table for reduced memory footprint + */ + uword * mf_entries; + + /** + * The load-balance indeices keyed by 21 bit label+eos bit. + * A flat array for maximum lookup performace. + */ + index_t mf_lbs[MPLS_FIB_DB_SIZE]; +} mpls_fib_t; + +/** + * @brief Definition of a callback for receiving MPLS interface state change + * notifications + */ +typedef void (*mpls_interface_state_change_callback_t)(u32 sw_if_index, + u32 is_enable); + +typedef struct { + /* MPLS FIB index for each software interface */ + u32 *fib_index_by_sw_if_index; + + /** A pool of all the MPLS FIBs */ + struct fib_table_t_ *fibs; + + /** A hash table to lookup the mpls_fib by table ID */ + uword *fib_index_by_table_id; + + /* Feature arc indices */ + u8 input_feature_arc_index; + u8 output_feature_arc_index; + + /* IP4 enabled count by software interface */ + u8 * mpls_enabled_by_sw_if_index; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} mpls_main_t; + +extern mpls_main_t mpls_main; + +extern clib_error_t * mpls_feature_init(vlib_main_t * vm); + +format_function_t format_mpls_protocol; +format_function_t format_mpls_encap_index; + +format_function_t format_mpls_eos_bit; +format_function_t format_mpls_unicast_header_net_byte_order; +format_function_t format_mpls_unicast_label; +format_function_t format_mpls_header; + +extern vlib_node_registration_t mpls_input_node; +extern vlib_node_registration_t mpls_policy_encap_node; +extern vlib_node_registration_t mpls_output_node; +extern vlib_node_registration_t mpls_midchain_node; + +/* Parse mpls protocol as 0xXXXX or protocol name. + In either host or network byte order. */ +unformat_function_t unformat_mpls_protocol_host_byte_order; +unformat_function_t unformat_mpls_protocol_net_byte_order; +unformat_function_t unformat_mpls_label_net_byte_order; +unformat_function_t unformat_mpls_unicast_label; + +/* Parse mpls header. */ +unformat_function_t unformat_mpls_header; +unformat_function_t unformat_pg_mpls_header; + +void mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable); + +u8 mpls_sw_interface_is_enabled (u32 sw_if_index); + +int mpls_fib_reset_labels (u32 fib_id); + +#define foreach_mpls_input_next \ +_(DROP, "error-drop") \ +_(LOOKUP, "mpls-lookup") + +typedef enum { +#define _(s,n) MPLS_INPUT_NEXT_##s, + foreach_mpls_input_next +#undef _ + MPLS_INPUT_N_NEXT, +} mpls_input_next_t; + +#define foreach_mpls_lookup_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(L2_OUTPUT, "l2-output") + +// FIXME remove. +typedef enum { +#define _(s,n) MPLS_LOOKUP_NEXT_##s, + foreach_mpls_lookup_next +#undef _ + MPLS_LOOKUP_N_NEXT, +} mpls_lookup_next_t; + +#define foreach_mpls_output_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) MPLS_OUTPUT_NEXT_##s, + foreach_mpls_output_next +#undef _ + MPLS_OUTPUT_N_NEXT, +} mpls_output_next_t; + +typedef struct { + u32 fib_index; + u32 entry_index; + u32 dest; + u32 s_bit; + u32 label; +} show_mpls_fib_t; + +int +mpls_dest_cmp(void * a1, void * a2); + +int +mpls_fib_index_cmp(void * a1, void * a2); + +int +mpls_label_cmp(void * a1, void * a2); + +#endif /* included_vnet_mpls_h */ diff --git a/src/vnet/mpls/mpls_features.c b/src/vnet/mpls/mpls_features.c new file mode 100644 index 00000000..a7593c55 --- /dev/null +++ b/src/vnet/mpls/mpls_features.c @@ -0,0 +1,156 @@ +/* + * mpls_features.c: MPLS input and output features + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/mpls/mpls.h> + +always_inline uword +mpls_terminate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int error_code) +{ + u32 * buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + + vlib_error_drop_buffers (vm, node, + buffers, + /* stride */ 1, + n_packets, + /* next */ 0, + mpls_input_node.index, + error_code); + + return n_packets; +} + +static uword +mpls_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_PUNT)); +} + +VLIB_REGISTER_NODE (mpls_punt_node) = { + .function = mpls_punt, + .name = "mpls-punt", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-punt", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_punt_node, mpls_punt) + +static uword +mpls_drop (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_DROP)); +} + +VLIB_REGISTER_NODE (mpls_drop_node) = { + .function = mpls_drop, + .name = "mpls-drop", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_drop_node, mpls_drop) + +static uword +mpls_not_enabled (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_NOT_ENABLED)); +} + +VLIB_REGISTER_NODE (mpls_not_enabled_node) = { + .function = mpls_not_enabled, + .name = "mpls-not-enabled", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_not_enabled_node, mpls_not_enabled) + +VNET_FEATURE_ARC_INIT (mpls_input, static) = +{ + .arc_name = "mpls-input", + .start_nodes = VNET_FEATURES ("mpls-input"), + .arc_index_ptr = &mpls_main.input_feature_arc_index, +}; + +VNET_FEATURE_INIT (mpls_lookup, static) = { + .arc_name = "mpls-input", + .node_name = "mpls-lookup", + .runs_before = VNET_FEATURES ("mpls-not-enabled"), +}; + +VNET_FEATURE_INIT (mpls_not_enabled, static) = { + .arc_name = "mpls-input", + .node_name = "mpls-not-enabled", + .runs_before = VNET_FEATURES (0), /* not before any other features */ +}; + +VNET_FEATURE_ARC_INIT (mpls_output, static) = +{ + .arc_name = "mpls-output", + .start_nodes = VNET_FEATURES ("mpls-output", "mpls-midchain"), + .arc_index_ptr = &mpls_main.output_feature_arc_index, +}; + +/* Built-in ip4 tx feature path definition */ +VNET_FEATURE_INIT (mpls_interface_output, static) = { + .arc_name = "mpls-output", + .node_name = "interface-output", + .runs_before = 0, /* not before any other features */ +}; + +static clib_error_t * +mpls_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + mpls_main_t * mm = &mpls_main; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0); + + vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled", sw_if_index, + is_add, 0, 0); + vnet_feature_enable_disable ("mpls-output", "interface-output", sw_if_index, + is_add, 0, 0); + + return /* no error */ 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (mpls_sw_interface_add_del); + + diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c new file mode 100644 index 00000000..2d34cbde --- /dev/null +++ b/src/vnet/mpls/mpls_lookup.c @@ -0,0 +1,531 @@ +/* + * mpls_lookup.c: MPLS lookup + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/dpo/load_balance.h> + +vlib_node_registration_t mpls_lookup_node; + +typedef struct { + u32 next_index; + u32 lb_index; + u32 lfib_index; + u32 label_net_byte_order; + u32 hash; +} mpls_lookup_trace_t; + +static u8 * +format_mpls_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *); + + s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %d" + "label %d eos %d", + t->next_index, t->lfib_index, t->lb_index, t->hash, + vnet_mpls_uc_get_label( + clib_net_to_host_u32(t->label_net_byte_order)), + vnet_mpls_uc_get_s(t->label_net_byte_order)); + return s; +} + +/* + * Compute flow hash. + * We'll use it to select which adjacency to use for this flow. And other things. + */ +always_inline u32 +mpls_compute_flow_hash (const mpls_unicast_header_t * hdr, + flow_hash_config_t flow_hash_config) +{ + // FIXME + return (vnet_mpls_uc_get_label(hdr->label_exp_s_ttl)); +} + +static inline uword +mpls_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + u32 n_left_from, next_index, * from, * to_next; + mpls_main_t * mm = &mpls_main; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 lbi0, next0, lfib_index0, bi0, hash_c0; + const mpls_unicast_header_t * h0; + const load_balance_t *lb0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + u32 lbi1, next1, lfib_index1, bi1, hash_c1; + const mpls_unicast_header_t * h1; + const load_balance_t *lb1; + const dpo_id_t *dpo1; + vlib_buffer_t * b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE); + } + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b1)->sw_if_index[VLIB_RX]); + + lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0); + lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1); + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + + hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; + hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0; + + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + { + hash_c1 = vnet_buffer (b1)->ip.flow_hash = + mpls_compute_flow_hash(h1, lb1->lb_hash_config); + } + + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + dpo1 = load_balance_get_bucket_i(lb1, + (hash_c1 & + (lb1->lb_n_buckets_minus_1))); + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + vlib_increment_combined_counter + (cm, cpu_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, b1)); + + /* + * before we pop the label copy th values we need to maintain. + * The label header is in network byte order. + * last byte is the TTL. + * bits 2 to 4 inclusive are the EXP bits + */ + vnet_buffer (b0)->mpls.ttl = ((char*)h0)[3]; + vnet_buffer (b0)->mpls.exp = (((char*)h0)[2] & 0xe) >> 1; + vnet_buffer (b0)->mpls.first = 1; + vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3]; + vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1; + vnet_buffer (b1)->mpls.first = 1; + + /* + * pop the label that was just used in the lookup + */ + vlib_buffer_advance(b0, sizeof(*h0)); + vlib_buffer_advance(b1, sizeof(*h1)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->lfib_index = lfib_index0; + tr->hash = hash_c0; + tr->label_net_byte_order = h0->label_exp_s_ttl; + } + + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->next_index = next1; + tr->lb_index = lbi1; + tr->lfib_index = lfib_index1; + tr->hash = hash_c1; + tr->label_net_byte_order = h1->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 lbi0, next0, lfib_index0, bi0, hash_c0; + const mpls_unicast_header_t * h0; + const load_balance_t *lb0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + + lbi0 = mpls_fib_table_forwarding_lookup(lfib_index0, h0); + lb0 = load_balance_get(lbi0); + + hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } + + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + /* + * before we pop the label copy th values we need to maintain. + * The label header is in network byte order. + * last byte is the TTL. + * bits 2 to 4 inclusive are the EXP bits + */ + vnet_buffer (b0)->mpls.ttl = ((char*)h0)[3]; + vnet_buffer (b0)->mpls.exp = (((char*)h0)[2] & 0xe) >> 1; + vnet_buffer (b0)->mpls.first = 1; + + /* + * pop the label that was just used in the lookup + */ + vlib_buffer_advance(b0, sizeof(*h0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->lfib_index = lfib_index0; + tr->hash = hash_c0; + tr->label_net_byte_order = h0->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_lookup_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_lookup_node) = { + .function = mpls_lookup, + .name = "mpls-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .sibling_of = "ip4-lookup", + + .format_buffer = format_mpls_header, + .format_trace = format_mpls_lookup_trace, + .unformat_buffer = unformat_mpls_header, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup) + +typedef struct { + u32 next_index; + u32 lb_index; + u32 hash; +} mpls_load_balance_trace_t; + +static u8 * +format_mpls_load_balance_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *); + + s = format (s, "MPLS: next [%d], LB index %d hash %d", + t->next_index, t->lb_index, t->hash); + return s; +} + +always_inline uword +mpls_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + u32 next; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + mpls_lookup_next_t next0, next1; + const load_balance_t *lb0, *lb1; + vlib_buffer_t * p0, *p1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1; + const mpls_unicast_header_t *mpls0, *mpls1; + const dpo_id_t *dpo0, *dpo1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (mpls0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (mpls0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + mpls0 = vlib_buffer_get_current (p0); + mpls1 = vlib_buffer_get_current (p1); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + + /* + * this node is for via FIBs we can re-use the hash value from the + * to node if present. + * We don't want to use the same hash value at each level in the recursion + * graph as that would lead to polarisation + */ + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + hc1 = vnet_buffer (p1)->ip.flow_hash = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); + } + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash)) + { + hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1; + } + else + { + hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1); + } + } + + dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); + dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, cpu_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, p1)); + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->hash = hc0; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + mpls_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0; + const mpls_unicast_header_t *mpls0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + mpls0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + + hc0 = vnet_buffer (p0)->ip.flow_hash = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash)) + { + hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0); + } + } + + dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_load_balance_node) = { + .function = mpls_load_balance, + .name = "mpls-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "mpls-lookup", + + .format_trace = format_mpls_load_balance_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance) diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c new file mode 100644 index 00000000..8292a0cb --- /dev/null +++ b/src/vnet/mpls/mpls_output.c @@ -0,0 +1,479 @@ +/* + * mpls_output.c: MPLS Adj rewrite + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/ip/ip.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + /* Adjacency taken. */ + u32 adj_index; + u32 flow_hash; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[64 - 1*sizeof(u32)]; +} mpls_output_trace_t; + +static u8 * +format_mpls_output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *); + vnet_main_t * vnm = vnet_get_main(); + uword indent = format_get_indent (s); + + s = format (s, "adj-idx %d : %U flow hash: 0x%08x", + t->adj_index, + format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + vnm, t->adj_index, + t->packet_data, sizeof (t->packet_data)); + return s; +} + +static inline uword +mpls_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int is_midchain) +{ + u32 n_left_from, next_index, * from, * to_next, cpu_index; + vlib_node_runtime_t * error_node; + u32 n_left_to_next; + + cpu_index = os_get_cpu_number(); + error_node = vlib_node_get_runtime (vm, mpls_output_node.index); + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_adjacency_t * adj0; + mpls_unicast_header_t *hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + ip_adjacency_t * adj1; + mpls_unicast_header_t *hdr1; + vlib_buffer_t * p1; + u32 pi1, rw_len1, adj_index1, next1, error1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0); + ASSERT(adj_index1); + + adj0 = adj_get(adj_index0); + adj1 = adj_get(adj_index1); + hdr0 = vlib_buffer_get_current (p0); + hdr1 = vlib_buffer_get_current (p1); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + rw_len1 = adj1[0].rewrite_header.data_bytes; + + if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index1, + /* packet increment */ 0, + /* byte increment */ rw_len1-sizeof(ethernet_header_t)); + + /* Check MTU of outgoing interface. */ + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = adj0[0].rewrite_header.next_index; + error0 = IP4_ERROR_NONE; + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } + } + else + { + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = MPLS_OUTPUT_NEXT_DROP; + } + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <= + adj1[0].rewrite_header.max_l3_packet_bytes)) + { + p1->current_data -= rw_len1; + p1->current_length += rw_len1; + + vnet_buffer (p1)->sw_if_index[VLIB_TX] = + adj1[0].rewrite_header.sw_if_index; + next1 = adj1[0].rewrite_header.next_index; + error1 = IP4_ERROR_NONE; + + if (is_midchain) + { + adj1->sub_type.midchain.fixup_func(vm, adj1, p1); + } + } + else + { + error1 = IP4_ERROR_MTU_EXCEEDED; + next1 = MPLS_OUTPUT_NEXT_DROP; + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + } + if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p1, sizeof (*tr)); + tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p1)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + mpls_unicast_header_t *hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0); + + adj0 = adj_get(adj_index0); + hdr0 = vlib_buffer_get_current (p0); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], hdr0, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + + if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + + /* Check MTU of outgoing interface. */ + if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <= + adj0[0].rewrite_header.max_l3_packet_bytes)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = adj0[0].rewrite_header.next_index; + error0 = IP4_ERROR_NONE; + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func(vm, adj0, p0); + } + } + else + { + error0 = IP4_ERROR_MTU_EXCEEDED; + next0 = MPLS_OUTPUT_NEXT_DROP; + } + p0->error = error_node->errors[error0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_ENCAP, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +static inline uword +mpls_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 0)); +} + +VLIB_REGISTER_NODE (mpls_output_node) = { + .function = mpls_output, + .name = "mpls-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n, + foreach_mpls_output_next +#undef _ + }, + + .format_trace = format_mpls_output_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_output_node, mpls_output) + +static inline uword +mpls_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (mpls_output_inline(vm, node, from_frame, /* is_midchain */ 1)); +} + +VLIB_REGISTER_NODE (mpls_midchain_node) = { + .function = mpls_midchain, + .name = "mpls-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_output_trace, + + .sibling_of = "mpls-output", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_midchain_node, mpls_midchain) + +/** + * @brief Next index values from the MPLS incomplete adj node + */ +#define foreach_mpls_adj_incomplete_next \ +_(DROP, "error-drop") \ +_(IP4, "ip4-arp") \ +_(IP6, "ip6-discover-neighbor") + +typedef enum { +#define _(s,n) MPLS_ADJ_INCOMPLETE_NEXT_##s, + foreach_mpls_adj_incomplete_next +#undef _ + MPLS_ADJ_INCOMPLETE_N_NEXT, +} mpls_adj_incomplete_next_t; + +/** + * @brief A struct to hold tracing information for the MPLS label imposition + * node. + */ +typedef struct mpls_adj_incomplete_trace_t_ +{ + u32 next; +} mpls_adj_incomplete_trace_t; + + +/** + * @brief Graph node for incomplete MPLS adjacency. + * This node will push traffic to either the v4-arp or v6-nd node + * based on the next-hop proto of the adj. + * We pay a cost for this 'routing' node, but an incomplete adj is the + * exception case. + */ +static inline uword +mpls_adj_incomplete (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0, next0, adj_index0; + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + + pi0 = to_next[0] = from[0]; + p0 = vlib_get_buffer (vm, pi0); + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + ASSERT(adj_index0); + + adj0 = adj_get(adj_index0); + + if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto)) + { + next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP4; + } + else + { + next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_adj_incomplete_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->next = next0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static u8 * +format_mpls_adj_incomplete_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_adj_incomplete_trace_t * t; + uword indent; + + t = va_arg (*args, mpls_adj_incomplete_trace_t *); + indent = format_get_indent (s); + + s = format (s, "%Unext:%d", + format_white_space, indent, + t->next); + return (s); +} + +VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = { + .function = mpls_adj_incomplete, + .name = "mpls-adj-incomplete", + .format_trace = format_mpls_adj_incomplete_trace, + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_ADJ_INCOMPLETE_NEXT_##s] = n, + foreach_mpls_adj_incomplete_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_adj_incomplete_node, + mpls_adj_incomplete) diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c new file mode 100644 index 00000000..8d1e30a3 --- /dev/null +++ b/src/vnet/mpls/mpls_tunnel.c @@ -0,0 +1,787 @@ +/* + * mpls_tunnel.c: MPLS tunnel interfaces (i.e. for RSVP-TE) + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls_tunnel.h> +#include <vnet/ip/ip.h> +#include <vnet/fib/fib_path_list.h> +#include <vnet/adj/adj_midchain.h> + +/** + * @brief pool of tunnel instances + */ +static mpls_tunnel_t *mpls_tunnel_pool; + +/** + * @brief Pool of free tunnel SW indices - i.e. recycled indices + */ +static u32 * mpls_tunnel_free_hw_if_indices; + +/** + * @brief DB of SW index to tunnel index + */ +static u32 *mpls_tunnel_db; + +/** + * @brief Get a tunnel object from a SW interface index + */ +static mpls_tunnel_t* +mpls_tunnel_get_from_sw_if_index (u32 sw_if_index) +{ + if ((vec_len(mpls_tunnel_db) < sw_if_index) || + (~0 == mpls_tunnel_db[sw_if_index])) + return (NULL); + + return (pool_elt_at_index(mpls_tunnel_pool, + mpls_tunnel_db[sw_if_index])); +} + +/** + * @brief Return true if the label stack is imp-null only + */ +static fib_forward_chain_type_t +mpls_tunnel_get_fwd_chain_type (const mpls_tunnel_t *mt) +{ + if ((1 == vec_len(mt->mt_label_stack)) && + (mt->mt_label_stack[0] == MPLS_IETF_IMPLICIT_NULL_LABEL)) + { + /* + * the only label in the label stack is implicit null + * we need to build an IP chain. + */ + if (FIB_PROTOCOL_IP4 == fib_path_list_get_proto(mt->mt_path_list)) + { + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + } + else + { + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + } + } + else + { + return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + } +} + +/** + * @brief Build a rewrite string for the MPLS tunnel. + * + * We have choices here; + * 1 - have an Adjacency with a zero length string and stack it on + * MPLS label objects + * 2 - put the label header rewrites in the adjacency string. + * + * We choose 2 since it results in fewer graph nodes in the egress path + */ +static u8* +mpls_tunnel_build_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) +{ + mpls_unicast_header_t *muh; + mpls_tunnel_t *mt; + u8 *rewrite; + u32 mti, ii; + + rewrite = NULL; + mti = mpls_tunnel_db[sw_if_index]; + mt = pool_elt_at_index(mpls_tunnel_pool, mti); + + /* + * The vector must be allocated as u8 so the length is correct + */ + ASSERT(0 < vec_len(mt->mt_label_stack)); + vec_validate(rewrite, (sizeof(*muh) * vec_len(mt->mt_label_stack)) - 1); + ASSERT(rewrite); + muh = (mpls_unicast_header_t *)rewrite; + + /* + * The last (inner most) label in the stack may be EOS, all the rest Non-EOS + */ + for (ii = 0; ii < vec_len(mt->mt_label_stack)-1; ii++) + { + vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]); + vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255); + vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0); + vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS); + muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl); + } + + vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]); + vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255); + vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0); + + if ((VNET_LINK_MPLS == link_type) && + (mt->mt_label_stack[ii] != MPLS_IETF_IMPLICIT_NULL_LABEL)) + { + vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS); + } + else + { + vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_EOS); + } + + muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl); + + return (rewrite); +} + +/** + * mpls_tunnel_stack + * + * 'stack' (resolve the recursion for) the tunnel's midchain adjacency + */ +static void +mpls_tunnel_stack (adj_index_t ai) +{ + ip_adjacency_t *adj; + mpls_tunnel_t *mt; + u32 sw_if_index; + + adj = adj_get(ai); + sw_if_index = adj->rewrite_header.sw_if_index; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return; + + /* + * find the adjacency that is contributed by the FIB path-list + * that this tunnel resovles via, and use it as the next adj + * in the midchain + */ + if (vnet_hw_interface_get_flags(vnet_get_main(), + mt->mt_hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) + { + dpo_id_t dpo = DPO_INVALID; + + fib_path_list_contribute_forwarding(mt->mt_path_list, + mpls_tunnel_get_fwd_chain_type(mt), + &dpo); + + if (DPO_LOAD_BALANCE == dpo.dpoi_type) + { + /* + * we don't support multiple paths, so no need to load-balance. + * pull the first and only choice and stack directly on that. + */ + load_balance_t *lb; + + lb = load_balance_get (dpo.dpoi_index); + + ASSERT(1 == lb->lb_n_buckets); + + dpo_copy(&dpo, load_balance_get_bucket_i (lb, 0)); + } + + adj_nbr_midchain_stack(ai, &dpo); + dpo_reset(&dpo); + } + else + { + adj_nbr_midchain_unstack(ai); + } +} + +/** + * @brief Call back when restacking all adjacencies on a MPLS interface + */ +static adj_walk_rc_t +mpls_adj_walk_cb (adj_index_t ai, + void *ctx) +{ + mpls_tunnel_stack(ai); + + return (ADJ_WALK_RC_CONTINUE); +} + +static void +mpls_tunnel_restack (mpls_tunnel_t *mt) +{ + fib_protocol_t proto; + + /* + * walk all the adjacencies on the MPLS interface and restack them + */ + FOR_EACH_FIB_PROTOCOL(proto) + { + adj_nbr_walk(mt->mt_sw_if_index, + proto, + mpls_adj_walk_cb, + NULL); + } +} + +static clib_error_t * +mpls_tunnel_admin_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + vnet_hw_interface_t * hi; + mpls_tunnel_t *mt; + + hi = vnet_get_hw_interface (vnm, hw_if_index); + + mt = mpls_tunnel_get_from_sw_if_index(hi->sw_if_index); + + if (NULL == mt) + return (NULL); + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + vnet_hw_interface_set_flags (vnm, hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); + else + vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); + + mpls_tunnel_restack(mt); + + return (NULL); +} + +/** + * @brief Fixup the adj rewrite post encap. This is a no-op since the + * rewrite is a stack of labels. + */ +static void +mpls_tunnel_fixup (vlib_main_t *vm, + ip_adjacency_t *adj, + vlib_buffer_t *b0) +{ +} + +static void +mpls_tunnel_update_adj (vnet_main_t * vnm, + u32 sw_if_index, + adj_index_t ai) +{ + adj_nbr_midchain_update_rewrite( + ai, mpls_tunnel_fixup, + ADJ_MIDCHAIN_FLAG_NONE, + mpls_tunnel_build_rewrite(vnm, sw_if_index, + adj_get_link_type(ai), + NULL)); + + mpls_tunnel_stack(ai); +} + +static u8 * +format_mpls_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "mpls-tunnel%d", dev_instance); +} + +static u8 * +format_mpls_tunnel_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + return (format (s, "MPLS-tunnel: id %d\n", dev_instance)); +} + +/** + * @brief Packet trace structure + */ +typedef struct mpls_tunnel_trace_t_ +{ + /** + * Tunnel-id / index in tunnel vector + */ + u32 tunnel_id; +} mpls_tunnel_trace_t; + +static u8 * +format_mpls_tunnel_tx_trace (u8 * s, + va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_tunnel_trace_t * t = va_arg (*args, mpls_tunnel_trace_t *); + + s = format (s, "MPLS: tunnel %d", t->tunnel_id); + return s; +} + +/** + * @brief TX function. Only called L2. L3 traffic uses the adj-midchains + */ +static uword +mpls_tunnel_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + vnet_interface_output_runtime_t * rd = (void *) node->runtime_data; + const mpls_tunnel_t *mt; + + mt = pool_elt_at_index(mpls_tunnel_pool, rd->dev_instance); + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * FIXME DUAL LOOP + */ + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer(vm, bi0); + + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = rd->dev_instance; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, mt->mt_l2_tx_arc); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (mpls_tunnel_class) = { + .name = "MPLS tunnel device", + .format_device_name = format_mpls_tunnel_name, + .format_device = format_mpls_tunnel_device, + .format_tx_trace = format_mpls_tunnel_tx_trace, + .tx_function = mpls_tunnel_tx, + .admin_up_down_function = mpls_tunnel_admin_up_down, +}; + +VNET_HW_INTERFACE_CLASS (mpls_tunnel_hw_interface_class) = { + .name = "MPLS-Tunnel", +// .format_header = format_mpls_eth_header_with_length, +// .unformat_header = unformat_mpls_eth_header, + .update_adjacency = mpls_tunnel_update_adj, + .build_rewrite = mpls_tunnel_build_rewrite, + .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, +}; + +const mpls_tunnel_t * +mpls_tunnel_get (u32 mti) +{ + return (pool_elt_at_index(mpls_tunnel_pool, mti)); +} + +/** + * @brief Walk all the MPLS tunnels + */ +void +mpls_tunnel_walk (mpls_tunnel_walk_cb_t cb, + void *ctx) +{ + u32 mti; + + pool_foreach_index(mti, mpls_tunnel_pool, + ({ + cb(mti, ctx); + })); +} + +void +vnet_mpls_tunnel_del (u32 sw_if_index) +{ + mpls_tunnel_t *mt; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return; + + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); + if (ADJ_INDEX_INVALID != mt->mt_l2_adj) + adj_unlock(mt->mt_l2_adj); + + vec_free(mt->mt_label_stack); + + vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index); + pool_put(mpls_tunnel_pool, mt); + mpls_tunnel_db[sw_if_index] = ~0; +} + +void +vnet_mpls_tunnel_add (fib_route_path_t *rpaths, + mpls_label_t *label_stack, + u8 l2_only, + u32 *sw_if_index) +{ + vnet_hw_interface_t * hi; + mpls_tunnel_t *mt; + vnet_main_t * vnm; + u32 mti; + + vnm = vnet_get_main(); + pool_get(mpls_tunnel_pool, mt); + memset (mt, 0, sizeof (*mt)); + mti = mt - mpls_tunnel_pool; + fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL); + mt->mt_l2_adj = ADJ_INDEX_INVALID; + + /* + * Create a new, or re=use and old, tunnel HW interface + */ + if (vec_len (mpls_tunnel_free_hw_if_indices) > 0) + { + mt->mt_hw_if_index = + mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1]; + _vec_len (mpls_tunnel_free_hw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); + hi->hw_instance = mti; + hi->dev_instance = mti; + } + else + { + mt->mt_hw_if_index = vnet_register_interface( + vnm, + mpls_tunnel_class.index, + mti, + mpls_tunnel_hw_interface_class.index, + mti); + hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); + } + + /* + * Add the new tunnel to the tunnel DB - key:SW if index + */ + mt->mt_sw_if_index = hi->sw_if_index; + vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0); + mpls_tunnel_db[mt->mt_sw_if_index] = mti; + + /* + * construct a path-list from the path provided + */ + mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + + mt->mt_label_stack = vec_dup(label_stack); + + if (l2_only) + { + mt->mt_l2_adj = + adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), + VNET_LINK_ETHERNET, + &zero_addr, + mt->mt_sw_if_index); + + mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), + hi->tx_node_index, + "adj-l2-midchain"); + } + + *sw_if_index = mt->mt_sw_if_index; +} + +static clib_error_t * +vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + u8 is_del = 0; + u8 l2_only = 0; + fib_route_path_t rpath, *rpaths = NULL; + mpls_label_t out_label = MPLS_LABEL_INVALID, *labels = NULL; + u32 sw_if_index; + + memset(&rpath, 0, sizeof(rpath)); + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "del %U", + unformat_vnet_sw_interface, vnm, + &sw_if_index)) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "out-label %U", + unformat_mpls_unicast_label, &out_label)) + { + vec_add1(labels, out_label); + } + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (is_del) + { + vnet_mpls_tunnel_del(sw_if_index); + } + else + { + if (0 == vec_len(labels)) + return clib_error_return (0, "No Output Labels '%U'", + format_unformat_error, line_input); + + vec_add1(rpaths, rpath); + vnet_mpls_tunnel_add(rpaths, labels, l2_only, &sw_if_index); + } + + vec_free(labels); + vec_free(rpaths); + + return (NULL); +} + +/*? + * This command create a uni-directional MPLS tunnel + * + * @cliexpar + * @cliexstart{create mpls tunnel} + * create mpls tunnel via 10.0.0.1 GigEthernet0/8/0 out-label 33 out-label 34 + * @cliexend + ?*/ +VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { + .path = "mpls tunnel", + .short_help = + "mpls tunnel via [addr] [interface] [out-labels]", + .function = vnet_create_mpls_tunnel_command_fn, +}; + +static u8 * +format_mpls_tunnel (u8 * s, va_list * args) +{ + mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *); + int ii; + + s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d", + mt - mpls_tunnel_pool, + mt->mt_sw_if_index, + mt->mt_hw_if_index); + s = format(s, "\n label-stack:\n "); + for (ii = 0; ii < vec_len(mt->mt_label_stack); ii++) + { + s = format(s, "%d, ", mt->mt_label_stack[ii]); + } + s = format(s, "\n via:\n"); + s = fib_path_list_format(mt->mt_path_list, s); + s = format(s, "\n"); + + return (s); +} + +static clib_error_t * +show_mpls_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_tunnel_t * mt; + u32 mti = ~0; + + if (pool_elts (mpls_tunnel_pool) == 0) + vlib_cli_output (vm, "No MPLS tunnels configured..."); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &mti)) + ; + else + break; + } + + if (~0 == mti) + { + pool_foreach (mt, mpls_tunnel_pool, + ({ + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); + })); + } + else + { + if (pool_is_free_index(mpls_tunnel_pool, mti)) + return clib_error_return (0, "Not atunnel index %d", mti); + + mt = pool_elt_at_index(mpls_tunnel_pool, mti); + + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); + } + + return 0; +} + +/*? + * This command to show MPLS tunnels + * + * @cliexpar + * @cliexstart{sh mpls tunnel 2} + * [@2] mpls_tunnel2: sw_if_index:5 hw_if_index:5 + * label-stack: + * 3, + * via: + * index:26 locks:1 proto:ipv4 uPRF-list:26 len:1 itfs:[2, ] + * index:26 pl-index:26 ipv4 weight=1 attached-nexthop: oper-flags:resolved, + * 10.0.0.2 loop0 + * [@0]: ipv4 via 10.0.0.2 loop0: IP4: de:ad:00:00:00:00 -> 00:00:11:aa:bb:cc + * @cliexend + ?*/ +VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = { + .path = "show mpls tunnel", + .function = show_mpls_tunnel_command_fn, +}; + +static mpls_tunnel_t * +mpls_tunnel_from_fib_node (fib_node_t *node) +{ +#if (CLIB_DEBUG > 0) + ASSERT(FIB_NODE_TYPE_MPLS_TUNNEL == node->fn_type); +#endif + return ((mpls_tunnel_t*) (((char*)node) - + STRUCT_OFFSET_OF(mpls_tunnel_t, mt_node))); +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +mpls_tunnel_back_walk (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + mpls_tunnel_restack(mpls_tunnel_from_fib_node(node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t* +mpls_tunnel_fib_node_get (fib_node_index_t index) +{ + mpls_tunnel_t * mt; + + mt = pool_elt_at_index(mpls_tunnel_pool, index); + + return (&mt->mt_node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +mpls_tunnel_last_lock_gone (fib_node_t *node) +{ + /* + * The MPLS MPLS tunnel is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT(0); +} + +/* + * Virtual function table registered by MPLS MPLS tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t mpls_vft = { + .fnv_get = mpls_tunnel_fib_node_get, + .fnv_last_lock = mpls_tunnel_last_lock_gone, + .fnv_back_walk = mpls_tunnel_back_walk, +}; + +static clib_error_t * +mpls_tunnel_init (vlib_main_t *vm) +{ + fib_node_register_type(FIB_NODE_TYPE_MPLS_TUNNEL, &mpls_vft); + + return 0; +} +VLIB_INIT_FUNCTION(mpls_tunnel_init); diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h new file mode 100644 index 00000000..ee56c0fc --- /dev/null +++ b/src/vnet/mpls/mpls_tunnel.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_TUNNEL_H__ +#define __MPLS_TUNNEL_H__ + +#include <vnet/mpls/mpls.h> + +/** + * @brief A uni-directional MPLS tunnel + */ +typedef struct mpls_tunnel_t_ +{ + /** + * @brief The tunnel hooks into the FIB control plane graph. + */ + fib_node_t mt_node; + + /** + * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET + * adjacency + */ + adj_index_t mt_l2_adj; + + /** + * @brief on a L2 tunnel this is the VLIB arc from the L2-tx to the l2-midchain + */ + u32 mt_l2_tx_arc; + + /** + * @brief The path-list over which the tunnel's destination is reachable + */ + fib_node_index_t mt_path_list; + + /** + * @brief sibling index on the path-list so notifications are received. + */ + u32 mt_sibling_index; + + /** + * @brief The Label stack to apply to egress packets + */ + mpls_label_t *mt_label_stack; + + /** + * @brief Flag to indicate the tunnel is only for L2 traffic, that is + * this tunnel belongs in a bridge domain. + */ + u8 mt_l2_only; + + /** + * @brief The HW interface index of the tunnel interfaces + */ + u32 mt_hw_if_index; + + /** + * @brief The SW interface index of the tunnel interfaces + */ + u32 mt_sw_if_index; + +} mpls_tunnel_t; + +/** + * @brief Create a new MPLS tunnel + */ +extern void vnet_mpls_tunnel_add (fib_route_path_t *rpath, + mpls_label_t *label_stack, + u8 l2_only, + u32 *sw_if_index); + +extern void vnet_mpls_tunnel_del (u32 sw_if_index); + +extern const mpls_tunnel_t *mpls_tunnel_get(u32 index); + +/** + * @brief Callback function invoked while walking MPLS tunnels + */ +typedef void (*mpls_tunnel_walk_cb_t)(u32 index, void *ctx); + +/** + * @brief Walk all the MPLS tunnels + */ +extern void mpls_tunnel_walk(mpls_tunnel_walk_cb_t cb, + void *ctx); + +#endif diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h new file mode 100644 index 00000000..d7c629df --- /dev/null +++ b/src/vnet/mpls/mpls_types.h @@ -0,0 +1,39 @@ +#ifndef __MPLS_TYPES_H__ +#define __MPLS_TYPES_H__ + +#define MPLS_IETF_MIN_LABEL 0x00000 +#define MPLS_IETF_MAX_LABEL 0xfffff + +#define MPLS_IETF_MIN_RESERVED_LABEL 0x00000 +#define MPLS_IETF_MAX_RESERVED_LABEL 0x0000f + +#define MPLS_IETF_MIN_UNRES_LABEL 0x00010 +#define MPLS_IETF_MAX_UNRES_LABEL 0xfffff + +#define MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL 0x00000 +#define MPLS_IETF_ROUTER_ALERT_LABEL 0x00001 +#define MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL 0x00002 +#define MPLS_IETF_IMPLICIT_NULL_LABEL 0x00003 +#define MPLS_IETF_ELI_LABEL 0x00007 +#define MPLS_IETF_GAL_LABEL 0x0000D + +#define MPLS_IETF_IPV4_EXPLICIT_NULL_STRING "ip4-explicit-null" +#define MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING "e-nul" +#define MPLS_IETF_IMPLICIT_NULL_STRING "implicit-null" +#define MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING "i-nul" +#define MPLS_IETF_ROUTER_ALERT_STRING "router-alert" +#define MPLS_IETF_ROUTER_ALERT_BRIEF_STRING "r-alt" +#define MPLS_IETF_IPV6_EXPLICIT_NULL_STRING "ipv6-explicit-null" +#define MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING "v6enl" +#define MPLS_IETF_ELI_STRING "entropy-label-indicator" +#define MPLS_IETF_ELI_BRIEF_STRING "eli" +#define MPLS_IETF_GAL_STRING "gal" +#define MPLS_IETF_GAL_BRIEF_STRING "gal" + +#define MPLS_LABEL_INVALID (MPLS_IETF_MAX_LABEL+1) + +#define MPLS_LABEL_IS_REAL(_lbl) \ + (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \ + ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL)) + +#endif diff --git a/src/vnet/mpls/node.c b/src/vnet/mpls/node.c new file mode 100644 index 00000000..18100912 --- /dev/null +++ b/src/vnet/mpls/node.c @@ -0,0 +1,303 @@ +/* + * node.c: MPLS input + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> +#include <vnet/feature/feature.h> + +typedef struct { + u32 next_index; + u32 label_host_byte_order; +} mpls_input_trace_t; + +static u8 * +format_mpls_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *); + char * next_name; + + next_name = "BUG!"; + +#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b; + foreach_mpls_input_next; +#undef _ + + s = format (s, "MPLS: next %s[%d] label %d ttl %d", + next_name, t->next_index, + vnet_mpls_uc_get_label(t->label_host_byte_order), + vnet_mpls_uc_get_ttl(t->label_host_byte_order)); + + return s; +} + +vlib_node_registration_t mpls_input_node; + +typedef struct { + u32 last_label; + u32 last_inner_fib_index; + u32 last_outer_fib_index; + mpls_main_t * mpls_main; +} mpls_input_runtime_t; + +static inline uword +mpls_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + mpls_input_runtime_t * rt; + mpls_main_t * mm; + u32 cpu_index = os_get_cpu_number(); + vlib_simple_counter_main_t * cm; + vnet_main_t * vnm = vnet_get_main(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + mm = rt->mpls_main; + /* + * Force an initial lookup every time, in case the control-plane + * changed the label->FIB mapping. + */ + rt->last_label = ~0; + + next_index = node->cached_next_index; + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_MPLS); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 label0, bi0, next0, sw_if_index0; + u32 label1, bi1, next1, sw_if_index1; + mpls_unicast_header_t *h0, *h1; + vlib_buffer_t *b0, *b1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE); + } + + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + h0 = vlib_buffer_get_current (b0); + h1 = vlib_buffer_get_current (b1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + label1 = clib_net_to_host_u32 (h1->label_exp_s_ttl); + + /* TTL expired? */ + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + next0 = MPLS_INPUT_NEXT_LOOKUP; + vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + } + + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label1) == 0)) + { + next1 = MPLS_INPUT_NEXT_DROP; + b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + next1 = MPLS_INPUT_NEXT_LOOKUP; + vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index1, &next1, b1); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1); + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->label_host_byte_order = label0; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->next_index = next1; + tr->label_host_byte_order = label1; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + mpls_unicast_header_t * h0; + u32 label0; + u32 next0 = 0; + u32 sw_if_index0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + /* TTL expired? */ + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->label_host_byte_order = label0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static uword +mpls_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return mpls_input_inline (vm, node, from_frame); +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_input_node) = { + .function = mpls_input, + .name = "mpls-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof(mpls_input_runtime_t), + + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, + foreach_mpls_input_next +#undef _ + }, + + .format_buffer = format_mpls_unicast_header_net_byte_order, + .format_trace = format_mpls_input_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input) + +static void +mpls_setup_nodes (vlib_main_t * vm) +{ + mpls_input_runtime_t * rt; + pg_node_t * pn; + + pn = pg_get_node (mpls_input_node.index); + pn->unformat_edit = unformat_pg_mpls_header; + + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, + mpls_input_node.index); +} + +static clib_error_t * mpls_input_init (vlib_main_t * vm) +{ + clib_error_t * error; + + error = vlib_call_init_function (vm, mpls_init); + if (error) + clib_error_report (error); + + mpls_setup_nodes (vm); + + return 0; +} + +VLIB_INIT_FUNCTION (mpls_input_init); diff --git a/src/vnet/mpls/packet.h b/src/vnet/mpls/packet.h new file mode 100644 index 00000000..bc67445b --- /dev/null +++ b/src/vnet/mpls/packet.h @@ -0,0 +1,125 @@ +#ifndef included_vnet_mpls_packet_h +#define included_vnet_mpls_packet_h + +/* + * MPLS packet format + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A label value only, i.e. 20bits. + */ +typedef u32 mpls_label_t; + +typedef struct { + /* Label: top 20 bits [in network byte order] */ + /* Experimental: 3 bits ... */ + /* S (bottom of label stack): 1 bit */ + /* TTL: 8 bits */ + mpls_label_t label_exp_s_ttl; +} mpls_unicast_header_t; + +typedef enum mpls_eos_bit_t_ +{ + MPLS_NON_EOS = 0, + MPLS_EOS = 1, +} mpls_eos_bit_t; + +#define MPLS_EOS_BITS { \ + [MPLS_NON_EOS] = "neos", \ + [MPLS_EOS] = "eos", \ +} + +#define FOR_EACH_MPLS_EOS_BIT(_eos) \ + for (_eos = MPLS_NON_EOS; _eos <= MPLS_EOS; _eos++) + +#define MPLS_ENTRY_LABEL_OFFSET 0 +#define MPLS_ENTRY_LABEL_SHIFT 12 +#define MPLS_ENTRY_LABEL_MASK 0x000fffff +#define MPLS_ENTRY_LABEL_BITS \ + (MPLS_ENTRY_LABEL_MASK << MPLS_ENTRY_LABEL_SHIFT) + +#define MPLS_ENTRY_EXP_OFFSET 2 /* byte offset to EXP bits */ +#define MPLS_ENTRY_EXP_SHIFT 9 +#define MPLS_ENTRY_EXP_MASK 0x07 +#define MPLS_ENTRY_EXP(mpls) \ + (((mpls)>>MPLS_ENTRY_EXP_SHIFT) & MPLS_ENTRY_EXP_MASK) +#define MPLS_ENTRY_EXP_BITS \ + (MPLS_ENTRY_EXP_MASK << MPLS_ENTRY_EXP_SHIFT) + +#define MPLS_ENTRY_EOS_OFFSET 2 /* byte offset to EOS bit */ +#define MPLS_ENTRY_EOS_SHIFT 8 +#define MPLS_ENTRY_EOS_MASK 0x01 /* EOS bit in its byte */ +#define MPLS_ENTRY_EOS(mpls) \ + (((mpls) >> MPLS_ENTRY_EOS_SHIFT) & MPLS_ENTRY_EOS_MASK) +#define MPLS_ENTRY_EOS_BIT (MPLS_ENTRY_EOS_MASK << MPLS_ENTRY_EOS_SHIFT) + +#define MPLS_ENTRY_TTL_OFFSET 3 /* byte offset to ttl field */ +#define MPLS_ENTRY_TTL_SHIFT 0 +#define MPLS_ENTRY_TTL_MASK 0xff +#define MPLS_ENTRY_TTL(mpls) \ + (((mpls) >> MPLS_ENTRY_TTL_SHIFT) & MPLS_ENTRY_TTL_MASK) +#define MPLS_ENTRY_TTL_BITS \ + (MPLS_ENTRY_TTL_MASK << MPLS_ENTRY_TTL_SHIFT) + +static inline u32 vnet_mpls_uc_get_label (mpls_label_t label_exp_s_ttl) +{ + return (label_exp_s_ttl>>MPLS_ENTRY_LABEL_SHIFT); +} + +static inline u32 vnet_mpls_uc_get_exp (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_EXP(label_exp_s_ttl)); +} + +static inline u32 vnet_mpls_uc_get_s (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_EOS(label_exp_s_ttl)); +} + +static inline u32 vnet_mpls_uc_get_ttl (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_TTL(label_exp_s_ttl)); +} + +static inline void vnet_mpls_uc_set_label (mpls_label_t *label_exp_s_ttl, + u32 value) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_LABEL_BITS)) | + ((value & MPLS_ENTRY_LABEL_MASK) << MPLS_ENTRY_LABEL_SHIFT)); +} + +static inline void vnet_mpls_uc_set_exp (mpls_label_t *label_exp_s_ttl, + u32 exp) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EXP_BITS)) | + ((exp & MPLS_ENTRY_EXP_MASK) << MPLS_ENTRY_EXP_SHIFT)); +} + +static inline void vnet_mpls_uc_set_s (mpls_label_t *label_exp_s_ttl, + u32 eos) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EOS_BIT)) | + ((eos & MPLS_ENTRY_EOS_MASK) << MPLS_ENTRY_EOS_SHIFT)); +} + +static inline void vnet_mpls_uc_set_ttl (mpls_label_t *label_exp_s_ttl, + u32 ttl) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_TTL_BITS)) | + ((ttl & MPLS_ENTRY_TTL_MASK))); +} + +#endif /* included_vnet_mpls_packet_h */ diff --git a/src/vnet/mpls/pg.c b/src/vnet/mpls/pg.c new file mode 100644 index 00000000..6ff86e32 --- /dev/null +++ b/src/vnet/mpls/pg.c @@ -0,0 +1,71 @@ +/* + * pg.c: packet generator mpls interface + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + pg_edit_t label; +} pg_mpls_header_t; + +static inline void +pg_mpls_header_init (pg_mpls_header_t * e) +{ + pg_edit_init (&e->label, mpls_unicast_header_t, label_exp_s_ttl); +} + +uword +unformat_pg_mpls_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_mpls_header_t * h; + vlib_main_t * vm = vlib_get_main(); + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (mpls_unicast_header_t), + &group_index); + pg_mpls_header_init (h); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_mpls_label_net_byte_order, &h->label)) + goto done; + + { + pg_node_t * pg_node = 0; + vlib_node_t * ip_lookup_node; + + ip_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip4-input"); + ASSERT (ip_lookup_node); + + pg_node = pg_get_node (ip_lookup_node->index); + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + |