diff options
Diffstat (limited to 'vnet/vnet/mpls')
-rw-r--r-- | vnet/vnet/mpls/error.def | 31 | ||||
-rw-r--r-- | vnet/vnet/mpls/interface.c | 2276 | ||||
-rw-r--r-- | vnet/vnet/mpls/mpls.c | 968 | ||||
-rw-r--r-- | vnet/vnet/mpls/mpls.h | 362 | ||||
-rw-r--r-- | vnet/vnet/mpls/mpls_features.c | 254 | ||||
-rw-r--r-- | vnet/vnet/mpls/mpls_lookup.c | 278 | ||||
-rw-r--r-- | vnet/vnet/mpls/mpls_output.c | 343 | ||||
-rw-r--r-- | vnet/vnet/mpls/mpls_types.h | 39 | ||||
-rw-r--r-- | vnet/vnet/mpls/node.c | 223 | ||||
-rw-r--r-- | vnet/vnet/mpls/packet.h | 125 | ||||
-rw-r--r-- | vnet/vnet/mpls/pg.c | 71 | ||||
-rw-r--r-- | vnet/vnet/mpls/policy_encap.c | 180 |
12 files changed, 5150 insertions, 0 deletions
diff --git a/vnet/vnet/mpls/error.def b/vnet/vnet/mpls/error.def new file mode 100644 index 00000000..de8b9665 --- /dev/null +++ b/vnet/vnet/mpls/error.def @@ -0,0 +1,31 @@ +/* + * mpls_error.def: mpls errors + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +mpls_error (NONE, "no error") +mpls_error (UNKNOWN_PROTOCOL, "unknown protocol") +mpls_error (UNSUPPORTED_VERSION, "unsupported version") +mpls_error (PKTS_DECAP, "MPLS-GRE input packets decapsulated") +mpls_error (PKTS_ENCAP, "MPLS-GRE output packets encapsulated") +mpls_error (NO_LABEL, "MPLS-GRE no label for fib/dst") +mpls_error (TTL_EXPIRED, "MPLS-GRE ttl expired") +mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set") +mpls_error (BAD_LABEL, "invalid FIB id in label") +mpls_error (NOT_IP4, "non-ip4 packets dropped") +mpls_error (DISALLOWED_FIB, "disallowed FIB id") +mpls_error (NOT_ENABLED, "MPLS not enabled") +mpls_error (DROP, "MPLS DROP DPO") +mpls_error (PUNT, "MPLS PUNT DPO") diff --git a/vnet/vnet/mpls/interface.c b/vnet/vnet/mpls/interface.c new file mode 100644 index 00000000..9ef4c293 --- /dev/null +++ b/vnet/vnet/mpls/interface.c @@ -0,0 +1,2276 @@ +/* + * interface.c: mpls interfaces + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/adj/adj_midchain.h> +#include <vnet/dpo/classify_dpo.h> + +static uword mpls_gre_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + /* + * Conundrum: packets from tun/tap destined for the tunnel + * actually have this rewrite applied. Transit packets do not. + * To make the two cases equivalent, don't generate a + * rewrite here, build the entire header in the fast path. + */ + return 0; +} + +/* manually added to the interface output node */ +#define MPLS_GRE_OUTPUT_NEXT_POST_REWRITE 1 + +static uword +mpls_gre_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + mpls_main_t * gm = &mpls_main; + vnet_main_t * vnm = gm->vnet_main; + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * As long as we have enough pkts left to process two pkts + * and prefetch two pkts... + */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, next0, bi1, next1; + mpls_gre_tunnel_t * t0, * t1; + u32 sw_if_index0, sw_if_index1; + vnet_hw_interface_t * hi0, * hi1; + u8 * dst0, * dst1; + + /* Prefetch the next iteration */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* + * Prefetch packet data. We expect to overwrite + * the inbound L2 header with an ip header and a + * gre header. Might want to prefetch the last line + * of rewrite space as well; need profile data + */ + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* Pick up the next two buffer indices */ + bi0 = from[0]; + bi1 = from[1]; + + /* Speculatively enqueue them where we sent the last buffer */ + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index [VLIB_TX]; + + /* get h/w intfcs */ + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + + /* hw_instance = tunnel pool index */ + t0 = pool_elt_at_index (gm->gre_tunnels, hi0->hw_instance); + t1 = pool_elt_at_index (gm->gre_tunnels, hi1->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + vlib_buffer_advance (b1, -(word)vec_len(t1->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + dst1 = vlib_buffer_get_current (b1); + + clib_memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + clib_memcpy (dst1, t1->rewrite_data, vec_len(t1->rewrite_data)); + + /* Fix TX fib indices */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->outer_fib_index; + vnet_buffer(b1)->sw_if_index [VLIB_TX] = t1->outer_fib_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + next1 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = t0 - gm->gre_tunnels; + tr->length = b0->current_length; + tr->src.as_u32 = t0->tunnel_src.as_u32; + tr->dst.as_u32 = t0->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t0->encap_index; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->tunnel_id = t1 - gm->gre_tunnels; + tr->length = b1->current_length; + tr->src.as_u32 = t1->tunnel_src.as_u32; + tr->dst.as_u32 = t1->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t1->encap_index; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0, next0; + mpls_gre_tunnel_t * t0; + u32 sw_if_index0; + vnet_hw_interface_t * hi0; + u8 * dst0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + t0 = pool_elt_at_index (gm->gre_tunnels, hi0->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + + clib_memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + + /* Fix the TX fib index */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->outer_fib_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = t0 - gm->gre_tunnels; + tr->length = b0->current_length; + tr->src.as_u32 = t0->tunnel_src.as_u32; + tr->dst.as_u32 = t0->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t0->encap_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, gre_input_node.index, + GRE_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static u8 * format_mpls_gre_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "mpls-gre%d", dev_instance); +} + +static u8 * format_mpls_gre_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + s = format (s, "MPLS-GRE tunnel: id %d\n", dev_instance); + return s; +} + +VNET_DEVICE_CLASS (mpls_gre_device_class) = { + .name = "MPLS-GRE tunnel device", + .format_device_name = format_mpls_gre_tunnel_name, + .format_device = format_mpls_gre_device, + .format_tx_trace = format_mpls_gre_tx_trace, + .tx_function = mpls_gre_interface_tx, + .no_flatten_output_chains = 1, +#ifdef SOON + .clear counter = 0; + .admin_up_down_function = 0; +#endif +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (mpls_gre_device_class, + mpls_gre_interface_tx) + +VNET_HW_INTERFACE_CLASS (mpls_gre_hw_interface_class) = { + .name = "MPLS-GRE", + .format_header = format_mpls_gre_header_with_length, +#if 0 + .unformat_header = unformat_mpls_gre_header, +#endif + .set_rewrite = mpls_gre_set_rewrite, +}; + + +static uword mpls_eth_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + /* + * Conundrum: packets from tun/tap destined for the tunnel + * actually have this rewrite applied. Transit packets do not. + * To make the two cases equivalent, don't generate a + * rewrite here, build the entire header in the fast path. + */ + return 0; +} + +/* manually added to the interface output node */ +#define MPLS_ETH_OUTPUT_NEXT_OUTPUT 1 + +static uword +mpls_eth_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + mpls_main_t * gm = &mpls_main; + vnet_main_t * vnm = gm->vnet_main; + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * As long as we have enough pkts left to process two pkts + * and prefetch two pkts... + */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, next0, bi1, next1; + mpls_eth_tunnel_t * t0, * t1; + u32 sw_if_index0, sw_if_index1; + vnet_hw_interface_t * hi0, * hi1; + u8 * dst0, * dst1; + + /* Prefetch the next iteration */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* + * Prefetch packet data. We expect to overwrite + * the inbound L2 header with an ip header and a + * gre header. Might want to prefetch the last line + * of rewrite space as well; need profile data + */ + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* Pick up the next two buffer indices */ + bi0 = from[0]; + bi1 = from[1]; + + /* Speculatively enqueue them where we sent the last buffer */ + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index [VLIB_TX]; + + /* get h/w intfcs */ + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + + /* hw_instance = tunnel pool index */ + t0 = pool_elt_at_index (gm->eth_tunnels, hi0->hw_instance); + t1 = pool_elt_at_index (gm->eth_tunnels, hi1->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + vlib_buffer_advance (b1, -(word)vec_len(t1->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + dst1 = vlib_buffer_get_current (b1); + + clib_memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + clib_memcpy (dst1, t1->rewrite_data, vec_len(t1->rewrite_data)); + + /* Fix TX fib indices */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->tx_sw_if_index; + vnet_buffer(b1)->sw_if_index [VLIB_TX] = t1->tx_sw_if_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + next1 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t0 - gm->eth_tunnels; + tr->tx_sw_if_index = t0->tx_sw_if_index; + tr->mpls_encap_index = t0->encap_index; + tr->length = b0->current_length; + hi0 = vnet_get_sup_hw_interface (vnm, t0->tx_sw_if_index); + clib_memcpy (tr->dst, hi0->hw_address, sizeof (tr->dst)); + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t1 - gm->eth_tunnels; + tr->tx_sw_if_index = t1->tx_sw_if_index; + tr->mpls_encap_index = t1->encap_index; + tr->length = b1->current_length; + hi1 = vnet_get_sup_hw_interface (vnm, t1->tx_sw_if_index); + clib_memcpy (tr->dst, hi1->hw_address, sizeof (tr->dst)); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0, next0; + mpls_eth_tunnel_t * t0; + u32 sw_if_index0; + vnet_hw_interface_t * hi0; + u8 * dst0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + t0 = pool_elt_at_index (gm->eth_tunnels, hi0->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + + clib_memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + + /* Fix the TX interface */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->tx_sw_if_index; + + /* Send the packet */ + next0 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t0 - gm->eth_tunnels; + tr->tx_sw_if_index = t0->tx_sw_if_index; + tr->mpls_encap_index = t0->encap_index; + tr->length = b0->current_length; + hi0 = vnet_get_sup_hw_interface (vnm, t0->tx_sw_if_index); + clib_memcpy (tr->dst, hi0->hw_address, sizeof (tr->dst)); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static u8 * format_mpls_eth_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "mpls-eth%d", dev_instance); +} + +static u8 * format_mpls_eth_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + s = format (s, "MPLS-ETH tunnel: id %d\n", dev_instance); + return s; +} + +VNET_DEVICE_CLASS (mpls_eth_device_class) = { + .name = "MPLS-ETH tunnel device", + .format_device_name = format_mpls_eth_tunnel_name, + .format_device = format_mpls_eth_device, + .format_tx_trace = format_mpls_eth_tx_trace, + .tx_function = mpls_eth_interface_tx, + .no_flatten_output_chains = 1, +#ifdef SOON + .clear counter = 0; + .admin_up_down_function = 0; +#endif +}; + +VLIB_DEVICE_TX_FUNCTION_MULTIARCH (mpls_eth_device_class, + mpls_eth_interface_tx) + +VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = { + .name = "MPLS-ETH", + .format_header = format_mpls_eth_header_with_length, +#if 0 + .unformat_header = unformat_mpls_eth_header, +#endif + .set_rewrite = mpls_eth_set_rewrite, +}; + +/** + * A conversion of DPO next object tpyes to VLIB graph next nodes from + * the mpls_post_rewrite node + */ +static const int dpo_next_2_mpls_post_rewrite[DPO_LAST] = { + [DPO_LOAD_BALANCE] = IP_LOOKUP_NEXT_LOAD_BALANCE, +}; + +static uword +mpls_post_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 n_left_from, next_index, * from, * to_next; + u16 old_l0 = 0; //, old_l1 = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* { */ + /* u32 bi0, bi1; */ + /* vlib_buffer_t * b0, * b1; */ + /* ip4_header_t * ip0, * ip1; */ + /* u32 next0; */ + /* u32 next1; */ + /* u16 new_l0, new_l1, adj_index0, adj_index1; */ + /* ip_csum_t sum0, sum1; */ + /* ip_adjacency_t *adj0, *adj1; */ + + /* /\* Prefetch next iteration. *\/ */ + /* { */ + /* vlib_buffer_t * p2, * p3; */ + + /* p2 = vlib_get_buffer (vm, from[2]); */ + /* p3 = vlib_get_buffer (vm, from[3]); */ + + /* vlib_prefetch_buffer_header (p2, LOAD); */ + /* vlib_prefetch_buffer_header (p3, LOAD); */ + + /* CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); */ + /* CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); */ + /* } */ + + /* bi0 = from[0]; */ + /* bi1 = from[1]; */ + /* to_next[0] = bi0; */ + /* to_next[1] = bi1; */ + /* from += 2; */ + /* to_next += 2; */ + /* n_left_to_next -= 2; */ + /* n_left_from -= 2; */ + + + /* b0 = vlib_get_buffer (vm, bi0); */ + /* b1 = vlib_get_buffer (vm, bi1); */ + /* ip0 = vlib_buffer_get_current (b0); */ + /* ip1 = vlib_buffer_get_current (b1); */ + + /* /\* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] *\/ */ + + /* /\* set the GRE (outer) ip packet length, fix the bloody checksum *\/ */ + /* sum0 = ip0->checksum; */ + /* sum1 = ip1->checksum; */ + + /* /\* old_l0, old_l1 always 0, see the rewrite setup *\/ */ + /* new_l0 = */ + /* clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); */ + /* new_l1 = */ + /* clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); */ + + /* sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, */ + /* length /\* changed member *\/); */ + /* sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, */ + /* length /\* changed member *\/); */ + /* ip0->checksum = ip_csum_fold (sum0); */ + /* ip1->checksum = ip_csum_fold (sum1); */ + /* ip0->length = new_l0; */ + /* ip1->length = new_l1; */ + + /* /\* replace the TX adj in the packet with the next in the chain *\/ */ + /* adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; */ + /* adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; */ + + /* adj0 = ip_get_adjacency (lm, adj_index0); */ + /* adj1 = ip_get_adjacency (lm, adj_index1); */ + + /* ASSERT(adj0->sub_type.midchain.adj_index != ADJ_INDEX_INVALID); */ + /* ASSERT(adj1->sub_type.midchain.adj_index != ADJ_INDEX_INVALID); */ + + /* adj_index0 = adj0->sub_type.midchain.adj_index; */ + /* adj_index1 = adj1->sub_type.midchain.adj_index; */ + + /* vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; */ + /* vnet_buffer (b1)->ip.adj_index[VLIB_TX] = adj_index1; */ + + /* /\* get the next adj in the chain to determine the next graph node *\/ */ + /* adj0 = ip_get_adjacency (lm, adj_index0); */ + /* adj1 = ip_get_adjacency (lm, adj_index1); */ + + /* next0 = adj0->lookup_next_index; */ + /* next1 = adj1->lookup_next_index; */ + + /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */ + /* to_next, n_left_to_next, */ + /* bi0, bi1, next0, next1); */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + u32 bi0; + vlib_buffer_t * b0; + ip4_header_t * ip0; + u32 next0; + u16 new_l0, adj_index0; + ip_csum_t sum0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] */ + + /* set the GRE (outer) ip packet length, fix the bloody checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + /* replace the TX adj in the packet with the next in the chain */ + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + + ASSERT(adj_index0); + + adj0 = ip_get_adjacency (lm, adj_index0); + ASSERT(adj0->sub_type.midchain.next_dpo.dpoi_index != ADJ_INDEX_INVALID); + adj_index0 = adj0->sub_type.midchain.next_dpo.dpoi_index; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; + + /* get the next adj in the chain to determine the next graph node */ + ASSERT(0); + next0 = 0; //adj0->sub_type.midchain.next_dpo.dpoi_next; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_ENCAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_post_rewrite_node) = { + .function = mpls_post_rewrite, + .name = "mpls-post-rewrite", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = 0, + + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = IP4_LOOKUP_NEXT_NODES, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_post_rewrite_node, mpls_post_rewrite) + +static u8 * mpls_gre_rewrite (mpls_main_t *mm, mpls_gre_tunnel_t * t) +{ + ip4_header_t * ip0; + ip4_gre_and_mpls_header_t * h0; + u8 * rewrite_data = 0; + mpls_encap_t * e; + mpls_unicast_header_t *lp0; + int i; + + /* look up the encap label stack using the RX FIB */ + e = mpls_encap_by_fib_and_dest (mm, t->inner_fib_index, t->tunnel_dst.as_u32); + + if (e == 0) + { + clib_warning ("no label for inner fib index %d, dst %U", + t->inner_fib_index, format_ip4_address, + &t->tunnel_dst); + return 0; + } + + vec_validate (rewrite_data, sizeof (*h0) + + sizeof (mpls_unicast_header_t) * vec_len(e->labels) -1); + memset (rewrite_data, 0, sizeof (*h0)); + + h0 = (ip4_gre_and_mpls_header_t *) rewrite_data; + /* Copy the encap label stack */ + lp0 = h0->labels; + for (i = 0; i < vec_len(e->labels); i++) + lp0[i] = e->labels[i]; + ip0 = &h0->ip4; + h0->gre.protocol = clib_host_to_net_u16(GRE_PROTOCOL_mpls_unicast); + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_GRE; + /* $$$ fixup ip4 header length and checksum after-the-fact */ + ip0->src_address.as_u32 = t->tunnel_src.as_u32; + ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + + return (rewrite_data); +} + +u8 +mpls_sw_interface_is_enabled (u32 sw_if_index) +{ + mpls_main_t * mm = &mpls_main; + + if (vec_len(mm->mpls_enabled_by_sw_if_index) < sw_if_index) + return (0); + + return (mm->mpls_enabled_by_sw_if_index[sw_if_index]); +} + +void +mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable) +{ + mpls_interface_state_change_callback_t *callback; + vlib_main_t * vm = vlib_get_main(); + ip_config_main_t * cm = &mm->rx_config_mains; + vnet_config_main_t * vcm = &cm->config_main; + u32 lookup_feature_index; + fib_node_index_t lfib_index; + u32 ci; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) + { + if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index]) + return; + + lfib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + vec_validate(mm->fib_index_by_sw_if_index, 0); + mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index; + } + else + { + ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index]) + return; + + fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_MPLS); + } + + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + lookup_feature_index = mm->mpls_rx_feature_lookup; + + if (is_enable) + ci = vnet_config_add_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + + cm->config_index_by_sw_if_index[sw_if_index] = ci; + + /* + * notify all interested clients of the change of state. + */ + vec_foreach(callback, mm->mpls_interface_state_change_callbacks) + { + (*callback)(sw_if_index, is_enable); + } +} + +static mpls_gre_tunnel_t * +mpls_gre_tunnel_from_fib_node (fib_node_t *node) +{ +#if (CLIB_DEBUG > 0) + ASSERT(FIB_NODE_TYPE_MPLS_GRE_TUNNEL == node->fn_type); +#endif + return ((mpls_gre_tunnel_t*)node); +} + +/* + * mpls_gre_tunnel_stack + * + * 'stack' (resolve the recursion for) the tunnel's midchain adjacency + */ +static void +mpls_gre_tunnel_stack (mpls_gre_tunnel_t *mgt) +{ + /* + * find the adjacency that is contributed by the FIB entry + * that this tunnel resovles via, and use it as the next adj + * in the midchain + */ + adj_nbr_midchain_stack(mgt->adj_index, + fib_entry_contribute_ip_forwarding(mgt->fei)); +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +mpls_gre_tunnel_back_walk (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + mpls_gre_tunnel_stack(mpls_gre_tunnel_from_fib_node(node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t* +mpls_gre_tunnel_fib_node_get (fib_node_index_t index) +{ + mpls_gre_tunnel_t * mgt; + mpls_main_t * mm; + + mm = &mpls_main; + mgt = pool_elt_at_index(mm->gre_tunnels, index); + + return (&mgt->mgt_node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +mpls_gre_tunnel_last_lock_gone (fib_node_t *node) +{ + /* + * The MPLS GRE tunnel is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT(0); +} + +/* + * Virtual function table registered by MPLS GRE tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t mpls_gre_vft = { + .fnv_get = mpls_gre_tunnel_fib_node_get, + .fnv_last_lock = mpls_gre_tunnel_last_lock_gone, + .fnv_back_walk = mpls_gre_tunnel_back_walk, +}; + +static mpls_gre_tunnel_t * +mpls_gre_tunnel_find (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 inner_fib_index) +{ + mpls_main_t * mm = &mpls_main; + mpls_gre_tunnel_t *tp; + int found_tunnel = 0; + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->gre_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_src, src, sizeof (*src)) + && !memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index) + { + found_tunnel = 1; + goto found; + } + })); + +found: + if (found_tunnel) + { + return (tp); + } + return (NULL); +} + +int mpls_gre_tunnel_add (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_index, + u32 outer_fib_index, + u32 * tunnel_sw_if_index, + u8 l2_only) +{ + mpls_main_t * mm = &mpls_main; + gre_main_t * gm = &gre_main; + vnet_main_t * vnm = vnet_get_main(); + mpls_gre_tunnel_t *tp; + ip_adjacency_t adj; + u8 * rewrite_data; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + const ip46_address_t zero_nh = { + .ip4.as_u32 = 0, + }; + + tp = mpls_gre_tunnel_find(src,dst,intfc,inner_fib_index); + + /* Add, duplicate */ + if (NULL != tp) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, dst->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + pool_get(mm->gre_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + fib_node_init(&tp->mgt_node, + FIB_NODE_TYPE_MPLS_GRE_TUNNEL); + + if (vec_len (mm->free_gre_sw_if_indices) > 0) + { + hw_if_index = + mm->free_gre_sw_if_indices[vec_len(mm->free_gre_sw_if_indices)-1]; + _vec_len (mm->free_gre_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->gre_tunnels; + hi->hw_instance = tp - mm->gre_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_gre_device_class.index, tp - mm->gre_tunnels, + mpls_gre_hw_interface_class.index, + tp - mm->gre_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "mpls-post-rewrite", MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + + ASSERT (slot == MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + vec_validate(ip4_main.fib_index_by_sw_if_index, *tunnel_sw_if_index); + ip4_main.fib_index_by_sw_if_index[*tunnel_sw_if_index] = outer_fib_index; + + tp->hw_if_index = hw_if_index; + + /* bind the MPLS and IPv4 FIBs to the interface and enable */ + vec_validate(mm->fib_index_by_sw_if_index, hi->sw_if_index); + mm->fib_index_by_sw_if_index[hi->sw_if_index] = inner_fib_index; + mpls_sw_interface_enable_disable(mm, hi->sw_if_index, 1); + ip4_main.fib_index_by_sw_if_index[hi->sw_if_index] = inner_fib_index; + ip4_sw_interface_enable_disable(hi->sw_if_index, 1); + + tp->tunnel_src.as_u32 = src->as_u32; + tp->tunnel_dst.as_u32 = dst->as_u32; + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->outer_fib_index = outer_fib_index; + tp->encap_index = e - mm->encaps; + tp->l2_only = l2_only; + + /* Add the tunnel to the hash table of all GRE tunnels */ + u64 key = (u64)src->as_u32 << 32 | (u64)dst->as_u32; + + ASSERT(NULL == hash_get (gm->tunnel_by_key, key)); + hash_set (gm->tunnel_by_key, key, tp - mm->gre_tunnels); + + /* Create the adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + rewrite_data = mpls_gre_rewrite (mm, tp); + if (rewrite_data == 0) + { + if (*tunnel_sw_if_index != ~0) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + } + pool_put (mm->gre_tunnels, tp); + return VNET_API_ERROR_NO_SUCH_LABEL; + } + + /* Save a copy of the rewrite data for L2 x-connect */ + vec_free (tp->rewrite_data); + + tp->rewrite_data = rewrite_data; + + if (!l2_only) + { + /* + * source the FIB entry for the tunnel's destination + * and become a child thereof. The tunnel will then get poked + * when the forwarding for the entry updates, and the tunnel can + * re-stack accordingly + */ + const fib_prefix_t tun_dst_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = *dst, + } + }; + + tp->fei = fib_table_entry_special_add(outer_fib_index, + &tun_dst_pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + tp->sibling_index = fib_entry_child_add(tp->fei, + FIB_NODE_TYPE_MPLS_GRE_TUNNEL, + tp - mm->gre_tunnels); + + /* + * create and update the midchain adj this tunnel sources. + * This is the adj the route we add below will resolve to. + */ + tp->adj_index = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &zero_nh, + hi->sw_if_index); + + adj_nbr_midchain_update_rewrite(tp->adj_index, + mpls_post_rewrite_node.index, + rewrite_data); + mpls_gre_tunnel_stack(tp); + + /* + * Update the route for the tunnel's subnet to point through the tunnel + */ + const fib_prefix_t tun_sub_net_pfx = { + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = tp->intfc_address, + }, + }; + + fib_table_entry_update_one_path(inner_fib_index, + &tun_sub_net_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + &zero_nh, + hi->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + } + + return 0; +} + +static int +mpls_gre_tunnel_del (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_index, + u32 outer_fib_index, + u32 * tunnel_sw_if_index, + u8 l2_only) +{ + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + gre_main_t * gm = &gre_main; + mpls_gre_tunnel_t *tp; + vnet_hw_interface_t * hi; + + tp = mpls_gre_tunnel_find(src,dst,intfc,inner_fib_index); + + /* Delete, and we can't find the tunnel */ + if (NULL == tp) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + + if (!l2_only) + { + /* + * unsource the FIB entry for the tunnel's destination + */ + const fib_prefix_t tun_dst_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = *dst, + } + }; + + fib_entry_child_remove(tp->fei, + tp->sibling_index); + fib_table_entry_special_remove(outer_fib_index, + &tun_dst_pfx, + FIB_SOURCE_RR); + tp->fei = FIB_NODE_INDEX_INVALID; + adj_unlock(tp->adj_index); + + /* + * unsource the route for the tunnel's subnet + */ + const fib_prefix_t tun_sub_net_pfx = { + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = tp->intfc_address, + }, + }; + + fib_table_entry_delete(inner_fib_index, + &tun_sub_net_pfx, + FIB_SOURCE_INTERFACE); + } + + u64 key = ((u64)tp->tunnel_src.as_u32 << 32 | + (u64)tp->tunnel_src.as_u32); + + hash_unset (gm->tunnel_by_key, key); + mpls_sw_interface_enable_disable(mm, hi->sw_if_index, 0); + ip4_sw_interface_enable_disable(hi->sw_if_index, 0); + + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + fib_node_deinit(&tp->mgt_node); + pool_put (mm->gre_tunnels, tp); + + return 0; +} + +int +vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, u32 outer_fib_id, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add) +{ + u32 inner_fib_index = 0; + u32 outer_fib_index = 0; + u32 dummy; + ip4_main_t * im = &ip4_main; + + /* No questions, no answers */ + if (NULL == tunnel_sw_if_index) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + inner_fib_index = p[0]; + } + + if (outer_fib_id != 0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, outer_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + outer_fib_index = p[0]; + } + + if (is_add) + { + return (mpls_gre_tunnel_add(src,dst,intfc, mask_width, + inner_fib_index, + outer_fib_index, + tunnel_sw_if_index, + l2_only)); + } + else + { + return (mpls_gre_tunnel_del(src,dst,intfc, mask_width, + inner_fib_index, + outer_fib_index, + tunnel_sw_if_index, + l2_only)); + } +} + +/* + * Remove all mpls tunnels in the specified fib + */ +int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id) +{ + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = mm->vnet_main; + mpls_gre_tunnel_t *tp; + u32 fib_index = 0; + u32 * tunnels_to_delete = 0; + vnet_hw_interface_t * hi; + int i; + + fib_index = ip4_fib_index_from_table_id(fib_id); + if (~0 == fib_index) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + + pool_foreach (tp, mm->gre_tunnels, + ({ + if (tp->inner_fib_index == fib_index) + vec_add1 (tunnels_to_delete, tp - mm->gre_tunnels); + })); + + for (i = 0; i < vec_len(tunnels_to_delete); i++) { + tp = pool_elt_at_index (mm->gre_tunnels, tunnels_to_delete[i]); + + /* Delete, the route if not already gone */ + if (FIB_NODE_INDEX_INVALID != tp->fei && !tp->l2_only) + { + const fib_prefix_t tun_dst_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = tp->tunnel_dst, + } + }; + + fib_entry_child_remove(tp->fei, + tp->sibling_index); + fib_table_entry_special_remove(tp->outer_fib_index, + &tun_dst_pfx, + FIB_SOURCE_RR); + tp->fei = FIB_NODE_INDEX_INVALID; + adj_unlock(tp->adj_index); + + const fib_prefix_t tun_sub_net_pfx = { + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = tp->intfc_address, + }, + }; + + fib_table_entry_delete(tp->inner_fib_index, + &tun_sub_net_pfx, + FIB_SOURCE_INTERFACE); + } + + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + pool_put (mm->gre_tunnels, tp); + } + + vec_free(tunnels_to_delete); + + return (0); +} + +static clib_error_t * +create_mpls_gre_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t src, dst, intfc; + int src_set = 0, dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + u32 outer_fib_id = 0; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tunnel_intfc_sw_if_index = ~0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "src %U", + unformat_ip4_address, &src)) + src_set = 1; + else if (unformat (line_input, "dst %U", + unformat_ip4_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "intfc %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + intfc_set = 1; + else if (unformat (line_input, "inner-fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (!src_set) + return clib_error_return (0, "missing: src <ip-address>"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ip-address>"); + + if (!intfc_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_gre_add_del_tunnel (&src, &dst, &intfc, mask_width, + inner_fib_id, outer_fib_id, + &tunnel_intfc_sw_if_index, + l2_only, !is_del); + + switch (rv) + { + case 0: + if (!is_del) + vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), tunnel_intfc_sw_if_index); + break; + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "inner fib ID %d doesn't exist\n", + inner_fib_id); + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "outer fib ID %d doesn't exist\n", + outer_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + break; + + default: + return clib_error_return (0, "vnet_mpls_gre_add_del_tunnel returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { + .path = "create mpls gre tunnel", + .short_help = + "create mpls gre tunnel [del] src <addr> dst <addr> intfc <addr>/<mw>", + .function = create_mpls_gre_tunnel_command_fn, +}; + +u8 * format_mpls_encap_index (u8 * s, va_list * args) +{ + mpls_main_t * mm = va_arg (*args, mpls_main_t *); + u32 entry_index = va_arg (*args, u32); + mpls_encap_t * e; + int i; + + e = pool_elt_at_index (mm->encaps, entry_index); + + for (i = 0; i < vec_len (e->labels); i++) + s = format + (s, "%d ", vnet_mpls_uc_get_label(clib_net_to_host_u32 + (e->labels[i].label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_tunnel (u8 * s, va_list * args) +{ + mpls_gre_tunnel_t * t = va_arg (*args, mpls_gre_tunnel_t *); + mpls_main_t * mm = &mpls_main; + + if (t->l2_only == 0) + { + s = format (s, "[%d]: src %U, dst %U, adj %U/%d, labels %U\n", + t - mm->gre_tunnels, + format_ip4_address, &t->tunnel_src, + format_ip4_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + t->mask_width, + format_mpls_encap_index, mm, t->encap_index); + + s = format (s, " inner fib index %d, outer fib index %d", + t->inner_fib_index, t->outer_fib_index); + } + else + { + s = format (s, "[%d]: src %U, dst %U, key %U, labels %U\n", + t - mm->gre_tunnels, + format_ip4_address, &t->tunnel_src, + format_ip4_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + format_mpls_encap_index, mm, t->encap_index); + + s = format (s, " l2 interface %d, outer fib index %d", + t->hw_if_index, t->outer_fib_index); + } + + return s; +} + +u8 * format_mpls_ethernet_tunnel (u8 * s, va_list * args) +{ + mpls_eth_tunnel_t * t = va_arg (*args, mpls_eth_tunnel_t *); + mpls_main_t * mm = &mpls_main; + + s = format (s, "[%d]: dst %U, adj %U/%d, labels %U\n", + t - mm->eth_tunnels, + format_ethernet_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + t->mask_width, + format_mpls_encap_index, mm, t->encap_index); + + + s = format (s, " tx on %U, rx fib index %d", + format_vnet_sw_if_index_name, mm->vnet_main, t->tx_sw_if_index, + t->inner_fib_index); + + return s; +} + +static clib_error_t * +show_mpls_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_main_t * mm = &mpls_main; + mpls_gre_tunnel_t * gt; + mpls_eth_tunnel_t * et; + + if (pool_elts (mm->gre_tunnels)) + { + vlib_cli_output (vm, "MPLS-GRE tunnels"); + pool_foreach (gt, mm->gre_tunnels, + ({ + vlib_cli_output (vm, "%U", format_mpls_gre_tunnel, gt); + })); + } + else + vlib_cli_output (vm, "No MPLS-GRE tunnels"); + + if (pool_elts (mm->eth_tunnels)) + { + vlib_cli_output (vm, "MPLS-Ethernet tunnels"); + pool_foreach (et, mm->eth_tunnels, + ({ + vlib_cli_output (vm, "%U", format_mpls_ethernet_tunnel, et); + })); + } + else + vlib_cli_output (vm, "No MPLS-Ethernet tunnels"); + + return 0; +} + +VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = { + .path = "show mpls tunnel", + .short_help = "show mpls tunnel", + .function = show_mpls_tunnel_command_fn, +}; + + +/* force inclusion from application's main.c */ +clib_error_t *mpls_interface_init (vlib_main_t *vm) +{ + clib_error_t * error; + + fib_node_register_type(FIB_NODE_TYPE_MPLS_GRE_TUNNEL, + &mpls_gre_vft); + + if ((error = vlib_call_init_function (vm, mpls_policy_encap_init))) + return error; + + return 0; +} +VLIB_INIT_FUNCTION(mpls_interface_init); + + +static u8 * mpls_ethernet_rewrite (mpls_main_t *mm, mpls_eth_tunnel_t * t) +{ + u8 * rewrite_data = 0; + mpls_encap_t * e; + mpls_unicast_header_t *lp0; + int i; + + /* look up the encap label stack using the RX FIB and adjacency address*/ + e = mpls_encap_by_fib_and_dest (mm, t->inner_fib_index, + t->intfc_address.as_u32); + + if (e == 0) + { + clib_warning ("no label for inner fib index %d, dst %U", + t->inner_fib_index, format_ip4_address, + &t->intfc_address); + return 0; + } + + vec_validate (rewrite_data, + sizeof (mpls_unicast_header_t) * vec_len(e->labels) -1); + + /* Copy the encap label stack */ + lp0 = (mpls_unicast_header_t *) rewrite_data; + + for (i = 0; i < vec_len(e->labels); i++) + lp0[i] = e->labels[i]; + + return (rewrite_data); +} + +int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + mpls_eth_tunnel_t *tp; + u32 inner_fib_index = 0; + ip_adjacency_t adj; + u32 adj_index; + u8 * rewrite_data; + int found_tunnel = 0; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + u32 dummy; + + if (tunnel_sw_if_index == 0) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + inner_fib_index = p[0]; + } + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->eth_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index + && FIB_NODE_INDEX_INVALID != tp->fei) + { + found_tunnel = 1; + + if (is_add) + { + if (l2_only) + return 1; + else + { + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, + intfc->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + goto reinstall_it; + } + } + else + { + /* Delete */ + goto add_del_route; + } + + } + })); + + /* Delete, and we can't find the tunnel */ + if (is_add == 0 && found_tunnel == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, intfc->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + pool_get(mm->eth_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + + if (vec_len (mm->free_eth_sw_if_indices) > 0) + { + hw_if_index = + mm->free_eth_sw_if_indices[vec_len(mm->free_eth_sw_if_indices)-1]; + _vec_len (mm->free_eth_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->eth_tunnels; + hi->hw_instance = tp - mm->eth_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_eth_device_class.index, tp - mm->eth_tunnels, + mpls_eth_hw_interface_class.index, + tp - mm->eth_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "interface-output", MPLS_ETH_OUTPUT_NEXT_OUTPUT); + + ASSERT (slot == MPLS_ETH_OUTPUT_NEXT_OUTPUT); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + tp->hw_if_index = hw_if_index; + + reinstall_it: + clib_memcpy(tp->tunnel_dst, dst, sizeof (tp->tunnel_dst)); + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->encap_index = e - mm->encaps; + tp->tx_sw_if_index = tx_sw_if_index; + tp->l2_only = l2_only; + + /* Create the adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + rewrite_data = mpls_ethernet_rewrite (mm, tp); + if (rewrite_data == 0) + { + if (*tunnel_sw_if_index != ~0) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + } + + pool_put (mm->eth_tunnels, tp); + return VNET_API_ERROR_NO_SUCH_LABEL; + } + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_MPLS_UNICAST, + tx_sw_if_index, + ip4_rewrite_node.index, + tp->tunnel_dst, + &adj.rewrite_header, + sizeof (adj.rewrite_data)); + + /* + * Prepend the (0,1,2) VLAN tag ethernet header + * we just built to the mpls header stack + */ + vec_insert (rewrite_data, adj.rewrite_header.data_bytes, 0); + clib_memcpy(rewrite_data, + vnet_rewrite_get_data_internal(&adj.rewrite_header, + sizeof (adj.rewrite_data)), + adj.rewrite_header.data_bytes); + + vnet_rewrite_set_data_internal (&adj.rewrite_header, + sizeof(adj.rewrite_data), + rewrite_data, + vec_len(rewrite_data)); + + vec_free (tp->rewrite_data); + + tp->rewrite_data = rewrite_data; + + if (!l2_only) + ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + add_del_route: + + if (!l2_only) + { + const fib_prefix_t pfx = { + .fp_addr = { + .ip4 = tp->intfc_address, + }, + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + }; + if (is_add) + tp->fei = fib_table_entry_special_add(tp->inner_fib_index, + &pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + adj_index); + else + { + fib_table_entry_delete(tp->inner_fib_index, &pfx, FIB_SOURCE_API); + tp->fei = FIB_NODE_INDEX_INVALID; + } + } + if (is_add == 0 && found_tunnel) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + pool_put (mm->eth_tunnels, tp); + } + + return 0; +} + +static clib_error_t * +create_mpls_ethernet_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t intfc; + int adj_set = 0; + u8 dst[6]; + int dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tx_sw_if_index; + u32 sw_if_index = ~0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "dst %U", + unformat_ethernet_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "adj %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + adj_set = 1; + else if (unformat (line_input, "tx-intfc %U", + unformat_vnet_sw_interface, vnm, &tx_sw_if_index)) + intfc_set = 1; + else if (unformat (line_input, "fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (!intfc_set) + return clib_error_return (0, "missing tx-intfc"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ethernet-address>"); + + if (!adj_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_ethernet_add_del_tunnel (dst, &intfc, mask_width, + inner_fib_id, tx_sw_if_index, + &sw_if_index, + l2_only, !is_del); + + switch (rv) + { + case 0: + if (!is_del) + vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index); + break; + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "rx fib ID %d doesn't exist\n", + inner_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + return clib_error_return (0, "no label for %U in fib %d", + format_ip4_address, &intfc, inner_fib_id); + break; + + default: + return clib_error_return (0, "vnet_mpls_ethernet_add_del_tunnel returned %d", rv); + break; + } + return 0; +} + + +VLIB_CLI_COMMAND (create_mpls_ethernet_tunnel_command, static) = { + .path = "create mpls ethernet tunnel", + .short_help = + "create mpls ethernet tunnel [del] dst <mac-addr> intfc <addr>/<mw>", + .function = create_mpls_ethernet_tunnel_command_fn, +}; + + +int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm, + mpls_encap_t * e, + u32 policy_tunnel_index) +{ + mpls_eth_tunnel_t * t; + ip_adjacency_t adj; + u8 * rewrite_data = 0; + u8 * label_start; + mpls_unicast_header_t *lp; + int i; + + if (pool_is_free_index (mm->eth_tunnels, policy_tunnel_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + t = pool_elt_at_index (mm->eth_tunnels, policy_tunnel_index); + + memset (&adj, 0, sizeof (adj)); + + /* Build L2 encap */ + vnet_rewrite_for_sw_interface + (mm->vnet_main, + VNET_L3_PACKET_TYPE_MPLS_UNICAST, + t->tx_sw_if_index, + mpls_policy_encap_node.index, + t->tunnel_dst, + &adj.rewrite_header, + sizeof (adj.rewrite_data)); + + vec_validate (rewrite_data, adj.rewrite_header.data_bytes -1); + + clib_memcpy(rewrite_data, + vnet_rewrite_get_data_internal(&adj.rewrite_header, + sizeof (adj.rewrite_data)), + adj.rewrite_header.data_bytes); + + /* Append the label stack */ + + vec_add2 (rewrite_data, label_start, vec_len(e->labels) * sizeof (u32)); + + lp = (mpls_unicast_header_t *) label_start; + + for (i = 0; i < vec_len(e->labels); i++) + lp[i] = e->labels[i]; + + /* Remember the rewrite data */ + e->rewrite = rewrite_data; + e->output_next_index = adj.rewrite_header.next_index; + + return 0; +} + +int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u32 classify_table_index, + u32 * new_tunnel_index, + u8 l2_only, + u8 is_add) +{ + ip4_main_t * im = &ip4_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + mpls_eth_tunnel_t *tp; + u32 inner_fib_index = 0; + int found_tunnel = 0; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + u32 dummy; + + if (tunnel_sw_if_index == 0) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + inner_fib_index = p[0]; + } + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->eth_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index + && FIB_NODE_INDEX_INVALID != tp->fei) + { + found_tunnel = 1; + + if (is_add) + { + if (l2_only) + return 1; + else + { + goto reinstall_it; + } + } + else + { + /* Delete */ + goto add_del_route; + } + + } + })); + + /* Delete, and we can't find the tunnel */ + if (is_add == 0 && found_tunnel == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + pool_get(mm->eth_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + + if (vec_len (mm->free_eth_sw_if_indices) > 0) + { + hw_if_index = + mm->free_eth_sw_if_indices[vec_len(mm->free_eth_sw_if_indices)-1]; + _vec_len (mm->free_eth_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->eth_tunnels; + hi->hw_instance = tp - mm->eth_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_eth_device_class.index, tp - mm->eth_tunnels, + mpls_eth_hw_interface_class.index, + tp - mm->eth_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "interface-output", MPLS_ETH_OUTPUT_NEXT_OUTPUT); + + ASSERT (slot == MPLS_ETH_OUTPUT_NEXT_OUTPUT); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + tp->hw_if_index = hw_if_index; + + reinstall_it: + clib_memcpy(tp->tunnel_dst, dst, sizeof (tp->tunnel_dst)); + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->encap_index = e - mm->encaps; + tp->tx_sw_if_index = tx_sw_if_index; + tp->l2_only = l2_only; + tp->fei = FIB_NODE_INDEX_INVALID; + + if (new_tunnel_index) + *new_tunnel_index = tp - mm->eth_tunnels; + + add_del_route: + + if (!l2_only) + { + const fib_prefix_t pfx = { + .fp_addr = { + .ip4 = tp->intfc_address, + }, + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + }; + dpo_id_t dpo = DPO_NULL; + + if (is_add) + { + dpo_set(&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP4, + classify_dpo_create(FIB_PROTOCOL_IP4, + classify_table_index)); + + tp->fei = fib_table_entry_special_dpo_add(tp->inner_fib_index, + &pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + dpo_reset(&dpo); + } + else + { + fib_table_entry_delete(tp->inner_fib_index, &pfx, FIB_SOURCE_API); + tp->fei = FIB_NODE_INDEX_INVALID; + } + } + if (is_add == 0 && found_tunnel) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + pool_put (mm->eth_tunnels, tp); + } + + return 0; +} + +static clib_error_t * +create_mpls_ethernet_policy_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t intfc; + int adj_set = 0; + u8 dst[6]; + int dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + u32 classify_table_index = (u32)~0; + u32 new_tunnel_index; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tx_sw_if_index; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "dst %U", + unformat_ethernet_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "adj %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + adj_set = 1; + else if (unformat (line_input, "tx-intfc %U", + unformat_vnet_sw_interface, vnm, &tx_sw_if_index)) + intfc_set = 1; + else if (unformat (line_input, "classify-table-index %d", + &classify_table_index)) + ; + else if (unformat (line_input, "fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (classify_table_index == ~0) + return clib_error_return (0, "missing classify_table_index"); + + if (!intfc_set) + return clib_error_return (0, "missing tx-intfc"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ethernet-address>"); + + if (!adj_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_ethernet_add_del_policy_tunnel (dst, &intfc, mask_width, + inner_fib_id, tx_sw_if_index, + 0 /* tunnel sw_if_index */, + classify_table_index, + &new_tunnel_index, + l2_only, !is_del); + switch (rv) + { + case 0: + if (!is_del) + vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), new_tunnel_index); + break; + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "rx fib ID %d doesn't exist\n", + inner_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + return clib_error_return (0, "no label for %U in fib %d", + format_ip4_address, &intfc, inner_fib_id); + break; + + default: + return clib_error_return (0, "vnet_mpls_ethernet_add_del_policy_tunnel returned %d", rv); + break; + } + + return 0; +} + +VLIB_CLI_COMMAND (create_mpls_ethernet_policy_tunnel_command, static) = { + .path = "create mpls ethernet policy tunnel", + .short_help = + "create mpls ethernet policy tunnel [del] dst <mac-addr> intfc <addr>/<mw>\n" + " classify-table-index <nn>", + .function = create_mpls_ethernet_policy_tunnel_command_fn, +}; + +static clib_error_t * +mpls_interface_enable_disable (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index, enable; + + sw_if_index = ~0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "enable")) + enable = 1; + else if (unformat (input, "disable")) + enable = 0; + else + { + error = clib_error_return (0, "expected 'enable' or 'disable'", + format_unformat_error, input); + goto done; + } + + mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { + .path = "set interface mpls", + .function = mpls_interface_enable_disable, + .short_help = "Enable/Disable an interface for MPLS forwarding", +}; diff --git a/vnet/vnet/mpls/mpls.c b/vnet/vnet/mpls/mpls.c new file mode 100644 index 00000000..be5e882f --- /dev/null +++ b/vnet/vnet/mpls/mpls.c @@ -0,0 +1,968 @@ +/* + * mpls.c: mpls + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/mpls_fib.h> + +const static char* mpls_eos_bit_names[] = MPLS_EOS_BITS; + +mpls_main_t mpls_main; + +u8 * format_mpls_unicast_label (u8 * s, va_list * args) +{ + mpls_label_t label = va_arg (*args, mpls_label_t); + + switch (label) { + case MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IPV4_EXPLICIT_NULL_STRING); + break; + case MPLS_IETF_ROUTER_ALERT_LABEL: + s = format (s, "%s", MPLS_IETF_ROUTER_ALERT_STRING); + break; + case MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IPV6_EXPLICIT_NULL_STRING); + break; + case MPLS_IETF_IMPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IMPLICIT_NULL_STRING); + break; + case MPLS_IETF_ELI_LABEL: + s = format (s, "%s", MPLS_IETF_ELI_STRING); + break; + case MPLS_IETF_GAL_LABEL: + s = format (s, "%s", MPLS_IETF_GAL_STRING); + break; + default: + s = format (s, "%d", label); + break; + } + return s; +} + +uword unformat_mpls_unicast_label (unformat_input_t * input, va_list * args) +{ + mpls_label_t *label = va_arg (*args, mpls_label_t*); + + if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_STRING)) + *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_STRING)) + *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_ROUTER_ALERT_STRING)) + *label = MPLS_IETF_ROUTER_ALERT_LABEL; + else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_STRING)) + *label = MPLS_IETF_IMPLICIT_NULL_LABEL; + else if (unformat (input, "%d", label)) + ; + + return (1); +} + +u8 * format_mpls_eos_bit (u8 * s, va_list * args) +{ + mpls_eos_bit_t eb = va_arg (*args, mpls_eos_bit_t); + + ASSERT(eb <= MPLS_EOS); + + s = format(s, "%s", mpls_eos_bit_names[eb]); + + return (s); +} + +u8 * format_mpls_header (u8 * s, va_list * args) +{ + mpls_unicast_header_t hdr = va_arg (*args, mpls_unicast_header_t); + + return (format(s, "[%U:%d:%d:%U]", + format_mpls_unicast_label, + vnet_mpls_uc_get_label(hdr.label_exp_s_ttl), + vnet_mpls_uc_get_ttl(hdr.label_exp_s_ttl), + vnet_mpls_uc_get_exp(hdr.label_exp_s_ttl), + format_mpls_eos_bit, + vnet_mpls_uc_get_s(hdr.label_exp_s_ttl))); +} + +u8 * format_mpls_gre_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_gre_tx_trace_t * t = va_arg (*args, mpls_gre_tx_trace_t *); + mpls_main_t * mm = &mpls_main; + + if (t->lookup_miss) + s = format (s, "MPLS: lookup miss"); + else + { + s = format (s, "MPLS: tunnel %d labels %U len %d src %U dst %U", + t->tunnel_id, + format_mpls_encap_index, mm, t->mpls_encap_index, + clib_net_to_host_u16 (t->length), + format_ip4_address, &t->src.as_u8, + format_ip4_address, &t->dst.as_u8); + } + return s; +} + +u8 * format_mpls_eth_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_eth_tx_trace_t * t = va_arg (*args, mpls_eth_tx_trace_t *); + mpls_main_t * mm = &mpls_main; + + if (t->lookup_miss) + s = format (s, "MPLS: lookup miss"); + else + { + s = format (s, "MPLS: tunnel %d labels %U len %d tx_sw_index %d dst %U", + t->tunnel_id, + format_mpls_encap_index, mm, t->mpls_encap_index, + clib_net_to_host_u16 (t->length), + t->tx_sw_if_index, + format_ethernet_address, t->dst); + } + return s; +} + +u8 * format_mpls_eth_header_with_length (u8 * s, va_list * args) +{ + ethernet_header_t * h = va_arg (*args, ethernet_header_t *); + mpls_unicast_header_t * m = (mpls_unicast_header_t *)(h+1); + u32 max_header_bytes = va_arg (*args, u32); + uword header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "ethernet header truncated"); + + s = format + (s, "ETHERNET-MPLS label %d", + vnet_mpls_uc_get_label (clib_net_to_host_u32 (m->label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_header_with_length (u8 * s, va_list * args) +{ + gre_header_t * h = va_arg (*args, gre_header_t *); + mpls_unicast_header_t * m = (mpls_unicast_header_t *)(h+1); + u32 max_header_bytes = va_arg (*args, u32); + uword header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "gre header truncated"); + + s = format + (s, "GRE-MPLS label %d", + vnet_mpls_uc_get_label (clib_net_to_host_u32 (m->label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_header (u8 * s, va_list * args) +{ + gre_header_t * h = va_arg (*args, gre_header_t *); + return format (s, "%U", format_mpls_gre_header_with_length, h, 0); +} + +uword +unformat_mpls_gre_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + gre_header_t _g, * g = &_g; + mpls_unicast_header_t _h, * h = &_h; + u32 label, label_exp_s_ttl; + + if (! unformat (input, "MPLS %d", &label)) + return 0; + + g->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_mpls_unicast); + + label_exp_s_ttl = (label<<12) | (1<<8) /* s-bit */ | 0xFF; + h->label_exp_s_ttl = clib_host_to_net_u32 (label_exp_s_ttl); + + /* Add gre, mpls headers to result. */ + { + void * p; + u32 g_n_bytes = sizeof (g[0]); + u32 h_n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, g_n_bytes); + clib_memcpy (p, g, g_n_bytes); + + vec_add2 (*result, p, h_n_bytes); + clib_memcpy (p, h, h_n_bytes); + } + + return 1; +} + +uword +unformat_mpls_label_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 label; + + if (!unformat (input, "MPLS: label %d", &label)) + return 0; + + label = (label<<12) | (1<<8) /* s-bit set */ | 0xFF /* ttl */; + + *result = clib_host_to_net_u32 (label); + return 1; +} + +mpls_encap_t * +mpls_encap_by_fib_and_dest (mpls_main_t * mm, u32 rx_fib, u32 dst_address) +{ + uword * p; + mpls_encap_t * e; + u64 key; + + key = ((u64)rx_fib<<32) | ((u64) dst_address); + p = hash_get (mm->mpls_encap_by_fib_and_dest, key); + + if (!p) + return 0; + + e = pool_elt_at_index (mm->encaps, p[0]); + return e; +} + +int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id, + u32 *labels_host_byte_order, + u32 policy_tunnel_index, + int no_dst_hash, u32 * indexp, int is_add) +{ + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + mpls_encap_t * e; + u32 label_net_byte_order, label_host_byte_order; + u32 fib_index; + u64 key; + uword *p; + int i; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_index = p[0]; + + key = ((u64)fib_index<<32) | ((u64) dest->as_u32); + + if (is_add) + { + pool_get (mm->encaps, e); + memset (e, 0, sizeof (*e)); + + for (i = 0; i < vec_len (labels_host_byte_order); i++) + { + mpls_unicast_header_t h; + label_host_byte_order = labels_host_byte_order[i]; + + /* Reformat label into mpls_unicast_header_t */ + label_host_byte_order <<= 12; + // FIXME NEOS AND EOS + //if (i == vec_len(labels_host_byte_order) - 1) + // label_host_byte_order |= 1<<8; /* S=1 */ + label_host_byte_order |= 0xff; /* TTL=FF */ + label_net_byte_order = clib_host_to_net_u32 (label_host_byte_order); + h.label_exp_s_ttl = label_net_byte_order; + vec_add1 (e->labels, h); + } + if (no_dst_hash == 0) + hash_set (mm->mpls_encap_by_fib_and_dest, key, e - mm->encaps); + if (indexp) + *indexp = e - mm->encaps; + if (policy_tunnel_index != ~0) + return vnet_mpls_policy_tunnel_add_rewrite (mm, e, policy_tunnel_index); + } + else + { + p = hash_get (mm->mpls_encap_by_fib_and_dest, key); + if (!p) + return VNET_API_ERROR_NO_SUCH_LABEL; + + e = pool_elt_at_index (mm->encaps, p[0]); + + vec_free (e->labels); + vec_free (e->rewrite); + pool_put(mm->encaps, e); + + if (no_dst_hash == 0) + hash_unset (mm->mpls_encap_by_fib_and_dest, key); + } + return 0; +} + +static clib_error_t * +mpls_add_encap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 fib_id; + u32 *labels = 0; + u32 this_label; + ip4_address_t dest; + u32 policy_tunnel_index = ~0; + int no_dst_hash = 0; + int rv; + int fib_set = 0; + int dest_set = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "fib %d", &fib_id)) + fib_set = 1; + else if (unformat (input, "dest %U", unformat_ip4_address, &dest)) + dest_set = 1; + else if (unformat (input, "no-dst-hash")) + no_dst_hash = 1; + else if (unformat (input, "label %d", &this_label)) + vec_add1 (labels, this_label); + else if (unformat (input, "policy-tunnel %d", &policy_tunnel_index)) + ; + else + break; + } + + if (fib_set == 0) + return clib_error_return (0, "fib-id missing"); + if (dest_set == 0) + return clib_error_return (0, "destination IP address missing"); + if (vec_len (labels) == 0) + return clib_error_return (0, "label stack missing"); + + rv = vnet_mpls_add_del_encap (&dest, fib_id, labels, + policy_tunnel_index, + no_dst_hash, 0 /* indexp */, + 1 /* is_add */); + vec_free (labels); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "fib id %d unknown", fib_id); + + default: + return clib_error_return (0, "vnet_mpls_add_del_encap returned %d", + rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (mpls_add_encap_command, static) = { + .path = "mpls encap add", + .short_help = + "mpls encap add label <label> ... fib <id> dest <ip4-address>", + .function = mpls_add_encap_command_fn, +}; + +u8 * format_mpls_unicast_header_host_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + u32 label = h->label_exp_s_ttl; + + s = format (s, "label %d exp %d, s %d, ttl %d", + vnet_mpls_uc_get_label (label), + vnet_mpls_uc_get_exp (label), + vnet_mpls_uc_get_s (label), + vnet_mpls_uc_get_ttl (label)); + return s; +} + +u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + mpls_unicast_header_t h_host; + + h_host.label_exp_s_ttl = clib_net_to_host_u32 (h->label_exp_s_ttl); + + return format (s, "%U", format_mpls_unicast_header_host_byte_order, + &h_host); +} + +static clib_error_t * +mpls_del_encap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 fib_id; + ip4_address_t dest; + int rv; + + if (unformat (input, "fib %d dest %U", &fib_id, + unformat_ip4_address, &dest)) + { + rv = vnet_mpls_add_del_encap (&dest, fib_id, 0 /* labels */, + ~0 /* policy_tunnel_index */, + 0 /* no_dst_hash */, + 0 /* indexp */, + 0 /* is_add */); + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "fib id %d unknown", fib_id); + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "dest %U not in fib %d", + format_ip4_address, &dest, fib_id); + default: + break; + } + return 0; + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (mpls_del_encap_command, static) = { + .path = "mpls encap delete", + .short_help = "mpls encap delete fib <id> dest <ip4-address>", + .function = mpls_del_encap_command_fn, +}; + +int vnet_mpls_add_del_decap (u32 rx_fib_id, + u32 tx_fib_id, + u32 label_host_byte_order, + int s_bit, int next_index, int is_add) +{ + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + mpls_decap_t * d; + u32 rx_fib_index, tx_fib_index_or_output_swif_index; + uword *p; + u64 key; + + p = hash_get (im->fib_index_by_table_id, rx_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + rx_fib_index = p[0]; + + /* L3 decap => transform fib ID to fib index */ + if (next_index == MPLS_LOOKUP_NEXT_IP4_INPUT) + { + p = hash_get (im->fib_index_by_table_id, tx_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + + tx_fib_index_or_output_swif_index = p[0]; + } + else + { + /* L2 decap, tx_fib_id is actually the output sw_if_index */ + tx_fib_index_or_output_swif_index = tx_fib_id; + } + + key = ((u64) rx_fib_index<<32) | ((u64) label_host_byte_order<<12) + | ((u64) s_bit<<8); + + p = hash_get (mm->mpls_decap_by_rx_fib_and_label, key); + + /* If deleting, or replacing an old entry */ + if (is_add == 0 || p) + { + if (is_add == 0 && p == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + d = pool_elt_at_index (mm->decaps, p[0]); + hash_unset (mm->mpls_decap_by_rx_fib_and_label, key); + pool_put (mm->decaps, d); + /* Deleting, we're done... */ + if (is_add == 0) + return 0; + } + + /* add decap entry... */ + pool_get (mm->decaps, d); + memset (d, 0, sizeof (*d)); + d->tx_fib_index = tx_fib_index_or_output_swif_index; + d->next_index = next_index; + + hash_set (mm->mpls_decap_by_rx_fib_and_label, key, d - mm->decaps); + + return 0; +} + +uword +unformat_mpls_gre_input_next (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + int rv = 0; + + if (unformat (input, "lookup")) + { + *result = MPLS_LOOKUP_NEXT_IP4_INPUT; + rv = 1; + } + else if (unformat (input, "output")) + { + *result = MPLS_LOOKUP_NEXT_L2_OUTPUT; + rv = 1; + } + return rv; +} + +static clib_error_t * +mpls_add_decap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 rx_fib_id = 0; + u32 tx_fib_or_sw_if_index; + u32 label; + int s_bit = 1; + u32 next_index = 1; /* ip4_lookup, see node.c */ + int tx_fib_id_set = 0; + int label_set = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "fib %d", &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "sw_if_index %d", &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "rx-fib %d", &rx_fib_id)) + ; + else if (unformat (input, "label %d", &label)) + label_set = 1; + else if (unformat (input, "s-bit-clear")) + s_bit = 0; + else if (unformat (input, "next %U", unformat_mpls_gre_input_next, + &next_index)) + ; + else + break; + } + + if (tx_fib_id_set == 0) + return clib_error_return (0, "lookup FIB ID not set"); + if (label_set == 0) + return clib_error_return (0, "missing label"); + + rv = vnet_mpls_add_del_decap (rx_fib_id, tx_fib_or_sw_if_index, + label, s_bit, next_index, 1 /* is_add */); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "no such rx fib id %d", rx_fib_id); + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "no such tx fib / swif %d", + tx_fib_or_sw_if_index); + + default: + return clib_error_return (0, "vnet_mpls_add_del_decap returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (mpls_add_decap_command, static) = { + .path = "mpls decap add", + .short_help = + "mpls decap add fib <id> label <nn> [s-bit-clear] [next-index <nn>]", + .function = mpls_add_decap_command_fn, +}; + +static clib_error_t * +mpls_del_decap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 rx_fib_id = 0; + u32 tx_fib_id = 0; + u32 label; + int s_bit = 1; + int label_set = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "rx-fib %d", &rx_fib_id)) + ; + else if (unformat (input, "label %d", &label)) + label_set = 1; + else if (unformat (input, "s-bit-clear")) + s_bit = 0; + } + + if (!label_set) + return clib_error_return (0, "label not set"); + + rv = vnet_mpls_add_del_decap (rx_fib_id, + tx_fib_id /* not interesting */, + label, s_bit, + 0 /* next_index not interesting */, + 0 /* is_add */); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "no such rx fib id %d", rx_fib_id); + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "no such lookup fib id %d", tx_fib_id); + + case VNET_API_ERROR_NO_SUCH_LABEL: + return clib_error_return (0, "no such label %d rx fib id %d", + label, rx_fib_id); + + default: + return clib_error_return (0, "vnet_mpls_add_del_decap returned %d", + rv); + } + return 0; +} + + +VLIB_CLI_COMMAND (mpls_del_decap_command, static) = { + .path = "mpls decap delete", + .short_help = "mpls decap delete label <label> rx-fib <id> [s-bit-clear]", + .function = mpls_del_decap_command_fn, +}; + +int +mpls_dest_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return clib_net_to_host_u32(r1->dest) - clib_net_to_host_u32(r2->dest); +} + +int +mpls_fib_index_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return r1->fib_index - r2->fib_index; +} + +int +mpls_label_cmp(void * a1, void * a2) +{ + show_mpls_fib_t * r1 = a1; + show_mpls_fib_t * r2 = a2; + + return r1->label - r2->label; +} + +static clib_error_t * +show_mpls_fib_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u64 key; + u32 value; + show_mpls_fib_t *records = 0; + show_mpls_fib_t *s; + mpls_main_t * mm = &mpls_main; + ip4_fib_t * rx_fib; + + hash_foreach (key, value, mm->mpls_encap_by_fib_and_dest, + ({ + vec_add2 (records, s, 1); + s->fib_index = (u32)(key>>32); + s->dest = (u32)(key & 0xFFFFFFFF); + s->entry_index = (u32) value; + })); + + if (!vec_len(records)) + { + vlib_cli_output (vm, "MPLS encap table empty"); + } + /* sort output by dst address within fib */ + vec_sort_with_function (records, mpls_dest_cmp); + vec_sort_with_function (records, mpls_fib_index_cmp); + vlib_cli_output (vm, "MPLS encap table"); + vlib_cli_output (vm, "%=6s%=16s%=16s", "Table", "Dest address", "Labels"); + vec_foreach (s, records) + { + rx_fib = ip4_fib_get (s->fib_index); + vlib_cli_output (vm, "%=6d%=16U%=16U", rx_fib->table_id, + format_ip4_address, &s->dest, + format_mpls_encap_index, mm, s->entry_index); + } + + vec_free(records); + return 0; +} + +VLIB_CLI_COMMAND (show_mpls_fib_command, static) = { + .path = "show mpls encap", + .short_help = "show mpls encap", + .function = show_mpls_fib_command_fn, +}; + +static clib_error_t * +vnet_mpls_local_label (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + fib_route_path_t *rpaths = NULL, rpath; + clib_error_t * error = 0; + u32 table_id, is_del, is_ip; + fib_prefix_t pfx; + mpls_label_t local_label; + mpls_eos_bit_t eos; + + is_ip = 0; + table_id = 0; + eos = MPLS_EOS; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + memset(&rpath, 0, sizeof(rpath)); + memset(&pfx, 0, sizeof(pfx)); + + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "eos")) + eos = MPLS_EOS; + else if (unformat (line_input, "non-eos")) + eos = MPLS_NON_EOS; + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, + &pfx.fp_addr.ip4, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + is_ip = 1; + } + else if (unformat (line_input, "%U/%d", + unformat_ip6_address, + &pfx.fp_addr.ip6, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + is_ip = 1; + } + else if (unformat (line_input, "%d", &local_label)) + ; + else if (unformat (line_input, + "ip4-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "ip6-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "mpls-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + vec_add1(rpaths, rpath); + } + else + { + error = clib_error_return (0, "unkown input: %U", + format_unformat_error, input); + goto done; + } + + } + + if (is_ip) + { + u32 fib_index = fib_table_find(pfx.fp_proto, table_id); + + if (FIB_NODE_INDEX_INVALID == fib_index) + { + error = clib_error_return (0, "%U table-id %d does not exist", + format_fib_protocol, pfx.fp_proto, table_id); + goto done; + } + + if (is_del) + { + fib_table_entry_local_label_remove(fib_index, &pfx, local_label); + } + else + { + fib_table_entry_local_label_add(fib_index, &pfx, local_label); + } + } + else + { + fib_node_index_t lfe, fib_index; + fib_prefix_t prefix = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = local_label, + .fp_eos = eos, + }; + + fib_index = mpls_fib_index_from_table_id(table_id); + + if (FIB_NODE_INDEX_INVALID == fib_index) + { + error = clib_error_return (0, "MPLS table-id %d does not exist", + table_id); + goto done; + } + + lfe = fib_table_entry_path_add2(fib_index, + &prefix, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + rpaths); + + if (FIB_NODE_INDEX_INVALID == lfe) + { + error = clib_error_return (0, "Failed to create %U-%U in MPLS table-id %d", + format_mpls_unicast_label, local_label, + format_mpls_eos_bit, eos, + table_id); + goto done; + } + } + +done: + return error; +} + +VLIB_CLI_COMMAND (mpls_local_label_command, static) = { + .path = "mpls local-label", + .function = vnet_mpls_local_label, + .short_help = "Create/Delete MPL local labels", +}; + +int mpls_fib_reset_labels (u32 fib_id) +{ + u64 key; + u32 value; + show_mpls_fib_t *records = 0; + show_mpls_fib_t *s; + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + u32 fib_index; + uword *p; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_index = p[0]; + + hash_foreach (key, value, mm->mpls_encap_by_fib_and_dest, + ({ + if (fib_index == (u32)(key>>32)) { + vec_add2 (records, s, 1); + s->dest = (u32)(key & 0xFFFFFFFF); + s->entry_index = (u32) value; + } + })); + + vec_foreach (s, records) + { + key = ((u64)fib_index<<32) | ((u64) s->dest); + hash_unset (mm->mpls_encap_by_fib_and_dest, key); + pool_put_index (mm->encaps, s->entry_index); + } + + vec_reset_length(records); + + hash_foreach (key, value, mm->mpls_decap_by_rx_fib_and_label, + ({ + if (fib_index == (u32) (key>>32)) { + vec_add2 (records, s, 1); + s->entry_index = value; + s->fib_index = fib_index; + s->s_bit = key & (1<<8); + s->dest = (u32)((key & 0xFFFFFFFF)>>12); + } + })); + + vec_foreach (s, records) + { + key = ((u64) fib_index <<32) | ((u64) s->dest<<12) | + ((u64) s->s_bit); + + hash_unset (mm->mpls_decap_by_rx_fib_and_label, key); + pool_put_index (mm->decaps, s->entry_index); + } + + vec_free(records); + return 0; +} + +static clib_error_t * mpls_init (vlib_main_t * vm) +{ + mpls_main_t * mm = &mpls_main; + clib_error_t * error; + + mm->vlib_main = vm; + mm->vnet_main = vnet_get_main(); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + + mm->mpls_encap_by_fib_and_dest = hash_create (0, sizeof (uword)); + mm->mpls_decap_by_rx_fib_and_label = hash_create (0, sizeof (uword)); + + return vlib_call_init_function (vm, mpls_input_init); +} + +VLIB_INIT_FUNCTION (mpls_init); + +mpls_main_t * mpls_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, mpls_init); + return &mpls_main; +} + diff --git a/vnet/vnet/mpls/mpls.h b/vnet/vnet/mpls/mpls.h new file mode 100644 index 00000000..2aeae49d --- /dev/null +++ b/vnet/vnet/mpls/mpls.h @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_mpls_gre_h +#define included_vnet_mpls_gre_h + +#include <vnet/vnet.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls/packet.h> +#include <vnet/mpls/mpls_types.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/fib/fib_node.h> +#include <vnet/adj/adj.h> + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; /* 20 bytes */ + gre_header_t gre; /* 4 bytes */ + mpls_unicast_header_t labels[0]; /* 4 bytes each */ +}) ip4_gre_and_mpls_header_t; + +extern vnet_hw_interface_class_t mpls_gre_hw_interface_class; + +typedef enum { +#define mpls_error(n,s) MPLS_ERROR_##n, +#include <vnet/mpls/error.def> +#undef mpls_error + MPLS_N_ERROR, +} mpls_gre_error_t; + +/* + * No protocol info, MPLS labels don't have a next-header field + * presumably the label field tells all... + */ + +typedef struct { + fib_node_t mgt_node; + ip4_address_t tunnel_src; + ip4_address_t tunnel_dst; + ip4_address_t intfc_address; + u32 mask_width; + u32 inner_fib_index; + u32 outer_fib_index; + u32 encap_index; + u32 hw_if_index; /* L2 x-connect capable tunnel intfc */ + u8 * rewrite_data; + u8 l2_only; + fib_node_index_t fei; /* FIB Entry index for the tunnel's destination */ + adj_index_t adj_index; /* The midchain adj this tunnel creates */ + u32 sibling_index; +} mpls_gre_tunnel_t; + +typedef struct { + u8 tunnel_dst[6]; + ip4_address_t intfc_address; + u32 tx_sw_if_index; + u32 inner_fib_index; + u32 mask_width; + u32 encap_index; + u32 hw_if_index; + u8 * rewrite_data; + u8 l2_only; + fib_node_index_t fei; +} mpls_eth_tunnel_t; + +typedef struct { + mpls_unicast_header_t *labels; + /* only for policy tunnels */ + u8 * rewrite; + u32 output_next_index; +} mpls_encap_t; + +typedef struct { + u32 tx_fib_index; + u32 next_index; /* e.g. ip4/6-input, l2-input */ +} mpls_decap_t; + +#define MPLS_FIB_DEFAULT_TABLE_ID 0 + +/** + * Type exposure is to allow the DP fast/inlined access + */ +#define MPLS_FIB_KEY_SIZE 21 +#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) + +typedef struct mpls_fib_t_ +{ + /** + * A hash table of entries. 21 bit key + * Hash table for reduced memory footprint + */ + uword * mf_entries; + + /** + * The load-balance indeices keyed by 21 bit label+eos bit. + * A flat array for maximum lookup performace. + */ + index_t mf_lbs[MPLS_FIB_DB_SIZE]; +} mpls_fib_t; + +/** + * @brief Definition of a callback for receiving MPLS interface state change + * notifications + */ +typedef void (*mpls_interface_state_change_callback_t)(u32 sw_if_index, + u32 is_enable); + +typedef struct { + /* MPLS FIB index for each software interface */ + u32 *fib_index_by_sw_if_index; + + /** A pool of all the MPLS FIBs */ + struct fib_table_t_ *fibs; + + /** A hash table to lookup the mpls_fib by table ID */ + uword *fib_index_by_table_id; + + /* rx/tx interface/feature configuration. */ + ip_config_main_t rx_config_mains, tx_config_main; + + /* Built-in unicast feature path indices, see ip_feature_init_cast(...) */ + u32 mpls_rx_feature_lookup; + u32 mpls_rx_feature_not_enabled; + + /* pool of gre tunnel instances */ + mpls_gre_tunnel_t *gre_tunnels; + u32 * free_gre_sw_if_indices; + + /* pool of ethernet tunnel instances */ + mpls_eth_tunnel_t *eth_tunnels; + u32 * free_eth_sw_if_indices; + + /* Encap side: map (fib, dst_address) to mpls label stack */ + mpls_encap_t * encaps; + uword * mpls_encap_by_fib_and_dest; + + /* Decap side: map rx label to FIB */ + mpls_decap_t * decaps; + uword * mpls_decap_by_rx_fib_and_label; + + /* mpls-o-e policy tunnel next index for ip4/ip6-classify */ + u32 ip4_classify_mpls_policy_encap_next_index; + u32 ip6_classify_mpls_policy_encap_next_index; + + /* feature path configuration lists */ + vnet_ip_feature_registration_t * next_feature; + + /* Save feature results for show command */ + char **feature_nodes; + + /* IP4 enabled count by software interface */ + u8 * mpls_enabled_by_sw_if_index; + + /* Functions to call when MPLS state on an interface changes. */ + mpls_interface_state_change_callback_t * mpls_interface_state_change_callbacks; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} mpls_main_t; + +extern mpls_main_t mpls_main; + +#define VNET_MPLS_FEATURE_INIT(x,...) \ + __VA_ARGS__ vnet_ip_feature_registration_t uc_##x; \ +static void __vnet_add_feature_registration_uc_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vnet_add_feature_registration_uc_##x (void) \ +{ \ + mpls_main_t * mm = &mpls_main; \ + uc_##x.next = mm->next_feature; \ + mm->next_feature = &uc_##x; \ +} \ +__VA_ARGS__ vnet_ip_feature_registration_t uc_##x + +extern clib_error_t * mpls_feature_init(vlib_main_t * vm); + +format_function_t format_mpls_protocol; +format_function_t format_mpls_gre_header_with_length; +format_function_t format_mpls_eth_header_with_length; +format_function_t format_mpls_encap_index; + +format_function_t format_mpls_eos_bit; +format_function_t format_mpls_unicast_header_net_byte_order; +format_function_t format_mpls_unicast_label; +format_function_t format_mpls_header; + +extern vlib_node_registration_t mpls_input_node; +extern vlib_node_registration_t mpls_policy_encap_node; +extern vlib_node_registration_t mpls_output_node; +extern vlib_node_registration_t mpls_midchain_node; + +extern vnet_device_class_t mpls_gre_device_class; + +/* Parse mpls protocol as 0xXXXX or protocol name. + In either host or network byte order. */ +unformat_function_t unformat_mpls_protocol_host_byte_order; +unformat_function_t unformat_mpls_protocol_net_byte_order; +unformat_function_t unformat_mpls_label_net_byte_order; +unformat_function_t unformat_mpls_gre_header; +unformat_function_t unformat_pg_mpls_gre_header; +unformat_function_t unformat_mpls_unicast_label; + +/* Parse mpls header. */ +unformat_function_t unformat_mpls_header; +unformat_function_t unformat_pg_mpls_header; + +/* manually added to the interface output node in mpls.c */ +#define MPLS_GRE_OUTPUT_NEXT_LOOKUP 1 +#define MPLS_GRE_OUTPUT_NEXT_DROP VNET_INTERFACE_TX_NEXT_DROP + +void mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable); + +u8 mpls_sw_interface_is_enabled (u32 sw_if_index); + +mpls_encap_t * +mpls_encap_by_fib_and_dest (mpls_main_t * mm, u32 rx_fib, u32 dst_address); + +int mpls_label_from_fib_id_and_dest (mpls_main_t *gm, u32 fib_id, + u32 dst_address, u32 *labelp); + +int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, u32 outer_fib_id, + u32 * tunnel_intfc_sw_if_index, + u8 l2_only, + u8 is_add); + +int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add); + +int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id); + +int mpls_fib_reset_labels (u32 fib_id); + +int vnet_mpls_add_del_decap (u32 rx_fib_id, + u32 tx_fib_id, + u32 label_host_byte_order, + int s_bit, int next_index, int is_add); + +int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id, + u32 *labels_host_byte_order, + u32 policy_tunnel_index, + int no_dst_hash, u32 * indexp, int is_add); + +int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm, + mpls_encap_t * e, + u32 policy_tunnel_index); + +typedef struct { + u32 lookup_miss; + + /* Tunnel-id / index in tunnel vector */ + u32 tunnel_id; + + /* mpls encap index */ + u32 mpls_encap_index; + + /* pkt length */ + u32 length; + + /* tunnel ip4 addresses */ + ip4_address_t src; + ip4_address_t dst; +} mpls_gre_tx_trace_t; + +u8 * format_mpls_gre_tx_trace (u8 * s, va_list * args); +u8 * format_mpls_gre_header (u8 * s, va_list * args); + +#define foreach_mpls_input_next \ +_(DROP, "error-drop") \ +_(LOOKUP, "mpls-lookup") + +typedef enum { +#define _(s,n) MPLS_INPUT_NEXT_##s, + foreach_mpls_input_next +#undef _ + MPLS_INPUT_N_NEXT, +} mpls_input_next_t; + +#define foreach_mpls_lookup_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(L2_OUTPUT, "l2-output") + +// FIXME remove. +typedef enum { +#define _(s,n) MPLS_LOOKUP_NEXT_##s, + foreach_mpls_lookup_next +#undef _ + MPLS_LOOKUP_N_NEXT, +} mpls_lookup_next_t; + +#define foreach_mpls_output_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) MPLS_OUTPUT_NEXT_##s, + foreach_mpls_output_next +#undef _ + MPLS_OUTPUT_N_NEXT, +} mpls_output_next_t; + +typedef struct { + u32 lookup_miss; + + /* Tunnel-id / index in tunnel vector */ + u32 tunnel_id; + + /* output interface */ + u32 tx_sw_if_index; + + /* mpls encap index */ + u32 mpls_encap_index; + + /* pkt length */ + u32 length; + + u8 dst[6]; +} mpls_eth_tx_trace_t; + +u8 * format_mpls_eth_tx_trace (u8 * s, va_list * args); + +typedef struct { + u32 fib_index; + u32 entry_index; + u32 dest; + u32 s_bit; + u32 label; +} show_mpls_fib_t; + +int +mpls_dest_cmp(void * a1, void * a2); + +int +mpls_fib_index_cmp(void * a1, void * a2); + +int +mpls_label_cmp(void * a1, void * a2); + +#endif /* included_vnet_mpls_gre_h */ diff --git a/vnet/vnet/mpls/mpls_features.c b/vnet/vnet/mpls/mpls_features.c new file mode 100644 index 00000000..d3a726af --- /dev/null +++ b/vnet/vnet/mpls/mpls_features.c @@ -0,0 +1,254 @@ +/* + * mpls_features.c: MPLS input and output features + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/mpls/mpls.h> + +always_inline uword +mpls_terminate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int error_code) +{ + u32 * buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + + vlib_error_drop_buffers (vm, node, + buffers, + /* stride */ 1, + n_packets, + /* next */ 0, + mpls_input_node.index, + error_code); + + return n_packets; +} + +static uword +mpls_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_PUNT)); +} + +VLIB_REGISTER_NODE (mpls_punt_node) = { + .function = mpls_punt, + .name = "mpls-punt", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-punt", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_punt_node, mpls_punt) + +static uword +mpls_drop (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_DROP)); +} + +VLIB_REGISTER_NODE (mpls_drop_node) = { + .function = mpls_drop, + .name = "mpls-drop", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_drop_node, mpls_drop) + +static uword +mpls_not_enabled (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_NOT_ENABLED)); +} + +VLIB_REGISTER_NODE (mpls_not_enabled_node) = { + .function = mpls_not_enabled, + .name = "mpls-not-enabled", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_not_enabled_node, mpls_not_enabled) + +VNET_MPLS_FEATURE_INIT (mpls_lookup, static) = { + .node_name = "mpls-lookup", + .runs_before = ORDER_CONSTRAINTS {"mpls-not-enabled", 0}, + .feature_index = &mpls_main.mpls_rx_feature_lookup, +}; + +VNET_MPLS_FEATURE_INIT (mpls_not_enabled, static) = { + .node_name = "mpls-not-enabled", + .runs_before = ORDER_CONSTRAINTS {0}, /* not before any other features */ + .feature_index = &mpls_main.mpls_rx_feature_not_enabled, +}; + +static char * feature_start_nodes[] = +{ + "mpls-input", +}; + +clib_error_t * +mpls_feature_init (vlib_main_t * vm) +{ + ip_config_main_t * cm = &mpls_main.rx_config_mains; + vnet_config_main_t * vcm = &cm->config_main; + + return (ip_feature_init_cast (vm, cm, vcm, + feature_start_nodes, + ARRAY_LEN(feature_start_nodes), + VNET_IP_RX_UNICAST_FEAT, + VNET_L3_PACKET_TYPE_MPLS_UNICAST)); +} + +static clib_error_t * +mpls_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + vlib_main_t * vm = vnm->vlib_main; + mpls_main_t * mm = &mpls_main; + ip_config_main_t * cm = &mm->rx_config_mains; + vnet_config_main_t * vcm = &cm->config_main; + u32 drop_feature_index; + u32 ci; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0); + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + drop_feature_index = mm->mpls_rx_feature_not_enabled; + + if (is_add) + ci = vnet_config_add_feature (vm, vcm, ci, + drop_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + { + ci = vnet_config_del_feature (vm, vcm, ci, + drop_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + mm->mpls_enabled_by_sw_if_index[sw_if_index] = 0;; + } + + cm->config_index_by_sw_if_index[sw_if_index] = ci; + + return /* no error */ 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (mpls_sw_interface_add_del); + +static clib_error_t * +show_mpls_features_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_main_t * mm = &mpls_main; + int i; + char ** features; + + vlib_cli_output (vm, "Available MPLS feature nodes"); + + do { + features = mm->feature_nodes; + for (i = 0; i < vec_len(features); i++) + vlib_cli_output (vm, " %s\n", features[i]); + } while(0); + + return 0; +} + +VLIB_CLI_COMMAND (show_ip_features_command, static) = { + .path = "show mpls features", + .short_help = "show mpls features", + .function = show_mpls_features_command_fn, +}; + +static clib_error_t * +show_mpls_interface_features_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + mpls_main_t * mm = &mpls_main; + + ip_config_main_t * cm; + vnet_config_main_t * vcm; + vnet_config_t * cfg; + u32 cfg_index; + vnet_config_feature_t * feat; + vlib_node_t * n; + u32 sw_if_index; + u32 node_index; + u32 current_config_index; + int i; + + if (! unformat (input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + return clib_error_return (0, "Interface not specified..."); + + vlib_cli_output (vm, "MPLS feature paths configured on %U...", + format_vnet_sw_if_index_name, vnm, sw_if_index); + + cm = &mm->rx_config_mains; + vcm = &cm->config_main; + + current_config_index = vec_elt (cm->config_index_by_sw_if_index, + sw_if_index); + + ASSERT(current_config_index + < vec_len (vcm->config_pool_index_by_user_index)); + + cfg_index = + vcm->config_pool_index_by_user_index[current_config_index]; + cfg = pool_elt_at_index (vcm->config_pool, cfg_index); + + for (i = 0; i < vec_len(cfg->features); i++) + { + feat = cfg->features + i; + node_index = feat->node_index; + n = vlib_get_node (vm, node_index); + vlib_cli_output (vm, " %v", n->name); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_mpls_interface_features_command, static) = { + .path = "show mpls interface features", + .short_help = "show mpls interface features <intfc>", + .function = show_mpls_interface_features_command_fn, +}; + diff --git a/vnet/vnet/mpls/mpls_lookup.c b/vnet/vnet/mpls/mpls_lookup.c new file mode 100644 index 00000000..31ad68c4 --- /dev/null +++ b/vnet/vnet/mpls/mpls_lookup.c @@ -0,0 +1,278 @@ +/* + * node.c: mpls-o-gre decap processing + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/dpo/load_balance.h> + +vlib_node_registration_t mpls_lookup_node; + +typedef struct { + u32 next_index; + u32 lb_index; + u32 lfib_index; + u32 label_net_byte_order; +} mpls_lookup_trace_t; + +static u8 * +format_mpls_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *); + + s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d " + "label %d eos %d", + t->next_index, t->lfib_index, t->lb_index, + vnet_mpls_uc_get_label( + clib_net_to_host_u32(t->label_net_byte_order)), + vnet_mpls_uc_get_s(t->label_net_byte_order)); + return s; +} + +/* + * Compute flow hash. + * We'll use it to select which adjacency to use for this flow. And other things. + */ +always_inline u32 +mpls_compute_flow_hash (const mpls_unicast_header_t * hdr, + flow_hash_config_t flow_hash_config) +{ + // FIXME + return (vnet_mpls_uc_get_label(hdr->label_exp_s_ttl)); +} + +static inline uword +mpls_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + u32 n_left_from, next_index, * from, * to_next; + mpls_main_t * mm = &mpls_main; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 lbi0, next0, lfib_index0, bi0, hash_c0; + const mpls_unicast_header_t * h0; + const load_balance_t *lb0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + + lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0); + lb0 = load_balance_get(lbi0); + + hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } + + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + /* + * pop the label that was just used in the lookup + */ + vlib_buffer_advance(b0, sizeof(*h0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->lfib_index = lfib_index0; + tr->label_net_byte_order = h0->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_lookup_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_lookup_node) = { + .function = mpls_lookup, + .name = "mpls-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .sibling_of = "ip4-lookup", + + .format_buffer = format_mpls_gre_header_with_length, + .format_trace = format_mpls_lookup_trace, + .unformat_buffer = unformat_mpls_gre_header, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup) + +typedef struct { + u32 next_index; + u32 lb_index; +} mpls_load_balance_trace_t; + +static u8 * +format_mpls_load_balance_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *); + + s = format (s, "MPLS: next [%d], LB index %d ", + t->next_index, t->lb_index); + return s; +} + +always_inline uword +mpls_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from > 0 && n_left_to_next > 0) + { + const mpls_unicast_header_t *hdr0; + const load_balance_t *lb0; + u32 pi0, lbi0, hc0, next0; + const dpo_id_t *dpo0; + vlib_buffer_t * p0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + hdr0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + hc0 = lb0->lb_hash_config; + vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(hdr0, hc0); + + dpo0 = load_balance_get_bucket_i(lb0, + vnet_buffer(p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_load_balance_node) = { + .function = mpls_load_balance, + .name = "mpls-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "mpls-lookup", + + .format_trace = format_mpls_load_balance_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance) diff --git a/vnet/vnet/mpls/mpls_output.c b/vnet/vnet/mpls/mpls_output.c new file mode 100644 index 00000000..932fcb8d --- /dev/null +++ b/vnet/vnet/mpls/mpls_output.c @@ -0,0 +1,343 @@ +/* + * mpls_output.c: MPLS Adj rewrite + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + /* Adjacency taken. */ + u32 adj_index; + u32 flow_hash; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[64 - 1*sizeof(u32)]; +} mpls_output_trace_t; + +static u8 * +format_mpls_output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *); + vnet_main_t * vnm = vnet_get_main(); + uword indent = format_get_indent (s); + + s = format (s, "adj-idx %d : %U flow hash: 0x%08x", + t->adj_index, + format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + vnm, t->adj_index, + t->packet_data, sizeof (t->packet_data)); + return s; +} + +static inline uword +mpls_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next, cpu_index; + vlib_node_runtime_t * error_node; + + cpu_index = os_get_cpu_number(); + error_node = vlib_node_get_runtime (vm, mpls_output_node.index); + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + mpls_unicast_header_t *hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0); + + adj0 = adj_get(adj_index0); + hdr0 = vlib_buffer_get_current (p0); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], hdr0, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + + if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + + /* Check MTU of outgoing interface. */ + error0 = (vlib_buffer_length_in_chain (vm, p0) + > adj0[0].rewrite_header.max_l3_packet_bytes + ? IP4_ERROR_MTU_EXCEEDED + : IP4_ERROR_NONE); + + p0->error = error_node->errors[error0]; + + /* Don't adjust the buffer for ttl issue; icmp-error node wants + * to see the IP headerr */ + if (PREDICT_TRUE(error0 == IP4_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = adj0[0].rewrite_header.next_index; + } + else + { + next0 = MPLS_OUTPUT_NEXT_DROP; + } + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_ENCAP, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +static inline uword +mpls_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (mpls_output_inline(vm, node, from_frame)); +} + +VLIB_REGISTER_NODE (mpls_output_node) = { + .function = mpls_output, + .name = "mpls-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n, + foreach_mpls_output_next +#undef _ + }, + + .format_trace = format_mpls_output_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_output_node, mpls_output) + +static inline uword +mpls_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (mpls_output_inline(vm, node, from_frame)); +} + +VLIB_REGISTER_NODE (mpls_midchain_node) = { + .function = mpls_output, + .name = "mpls-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_output_trace, + + .sibling_of = "mpls-output", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_midchain_node, mpls_midchain) + +/** + * @brief Next index values from the MPLS incomplete adj node + */ +#define foreach_mpls_adj_incomplete_next \ +_(DROP, "error-drop") \ +_(IP4, "ip4-arp") \ +_(IP6, "ip6-discover-neighbor") + +typedef enum { +#define _(s,n) MPLS_ADJ_INCOMPLETE_NEXT_##s, + foreach_mpls_adj_incomplete_next +#undef _ + MPLS_ADJ_INCOMPLETE_N_NEXT, +} mpls_adj_incomplete_next_t; + +/** + * @brief A struct to hold tracing information for the MPLS label imposition + * node. + */ +typedef struct mpls_adj_incomplete_trace_t_ +{ + u32 next; +} mpls_adj_incomplete_trace_t; + + +/** + * @brief Graph node for incomplete MPLS adjacency. + * This node will push traffic to either the v4-arp or v6-nd node + * based on the next-hop proto of the adj. + * We pay a cost for this 'routing' node, but an incomplete adj is the + * exception case. + */ +static inline uword +mpls_adj_incomplete (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0, next0, adj_index0; + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + + pi0 = to_next[0] = from[0]; + p0 = vlib_get_buffer (vm, pi0); + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + ASSERT(adj_index0); + + adj0 = adj_get(adj_index0); + + if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto)) + { + next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP4; + } + else + { + next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_adj_incomplete_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->next = next0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static u8 * +format_mpls_adj_incomplete_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_adj_incomplete_trace_t * t; + uword indent; + + t = va_arg (*args, mpls_adj_incomplete_trace_t *); + indent = format_get_indent (s); + + s = format (s, "%Unext:%d", + format_white_space, indent, + t->next); + return (s); +} + +VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = { + .function = mpls_adj_incomplete, + .name = "mpls-adj-incomplete", + .format_trace = format_mpls_adj_incomplete_trace, + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_ADJ_INCOMPLETE_NEXT_##s] = n, + foreach_mpls_adj_incomplete_next +#undef _ + }, + + .format_trace = format_mpls_output_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_adj_incomplete_node, + mpls_adj_incomplete) diff --git a/vnet/vnet/mpls/mpls_types.h b/vnet/vnet/mpls/mpls_types.h new file mode 100644 index 00000000..d7c629df --- /dev/null +++ b/vnet/vnet/mpls/mpls_types.h @@ -0,0 +1,39 @@ +#ifndef __MPLS_TYPES_H__ +#define __MPLS_TYPES_H__ + +#define MPLS_IETF_MIN_LABEL 0x00000 +#define MPLS_IETF_MAX_LABEL 0xfffff + +#define MPLS_IETF_MIN_RESERVED_LABEL 0x00000 +#define MPLS_IETF_MAX_RESERVED_LABEL 0x0000f + +#define MPLS_IETF_MIN_UNRES_LABEL 0x00010 +#define MPLS_IETF_MAX_UNRES_LABEL 0xfffff + +#define MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL 0x00000 +#define MPLS_IETF_ROUTER_ALERT_LABEL 0x00001 +#define MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL 0x00002 +#define MPLS_IETF_IMPLICIT_NULL_LABEL 0x00003 +#define MPLS_IETF_ELI_LABEL 0x00007 +#define MPLS_IETF_GAL_LABEL 0x0000D + +#define MPLS_IETF_IPV4_EXPLICIT_NULL_STRING "ip4-explicit-null" +#define MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING "e-nul" +#define MPLS_IETF_IMPLICIT_NULL_STRING "implicit-null" +#define MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING "i-nul" +#define MPLS_IETF_ROUTER_ALERT_STRING "router-alert" +#define MPLS_IETF_ROUTER_ALERT_BRIEF_STRING "r-alt" +#define MPLS_IETF_IPV6_EXPLICIT_NULL_STRING "ipv6-explicit-null" +#define MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING "v6enl" +#define MPLS_IETF_ELI_STRING "entropy-label-indicator" +#define MPLS_IETF_ELI_BRIEF_STRING "eli" +#define MPLS_IETF_GAL_STRING "gal" +#define MPLS_IETF_GAL_BRIEF_STRING "gal" + +#define MPLS_LABEL_INVALID (MPLS_IETF_MAX_LABEL+1) + +#define MPLS_LABEL_IS_REAL(_lbl) \ + (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \ + ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL)) + +#endif diff --git a/vnet/vnet/mpls/node.c b/vnet/vnet/mpls/node.c new file mode 100644 index 00000000..6801cc7b --- /dev/null +++ b/vnet/vnet/mpls/node.c @@ -0,0 +1,223 @@ +/* + * node.c: mpls-o-gre decap processing + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + u32 next_index; + u32 label_host_byte_order; +} mpls_input_trace_t; + +static u8 * +format_mpls_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *); + char * next_name; + + next_name = "BUG!"; + +#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b; + foreach_mpls_input_next; +#undef _ + + s = format (s, "MPLS: next %s[%d] label %d ttl %d", + next_name, t->next_index, + vnet_mpls_uc_get_label(t->label_host_byte_order), + vnet_mpls_uc_get_ttl(t->label_host_byte_order)); + + return s; +} + +vlib_node_registration_t mpls_input_node; + +typedef struct { + u32 last_label; + u32 last_inner_fib_index; + u32 last_outer_fib_index; + mpls_main_t * mpls_main; +} mpls_input_runtime_t; + +static inline uword +mpls_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + mpls_input_runtime_t * rt; + mpls_main_t * mm; + u32 cpu_index = os_get_cpu_number(); + vlib_simple_counter_main_t * cm; + vnet_main_t * vnm = vnet_get_main(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + mm = rt->mpls_main; + /* + * Force an initial lookup every time, in case the control-plane + * changed the label->FIB mapping. + */ + rt->last_label = ~0; + + next_index = node->cached_next_index; + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_MPLS); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + mpls_unicast_header_t * h0; + u32 label0; + u32 next0; + ip_config_main_t * cm0; + u32 sw_if_index0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + cm0 = &mm->rx_config_mains; + b0->current_config_index = vec_elt (cm0->config_index_by_sw_if_index, + sw_if_index0); + + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + /* TTL expired? */ + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + vnet_get_config_data (&cm0->config_main, + &b0->current_config_index, + &next0, + /* # bytes of config data */ 0); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->label_host_byte_order = label0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static uword +mpls_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return mpls_input_inline (vm, node, from_frame); +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_input_node) = { + .function = mpls_input, + .name = "mpls-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof(mpls_input_runtime_t), + + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, + foreach_mpls_input_next +#undef _ + }, + + .format_buffer = format_mpls_unicast_header_net_byte_order, + .format_trace = format_mpls_input_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input) + +static void +mpls_setup_nodes (vlib_main_t * vm) +{ + mpls_input_runtime_t * rt; + pg_node_t * pn; + + pn = pg_get_node (mpls_input_node.index); + pn->unformat_edit = unformat_pg_mpls_header; + + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, + mpls_input_node.index); +} + +static clib_error_t * mpls_input_init (vlib_main_t * vm) +{ + clib_error_t * error; + + error = vlib_call_init_function (vm, mpls_init); + if (error) + clib_error_report (error); + + mpls_setup_nodes (vm); + + return (mpls_feature_init(vm)); +} + +VLIB_INIT_FUNCTION (mpls_input_init); diff --git a/vnet/vnet/mpls/packet.h b/vnet/vnet/mpls/packet.h new file mode 100644 index 00000000..bc67445b --- /dev/null +++ b/vnet/vnet/mpls/packet.h @@ -0,0 +1,125 @@ +#ifndef included_vnet_mpls_packet_h +#define included_vnet_mpls_packet_h + +/* + * MPLS packet format + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A label value only, i.e. 20bits. + */ +typedef u32 mpls_label_t; + +typedef struct { + /* Label: top 20 bits [in network byte order] */ + /* Experimental: 3 bits ... */ + /* S (bottom of label stack): 1 bit */ + /* TTL: 8 bits */ + mpls_label_t label_exp_s_ttl; +} mpls_unicast_header_t; + +typedef enum mpls_eos_bit_t_ +{ + MPLS_NON_EOS = 0, + MPLS_EOS = 1, +} mpls_eos_bit_t; + +#define MPLS_EOS_BITS { \ + [MPLS_NON_EOS] = "neos", \ + [MPLS_EOS] = "eos", \ +} + +#define FOR_EACH_MPLS_EOS_BIT(_eos) \ + for (_eos = MPLS_NON_EOS; _eos <= MPLS_EOS; _eos++) + +#define MPLS_ENTRY_LABEL_OFFSET 0 +#define MPLS_ENTRY_LABEL_SHIFT 12 +#define MPLS_ENTRY_LABEL_MASK 0x000fffff +#define MPLS_ENTRY_LABEL_BITS \ + (MPLS_ENTRY_LABEL_MASK << MPLS_ENTRY_LABEL_SHIFT) + +#define MPLS_ENTRY_EXP_OFFSET 2 /* byte offset to EXP bits */ +#define MPLS_ENTRY_EXP_SHIFT 9 +#define MPLS_ENTRY_EXP_MASK 0x07 +#define MPLS_ENTRY_EXP(mpls) \ + (((mpls)>>MPLS_ENTRY_EXP_SHIFT) & MPLS_ENTRY_EXP_MASK) +#define MPLS_ENTRY_EXP_BITS \ + (MPLS_ENTRY_EXP_MASK << MPLS_ENTRY_EXP_SHIFT) + +#define MPLS_ENTRY_EOS_OFFSET 2 /* byte offset to EOS bit */ +#define MPLS_ENTRY_EOS_SHIFT 8 +#define MPLS_ENTRY_EOS_MASK 0x01 /* EOS bit in its byte */ +#define MPLS_ENTRY_EOS(mpls) \ + (((mpls) >> MPLS_ENTRY_EOS_SHIFT) & MPLS_ENTRY_EOS_MASK) +#define MPLS_ENTRY_EOS_BIT (MPLS_ENTRY_EOS_MASK << MPLS_ENTRY_EOS_SHIFT) + +#define MPLS_ENTRY_TTL_OFFSET 3 /* byte offset to ttl field */ +#define MPLS_ENTRY_TTL_SHIFT 0 +#define MPLS_ENTRY_TTL_MASK 0xff +#define MPLS_ENTRY_TTL(mpls) \ + (((mpls) >> MPLS_ENTRY_TTL_SHIFT) & MPLS_ENTRY_TTL_MASK) +#define MPLS_ENTRY_TTL_BITS \ + (MPLS_ENTRY_TTL_MASK << MPLS_ENTRY_TTL_SHIFT) + +static inline u32 vnet_mpls_uc_get_label (mpls_label_t label_exp_s_ttl) +{ + return (label_exp_s_ttl>>MPLS_ENTRY_LABEL_SHIFT); +} + +static inline u32 vnet_mpls_uc_get_exp (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_EXP(label_exp_s_ttl)); +} + +static inline u32 vnet_mpls_uc_get_s (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_EOS(label_exp_s_ttl)); +} + +static inline u32 vnet_mpls_uc_get_ttl (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_TTL(label_exp_s_ttl)); +} + +static inline void vnet_mpls_uc_set_label (mpls_label_t *label_exp_s_ttl, + u32 value) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_LABEL_BITS)) | + ((value & MPLS_ENTRY_LABEL_MASK) << MPLS_ENTRY_LABEL_SHIFT)); +} + +static inline void vnet_mpls_uc_set_exp (mpls_label_t *label_exp_s_ttl, + u32 exp) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EXP_BITS)) | + ((exp & MPLS_ENTRY_EXP_MASK) << MPLS_ENTRY_EXP_SHIFT)); +} + +static inline void vnet_mpls_uc_set_s (mpls_label_t *label_exp_s_ttl, + u32 eos) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EOS_BIT)) | + ((eos & MPLS_ENTRY_EOS_MASK) << MPLS_ENTRY_EOS_SHIFT)); +} + +static inline void vnet_mpls_uc_set_ttl (mpls_label_t *label_exp_s_ttl, + u32 ttl) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_TTL_BITS)) | + ((ttl & MPLS_ENTRY_TTL_MASK))); +} + +#endif /* included_vnet_mpls_packet_h */ diff --git a/vnet/vnet/mpls/pg.c b/vnet/vnet/mpls/pg.c new file mode 100644 index 00000000..f04b5307 --- /dev/null +++ b/vnet/vnet/mpls/pg.c @@ -0,0 +1,71 @@ +/* + * pg.c: packet generator mpls/gre interface + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + pg_edit_t label; +} pg_mpls_header_t; + +static inline void +pg_mpls_header_init (pg_mpls_header_t * e) +{ + pg_edit_init (&e->label, mpls_unicast_header_t, label_exp_s_ttl); +} + +uword +unformat_pg_mpls_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_mpls_header_t * h; + vlib_main_t * vm = vlib_get_main(); + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (mpls_unicast_header_t), + &group_index); + pg_mpls_header_init (h); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_mpls_label_net_byte_order, &h->label)) + goto done; + + { + pg_node_t * pg_node = 0; + vlib_node_t * ip_lookup_node; + + ip_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip4-input"); + ASSERT (ip_lookup_node); + + pg_node = pg_get_node (ip_lookup_node->index); + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/mpls/policy_encap.c b/vnet/vnet/mpls/policy_encap.c new file mode 100644 index 00000000..278e8e6d --- /dev/null +++ b/vnet/vnet/mpls/policy_encap.c @@ -0,0 +1,180 @@ +/* + * policy_encap.c: mpls-o-e policy encap + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + u32 next_index; + u32 encap_index; +} mpls_policy_encap_trace_t; + +u8 * format_mpls_policy_encap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_policy_encap_trace_t * t = va_arg (*args, mpls_policy_encap_trace_t *); + + s = format (s, "MPLS-POLICY-ENCAP: next-index %d encap-index %d", + t->next_index, t->encap_index); + + return s; +} + +vlib_node_registration_t mpls_policy_encap_node; + +#define foreach_mpls_policy_encap_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) MPLS_POLICY_ENCAP_NEXT_##s, + foreach_mpls_policy_encap_next +#undef _ + MPLS_POLICY_ENCAP_N_NEXT, +} mpls_policy_encap_next_t; + +#define foreach_mpls_policy_error \ +_(PKTS_ENCAP, "mpls policy tunnel packets encapsulated") + +typedef enum { +#define _(n,s) MPLS_POLICY_ENCAP_ERROR_##n, + foreach_mpls_policy_error + MPLS_POLICY_ENCAP_N_ERROR, +#undef _ +} mpls_policy_encap_error_t; + +static char * mpls_policy_encap_error_strings[] = + { +#define _(n,s) s, + foreach_mpls_policy_error +#undef _ +}; + +static uword +mpls_policy_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + mpls_main_t * mm = &mpls_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u8 * h0; + u32 encap_index0; + u32 next0; + mpls_encap_t * e0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + encap_index0 = vnet_buffer(b0)->l2_classify.opaque_index; + + e0 = pool_elt_at_index (mm->encaps, encap_index0); + + vlib_buffer_advance (b0, -(word)vec_len(e0->rewrite)); + h0 = vlib_buffer_get_current (b0); + clib_memcpy (h0, e0->rewrite, vec_len(e0->rewrite)); + + next0 = e0->output_next_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_policy_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->encap_index = encap_index0; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_policy_encap_node.index, + MPLS_POLICY_ENCAP_ERROR_PKTS_ENCAP, + from_frame->n_vectors); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_policy_encap_node) = { + .function = mpls_policy_encap, + .name = "mpls-policy-encap", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = 0, + + .n_errors = MPLS_POLICY_ENCAP_N_ERROR, + .error_strings = mpls_policy_encap_error_strings, + + .format_trace = format_mpls_policy_encap_trace, + + .n_next_nodes = MPLS_POLICY_ENCAP_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_POLICY_ENCAP_NEXT_##s] = n, + foreach_mpls_policy_encap_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_policy_encap_node, mpls_policy_encap) + +static clib_error_t * +mpls_policy_encap_init (vlib_main_t * vm) +{ + mpls_main_t * mm = &mpls_main; + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + + mm->ip4_classify_mpls_policy_encap_next_index = + vlib_node_add_next (mm->vlib_main, + ip4_classify_node.index, + mpls_policy_encap_node.index); + + mm->ip6_classify_mpls_policy_encap_next_index = + vlib_node_add_next (mm->vlib_main, + ip6_classify_node.index, + mpls_policy_encap_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (mpls_policy_encap_init); |