diff options
Diffstat (limited to 'vnet/vnet/mpls-gre')
-rw-r--r-- | vnet/vnet/mpls-gre/error.def | 28 | ||||
-rw-r--r-- | vnet/vnet/mpls-gre/interface.c | 1930 | ||||
-rw-r--r-- | vnet/vnet/mpls-gre/mpls.c | 769 | ||||
-rw-r--r-- | vnet/vnet/mpls-gre/mpls.h | 231 | ||||
-rw-r--r-- | vnet/vnet/mpls-gre/node.c | 359 | ||||
-rw-r--r-- | vnet/vnet/mpls-gre/packet.h | 49 | ||||
-rw-r--r-- | vnet/vnet/mpls-gre/pg.c | 71 | ||||
-rw-r--r-- | vnet/vnet/mpls-gre/policy_encap.c | 172 |
8 files changed, 3609 insertions, 0 deletions
diff --git a/vnet/vnet/mpls-gre/error.def b/vnet/vnet/mpls-gre/error.def new file mode 100644 index 00000000000..424ab50a030 --- /dev/null +++ b/vnet/vnet/mpls-gre/error.def @@ -0,0 +1,28 @@ +/* + * mpls_error.def: mpls errors + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +mpls_error (NONE, "no error") +mpls_error (UNKNOWN_PROTOCOL, "unknown protocol") +mpls_error (UNSUPPORTED_VERSION, "unsupported version") +mpls_error (PKTS_DECAP, "MPLS-GRE input packets decapsulated") +mpls_error (PKTS_ENCAP, "MPLS-GRE output packets encapsulated") +mpls_error (NO_LABEL, "MPLS-GRE no label for fib/dst") +mpls_error (TTL_EXPIRED, "MPLS-GRE ttl expired") +mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set") +mpls_error (BAD_LABEL, "invalid FIB id in label") +mpls_error (NOT_IP4, "non-ip4 packets dropped") +mpls_error (DISALLOWED_FIB, "disallowed FIB id") diff --git a/vnet/vnet/mpls-gre/interface.c b/vnet/vnet/mpls-gre/interface.c new file mode 100644 index 00000000000..c345054bdec --- /dev/null +++ b/vnet/vnet/mpls-gre/interface.c @@ -0,0 +1,1930 @@ +/* + * interface.c: mpls interfaces + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls-gre/mpls.h> + +static uword mpls_gre_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + /* + * Conundrum: packets from tun/tap destined for the tunnel + * actually have this rewrite applied. Transit packets do not. + * To make the two cases equivalent, don't generate a + * rewrite here, build the entire header in the fast path. + */ + return 0; +} + +/* manually added to the interface output node */ +#define MPLS_GRE_OUTPUT_NEXT_POST_REWRITE 1 + +static uword +mpls_gre_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + mpls_main_t * gm = &mpls_main; + vnet_main_t * vnm = gm->vnet_main; + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * As long as we have enough pkts left to process two pkts + * and prefetch two pkts... + */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, next0, bi1, next1; + mpls_gre_tunnel_t * t0, * t1; + u32 sw_if_index0, sw_if_index1; + vnet_hw_interface_t * hi0, * hi1; + u8 * dst0, * dst1; + + /* Prefetch the next iteration */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* + * Prefetch packet data. We expect to overwrite + * the inbound L2 header with an ip header and a + * gre header. Might want to prefetch the last line + * of rewrite space as well; need profile data + */ + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* Pick up the next two buffer indices */ + bi0 = from[0]; + bi1 = from[1]; + + /* Speculatively enqueue them where we sent the last buffer */ + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index [VLIB_TX]; + + /* get h/w intfcs */ + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + + /* hw_instance = tunnel pool index */ + t0 = pool_elt_at_index (gm->gre_tunnels, hi0->hw_instance); + t1 = pool_elt_at_index (gm->gre_tunnels, hi1->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + vlib_buffer_advance (b1, -(word)vec_len(t1->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + dst1 = vlib_buffer_get_current (b1); + + memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + memcpy (dst1, t1->rewrite_data, vec_len(t1->rewrite_data)); + + /* Fix TX fib indices */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->outer_fib_index; + vnet_buffer(b1)->sw_if_index [VLIB_TX] = t1->outer_fib_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + next1 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = t0 - gm->gre_tunnels; + tr->length = b0->current_length; + tr->src.as_u32 = t0->tunnel_src.as_u32; + tr->dst.as_u32 = t0->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t0->encap_index; + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->tunnel_id = t1 - gm->gre_tunnels; + tr->length = b1->current_length; + tr->src.as_u32 = t1->tunnel_src.as_u32; + tr->dst.as_u32 = t1->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t1->encap_index; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0, next0; + mpls_gre_tunnel_t * t0; + u32 sw_if_index0; + vnet_hw_interface_t * hi0; + u8 * dst0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + t0 = pool_elt_at_index (gm->gre_tunnels, hi0->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + + memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + + /* Fix the TX fib index */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->outer_fib_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_GRE_OUTPUT_NEXT_POST_REWRITE; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_gre_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = t0 - gm->gre_tunnels; + tr->length = b0->current_length; + tr->src.as_u32 = t0->tunnel_src.as_u32; + tr->dst.as_u32 = t0->tunnel_dst.as_u32; + tr->lookup_miss = 0; + tr->mpls_encap_index = t0->encap_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, gre_input_node.index, + GRE_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static u8 * format_mpls_gre_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "mpls-gre%d", dev_instance); +} + +static u8 * format_mpls_gre_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + s = format (s, "MPLS-GRE tunnel: id %d\n", dev_instance); + return s; +} + +VNET_DEVICE_CLASS (mpls_gre_device_class) = { + .name = "MPLS-GRE tunnel device", + .format_device_name = format_mpls_gre_tunnel_name, + .format_device = format_mpls_gre_device, + .format_tx_trace = format_mpls_gre_tx_trace, + .tx_function = mpls_gre_interface_tx, + .no_flatten_output_chains = 1, +#ifdef SOON + .clear counter = 0; + .admin_up_down_function = 0; +#endif +}; + +VNET_HW_INTERFACE_CLASS (mpls_gre_hw_interface_class) = { + .name = "MPLS-GRE", + .format_header = format_mpls_gre_header_with_length, +#if 0 + .unformat_header = unformat_mpls_gre_header, +#endif + .set_rewrite = mpls_gre_set_rewrite, +}; + + +static uword mpls_eth_set_rewrite (vnet_main_t * vnm, + u32 sw_if_index, + u32 l3_type, + void * dst_address, + void * rewrite, + uword max_rewrite_bytes) +{ + /* + * Conundrum: packets from tun/tap destined for the tunnel + * actually have this rewrite applied. Transit packets do not. + * To make the two cases equivalent, don't generate a + * rewrite here, build the entire header in the fast path. + */ + return 0; +} + +/* manually added to the interface output node */ +#define MPLS_ETH_OUTPUT_NEXT_OUTPUT 1 + +static uword +mpls_eth_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + mpls_main_t * gm = &mpls_main; + vnet_main_t * vnm = gm->vnet_main; + u32 next_index; + u32 * from, * to_next, n_left_from, n_left_to_next; + + /* Vector of buffer / pkt indices we're supposed to process */ + from = vlib_frame_vector_args (frame); + + /* Number of buffers / pkts */ + n_left_from = frame->n_vectors; + + /* Speculatively send the first buffer to the last disposition we used */ + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + /* set up to enqueue to our disposition with index = next_index */ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* + * As long as we have enough pkts left to process two pkts + * and prefetch two pkts... + */ + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * b0, * b1; + u32 bi0, next0, bi1, next1; + mpls_eth_tunnel_t * t0, * t1; + u32 sw_if_index0, sw_if_index1; + vnet_hw_interface_t * hi0, * hi1; + u8 * dst0, * dst1; + + /* Prefetch the next iteration */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + /* + * Prefetch packet data. We expect to overwrite + * the inbound L2 header with an ip header and a + * gre header. Might want to prefetch the last line + * of rewrite space as well; need profile data + */ + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* Pick up the next two buffer indices */ + bi0 = from[0]; + bi1 = from[1]; + + /* Speculatively enqueue them where we sent the last buffer */ + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + sw_if_index1 = vnet_buffer(b1)->sw_if_index [VLIB_TX]; + + /* get h/w intfcs */ + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + hi1 = vnet_get_sup_hw_interface (vnm, sw_if_index1); + + /* hw_instance = tunnel pool index */ + t0 = pool_elt_at_index (gm->eth_tunnels, hi0->hw_instance); + t1 = pool_elt_at_index (gm->eth_tunnels, hi1->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + vlib_buffer_advance (b1, -(word)vec_len(t1->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + dst1 = vlib_buffer_get_current (b1); + + memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + memcpy (dst1, t1->rewrite_data, vec_len(t1->rewrite_data)); + + /* Fix TX fib indices */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->tx_sw_if_index; + vnet_buffer(b1)->sw_if_index [VLIB_TX] = t1->tx_sw_if_index; + + /* mpls-post-rewrite takes it from here... */ + next0 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + next1 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t0 - gm->eth_tunnels; + tr->tx_sw_if_index = t0->tx_sw_if_index; + tr->mpls_encap_index = t0->encap_index; + tr->length = b0->current_length; + hi0 = vnet_get_sup_hw_interface (vnm, t0->tx_sw_if_index); + memcpy (tr->dst, hi0->hw_address, sizeof (tr->dst)); + } + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t1 - gm->eth_tunnels; + tr->tx_sw_if_index = t1->tx_sw_if_index; + tr->mpls_encap_index = t1->encap_index; + tr->length = b0->current_length; + hi1 = vnet_get_sup_hw_interface (vnm, t1->tx_sw_if_index); + memcpy (tr->dst, hi1->hw_address, sizeof (tr->dst)); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * b0; + u32 bi0, next0; + mpls_eth_tunnel_t * t0; + u32 sw_if_index0; + vnet_hw_interface_t * hi0; + u8 * dst0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer(b0)->sw_if_index [VLIB_TX]; + + hi0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + t0 = pool_elt_at_index (gm->eth_tunnels, hi0->hw_instance); + + /* Apply rewrite - $$$$$ fixme don't use memcpy */ + vlib_buffer_advance (b0, -(word)vec_len(t0->rewrite_data)); + + dst0 = vlib_buffer_get_current (b0); + + memcpy (dst0, t0->rewrite_data, vec_len(t0->rewrite_data)); + + /* Fix the TX interface */ + vnet_buffer(b0)->sw_if_index [VLIB_TX] = t0->tx_sw_if_index; + + /* Send the packet */ + next0 = MPLS_ETH_OUTPUT_NEXT_OUTPUT; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_eth_tx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->lookup_miss = 0; + tr->tunnel_id = t0 - gm->eth_tunnels; + tr->tx_sw_if_index = t0->tx_sw_if_index; + tr->mpls_encap_index = t0->encap_index; + tr->length = b0->current_length; + hi0 = vnet_get_sup_hw_interface (vnm, t0->tx_sw_if_index); + memcpy (tr->dst, hi0->hw_address, sizeof (tr->dst)); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_ENCAP, frame->n_vectors); + + return frame->n_vectors; +} + +static u8 * format_mpls_eth_tunnel_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "mpls-eth%d", dev_instance); +} + +static u8 * format_mpls_eth_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + CLIB_UNUSED (int verbose) = va_arg (*args, int); + + s = format (s, "MPLS-ETH tunnel: id %d\n", dev_instance); + return s; +} + +VNET_DEVICE_CLASS (mpls_eth_device_class) = { + .name = "MPLS-ETH tunnel device", + .format_device_name = format_mpls_eth_tunnel_name, + .format_device = format_mpls_eth_device, + .format_tx_trace = format_mpls_eth_tx_trace, + .tx_function = mpls_eth_interface_tx, + .no_flatten_output_chains = 1, +#ifdef SOON + .clear counter = 0; + .admin_up_down_function = 0; +#endif +}; + + +VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = { + .name = "MPLS-ETH", + .format_header = format_mpls_eth_header_with_length, +#if 0 + .unformat_header = unformat_mpls_eth_header, +#endif + .set_rewrite = mpls_eth_set_rewrite, +}; + +#define foreach_mpls_post_rewrite_next \ + _ (IP4_LOOKUP, "ip4-lookup") + +typedef enum { +#define _(s,n) MPLS_POST_REWRITE_NEXT_##s, + foreach_mpls_post_rewrite_next +#undef _ + MPLS_POST_REWRITE_N_NEXT, +} mpls_post_rewrite_next_t; + + +static uword +mpls_post_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + u16 old_l0 = 0, old_l1 = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + ip4_header_t * ip0, * ip1; + u32 next0 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; + u32 next1 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; + u16 new_l0, new_l1; + ip_csum_t sum0, sum1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] */ + + /* set the GRE (outer) ip packet length, fix the bloody checksum */ + sum0 = ip0->checksum; + sum1 = ip1->checksum; + + /* old_l0, old_l1 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + new_l1 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip1->checksum = ip_csum_fold (sum1); + ip0->length = new_l0; + ip1->length = new_l1; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + ip4_header_t * ip0; + u32 next0 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; + u16 new_l0; + ip_csum_t sum0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] */ + + /* set the GRE (outer) ip packet length, fix the bloody checksum */ + sum0 = ip0->checksum; + /* old_l0 always 0, see the rewrite setup */ + new_l0 = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); + + sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, + length /* changed member */); + ip0->checksum = ip_csum_fold (sum0); + ip0->length = new_l0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_ENCAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_post_rewrite_node) = { + .function = mpls_post_rewrite, + .name = "mpls-post-rewrite", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = 0, + + .n_next_nodes = MPLS_POST_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_POST_REWRITE_NEXT_##s] = n, + foreach_mpls_post_rewrite_next +#undef _ + }, +}; + +static u8 * mpls_gre_rewrite (mpls_main_t *mm, mpls_gre_tunnel_t * t) +{ + ip4_header_t * ip0; + ip4_gre_and_mpls_header_t * h0; + u8 * rewrite_data = 0; + mpls_encap_t * e; + mpls_unicast_header_t *lp0; + int i; + + /* look up the encap label stack using the RX FIB */ + e = mpls_encap_by_fib_and_dest (mm, t->inner_fib_index, t->tunnel_dst.as_u32); + + if (e == 0) + { + clib_warning ("no label for inner fib index %d, dst %U", + t->inner_fib_index, format_ip4_address, + &t->tunnel_dst); + return 0; + } + + vec_validate (rewrite_data, sizeof (*h0) + + sizeof (mpls_unicast_header_t) * vec_len(e->labels) -1); + memset (rewrite_data, 0, sizeof (*h0)); + + h0 = (ip4_gre_and_mpls_header_t *) rewrite_data; + /* Copy the encap label stack */ + lp0 = h0->labels; + for (i = 0; i < vec_len(e->labels); i++) + lp0[i] = e->labels[i]; + ip0 = &h0->ip4; + h0->gre.protocol = clib_host_to_net_u16(GRE_PROTOCOL_mpls_unicast); + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_GRE; + /* $$$ fixup ip4 header length and checksum after-the-fact */ + ip0->src_address.as_u32 = t->tunnel_src.as_u32; + ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; + ip0->checksum = ip4_header_checksum (ip0); + + return (rewrite_data); +} + +int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, u32 outer_fib_id, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t zero; + mpls_gre_tunnel_t *tp; + int need_route_add_del = 1; + u32 inner_fib_index = 0; + u32 outer_fib_index = 0; + ip_adjacency_t adj; + u32 adj_index; + u8 * rewrite_data; + int found_tunnel = 0; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + u32 dummy; + + zero.as_u32 = 0; + + /* No questions, no answers */ + if (tunnel_sw_if_index == 0) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + inner_fib_index = p[0]; + } + + if (outer_fib_id != 0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, outer_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + outer_fib_index = p[0]; + } + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->gre_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_src, src, sizeof (*src)) + && !memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index) + { + ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); + uword * hash = fib->adj_index_by_dst_address[mask_width]; + uword key = intfc->as_u32 & im->fib_masks[mask_width]; + uword *p = hash_get (hash, key); + + found_tunnel = 1; + + if (is_add) + { + /* A dup, and the route is in the fib. Done */ + if (p || l2_only) + return 1; + else + { + /* Reinstall the route (and other stuff) */ + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, + dst->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + goto reinstall_it; + } + } + else + { + /* Delete, the route is already gone? */ + if (!p) + need_route_add_del = 0; + goto add_del_route; + } + + } + })); + + /* Delete, and we can't find the tunnel */ + if (is_add == 0 && found_tunnel == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, dst->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + pool_get(mm->gre_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + + if (vec_len (mm->free_gre_sw_if_indices) > 0) + { + hw_if_index = + mm->free_gre_sw_if_indices[vec_len(mm->free_gre_sw_if_indices)-1]; + _vec_len (mm->free_gre_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->gre_tunnels; + hi->hw_instance = tp - mm->gre_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_gre_device_class.index, tp - mm->gre_tunnels, + mpls_gre_hw_interface_class.index, + tp - mm->gre_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "mpls-post-rewrite", MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + + ASSERT (slot == MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + tp->hw_if_index = hw_if_index; + + reinstall_it: + tp->tunnel_src.as_u32 = src->as_u32; + tp->tunnel_dst.as_u32 = dst->as_u32; + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->outer_fib_index = outer_fib_index; + tp->encap_index = e - mm->encaps; + tp->l2_only = l2_only; + + /* Create the adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + rewrite_data = mpls_gre_rewrite (mm, tp); + if (rewrite_data == 0) + { + if (*tunnel_sw_if_index != ~0) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + } + pool_put (mm->gre_tunnels, tp); + return VNET_API_ERROR_NO_SUCH_LABEL; + } + + /* Save a copy of the rewrite data for L2 x-connect */ + vec_free (tp->rewrite_data); + + tp->rewrite_data = rewrite_data; + + vnet_rewrite_for_tunnel + (vnm, + outer_fib_index /* tx_sw_if_index, aka outer fib ID */, + ip4_rewrite_node.index, + mpls_post_rewrite_node.index, + &adj.rewrite_header, + rewrite_data, vec_len(rewrite_data)); + + if (!l2_only) + ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + add_del_route: + + if (need_route_add_del && !l2_only) + { + if (is_add) + ip4_add_del_route_next_hop (im, + IP4_ROUTE_FLAG_ADD, + &tp->intfc_address, + tp->mask_width, + &zero /* no next hop */, + (u32)~0 /* next_hop_sw_if_index */, + 1 /* weight */, + adj_index, + tp->inner_fib_index); + else + { + ip4_add_del_route_args_t a; + memset (&a, 0, sizeof (a)); + + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = tp->inner_fib_index; + a.dst_address = tp->intfc_address; + a.dst_address_length = tp->mask_width; + a.adj_index = ~0; + + ip4_add_del_route (im, &a); + ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + } + } + + if (is_add == 0 && found_tunnel) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + pool_put (mm->gre_tunnels, tp); + } + + return 0; +} + +/* + * Remove all mpls tunnels in the specified fib + */ +int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id) +{ + ip4_main_t * im = &ip4_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = mm->vnet_main; + mpls_gre_tunnel_t *tp; + u32 fib_index = 0; + uword * p; + u32 * tunnels_to_delete = 0; + vnet_hw_interface_t * hi; + ip4_fib_t * fib; + int i; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + fib_index = p[0]; + + pool_foreach (tp, mm->gre_tunnels, + ({ + if (tp->inner_fib_index == fib_index) + vec_add1 (tunnels_to_delete, tp - mm->gre_tunnels); + })); + + fib = vec_elt_at_index (im->fibs, fib_index); + + for (i = 0; i < vec_len(tunnels_to_delete); i++) { + tp = pool_elt_at_index (mm->gre_tunnels, tunnels_to_delete[i]); + uword * hash = fib->adj_index_by_dst_address[tp->mask_width]; + uword key = tp->intfc_address.as_u32 & im->fib_masks[tp->mask_width]; + uword *p = hash_get (hash, key); + ip4_add_del_route_args_t a; + + /* Delete, the route if not already gone */ + if (p && !tp->l2_only) + { + memset (&a, 0, sizeof (a)); + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = tp->inner_fib_index; + a.dst_address = tp->intfc_address; + a.dst_address_length = tp->mask_width; + a.adj_index = ~0; + ip4_add_del_route (im, &a); + ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + } + + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + pool_put (mm->gre_tunnels, tp); + } + + vec_free(tunnels_to_delete); + + return (0); +} + +static clib_error_t * +create_mpls_gre_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + ip4_address_t src, dst, intfc; + int src_set = 0, dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + u32 outer_fib_id = 0; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tunnel_intfc_sw_if_index = ~0; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "src %U", + unformat_ip4_address, &src)) + src_set = 1; + else if (unformat (line_input, "dst %U", + unformat_ip4_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "intfc %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + intfc_set = 1; + else if (unformat (line_input, "inner-fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (!src_set) + return clib_error_return (0, "missing: src <ip-address>"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ip-address>"); + + if (!intfc_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_gre_add_del_tunnel (&src, &dst, &intfc, mask_width, + inner_fib_id, outer_fib_id, + &tunnel_intfc_sw_if_index, + l2_only, !is_del); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "inner fib ID %d doesn't exist\n", + inner_fib_id); + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "outer fib ID %d doesn't exist\n", + outer_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + break; + + default: + return clib_error_return (0, "vnet_mpls_gre_add_del_tunnel returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { + .path = "create mpls gre tunnel", + .short_help = + "create mpls gre tunnel [del] src <addr> dst <addr> intfc <addr>/<mw>", + .function = create_mpls_gre_tunnel_command_fn, +}; + +u8 * format_mpls_encap_index (u8 * s, va_list * args) +{ + mpls_main_t * mm = va_arg (*args, mpls_main_t *); + u32 entry_index = va_arg (*args, u32); + mpls_encap_t * e; + int i; + + e = pool_elt_at_index (mm->encaps, entry_index); + + for (i = 0; i < vec_len (e->labels); i++) + s = format + (s, "%d ", vnet_mpls_uc_get_label(clib_net_to_host_u32 + (e->labels[i].label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_tunnel (u8 * s, va_list * args) +{ + mpls_gre_tunnel_t * t = va_arg (*args, mpls_gre_tunnel_t *); + mpls_main_t * mm = &mpls_main; + + if (t->l2_only == 0) + { + s = format (s, "[%d]: src %U, dst %U, adj %U/%d, labels %U\n", + t - mm->gre_tunnels, + format_ip4_address, &t->tunnel_src, + format_ip4_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + t->mask_width, + format_mpls_encap_index, mm, t->encap_index); + + s = format (s, " inner fib index %d, outer fib index %d", + t->inner_fib_index, t->outer_fib_index); + } + else + { + s = format (s, "[%d]: src %U, dst %U, key %U, labels %U\n", + t - mm->gre_tunnels, + format_ip4_address, &t->tunnel_src, + format_ip4_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + format_mpls_encap_index, mm, t->encap_index); + + s = format (s, " l2 interface %d, outer fib index %d", + t->hw_if_index, t->outer_fib_index); + } + + return s; +} + +u8 * format_mpls_ethernet_tunnel (u8 * s, va_list * args) +{ + mpls_eth_tunnel_t * t = va_arg (*args, mpls_eth_tunnel_t *); + mpls_main_t * mm = &mpls_main; + + s = format (s, "[%d]: dst %U, adj %U/%d, labels %U\n", + t - mm->eth_tunnels, + format_ethernet_address, &t->tunnel_dst, + format_ip4_address, &t->intfc_address, + t->mask_width, + format_mpls_encap_index, mm, t->encap_index); + + + s = format (s, " tx on %U, rx fib index %d", + format_vnet_sw_if_index_name, mm->vnet_main, t->tx_sw_if_index, + t->inner_fib_index); + + return s; +} + +static clib_error_t * +show_mpls_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_main_t * mm = &mpls_main; + mpls_gre_tunnel_t * gt; + mpls_eth_tunnel_t * et; + + if (pool_elts (mm->gre_tunnels)) + { + vlib_cli_output (vm, "MPLS-GRE tunnels"); + pool_foreach (gt, mm->gre_tunnels, + ({ + vlib_cli_output (vm, "%U", format_mpls_gre_tunnel, gt); + })); + } + else + vlib_cli_output (vm, "No MPLS-GRE tunnels"); + + if (pool_elts (mm->eth_tunnels)) + { + vlib_cli_output (vm, "MPLS-Ethernet tunnels"); + pool_foreach (et, mm->eth_tunnels, + ({ + vlib_cli_output (vm, "%U", format_mpls_ethernet_tunnel, et); + })); + } + else + vlib_cli_output (vm, "No MPLS-Ethernet tunnels"); + + return 0; +} + +VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = { + .path = "show mpls tunnel", + .short_help = "show mpls tunnel", + .function = show_mpls_tunnel_command_fn, +}; + +/* force inclusion from application's main.c */ +clib_error_t *mpls_interface_init (vlib_main_t *vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_policy_encap_init))) + return error; + + return 0; +} +VLIB_INIT_FUNCTION(mpls_interface_init); + + +static u8 * mpls_ethernet_rewrite (mpls_main_t *mm, mpls_eth_tunnel_t * t) +{ + u8 * rewrite_data = 0; + mpls_encap_t * e; + mpls_unicast_header_t *lp0; + int i; + + /* look up the encap label stack using the RX FIB and adjacency address*/ + e = mpls_encap_by_fib_and_dest (mm, t->inner_fib_index, + t->intfc_address.as_u32); + + if (e == 0) + { + clib_warning ("no label for inner fib index %d, dst %U", + t->inner_fib_index, format_ip4_address, + &t->intfc_address); + return 0; + } + + vec_validate (rewrite_data, + sizeof (mpls_unicast_header_t) * vec_len(e->labels) -1); + + /* Copy the encap label stack */ + lp0 = (mpls_unicast_header_t *) rewrite_data; + + for (i = 0; i < vec_len(e->labels); i++) + lp0[i] = e->labels[i]; + + return (rewrite_data); +} + +int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t zero; + mpls_eth_tunnel_t *tp; + int need_route_add_del = 1; + u32 inner_fib_index = 0; + ip_adjacency_t adj; + u32 adj_index; + u8 * rewrite_data; + int found_tunnel = 0; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + u32 dummy; + + zero.as_u32 = 0; + + if (tunnel_sw_if_index == 0) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + inner_fib_index = p[0]; + } + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->eth_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index) + { + ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); + uword * hash = fib->adj_index_by_dst_address[mask_width]; + uword key = intfc->as_u32 & im->fib_masks[mask_width]; + uword *p = hash_get (hash, key); + + found_tunnel = 1; + + if (is_add) + { + if (p || l2_only) + return 1; + else + { + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, + intfc->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + goto reinstall_it; + } + } + else + { + /* Delete, the route is already gone? */ + if (!p) + need_route_add_del = 0; + goto add_del_route; + } + + } + })); + + /* Delete, and we can't find the tunnel */ + if (is_add == 0 && found_tunnel == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, intfc->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + pool_get(mm->eth_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + + if (vec_len (mm->free_eth_sw_if_indices) > 0) + { + hw_if_index = + mm->free_eth_sw_if_indices[vec_len(mm->free_eth_sw_if_indices)-1]; + _vec_len (mm->free_eth_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->eth_tunnels; + hi->hw_instance = tp - mm->eth_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_eth_device_class.index, tp - mm->eth_tunnels, + mpls_eth_hw_interface_class.index, + tp - mm->eth_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "interface-output", MPLS_ETH_OUTPUT_NEXT_OUTPUT); + + ASSERT (slot == MPLS_ETH_OUTPUT_NEXT_OUTPUT); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + tp->hw_if_index = hw_if_index; + + reinstall_it: + memcpy(tp->tunnel_dst, dst, sizeof (tp->tunnel_dst)); + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->encap_index = e - mm->encaps; + tp->tx_sw_if_index = tx_sw_if_index; + tp->l2_only = l2_only; + + /* Create the adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + rewrite_data = mpls_ethernet_rewrite (mm, tp); + if (rewrite_data == 0) + { + if (*tunnel_sw_if_index != ~0) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + } + + pool_put (mm->eth_tunnels, tp); + return VNET_API_ERROR_NO_SUCH_LABEL; + } + + vnet_rewrite_for_sw_interface + (vnm, + VNET_L3_PACKET_TYPE_MPLS_UNICAST, + tx_sw_if_index, + ip4_rewrite_node.index, + tp->tunnel_dst, + &adj.rewrite_header, + sizeof (adj.rewrite_data)); + + /* + * Prepend the (0,1,2) VLAN tag ethernet header + * we just built to the mpls header stack + */ + vec_insert (rewrite_data, adj.rewrite_header.data_bytes, 0); + memcpy(rewrite_data, + vnet_rewrite_get_data_internal(&adj.rewrite_header, + sizeof (adj.rewrite_data)), + adj.rewrite_header.data_bytes); + + vnet_rewrite_set_data_internal (&adj.rewrite_header, + sizeof(adj.rewrite_data), + rewrite_data, + vec_len(rewrite_data)); + + vec_free (tp->rewrite_data); + + tp->rewrite_data = rewrite_data; + + if (!l2_only) + ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + add_del_route: + + if (need_route_add_del && !l2_only) + { + if (is_add) + ip4_add_del_route_next_hop (im, + IP4_ROUTE_FLAG_ADD, + &tp->intfc_address, + tp->mask_width, + &zero /* no next hop */, + (u32)~0 /* next_hop_sw_if_index */, + 1 /* weight */, + adj_index, + tp->inner_fib_index); + else + { + ip4_add_del_route_args_t a; + memset (&a, 0, sizeof (a)); + + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = tp->inner_fib_index; + a.dst_address = tp->intfc_address; + a.dst_address_length = tp->mask_width; + a.adj_index = ~0; + + ip4_add_del_route (im, &a); + ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + } + } + if (is_add == 0 && found_tunnel) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + pool_put (mm->eth_tunnels, tp); + } + + return 0; +} + +static clib_error_t * +create_mpls_ethernet_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t intfc; + int adj_set = 0; + u8 dst[6]; + int dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tx_sw_if_index; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "dst %U", + unformat_ethernet_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "adj %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + adj_set = 1; + else if (unformat (line_input, "tx-intfc %U", + unformat_vnet_sw_interface, vnm, &tx_sw_if_index)) + intfc_set = 1; + else if (unformat (line_input, "fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (!intfc_set) + return clib_error_return (0, "missing tx-intfc"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ethernet-address>"); + + if (!adj_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_ethernet_add_del_tunnel (dst, &intfc, mask_width, + inner_fib_id, tx_sw_if_index, + 0 /* tunnel sw_if_index */, + l2_only, !is_del); + + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "rx fib ID %d doesn't exist\n", + inner_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + return clib_error_return (0, "no label for %U in fib %d", + format_ip4_address, &intfc, inner_fib_id); + break; + + default: + break; + } + return 0; +} + + +VLIB_CLI_COMMAND (create_mpls_ethernet_tunnel_command, static) = { + .path = "create mpls ethernet tunnel", + .short_help = + "create mpls ethernet tunnel [del] dst <mac-addr> intfc <addr>/<mw>", + .function = create_mpls_ethernet_tunnel_command_fn, +}; + + +int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm, + mpls_encap_t * e, + u32 policy_tunnel_index) +{ + mpls_eth_tunnel_t * t; + ip_adjacency_t adj; + u8 * rewrite_data = 0; + u8 * label_start; + mpls_unicast_header_t *lp; + int i; + + if (pool_is_free_index (mm->eth_tunnels, policy_tunnel_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + t = pool_elt_at_index (mm->eth_tunnels, policy_tunnel_index); + + memset (&adj, 0, sizeof (adj)); + + /* Build L2 encap */ + vnet_rewrite_for_sw_interface + (mm->vnet_main, + VNET_L3_PACKET_TYPE_MPLS_UNICAST, + t->tx_sw_if_index, + mpls_policy_encap_node.index, + t->tunnel_dst, + &adj.rewrite_header, + sizeof (adj.rewrite_data)); + + vec_validate (rewrite_data, adj.rewrite_header.data_bytes -1); + + memcpy(rewrite_data, + vnet_rewrite_get_data_internal(&adj.rewrite_header, + sizeof (adj.rewrite_data)), + adj.rewrite_header.data_bytes); + + /* Append the label stack */ + + vec_add2 (rewrite_data, label_start, vec_len(e->labels) * sizeof (u32)); + + lp = (mpls_unicast_header_t *) label_start; + + for (i = 0; i < vec_len(e->labels); i++) + lp[i] = e->labels[i]; + + /* Remember the rewrite data */ + e->rewrite = rewrite_data; + e->output_next_index = adj.rewrite_header.next_index; + + return 0; +} + +int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u32 classify_table_index, + u32 * new_tunnel_index, + u8 l2_only, + u8 is_add) +{ + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t zero; + mpls_eth_tunnel_t *tp; + int need_route_add_del = 1; + u32 inner_fib_index = 0; + ip_adjacency_t adj; + u32 adj_index; + int found_tunnel = 0; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + u32 dummy; + + zero.as_u32 = 0; + + if (tunnel_sw_if_index == 0) + tunnel_sw_if_index = &dummy; + + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) + { + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + inner_fib_index = p[0]; + } + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->eth_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index) + { + ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); + uword * hash = fib->adj_index_by_dst_address[mask_width]; + uword key = intfc->as_u32 & im->fib_masks[mask_width]; + uword *p = hash_get (hash, key); + + found_tunnel = 1; + + if (is_add) + { + if (p || l2_only) + return 1; + else + { + goto reinstall_it; + } + } + else + { + /* Delete, the route is already gone? */ + if (!p) + need_route_add_del = 0; + goto add_del_route; + } + + } + })); + + /* Delete, and we can't find the tunnel */ + if (is_add == 0 && found_tunnel == 0) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + pool_get(mm->eth_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + + if (vec_len (mm->free_eth_sw_if_indices) > 0) + { + hw_if_index = + mm->free_eth_sw_if_indices[vec_len(mm->free_eth_sw_if_indices)-1]; + _vec_len (mm->free_eth_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->eth_tunnels; + hi->hw_instance = tp - mm->eth_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_eth_device_class.index, tp - mm->eth_tunnels, + mpls_eth_hw_interface_class.index, + tp - mm->eth_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "interface-output", MPLS_ETH_OUTPUT_NEXT_OUTPUT); + + ASSERT (slot == MPLS_ETH_OUTPUT_NEXT_OUTPUT); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + tp->hw_if_index = hw_if_index; + + reinstall_it: + memcpy(tp->tunnel_dst, dst, sizeof (tp->tunnel_dst)); + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->encap_index = e - mm->encaps; + tp->tx_sw_if_index = tx_sw_if_index; + tp->l2_only = l2_only; + + if (new_tunnel_index) + *new_tunnel_index = tp - mm->eth_tunnels; + + /* Create the classify adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; + adj.classify_table_index = classify_table_index; + + if (!l2_only) + ip_add_adjacency (lm, &adj, 1 /* one adj */, + &adj_index); + + add_del_route: + + if (need_route_add_del && !l2_only) + { + if (is_add) + ip4_add_del_route_next_hop (im, + IP4_ROUTE_FLAG_ADD, + &tp->intfc_address, + tp->mask_width, + &zero /* no next hop */, + (u32)~0 /* next_hop_sw_if_index */, + 1 /* weight */, + adj_index, + tp->inner_fib_index); + else + { + ip4_add_del_route_args_t a; + memset (&a, 0, sizeof (a)); + + a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; + a.table_index_or_table_id = tp->inner_fib_index; + a.dst_address = tp->intfc_address; + a.dst_address_length = tp->mask_width; + a.adj_index = ~0; + + ip4_add_del_route (im, &a); + ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, + IP4_ROUTE_FLAG_FIB_INDEX); + } + } + if (is_add == 0 && found_tunnel) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_eth_sw_if_indices, tp->hw_if_index); + pool_put (mm->eth_tunnels, tp); + } + + return 0; +} + +static clib_error_t * +create_mpls_ethernet_policy_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + vnet_main_t * vnm = vnet_get_main(); + ip4_address_t intfc; + int adj_set = 0; + u8 dst[6]; + int dst_set = 0, intfc_set = 0; + u32 mask_width; + u32 inner_fib_id = (u32)~0; + u32 classify_table_index = (u32)~0; + u32 new_tunnel_index; + int rv; + u8 is_del = 0; + u8 l2_only = 0; + u32 tx_sw_if_index; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "dst %U", + unformat_ethernet_address, &dst)) + dst_set = 1; + else if (unformat (line_input, "adj %U/%d", + unformat_ip4_address, &intfc, &mask_width)) + adj_set = 1; + else if (unformat (line_input, "tx-intfc %U", + unformat_vnet_sw_interface, vnm, &tx_sw_if_index)) + intfc_set = 1; + else if (unformat (line_input, "classify-table-index %d", + &classify_table_index)) + ; + else if (unformat (line_input, "fib-id %d", &inner_fib_id)) + ; + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "del")) + is_del = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + } + + if (classify_table_index == ~0) + return clib_error_return (0, "missing classify_table_index"); + + if (!intfc_set) + return clib_error_return (0, "missing tx-intfc"); + + if (!dst_set) + return clib_error_return (0, "missing: dst <ethernet-address>"); + + if (!adj_set) + return clib_error_return (0, "missing: intfc <ip-address>/<mask-width>"); + + + rv = vnet_mpls_ethernet_add_del_policy_tunnel (dst, &intfc, mask_width, + inner_fib_id, tx_sw_if_index, + 0 /* tunnel sw_if_index */, + classify_table_index, + &new_tunnel_index, + l2_only, !is_del); + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "rx fib ID %d doesn't exist\n", + inner_fib_id); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "tunnel not found\n"); + + case VNET_API_ERROR_NO_SUCH_LABEL: + /* + * This happens when there's no MPLS label for the dst address + * no need for two error messages. + */ + return clib_error_return (0, "no label for %U in fib %d", + format_ip4_address, &intfc, inner_fib_id); + break; + + default: + break; + } + + if (!is_del) + vlib_cli_output (vm, "tunnel index %d", new_tunnel_index); + + return 0; +} + +VLIB_CLI_COMMAND (create_mpls_ethernet_policy_tunnel_command, static) = { + .path = "create mpls ethernet policy tunnel", + .short_help = + "create mpls ethernet policy tunnel [del] dst <mac-addr> intfc <addr>/<mw>\n" + " classify-table-index <nn>", + .function = create_mpls_ethernet_policy_tunnel_command_fn, +}; diff --git a/vnet/vnet/mpls-gre/mpls.c b/vnet/vnet/mpls-gre/mpls.c new file mode 100644 index 00000000000..431a69b4ab0 --- /dev/null +++ b/vnet/vnet/mpls-gre/mpls.c @@ -0,0 +1,769 @@ +/* + * mpls.c: mpls + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/vnet.h> +#include <vnet/mpls-gre/mpls.h> + +mpls_main_t mpls_main; + +u8 * format_mpls_gre_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_gre_tx_trace_t * t = va_arg (*args, mpls_gre_tx_trace_t *); + mpls_main_t * mm = &mpls_main; + + if (t->lookup_miss) + s = format (s, "MPLS: lookup miss"); + else + { + s = format (s, "MPLS: tunnel %d labels %U len %d src %U dst %U", + t->tunnel_id, + format_mpls_encap_index, mm, t->mpls_encap_index, + clib_net_to_host_u16 (t->length), + format_ip4_address, &t->src.as_u8, + format_ip4_address, &t->dst.as_u8); + } + return s; +} + +u8 * format_mpls_eth_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_eth_tx_trace_t * t = va_arg (*args, mpls_eth_tx_trace_t *); + mpls_main_t * mm = &mpls_main; + + if (t->lookup_miss) + s = format (s, "MPLS: lookup miss"); + else + { + s = format (s, "MPLS: tunnel %d labels %U len %d tx_sw_index %d dst %U", + t->tunnel_id, + format_mpls_encap_index, mm, t->mpls_encap_index, + clib_net_to_host_u16 (t->length), + t->tx_sw_if_index, + format_ethernet_address, t->dst); + } + return s; +} + +u8 * format_mpls_eth_header_with_length (u8 * s, va_list * args) +{ + ethernet_header_t * h = va_arg (*args, ethernet_header_t *); + mpls_unicast_header_t * m = (mpls_unicast_header_t *)(h+1); + u32 max_header_bytes = va_arg (*args, u32); + uword header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "ethernet header truncated"); + + s = format + (s, "ETHERNET-MPLS label %d", + vnet_mpls_uc_get_label (clib_net_to_host_u32 (m->label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_header_with_length (u8 * s, va_list * args) +{ + gre_header_t * h = va_arg (*args, gre_header_t *); + mpls_unicast_header_t * m = (mpls_unicast_header_t *)(h+1); + u32 max_header_bytes = va_arg (*args, u32); + uword header_bytes; + + header_bytes = sizeof (h[0]); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "gre header truncated"); + + s = format + (s, "GRE-MPLS label %d", + vnet_mpls_uc_get_label (clib_net_to_host_u32 (m->label_exp_s_ttl))); + + return s; +} + +u8 * format_mpls_gre_header (u8 * s, va_list * args) +{ + gre_header_t * h = va_arg (*args, gre_header_t *); + return format (s, "%U", format_mpls_gre_header_with_length, h, 0); +} + +uword +unformat_mpls_gre_header (unformat_input_t * input, va_list * args) +{ + u8 ** result = va_arg (*args, u8 **); + gre_header_t _g, * g = &_g; + mpls_unicast_header_t _h, * h = &_h; + u32 label, label_exp_s_ttl; + + if (! unformat (input, "MPLS %d", &label)) + return 0; + + g->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_mpls_unicast); + + label_exp_s_ttl = (label<<12) | (1<<8) /* s-bit */ | 0xFF; + h->label_exp_s_ttl = clib_host_to_net_u32 (label_exp_s_ttl); + + /* Add gre, mpls headers to result. */ + { + void * p; + u32 g_n_bytes = sizeof (g[0]); + u32 h_n_bytes = sizeof (h[0]); + + vec_add2 (*result, p, g_n_bytes); + memcpy (p, g, g_n_bytes); + + vec_add2 (*result, p, h_n_bytes); + memcpy (p, h, h_n_bytes); + } + + return 1; +} + +uword +unformat_mpls_label_net_byte_order (unformat_input_t * input, + va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + u32 label; + + if (!unformat (input, "MPLS: label %d", &label)) + return 0; + + label = (label<<12) | (1<<8) /* s-bit set */ | 0xFF /* ttl */; + + *result = clib_host_to_net_u32 (label); + return 1; +} + +mpls_encap_t * +mpls_encap_by_fib_and_dest (mpls_main_t * mm, u32 rx_fib, u32 dst_address) +{ + uword * p; + mpls_encap_t * e; + u64 key; + + key = ((u64)rx_fib<<32) | ((u64) dst_address); + p = hash_get (mm->mpls_encap_by_fib_and_dest, key); + + if (!p) + return 0; + + e = pool_elt_at_index (mm->encaps, p[0]); + return e; +} + +int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id, + u32 *labels_host_byte_order, + u32 policy_tunnel_index, + int no_dst_hash, u32 * indexp, int is_add) +{ + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + mpls_encap_t * e; + u32 label_net_byte_order, label_host_byte_order; + u32 fib_index; + u64 key; + uword *p; + int i; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_index = p[0]; + + key = ((u64)fib_index<<32) | ((u64) dest->as_u32); + + if (is_add) + { + pool_get (mm->encaps, e); + memset (e, 0, sizeof (*e)); + + for (i = 0; i < vec_len (labels_host_byte_order); i++) + { + mpls_unicast_header_t h; + label_host_byte_order = labels_host_byte_order[i]; + + /* Reformat label into mpls_unicast_header_t */ + label_host_byte_order <<= 12; + if (i == vec_len(labels_host_byte_order) - 1) + label_host_byte_order |= 1<<8; /* S=1 */ + label_host_byte_order |= 0xff; /* TTL=FF */ + label_net_byte_order = clib_host_to_net_u32 (label_host_byte_order); + h.label_exp_s_ttl = label_net_byte_order; + vec_add1 (e->labels, h); + } + if (no_dst_hash == 0) + hash_set (mm->mpls_encap_by_fib_and_dest, key, e - mm->encaps); + if (indexp) + *indexp = e - mm->encaps; + if (policy_tunnel_index != ~0) + return vnet_mpls_policy_tunnel_add_rewrite (mm, e, policy_tunnel_index); + } + else + { + p = hash_get (mm->mpls_encap_by_fib_and_dest, key); + if (!p) + return VNET_API_ERROR_NO_SUCH_LABEL; + + e = pool_elt_at_index (mm->encaps, p[0]); + + vec_free (e->labels); + vec_free (e->rewrite); + pool_put(mm->encaps, e); + + if (no_dst_hash == 0) + hash_unset (mm->mpls_encap_by_fib_and_dest, key); + } + return 0; +} + +static clib_error_t * +mpls_add_encap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 fib_id; + u32 *labels = 0; + u32 this_label; + ip4_address_t dest; + u32 policy_tunnel_index = ~0; + int no_dst_hash = 0; + int rv; + int fib_set = 0; + int dest_set = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "fib %d", &fib_id)) + fib_set = 1; + else if (unformat (input, "dest %U", unformat_ip4_address, &dest)) + dest_set = 1; + else if (unformat (input, "no-dst-hash")) + no_dst_hash = 1; + else if (unformat (input, "label %d", &this_label)) + vec_add1 (labels, this_label); + else if (unformat (input, "policy-tunnel %d", &policy_tunnel_index)) + ; + else + break; + } + + if (fib_set == 0) + return clib_error_return (0, "fib-id missing"); + if (dest_set == 0) + return clib_error_return (0, "destination IP address missing"); + if (vec_len (labels) == 0) + return clib_error_return (0, "label stack missing"); + + rv = vnet_mpls_add_del_encap (&dest, fib_id, labels, + policy_tunnel_index, + no_dst_hash, 0 /* indexp */, + 1 /* is_add */); + vec_free (labels); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "fib id %d unknown", fib_id); + + default: + return clib_error_return (0, "vnet_mpls_add_del_encap returned %d", + rv); + } + + return 0; +} + +VLIB_CLI_COMMAND (mpls_add_encap_command, static) = { + .path = "mpls encap add", + .short_help = + "mpls encap add label <label> ... fib <id> dest <ip4-address>", + .function = mpls_add_encap_command_fn, +}; + +u8 * format_mpls_unicast_header_host_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + u32 label = h->label_exp_s_ttl; + + s = format (s, "label %d exp %d, s %d, ttl %d", + vnet_mpls_uc_get_label (label), + vnet_mpls_uc_get_exp (label), + vnet_mpls_uc_get_s (label), + vnet_mpls_uc_get_ttl (label)); + return s; +} + +u8 * format_mpls_unicast_header_net_byte_order (u8 * s, va_list * args) +{ + mpls_unicast_header_t *h = va_arg(*args, mpls_unicast_header_t *); + mpls_unicast_header_t h_host; + + h_host.label_exp_s_ttl = clib_net_to_host_u32 (h->label_exp_s_ttl); + + return format (s, "%U", format_mpls_unicast_header_host_byte_order, + &h_host); +} + +static clib_error_t * +mpls_del_encap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 fib_id; + ip4_address_t dest; + int rv; + + if (unformat (input, "fib %d dest %U", &fib_id, + unformat_ip4_address, &dest)) + { + rv = vnet_mpls_add_del_encap (&dest, fib_id, 0 /* labels */, + ~0 /* policy_tunnel_index */, + 0 /* no_dst_hash */, + 0 /* indexp */, + 0 /* is_add */); + switch (rv) + { + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "fib id %d unknown", fib_id); + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "dest %U not in fib %d", + format_ip4_address, &dest, fib_id); + default: + break; + } + return 0; + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); +} + +VLIB_CLI_COMMAND (mpls_del_encap_command, static) = { + .path = "mpls encap delete", + .short_help = "mpls encap delete fib <id> dest <ip4-address>", + .function = mpls_del_encap_command_fn, +}; + +int vnet_mpls_add_del_decap (u32 rx_fib_id, + u32 tx_fib_id, + u32 label_host_byte_order, + int s_bit, int next_index, int is_add) +{ + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + mpls_decap_t * d; + u32 rx_fib_index, tx_fib_index_or_output_swif_index; + uword *p; + u64 key; + + p = hash_get (im->fib_index_by_table_id, rx_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + rx_fib_index = p[0]; + + /* L3 decap => transform fib ID to fib index */ + if (next_index == MPLS_INPUT_NEXT_IP4_INPUT) + { + p = hash_get (im->fib_index_by_table_id, tx_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + + tx_fib_index_or_output_swif_index = p[0]; + } + else + { + /* L2 decap, tx_fib_id is actually the output sw_if_index */ + tx_fib_index_or_output_swif_index = tx_fib_id; + } + + key = ((u64)rx_fib_index<<32) | ((u64) (label_host_byte_order<<12)) + | ((u64) s_bit<<8); + + p = hash_get (mm->mpls_decap_by_rx_fib_and_label, key); + + /* If deleting, or replacing an old entry */ + if (is_add == 0 || p) + { + if (is_add == 0 && p == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + d = pool_elt_at_index (mm->decaps, p[0]); + hash_unset (mm->mpls_decap_by_rx_fib_and_label, key); + pool_put (mm->decaps, d); + /* Deleting, we're done... */ + if (is_add == 0) + return 0; + } + + /* add decap entry... */ + pool_get (mm->decaps, d); + memset (d, 0, sizeof (*d)); + d->tx_fib_index = tx_fib_index_or_output_swif_index; + d->next_index = next_index; + + hash_set (mm->mpls_decap_by_rx_fib_and_label, key, d - mm->decaps); + + return 0; +} + +uword +unformat_mpls_gre_input_next (unformat_input_t * input, va_list * args) +{ + u32 * result = va_arg (*args, u32 *); + int rv = 0; + + if (unformat (input, "lookup")) + { + *result = MPLS_INPUT_NEXT_IP4_INPUT; + rv = 1; + } + else if (unformat (input, "output")) + { + *result = MPLS_INPUT_NEXT_L2_OUTPUT; + rv = 1; + } + return rv; +} + +static clib_error_t * +mpls_add_decap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + u32 rx_fib_id = 0; + u32 tx_fib_or_sw_if_index; + u32 label; + int s_bit = 1; + u32 next_index = 1; /* ip4_lookup, see node.c */ + int tx_fib_id_set = 0; + int label_set = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "fib %d", &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "sw_if_index %d", &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &tx_fib_or_sw_if_index)) + tx_fib_id_set = 1; + else if (unformat (input, "rx-fib %d", &rx_fib_id)) + ; + else if (unformat (input, "label %d", &label)) + label_set = 1; + else if (unformat (input, "s-bit-clear")) + s_bit = 0; + else if (unformat (input, "next %U", unformat_mpls_gre_input_next, + &next_index)) + ; + else + break; + } + + if (tx_fib_id_set == 0) + return clib_error_return (0, "lookup FIB ID not set"); + if (label_set == 0) + return clib_error_return (0, "missing label"); + + rv = vnet_mpls_add_del_decap (rx_fib_id, tx_fib_or_sw_if_index, + label, s_bit, next_index, 1 /* is_add */); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "no such rx fib id %d", rx_fib_id); + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "no such tx fib / swif %d", + tx_fib_or_sw_if_index); + + default: + return clib_error_return (0, "vnet_mpls_add_del_decap returned %d", + rv); + } + return 0; +} + +VLIB_CLI_COMMAND (mpls_add_decap_command, static) = { + .path = "mpls decap add", + .short_help = + "mpls decap add fib <id> label <nn> [s-bit-clear] [next-index <nn>]", + .function = mpls_add_decap_command_fn, +}; + +static clib_error_t * +mpls_del_decap_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 rx_fib_id = 0; + u32 tx_fib_id = 0; + u32 label; + int s_bit = 1; + int label_set = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "rx-fib %d", &rx_fib_id)) + ; + else if (unformat (input, "label %d", &label)) + label_set = 1; + else if (unformat (input, "s-bit-clear")) + s_bit = 0; + } + + if (!label_set) + return clib_error_return (0, "label not set"); + + rv = vnet_mpls_add_del_decap (rx_fib_id, + tx_fib_id /* not interesting */, + label, s_bit, + 0 /* next_index not interesting */, + 0 /* is_add */); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "no such rx fib id %d", rx_fib_id); + + case VNET_API_ERROR_NO_SUCH_INNER_FIB: + return clib_error_return (0, "no such lookup fib id %d", tx_fib_id); + + case VNET_API_ERROR_NO_SUCH_LABEL: + return clib_error_return (0, "no such label %d rx fib id %d", + label, rx_fib_id); + + default: + return clib_error_return (0, "vnet_mpls_add_del_decap returned %d", + rv); + } + return 0; +} + + +VLIB_CLI_COMMAND (mpls_del_decap_command, static) = { + .path = "mpls decap delete", + .short_help = "mpls decap delete label <label> rx-fib <id> [s-bit-clear]", + .function = mpls_del_decap_command_fn, +}; + +typedef struct { + u32 fib_index; + u32 entry_index; + u32 dest; + u32 s_bit; + u32 label; +} show_mpls_fib_t; + +static clib_error_t * +show_mpls_fib_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u64 key; + u32 value; + show_mpls_fib_t *records = 0; + show_mpls_fib_t *s; + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + ip4_fib_t * rx_fib, * tx_fib; + u32 tx_table_id; + char *swif_tag; + + hash_foreach (key, value, mm->mpls_encap_by_fib_and_dest, + ({ + vec_add2 (records, s, 1); + s->fib_index = (u32)(key>>32); + s->dest = (u32)(key & 0xFFFFFFFF); + s->entry_index = (u32) value; + })); + + if (!vec_len(records)) + { + vlib_cli_output (vm, "MPLS encap table empty"); + goto decap_table; + } + /* sort output by dst address within fib */ + vec_sort (records, r0, r1, clib_net_to_host_u32(r0->dest) - + clib_net_to_host_u32(r1->dest)); + vec_sort (records, r0, r1, r0->fib_index - r1->fib_index); + vlib_cli_output (vm, "MPLS encap table"); + vlib_cli_output (vm, "%=6s%=16s%=16s", "Table", "Dest address", "Labels"); + vec_foreach (s, records) + { + rx_fib = vec_elt_at_index (im->fibs, s->fib_index); + vlib_cli_output (vm, "%=6d%=16U%=16U", rx_fib->table_id, + format_ip4_address, &s->dest, + format_mpls_encap_index, mm, s->entry_index); + } + + decap_table: + vec_reset_length(records); + + hash_foreach (key, value, mm->mpls_decap_by_rx_fib_and_label, + ({ + vec_add2 (records, s, 1); + s->fib_index = (u32)(key>>32); + s->entry_index = (u32) value; + s->label = ((u32) key)>>12; + s->s_bit = (key & (1<<8)) != 0; + })); + + if (!vec_len(records)) + { + vlib_cli_output (vm, "MPLS decap table empty"); + goto out; + } + + vec_sort (records, r0, r1, r0->label - r1->label); + + vlib_cli_output (vm, "MPLS decap table"); + vlib_cli_output (vm, "%=10s%=15s%=6s%=6s", "RX Table", "TX Table/Intfc", + "Label", "S-bit"); + vec_foreach (s, records) + { + mpls_decap_t * d; + d = pool_elt_at_index (mm->decaps, s->entry_index); + if (d->next_index == MPLS_INPUT_NEXT_IP4_INPUT) + { + tx_fib = vec_elt_at_index (im->fibs, d->tx_fib_index); + tx_table_id = tx_fib->table_id; + swif_tag = " "; + } + else + { + tx_table_id = d->tx_fib_index; + swif_tag = "(i) "; + } + rx_fib = vec_elt_at_index (im->fibs, s->fib_index); + + vlib_cli_output (vm, "%=10d%=10d%=5s%=6d%=6d", rx_fib->table_id, + tx_table_id, swif_tag, s->label, s->s_bit); + } + + out: + vec_free(records); + return 0; +} + +VLIB_CLI_COMMAND (show_mpls_fib_command, static) = { + .path = "show mpls fib", + .short_help = "show mpls fib", + .function = show_mpls_fib_command_fn, +}; + +int mpls_fib_reset_labels (u32 fib_id) +{ + u64 key; + u32 value; + show_mpls_fib_t *records = 0; + show_mpls_fib_t *s; + mpls_main_t * mm = &mpls_main; + ip4_main_t * im = &ip4_main; + u32 fib_index; + uword *p; + + p = hash_get (im->fib_index_by_table_id, fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_index = p[0]; + + hash_foreach (key, value, mm->mpls_encap_by_fib_and_dest, + ({ + if (fib_index == (u32)(key>>32)) { + vec_add2 (records, s, 1); + s->dest = (u32)(key & 0xFFFFFFFF); + s->entry_index = (u32) value; + } + })); + + vec_foreach (s, records) + { + key = ((u64)fib_index<<32) | ((u64) s->dest); + hash_unset (mm->mpls_encap_by_fib_and_dest, key); + pool_put_index (mm->encaps, s->entry_index); + } + + vec_reset_length(records); + + hash_foreach (key, value, mm->mpls_decap_by_rx_fib_and_label, + ({ + if (fib_index == (u32) (key>>32)) { + vec_add2 (records, s, 1); + s->entry_index = value; + s->fib_index = fib_index; + s->s_bit = key & (1<<8); + s->dest = (u32)((key & 0xFFFFFFFF)>>12); + } + })); + + vec_foreach (s, records) + { + key = ((u64)fib_index <<32) | ((u64)(s->dest<<12)) | + ((u64)s->s_bit); + + hash_unset (mm->mpls_decap_by_rx_fib_and_label, key); + pool_put_index (mm->decaps, s->entry_index); + } + + vec_free(records); + return 0; +} + +static clib_error_t * mpls_init (vlib_main_t * vm) +{ + mpls_main_t * mm = &mpls_main; + clib_error_t * error; + + memset (mm, 0, sizeof (mm[0])); + mm->vlib_main = vm; + mm->vnet_main = vnet_get_main(); + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + + mm->mpls_encap_by_fib_and_dest = hash_create (0, sizeof (uword)); + mm->mpls_decap_by_rx_fib_and_label = hash_create (0, sizeof (uword)); + + return vlib_call_init_function (vm, mpls_input_init); +} + +VLIB_INIT_FUNCTION (mpls_init); + +mpls_main_t * mpls_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, mpls_init); + return &mpls_main; +} + diff --git a/vnet/vnet/mpls-gre/mpls.h b/vnet/vnet/mpls-gre/mpls.h new file mode 100644 index 00000000000..5d7f9c5e219 --- /dev/null +++ b/vnet/vnet/mpls-gre/mpls.h @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_mpls_gre_h +#define included_vnet_mpls_gre_h + +#include <vnet/vnet.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls-gre/packet.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ethernet/ethernet.h> + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; /* 20 bytes */ + gre_header_t gre; /* 4 bytes */ + mpls_unicast_header_t labels[0]; /* 4 bytes each */ +}) ip4_gre_and_mpls_header_t; + +vnet_hw_interface_class_t mpls_gre_hw_interface_class; + +typedef enum { +#define mpls_error(n,s) MPLS_ERROR_##n, +#include <vnet/mpls-gre/error.def> +#undef mpls_error + MPLS_N_ERROR, +} mpls_gre_error_t; + +/* + * No protocol info, MPLS labels don't have a next-header field + * presumably the label field tells all... + */ + +typedef struct { + ip4_address_t tunnel_src; + ip4_address_t tunnel_dst; + ip4_address_t intfc_address; + u32 mask_width; + u32 inner_fib_index; + u32 outer_fib_index; + u32 encap_index; + u32 hw_if_index; /* L2 x-connect capable tunnel intfc */ + u8 * rewrite_data; + u8 l2_only; +} mpls_gre_tunnel_t; + +typedef struct { + u8 tunnel_dst[6]; + ip4_address_t intfc_address; + u32 tx_sw_if_index; + u32 inner_fib_index; + u32 mask_width; + u32 encap_index; + u32 hw_if_index; + u8 * rewrite_data; + u8 l2_only; +} mpls_eth_tunnel_t; + +typedef struct { + mpls_unicast_header_t *labels; + /* only for policy tunnels */ + u8 * rewrite; + u32 output_next_index; +} mpls_encap_t; + +typedef struct { + u32 tx_fib_index; + u32 next_index; /* e.g. ip4/6-input, l2-input */ +} mpls_decap_t; + +typedef struct { + /* pool of gre tunnel instances */ + mpls_gre_tunnel_t *gre_tunnels; + u32 * free_gre_sw_if_indices; + + /* pool of ethernet tunnel instances */ + mpls_eth_tunnel_t *eth_tunnels; + u32 * free_eth_sw_if_indices; + + /* Encap side: map (fib, dst_address) to mpls label stack */ + mpls_encap_t * encaps; + uword * mpls_encap_by_fib_and_dest; + + /* Decap side: map rx label to FIB */ + mpls_decap_t * decaps; + uword * mpls_decap_by_rx_fib_and_label; + + /* mpls-o-e policy tunnel next index for ip4-classify */ + u32 ip_classify_mpls_policy_encap_next_index; + + /* convenience */ + vlib_main_t * vlib_main; + vnet_main_t * vnet_main; +} mpls_main_t; + +mpls_main_t mpls_main; + +format_function_t format_mpls_protocol; +format_function_t format_mpls_header; +format_function_t format_mpls_header_with_length; +format_function_t format_mpls_gre_header_with_length; +format_function_t format_mpls_eth_header_with_length; +format_function_t format_mpls_unicast_label; +format_function_t format_mpls_encap_index; + +vlib_node_registration_t mpls_input_node; +vlib_node_registration_t mpls_policy_encap_node; + +vnet_device_class_t mpls_gre_device_class; + +/* Parse mpls protocol as 0xXXXX or protocol name. + In either host or network byte order. */ +unformat_function_t unformat_mpls_protocol_host_byte_order; +unformat_function_t unformat_mpls_protocol_net_byte_order; +unformat_function_t unformat_mpls_label_net_byte_order; +unformat_function_t unformat_mpls_gre_header; +unformat_function_t unformat_pg_mpls_gre_header; + +/* Parse mpls header. */ +unformat_function_t unformat_mpls_header; +unformat_function_t unformat_pg_mpls_header; + +/* manually added to the interface output node in mpls.c */ +#define MPLS_GRE_OUTPUT_NEXT_LOOKUP 1 +#define MPLS_GRE_OUTPUT_NEXT_DROP VNET_INTERFACE_TX_NEXT_DROP + +mpls_encap_t * +mpls_encap_by_fib_and_dest (mpls_main_t * mm, u32 rx_fib, u32 dst_address); + +int mpls_label_from_fib_id_and_dest (mpls_main_t *gm, u32 fib_id, + u32 dst_address, u32 *labelp); + +int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, u32 outer_fib_id, + u32 * tunnel_intfc_sw_if_index, + u8 l2_only, + u8 is_add); + +int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, + u32 tx_sw_if_index, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add); + +int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id); + +int mpls_fib_reset_labels (u32 fib_id); + +int vnet_mpls_add_del_decap (u32 rx_fib_id, + u32 tx_fib_id, + u32 label_host_byte_order, + int s_bit, int next_index, int is_add); + +int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id, + u32 *labels_host_byte_order, + u32 policy_tunnel_index, + int no_dst_hash, u32 * indexp, int is_add); + +int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm, + mpls_encap_t * e, + u32 policy_tunnel_index); +typedef struct { + u32 lookup_miss; + + /* Tunnel-id / index in tunnel vector */ + u32 tunnel_id; + + /* mpls encap index */ + u32 mpls_encap_index; + + /* pkt length */ + u32 length; + + /* tunnel ip4 addresses */ + ip4_address_t src; + ip4_address_t dst; +} mpls_gre_tx_trace_t; + +u8 * format_mpls_gre_tx_trace (u8 * s, va_list * args); +u8 * format_mpls_gre_header (u8 * s, va_list * args); + +#define foreach_mpls_input_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(L2_OUTPUT, "l2-output") + +typedef enum { +#define _(s,n) MPLS_INPUT_NEXT_##s, + foreach_mpls_input_next +#undef _ + MPLS_INPUT_N_NEXT, +} mpls_input_next_t; + + +typedef struct { + u32 lookup_miss; + + /* Tunnel-id / index in tunnel vector */ + u32 tunnel_id; + + /* output interface */ + u32 tx_sw_if_index; + + /* mpls encap index */ + u32 mpls_encap_index; + + /* pkt length */ + u32 length; + + u8 dst[6]; +} mpls_eth_tx_trace_t; + +u8 * format_mpls_eth_tx_trace (u8 * s, va_list * args); + +#endif /* included_vnet_mpls_gre_h */ diff --git a/vnet/vnet/mpls-gre/node.c b/vnet/vnet/mpls-gre/node.c new file mode 100644 index 00000000000..6bf5f814aec --- /dev/null +++ b/vnet/vnet/mpls-gre/node.c @@ -0,0 +1,359 @@ +/* + * node.c: mpls-o-gre decap processing + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls-gre/mpls.h> + +typedef struct { + u32 next_index; + u32 decap_index; + u32 tx_fib_index; + u32 label_host_byte_order; +} mpls_rx_trace_t; + +u8 * format_mpls_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_rx_trace_t * t = va_arg (*args, mpls_rx_trace_t *); + char * next_name; + + next_name = "BUG!"; + +#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b; + foreach_mpls_input_next; +#undef _ + + s = format (s, "MPLS: next %s, lookup fib index %d, decap index %d\n", + next_name, t->next_index, t->tx_fib_index, t->decap_index); + if (t->decap_index != ~0) + { + s = format (s, " label %d", + vnet_mpls_uc_get_label(t->label_host_byte_order)); + } + return s; +} + +vlib_node_registration_t mpls_input_node; + +typedef struct { + u32 last_label; + u32 last_inner_fib_index; + u32 last_outer_fib_index; + mpls_main_t * mpls_main; +} mpls_input_runtime_t; + +static inline uword +mpls_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_mpls_o_gre) +{ + u32 n_left_from, next_index, * from, * to_next; + ip4_main_t * im = &ip4_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + mpls_input_runtime_t * rt; + mpls_main_t * mm; + + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + mm = rt->mpls_main; + /* + * Force an initial lookup every time, in case the control-plane + * changed the label->FIB mapping. + */ + rt->last_label = ~0; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + +#if 0 + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + mpls_unicast_header_t * h0, * h1; + int li0, li1; + u64 key0, key1; + u32 label0, label1; + u32 next0, next1; + uword * p0, * p1; + u32 fib_index0, fib_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ dual loop me */ + + vlib_buffer_advance (b0, sizeof (*h0)); + vlib_buffer_advance (b1, sizeof (*h1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + mpls_unicast_header_t * h0; + u32 label0; + u32 next0; + u64 key0; + uword * p0; + u32 rx_fib_index0; + mpls_decap_t *d0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + if (is_mpls_o_gre) + { + rx_fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + } + else + { +#if 0 + /* If separate RX numbering spaces are required... */ + rx_fib_index0 = vec_elt (mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); +#endif + rx_fib_index0 = 0; + } + + next0 = ~0; + d0 = 0; + + /* + * Expect the control-plane team to squeal like pigs. + * If they don't program a decap label entry for each + * and every label in the stack, packets go into the trash... + */ + + do + { + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + /* TTL expired? */ + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + break; + } + + key0 = ((u64)rx_fib_index0<<32) + | ((u64)vnet_mpls_uc_get_label (label0)<<12) + | ((u64)vnet_mpls_uc_get_s (label0)<<8); + + /* + * The architecture crew claims that we won't need + * separate ip4, ip6, mpls-o-ethernet label numbering + * spaces. Use the low 8 key bits as a discriminator. + */ + + p0 = hash_get (mm->mpls_decap_by_rx_fib_and_label, key0); + if (p0 == 0) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_BAD_LABEL]; + break; + } + d0 = pool_elt_at_index (mm->decaps, p0[0]); + next0 = d0->next_index; + vnet_buffer(b0)->sw_if_index[VLIB_TX] = d0->tx_fib_index; + vlib_buffer_advance (b0, sizeof (*h0)); + h0 = vlib_buffer_get_current (b0); + } while (!vnet_mpls_uc_get_s(label0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_rx_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->decap_index = d0 ? d0 - mm->decaps : ~0; + tr->tx_fib_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; + tr->label_host_byte_order = label0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static uword +mpls_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return mpls_input_inline (vm, node, from_frame, 1 /* is mpls-o-gre */); +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_input_node) = { + .function = mpls_input, + .name = "mpls-gre-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof(mpls_input_runtime_t), + + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, + foreach_mpls_input_next +#undef _ + }, + + .format_buffer = format_mpls_gre_header_with_length, + .format_trace = format_mpls_rx_trace, + .unformat_buffer = unformat_mpls_gre_header, +}; + +static uword +mpls_ethernet_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return mpls_input_inline (vm, node, from_frame, 0 /* is mpls-o-gre */); +} + + +VLIB_REGISTER_NODE (mpls_ethernet_input_node) = { + .function = mpls_ethernet_input, + .name = "mpls-ethernet-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof(mpls_input_runtime_t), + + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, + foreach_mpls_input_next +#undef _ + }, + + .format_buffer = format_mpls_eth_header_with_length, + .format_trace = format_mpls_rx_trace, + .unformat_buffer = unformat_mpls_gre_header, +}; + +static void +mpls_setup_nodes (vlib_main_t * vm) +{ + vlib_node_t * n = vlib_get_node (vm, mpls_input_node.index); + pg_node_t * pn = pg_get_node (mpls_input_node.index); + mpls_input_runtime_t * rt; + + n->format_buffer = format_mpls_gre_header_with_length; + n->unformat_buffer = unformat_mpls_gre_header; + pn->unformat_edit = unformat_pg_mpls_header; + + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + + n = vlib_get_node (vm, mpls_ethernet_input_node.index); + + n->format_buffer = format_mpls_eth_header_with_length; + + n->unformat_buffer = 0; /* unformat_mpls_ethernet_header; */ + + rt = vlib_node_get_runtime_data (vm, mpls_ethernet_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, + mpls_ethernet_input_node.index); +} + +static clib_error_t * mpls_input_init (vlib_main_t * vm) +{ + clib_error_t * error; + + error = vlib_call_init_function (vm, mpls_init); + if (error) + clib_error_report (error); + + mpls_setup_nodes (vm); + + return 0; +} + +VLIB_INIT_FUNCTION (mpls_input_init); diff --git a/vnet/vnet/mpls-gre/packet.h b/vnet/vnet/mpls-gre/packet.h new file mode 100644 index 00000000000..baa01818f09 --- /dev/null +++ b/vnet/vnet/mpls-gre/packet.h @@ -0,0 +1,49 @@ +#ifndef included_vnet_mpls_packet_h +#define included_vnet_mpls_packet_h + +/* + * MPLS packet format + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +typedef struct { + /* Label: top 20 bits [in network byte order] */ + /* Experimental: 3 bits ... */ + /* S (bottom of label stack): 1 bit */ + /* TTL: 8 bits */ + u32 label_exp_s_ttl; +} mpls_unicast_header_t; + +static inline u32 vnet_mpls_uc_get_label (u32 label_exp_s_ttl) +{ + return (label_exp_s_ttl>>12); +} + +static inline u32 vnet_mpls_uc_get_exp (u32 label_exp_s_ttl) +{ + return ((label_exp_s_ttl>>9) & 0x7); +} + +static inline u32 vnet_mpls_uc_get_s (u32 label_exp_s_ttl) +{ + return ((label_exp_s_ttl>>8) & 0x1); +} + +static inline u32 vnet_mpls_uc_get_ttl (u32 label_exp_s_ttl) +{ + return (label_exp_s_ttl & 0xff); +} + +#endif /* included_vnet_mpls_packet_h */ diff --git a/vnet/vnet/mpls-gre/pg.c b/vnet/vnet/mpls-gre/pg.c new file mode 100644 index 00000000000..6b6a1017c58 --- /dev/null +++ b/vnet/vnet/mpls-gre/pg.c @@ -0,0 +1,71 @@ +/* + * pg.c: packet generator mpls/gre interface + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/gre/gre.h> +#include <vnet/mpls-gre/mpls.h> + +typedef struct { + pg_edit_t label; +} pg_mpls_header_t; + +static inline void +pg_mpls_header_init (pg_mpls_header_t * e) +{ + pg_edit_init (&e->label, mpls_unicast_header_t, label_exp_s_ttl); +} + +uword +unformat_pg_mpls_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t * s = va_arg (*args, pg_stream_t *); + pg_mpls_header_t * h; + vlib_main_t * vm = vlib_get_main(); + u32 group_index, error; + + h = pg_create_edit_group (s, sizeof (h[0]), sizeof (mpls_unicast_header_t), + &group_index); + pg_mpls_header_init (h); + + error = 1; + if (! unformat (input, "%U", + unformat_pg_edit, + unformat_mpls_label_net_byte_order, &h->label)) + goto done; + + { + pg_node_t * pg_node = 0; + vlib_node_t * ip_lookup_node; + + ip_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip4-input"); + ASSERT (ip_lookup_node); + + pg_node = pg_get_node (ip_lookup_node->index); + + if (pg_node && pg_node->unformat_edit + && unformat_user (input, pg_node->unformat_edit, s)) + ; + } + + error = 0; + done: + if (error) + pg_free_edit_group (s); + return error == 0; +} + diff --git a/vnet/vnet/mpls-gre/policy_encap.c b/vnet/vnet/mpls-gre/policy_encap.c new file mode 100644 index 00000000000..53411515e69 --- /dev/null +++ b/vnet/vnet/mpls-gre/policy_encap.c @@ -0,0 +1,172 @@ +/* + * policy_encap.c: mpls-o-e policy encap + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls-gre/mpls.h> + +typedef struct { + u32 next_index; + u32 encap_index; +} mpls_policy_encap_trace_t; + +u8 * format_mpls_policy_encap_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_policy_encap_trace_t * t = va_arg (*args, mpls_policy_encap_trace_t *); + + s = format (s, "MPLS-POLICY-ENCAP: next-index %d encap-index %d", + t->next_index, t->encap_index); + + return s; +} + +vlib_node_registration_t mpls_policy_encap_node; + +#define foreach_mpls_policy_encap_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) MPLS_POLICY_ENCAP_NEXT_##s, + foreach_mpls_policy_encap_next +#undef _ + MPLS_POLICY_ENCAP_N_NEXT, +} mpls_policy_encap_next_t; + +#define foreach_mpls_policy_error \ +_(PKTS_ENCAP, "mpls policy tunnel packets encapsulated") + +typedef enum { +#define _(n,s) MPLS_POLICY_ENCAP_ERROR_##n, + foreach_mpls_policy_error + MPLS_POLICY_ENCAP_N_ERROR, +#undef _ +} mpls_policy_encap_error_t; + +static char * mpls_policy_encap_error_strings[] = + { +#define _(n,s) s, + foreach_mpls_policy_error +#undef _ +}; + +static uword +mpls_policy_encap (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + mpls_main_t * mm = &mpls_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + u8 * h0; + u32 encap_index0; + u32 next0; + mpls_encap_t * e0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + encap_index0 = vnet_buffer(b0)->l2_classify.opaque_index; + + e0 = pool_elt_at_index (mm->encaps, encap_index0); + + vlib_buffer_advance (b0, -(word)vec_len(e0->rewrite)); + h0 = vlib_buffer_get_current (b0); + memcpy (h0, e0->rewrite, vec_len(e0->rewrite)); + + next0 = e0->output_next_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_policy_encap_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->next_index = next0; + tr->encap_index = encap_index0; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_policy_encap_node.index, + MPLS_POLICY_ENCAP_ERROR_PKTS_ENCAP, + from_frame->n_vectors); + return from_frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_policy_encap_node) = { + .function = mpls_policy_encap, + .name = "mpls-policy-encap", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = 0, + + .n_errors = MPLS_POLICY_ENCAP_N_ERROR, + .error_strings = mpls_policy_encap_error_strings, + + .format_trace = format_mpls_policy_encap_trace, + + .n_next_nodes = MPLS_POLICY_ENCAP_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_POLICY_ENCAP_NEXT_##s] = n, + foreach_mpls_policy_encap_next +#undef _ + }, +}; + +static clib_error_t * +mpls_policy_encap_init (vlib_main_t * vm) +{ + mpls_main_t * mm = &mpls_main; + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + + mm->ip_classify_mpls_policy_encap_next_index = + vlib_node_add_next (mm->vlib_main, + ip4_classify_node.index, + mpls_policy_encap_node.index); + return 0; +} + +VLIB_INIT_FUNCTION (mpls_policy_encap_init); |