From 0f26c5a0138ac86d7ebd197c31a09d8d624c35fe Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Wed, 1 Mar 2017 15:12:11 -0800 Subject: MPLS Mcast 1 - interface-DPO Used in the Data-plane to change a packet's input interface 2 - MPLS multicast FIB entry Same as a unicast entry but it links to a replicate not a load-balance DPO 3 - Multicast MPLS tunnel Update MPLS tunnels to use a FIB path-list to describe the endpoint[s]. Use the path-list to generate the forwarding chain (DPOs) to link to . 4 - Resolve a path via a local label (of an mLDP LSP) For IP multicast entries to use an LSP in the replication list, we need to decribe the 'resolve-via-label' where the label is that of a multicast LSP. 5 - MPLS disposition path sets RPF-ID For a interface-less LSP (i.e. mLDP not RSVP-TE) at the tail of the LSP we still need to perform an RPF check. An MPLS disposition DPO performs the MPLS pop validation checks and sets the RPF-ID in the packet. 6 - RPF check with per-entry RPF-ID An RPF-ID is used instead of a real interface SW if index in the case the IP traffic arrives from an LSP that does not have an associated interface. Change-Id: Ib92e177be919147bafeb599729abf3d1abc2f4b3 Signed-off-by: Neale Ranns --- src/vnet/mpls/mpls.api | 87 +++-- src/vnet/mpls/mpls.c | 17 +- src/vnet/mpls/mpls_api.c | 97 +++-- src/vnet/mpls/mpls_input.c | 2 +- src/vnet/mpls/mpls_lookup.c | 236 ++++++++---- src/vnet/mpls/mpls_tunnel.c | 883 ++++++++++++++++++++++++++++++-------------- src/vnet/mpls/mpls_tunnel.h | 57 ++- src/vnet/mpls/mpls_types.h | 20 + 8 files changed, 941 insertions(+), 458 deletions(-) (limited to 'src/vnet/mpls') diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api index 2e3bfaf53f5..a1e1270a853 100644 --- a/src/vnet/mpls/mpls.api +++ b/src/vnet/mpls/mpls.api @@ -55,6 +55,7 @@ define mpls_ip_bind_unbind_reply @param context - sender context, to match reply w/ request @param mt_is_add - Is this a route add or delete @param mt_sw_if_index - The SW interface index of the tunnel to delete + @param mt_is_multicast - Is the tunnel's underlying LSP multicast @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4 @param mt_next_hop_weight - The weight, for UCMP @param mt_next_hop[16] - the nextop address @@ -70,6 +71,7 @@ define mpls_tunnel_add_del u32 mt_sw_if_index; u8 mt_is_add; u8 mt_l2_only; + u8 mt_is_multicast; u8 mt_next_hop_proto_is_ip4; u8 mt_next_hop_weight; u8 mt_next_hop[16]; @@ -102,30 +104,43 @@ define mpls_tunnel_dump i32 tunnel_index; }; -/** \brief mpls eth tunnel operational state response - @param tunnel_index - eth tunnel identifier - @param intfc_address - interface ipv4 addr - @param mask_width - interface ipv4 addr mask - @param hw_if_index - interface id - @param l2_only - - @param tunnel_dst_mac - - @param tx_sw_if_index - - @param encap_index - reference to mpls label table - @param nlabels - number of resolved labels - @param labels - resolved labels +/** \brief FIB path + @param sw_if_index - index of the interface + @param weight - The weight, for UCMP + @param is_local - local if non-zero, else remote + @param is_drop - Drop the packet + @param is_unreach - Drop the packet and rate limit send ICMP unreachable + @param is_prohibit - Drop the packet and rate limit send ICMP prohibited + @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 + @param next_hop[16] - the next hop address + + WARNING: this type is replicated, pending cleanup completion + +*/ +typeonly manual_print manual_endian define fib_path2 +{ + u32 sw_if_index; + u32 weight; + u8 is_local; + u8 is_drop; + u8 is_unreach; + u8 is_prohibit; + u8 afi; + u8 next_hop[16]; + u32 labels[16]; +}; + +/** \brief mpls tunnel details */ -define mpls_tunnel_details +manual_endian manual_print define mpls_tunnel_details { u32 context; - u32 tunnel_index; - u8 mt_l2_only; u8 mt_sw_if_index; - u8 mt_next_hop_proto_is_ip4; - u8 mt_next_hop[16]; - u32 mt_next_hop_sw_if_index; - u32 mt_next_hop_table_id; - u32 mt_next_hop_n_labels; - u32 mt_next_hop_out_labels[mt_next_hop_n_labels]; + u8 mt_tunnel_index; + u8 mt_l2_only; + u8 mt_is_multicast; + u32 mt_count; + vl_api_fib_path2_t mt_paths[mt_count]; }; /** \brief MPLS Route Add / del route @@ -140,10 +155,14 @@ define mpls_tunnel_details create them @param mr_is_add - Is this a route add or delete @param mr_is_classify - Is this route result a classify + @param mr_is_multicast - Is this a multicast route @param mr_is_multipath - Is this route update a multipath - i.e. is this a path addition to an existing route @param mr_is_resolve_host - Recurse resolution constraint via a host prefix @param mr_is_resolve_attached - Recurse resolution constraint via attached prefix + @param mr_is_interface_rx - Interface Receive path + @param mr_is_interface_rx - RPF-ID Receive path. The next-hop interface + is used as the RPF-ID @param mr_next_hop_proto_is_ip4 - The next-hop is IPV4 @param mr_next_hop_weight - The weight, for UCMP @param mr_next_hop[16] - the nextop address @@ -164,9 +183,12 @@ define mpls_route_add_del u8 mr_create_table_if_needed; u8 mr_is_add; u8 mr_is_classify; + u8 mr_is_multicast; u8 mr_is_multipath; u8 mr_is_resolve_host; u8 mr_is_resolve_attached; + u8 mr_is_interface_rx; + u8 mr_is_rpf_id; u8 mr_next_hop_proto_is_ip4; u8 mr_next_hop_weight; u8 mr_next_hop[16]; @@ -187,31 +209,6 @@ define mpls_route_add_del_reply i32 retval; }; -/** \brief FIB path - @param sw_if_index - index of the interface - @param weight - The weight, for UCMP - @param is_local - local if non-zero, else remote - @param is_drop - Drop the packet - @param is_unreach - Drop the packet and rate limit send ICMP unreachable - @param is_prohibit - Drop the packet and rate limit send ICMP prohibited - @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 - @param next_hop[16] - the next hop address - - WARNING: this type is replicated, pending cleanup completion - -*/ -typeonly manual_print manual_endian define fib_path2 -{ - u32 sw_if_index; - u32 weight; - u8 is_local; - u8 is_drop; - u8 is_unreach; - u8 is_prohibit; - u8 afi; - u8 next_hop[16]; -}; - /** \brief Dump MPLS fib table @param client_index - opaque cookie to identify the sender */ diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c index 482577b17ef..451b15cf03c 100644 --- a/src/vnet/mpls/mpls.c +++ b/src/vnet/mpls/mpls.c @@ -286,7 +286,15 @@ vnet_mpls_local_label (vlib_main_t * vm, rpath.frp_proto = FIB_PROTOCOL_IP4; vec_add1(rpaths, rpath); } - + else if (unformat (line_input, "rx-ip4 %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_flags = FIB_ROUTE_PATH_INTF_RX; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U", unformat_ip6_address, &rpath.frp_addr.ip6, @@ -512,10 +520,3 @@ mpls_init (vlib_main_t * vm) } VLIB_INIT_FUNCTION (mpls_init); - -mpls_main_t * mpls_get_main (vlib_main_t * vm) -{ - vlib_call_init_function (vm, mpls_init); - return &mpls_main; -} - diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c index f1aef6c92c6..6bfc491d344 100644 --- a/src/vnet/mpls/mpls_api.c +++ b/src/vnet/mpls/mpls_api.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -163,6 +164,7 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, dpo_proto_to_fib (pfx.fp_payload_proto), mp->mr_next_hop_table_id, mp->mr_create_table_if_needed, + mp->mr_is_rpf_id, &fib_index, &next_hop_fib_index); if (0 != rv) @@ -192,10 +194,13 @@ mpls_route_add_del_t_handler (vnet_main_t * vnm, 0, // mp->is_unreach, 0, // mp->is_prohibit, 0, // mp->is_local, + mp->mr_is_multicast, mp->mr_is_classify, mp->mr_classify_table_index, mp->mr_is_resolve_host, mp->mr_is_resolve_attached, + mp->mr_is_interface_rx, + mp->mr_is_rpf_id, fib_index, &pfx, mp->mr_next_hop_proto_is_ip4, &nh, ntohl (mp->mr_next_hop_sw_if_index), @@ -229,46 +234,54 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp) int rv = 0; u32 tunnel_sw_if_index; int ii; + fib_route_path_t rpath, *rpaths = NULL; + + memset (&rpath, 0, sizeof (rpath)); stats_dslock_with_hint (1 /* release hint */ , 5 /* tag */ ); - if (mp->mt_is_add) + if (mp->mt_next_hop_proto_is_ip4) { - fib_route_path_t rpath, *rpaths = NULL; - mpls_label_t *label_stack = NULL; - - memset (&rpath, 0, sizeof (rpath)); - - if (mp->mt_next_hop_proto_is_ip4) - { - rpath.frp_proto = FIB_PROTOCOL_IP4; - clib_memcpy (&rpath.frp_addr.ip4, - mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); - } - else - { - rpath.frp_proto = FIB_PROTOCOL_IP6; - clib_memcpy (&rpath.frp_addr.ip6, - mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); - } - rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + rpath.frp_proto = FIB_PROTOCOL_IP4; + clib_memcpy (&rpath.frp_addr.ip4, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip4)); + } + else + { + rpath.frp_proto = FIB_PROTOCOL_IP6; + clib_memcpy (&rpath.frp_addr.ip6, + mp->mt_next_hop, sizeof (rpath.frp_addr.ip6)); + } + rpath.frp_sw_if_index = ntohl (mp->mt_next_hop_sw_if_index); + rpath.frp_weight = 1; + if (mp->mt_is_add) + { for (ii = 0; ii < mp->mt_next_hop_n_out_labels; ii++) - vec_add1 (label_stack, ntohl (mp->mt_next_hop_out_label_stack[ii])); + vec_add1 (rpath.frp_label_stack, + ntohl (mp->mt_next_hop_out_label_stack[ii])); + } - vec_add1 (rpaths, rpath); + vec_add1 (rpaths, rpath); - vnet_mpls_tunnel_add (rpaths, label_stack, - mp->mt_l2_only, &tunnel_sw_if_index); - vec_free (rpaths); - vec_free (label_stack); + tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); + + if (mp->mt_is_add) + { + if (~0 == tunnel_sw_if_index) + tunnel_sw_if_index = vnet_mpls_tunnel_create (mp->mt_l2_only, + mp->mt_is_multicast); + vnet_mpls_tunnel_path_add (tunnel_sw_if_index, rpaths); } else { tunnel_sw_if_index = ntohl (mp->mt_sw_if_index); - vnet_mpls_tunnel_del (tunnel_sw_if_index); + if (!vnet_mpls_tunnel_path_remove (tunnel_sw_if_index, rpaths)) + vnet_mpls_tunnel_del (tunnel_sw_if_index); } + vec_free (rpaths); + stats_dsunlock (); /* *INDENT-OFF* */ @@ -289,10 +302,12 @@ typedef struct mpls_tunnel_send_walk_ctx_t_ static void send_mpls_tunnel_entry (u32 mti, void *arg) { + fib_route_path_encode_t *api_rpaths, *api_rpath; mpls_tunnel_send_walk_ctx_t *ctx; vl_api_mpls_tunnel_details_t *mp; const mpls_tunnel_t *mt; - u32 nlabels; + vl_api_fib_path2_t *fp; + u32 n; ctx = arg; @@ -300,18 +315,34 @@ send_mpls_tunnel_entry (u32 mti, void *arg) return; mt = mpls_tunnel_get (mti); - nlabels = vec_len (mt->mt_label_stack); + n = fib_path_list_get_n_paths (mt->mt_path_list); + + mp = vl_msg_api_alloc (sizeof (*mp) + n * sizeof (vl_api_fib_path2_t)); + memset (mp, 0, sizeof (*mp) + n * sizeof (vl_api_fib_path2_t)); - mp = vl_msg_api_alloc (sizeof (*mp) + nlabels * sizeof (u32)); - memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = ntohs (VL_API_MPLS_TUNNEL_DETAILS); mp->context = ctx->context; - mp->tunnel_index = ntohl (mti); - memcpy (mp->mt_next_hop_out_labels, - mt->mt_label_stack, nlabels * sizeof (u32)); + mp->mt_tunnel_index = ntohl (mti); + mp->mt_count = ntohl (n); + + fib_path_list_walk (mt->mt_path_list, fib_path_encode, &api_rpaths); + + fp = mp->mt_paths; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + + fp->weight = htonl (api_rpath->rpath.frp_weight); + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } // FIXME + // memcpy (mp->mt_next_hop_out_labels, + // mt->mt_label_stack, nlabels * sizeof (u32)); + vl_msg_api_send_shmem (ctx->q, (u8 *) & mp); } diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c index 1b9bdd05eed..86ad8bba270 100644 --- a/src/vnet/mpls/mpls_input.c +++ b/src/vnet/mpls/mpls_input.c @@ -291,7 +291,7 @@ mpls_setup_nodes (vlib_main_t * vm) rt->last_outer_fib_index = 0; rt->mpls_main = &mpls_main; - ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS, mpls_input_node.index); } diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c index ace6a70fe80..3c6be7e85ec 100644 --- a/src/vnet/mpls/mpls_lookup.c +++ b/src/vnet/mpls/mpls_lookup.c @@ -20,8 +20,17 @@ #include #include #include +#include -vlib_node_registration_t mpls_lookup_node; +/** + * Static MPLS VLIB forwarding node + */ +static vlib_node_registration_t mpls_lookup_node; + +/** + * The arc/edge from the MPLS lookup node to the MPLS replicate node + */ +static u32 mpls_lookup_to_replicate_edge; typedef struct { u32 next_index; @@ -156,81 +165,123 @@ mpls_lookup (vlib_main_t * vm, lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2); lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3); - lb0 = load_balance_get(lbi0); - lb1 = load_balance_get(lbi1); - lb2 = load_balance_get(lbi2); - lb3 = load_balance_get(lbi3); - hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0; hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0; hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0; - if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + if (MPLS_IS_REPLICATE & lbi0) { - hash_c0 = vnet_buffer (b0)->ip.flow_hash = - mpls_compute_flow_hash(h0, lb0->lb_hash_config); + next0 = mpls_lookup_to_replicate_edge; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + (lbi0 & ~MPLS_IS_REPLICATE); } - if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + else { - hash_c1 = vnet_buffer (b1)->ip.flow_hash = - mpls_compute_flow_hash(h1, lb1->lb_hash_config); + lb0 = load_balance_get(lbi0); + + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + next0 = dpo0->dpoi_next_node; + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); } - if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + if (MPLS_IS_REPLICATE & lbi1) { - hash_c2 = vnet_buffer (b2)->ip.flow_hash = - mpls_compute_flow_hash(h2, lb2->lb_hash_config); + next1 = mpls_lookup_to_replicate_edge; + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = + (lbi1 & ~MPLS_IS_REPLICATE); } - if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + else { - hash_c3 = vnet_buffer (b3)->ip.flow_hash = - mpls_compute_flow_hash(h3, lb3->lb_hash_config); - } - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - ASSERT (lb2->lb_n_buckets > 0); - ASSERT (is_pow2 (lb2->lb_n_buckets)); - ASSERT (lb3->lb_n_buckets > 0); - ASSERT (is_pow2 (lb3->lb_n_buckets)); - - dpo0 = load_balance_get_bucket_i(lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - dpo1 = load_balance_get_bucket_i(lb1, - (hash_c1 & - (lb1->lb_n_buckets_minus_1))); - dpo2 = load_balance_get_bucket_i(lb2, - (hash_c2 & - (lb2->lb_n_buckets_minus_1))); - dpo3 = load_balance_get_bucket_i(lb3, - (hash_c3 & - (lb3->lb_n_buckets_minus_1))); + lb1 = load_balance_get(lbi1); - next0 = dpo0->dpoi_next_node; - next1 = dpo1->dpoi_next_node; - next2 = dpo2->dpoi_next_node; - next3 = dpo3->dpoi_next_node; + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) + { + hash_c1 = vnet_buffer (b1)->ip.flow_hash = + mpls_compute_flow_hash(h1, lb1->lb_hash_config); + } + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + dpo1 = load_balance_get_bucket_i(lb1, + (hash_c1 & + (lb1->lb_n_buckets_minus_1))); + next1 = dpo1->dpoi_next_node; + + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, + vlib_buffer_length_in_chain (vm, b1)); + } + if (MPLS_IS_REPLICATE & lbi2) + { + next2 = mpls_lookup_to_replicate_edge; + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = + (lbi2 & ~MPLS_IS_REPLICATE); + } + else + { + lb2 = load_balance_get(lbi2); - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; - vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + if (PREDICT_FALSE(lb2->lb_n_buckets > 1)) + { + hash_c2 = vnet_buffer (b2)->ip.flow_hash = + mpls_compute_flow_hash(h2, lb2->lb_hash_config); + } + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); + dpo2 = load_balance_get_bucket_i(lb2, + (hash_c2 & + (lb2->lb_n_buckets_minus_1))); + next2 = dpo2->dpoi_next_node; + + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi2, 1, + vlib_buffer_length_in_chain (vm, b2)); + } + if (MPLS_IS_REPLICATE & lbi3) + { + next3 = mpls_lookup_to_replicate_edge; + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = + (lbi3 & ~MPLS_IS_REPLICATE); + } + else + { + lb3 = load_balance_get(lbi3); - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, b0)); - vlib_increment_combined_counter - (cm, thread_index, lbi1, 1, - vlib_buffer_length_in_chain (vm, b1)); - vlib_increment_combined_counter - (cm, thread_index, lbi2, 1, - vlib_buffer_length_in_chain (vm, b2)); - vlib_increment_combined_counter - (cm, thread_index, lbi3, 1, - vlib_buffer_length_in_chain (vm, b3)); + if (PREDICT_FALSE(lb3->lb_n_buckets > 1)) + { + hash_c3 = vnet_buffer (b3)->ip.flow_hash = + mpls_compute_flow_hash(h3, lb3->lb_hash_config); + } + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); + dpo3 = load_balance_get_bucket_i(lb3, + (hash_c3 & + (lb3->lb_n_buckets_minus_1))); + next3 = dpo3->dpoi_next_node; + + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi3, 1, + vlib_buffer_length_in_chain (vm, b3)); + } /* * before we pop the label copy th values we need to maintain. @@ -331,31 +382,41 @@ mpls_lookup (vlib_main_t * vm, vnet_buffer(b0)->sw_if_index[VLIB_RX]); lbi0 = mpls_fib_table_forwarding_lookup(lfib_index0, h0); - lb0 = load_balance_get(lbi0); - hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; - if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + + if (MPLS_IS_REPLICATE & lbi0) { - hash_c0 = vnet_buffer (b0)->ip.flow_hash = - mpls_compute_flow_hash(h0, lb0->lb_hash_config); + next0 = mpls_lookup_to_replicate_edge; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + (lbi0 & ~MPLS_IS_REPLICATE); } + else + { + lb0 = load_balance_get(lbi0); - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } - dpo0 = load_balance_get_bucket_i(lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); - next0 = dpo0->dpoi_next_node; - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); - vlib_increment_combined_counter - (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, b0)); + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + } /* - * before we pop the label copy th values we need to maintain. + * before we pop the label copy, values we need to maintain. * The label header is in network byte order. * last byte is the TTL. * bits 2 to 4 inclusive are the EXP bits @@ -398,7 +459,7 @@ static char * mpls_error_strings[] = { #undef mpls_error }; -VLIB_REGISTER_NODE (mpls_lookup_node) = { +VLIB_REGISTER_NODE (mpls_lookup_node, static) = { .function = mpls_lookup, .name = "mpls-lookup", /* Takes a vector of packets. */ @@ -621,3 +682,22 @@ VLIB_REGISTER_NODE (mpls_load_balance_node) = { }; VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance) + + +static clib_error_t * +mpls_lookup_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + + mpls_lookup_to_replicate_edge = + vlib_node_add_named_next(vm, + mpls_lookup_node.index, + "mpls-replicate"); + + return (NULL); +} + +VLIB_INIT_FUNCTION (mpls_lookup_init); diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c index ac6fdcdf1d7..1254dd9ddfb 100644 --- a/src/vnet/mpls/mpls_tunnel.c +++ b/src/vnet/mpls/mpls_tunnel.c @@ -18,9 +18,12 @@ #include #include #include +#include #include #include #include +#include +#include /** * @brief pool of tunnel instances @@ -37,6 +40,11 @@ static u32 * mpls_tunnel_free_hw_if_indices; */ static u32 *mpls_tunnel_db; +/** + * @brief MPLS tunnel flags strings + */ +static const char *mpls_tunnel_attribute_names[] = MPLS_TUNNEL_ATTRIBUTES; + /** * @brief Get a tunnel object from a SW interface index */ @@ -44,103 +52,178 @@ static mpls_tunnel_t* mpls_tunnel_get_from_sw_if_index (u32 sw_if_index) { if ((vec_len(mpls_tunnel_db) < sw_if_index) || - (~0 == mpls_tunnel_db[sw_if_index])) - return (NULL); + (~0 == mpls_tunnel_db[sw_if_index])) + return (NULL); return (pool_elt_at_index(mpls_tunnel_pool, - mpls_tunnel_db[sw_if_index])); + mpls_tunnel_db[sw_if_index])); } /** - * @brief Return true if the label stack is imp-null only + * @brief Build a rewrite string for the MPLS tunnel. */ -static fib_forward_chain_type_t -mpls_tunnel_get_fwd_chain_type (const mpls_tunnel_t *mt) +static u8* +mpls_tunnel_build_rewrite_i (void) { - if ((1 == vec_len(mt->mt_label_stack)) && - (mt->mt_label_stack[0] == MPLS_IETF_IMPLICIT_NULL_LABEL)) - { - /* - * the only label in the label stack is implicit null - * we need to build an IP chain. - */ - if (FIB_PROTOCOL_IP4 == fib_path_list_get_proto(mt->mt_path_list)) - { - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); - } - else - { - return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); - } - } - else - { - return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); - } + /* + * passing the adj code a NULL rewirte means 'i don't have one cos + * t'other end is unresolved'. That's not the case here. For the mpls + * tunnel there are just no bytes of encap to apply in the adj. We'll impose + * the label stack once we choose a path. So return a zero length rewrite. + */ + u8 *rewrite = NULL; + + vec_validate(rewrite, 0); + vec_reset_length(rewrite); + + return (rewrite); } /** * @brief Build a rewrite string for the MPLS tunnel. - * - * We have choices here; - * 1 - have an Adjacency with a zero length string and stack it on - * MPLS label objects - * 2 - put the label header rewrites in the adjacency string. - * - * We choose 2 since it results in fewer graph nodes in the egress path */ static u8* mpls_tunnel_build_rewrite (vnet_main_t * vnm, - u32 sw_if_index, - vnet_link_t link_type, - const void *dst_address) + u32 sw_if_index, + vnet_link_t link_type, + const void *dst_address) { - mpls_unicast_header_t *muh; - mpls_tunnel_t *mt; - u8 *rewrite; - u32 mti, ii; + return (mpls_tunnel_build_rewrite_i()); +} - rewrite = NULL; - mti = mpls_tunnel_db[sw_if_index]; - mt = pool_elt_at_index(mpls_tunnel_pool, mti); +typedef struct mpls_tunnel_collect_forwarding_ctx_t_ +{ + load_balance_path_t * next_hops; + const mpls_tunnel_t *mt; + fib_forward_chain_type_t fct; +} mpls_tunnel_collect_forwarding_ctx_t; + +static int +mpls_tunnel_collect_forwarding (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *arg) +{ + mpls_tunnel_collect_forwarding_ctx_t *ctx; + fib_path_ext_t *path_ext; + int have_path_ext; + + ctx = arg; /* - * The vector must be allocated as u8 so the length is correct + * if the path is not resolved, don't include it. */ - ASSERT(0 < vec_len(mt->mt_label_stack)); - vec_validate(rewrite, (sizeof(*muh) * vec_len(mt->mt_label_stack)) - 1); - ASSERT(rewrite); - muh = (mpls_unicast_header_t *)rewrite; + if (!fib_path_is_resolved(path_index)) + { + return (!0); + } /* - * The last (inner most) label in the stack may be EOS, all the rest Non-EOS + * get the matching path-extension for the path being visited. */ - for (ii = 0; ii < vec_len(mt->mt_label_stack)-1; ii++) + have_path_ext = 0; + vec_foreach(path_ext, ctx->mt->mt_path_exts) { - vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]); - vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255); - vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0); - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS); - muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl); + if (path_ext->fpe_path_index == path_index) + { + have_path_ext = 1; + break; + } } - vnet_mpls_uc_set_label(&muh[ii].label_exp_s_ttl, mt->mt_label_stack[ii]); - vnet_mpls_uc_set_ttl(&muh[ii].label_exp_s_ttl, 255); - vnet_mpls_uc_set_exp(&muh[ii].label_exp_s_ttl, 0); - - if ((VNET_LINK_MPLS == link_type) && - (mt->mt_label_stack[ii] != MPLS_IETF_IMPLICIT_NULL_LABEL)) + if (have_path_ext) { - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_NON_EOS); + /* + * found a matching extension. stack it to obtain the forwarding + * info for this path. + */ + ctx->next_hops = fib_path_ext_stack(path_ext, + ctx->fct, + ctx->fct, + ctx->next_hops); } else + ASSERT(0); + /* + * else + * There should be a path-extenios associated with each path + */ + + return (!0); +} + +static void +mpls_tunnel_mk_lb (mpls_tunnel_t *mt, + vnet_link_t linkt, + fib_forward_chain_type_t fct, + dpo_id_t *dpo_lb) +{ + dpo_proto_t lb_proto; + + /* + * If the entry has path extensions then we construct a load-balance + * by stacking the extensions on the forwarding chains of the paths. + * Otherwise we use the load-balance of the path-list + */ + mpls_tunnel_collect_forwarding_ctx_t ctx = { + .mt = mt, + .next_hops = NULL, + .fct = fct, + }; + + /* + * As an optimisation we allocate the vector of next-hops to be sized + * equal to the maximum nuber of paths we will need, which is also the + * most likely number we will need, since in most cases the paths are 'up'. + */ + vec_validate(ctx.next_hops, fib_path_list_get_n_paths(mt->mt_path_list)); + vec_reset_length(ctx.next_hops); + + lb_proto = vnet_link_to_dpo_proto(linkt); + + fib_path_list_walk(mt->mt_path_list, + mpls_tunnel_collect_forwarding, + &ctx); + + if (!dpo_id_is_valid(dpo_lb)) { - vnet_mpls_uc_set_s(&muh[ii].label_exp_s_ttl, MPLS_EOS); + /* + * first time create + */ + if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST) + { + dpo_set(dpo_lb, + DPO_REPLICATE, + lb_proto, + replicate_create(0, lb_proto)); + } + else + { + flow_hash_config_t fhc; + + fhc = 0; // FIXME + /* fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, */ + /* dpo_proto_to_fib(lb_proto)); */ + dpo_set(dpo_lb, + DPO_LOAD_BALANCE, + lb_proto, + load_balance_create(0, lb_proto, fhc)); + } } - muh[ii].label_exp_s_ttl = clib_host_to_net_u32(muh[ii].label_exp_s_ttl); - - return (rewrite); + if (mt->mt_flags & MPLS_TUNNEL_FLAG_MCAST) + { + /* + * MPLS multicast + */ + replicate_multipath_update(dpo_lb, ctx.next_hops); + } + else + { + load_balance_multipath_update(dpo_lb, + ctx.next_hops, + LOAD_BALANCE_FLAG_NONE); + vec_free(ctx.next_hops); + } } /** @@ -161,45 +244,47 @@ mpls_tunnel_stack (adj_index_t ai) mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); if (NULL == mt) - return; + return; /* - * find the adjacency that is contributed by the FIB path-list - * that this tunnel resovles via, and use it as the next adj - * in the midchain + * while we're stacking the adj, remove the tunnel from the child list + * of the path list. this breaks a circular dependency of walk updates + * where the create of adjacencies in the children can lead to walks + * that get back here. */ - if (vnet_hw_interface_get_flags(vnet_get_main(), - mt->mt_hw_if_index) & - VNET_HW_INTERFACE_FLAG_LINK_UP) - { - dpo_id_t dpo = DPO_INVALID; + fib_path_list_lock(mt->mt_path_list); - fib_path_list_contribute_forwarding(mt->mt_path_list, - mpls_tunnel_get_fwd_chain_type(mt), - &dpo); - - if (DPO_LOAD_BALANCE == dpo.dpoi_type) - { - /* - * we don't support multiple paths, so no need to load-balance. - * pull the first and only choice and stack directly on that. - */ - load_balance_t *lb; - - lb = load_balance_get (dpo.dpoi_index); + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); - ASSERT(1 == lb->lb_n_buckets); + /* + * Construct the DPO (load-balance or replicate) that we can stack + * the tunnel's midchain on + */ + if (vnet_hw_interface_get_flags(vnet_get_main(), + mt->mt_hw_if_index) & + VNET_HW_INTERFACE_FLAG_LINK_UP) + { + dpo_id_t dpo = DPO_INVALID; - dpo_copy(&dpo, load_balance_get_bucket_i (lb, 0)); - } + mpls_tunnel_mk_lb(mt, + adj->ia_link, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &dpo); - adj_nbr_midchain_stack(ai, &dpo); - dpo_reset(&dpo); + adj_nbr_midchain_stack(ai, &dpo); + dpo_reset(&dpo); } else { - adj_nbr_midchain_unstack(ai); + adj_nbr_midchain_unstack(ai); } + + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mt - mpls_tunnel_pool); + + fib_path_list_lock(mt->mt_path_list); } /** @@ -207,7 +292,7 @@ mpls_tunnel_stack (adj_index_t ai) */ static adj_walk_rc_t mpls_adj_walk_cb (adj_index_t ai, - void *ctx) + void *ctx) { mpls_tunnel_stack(ai); @@ -224,17 +309,17 @@ mpls_tunnel_restack (mpls_tunnel_t *mt) */ FOR_EACH_FIB_PROTOCOL(proto) { - adj_nbr_walk(mt->mt_sw_if_index, - proto, - mpls_adj_walk_cb, - NULL); + adj_nbr_walk(mt->mt_sw_if_index, + proto, + mpls_adj_walk_cb, + NULL); } } static clib_error_t * mpls_tunnel_admin_up_down (vnet_main_t * vnm, - u32 hw_if_index, - u32 flags) + u32 hw_if_index, + u32 flags) { vnet_hw_interface_t * hi; mpls_tunnel_t *mt; @@ -244,13 +329,13 @@ mpls_tunnel_admin_up_down (vnet_main_t * vnm, mt = mpls_tunnel_get_from_sw_if_index(hi->sw_if_index); if (NULL == mt) - return (NULL); + return (NULL); if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) - vnet_hw_interface_set_flags (vnm, hw_if_index, - VNET_HW_INTERFACE_FLAG_LINK_UP); + vnet_hw_interface_set_flags (vnm, hw_if_index, + VNET_HW_INTERFACE_FLAG_LINK_UP); else - vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); + vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */); mpls_tunnel_restack(mt); @@ -263,22 +348,58 @@ mpls_tunnel_admin_up_down (vnet_main_t * vnm, */ static void mpls_tunnel_fixup (vlib_main_t *vm, - ip_adjacency_t *adj, - vlib_buffer_t *b0) + ip_adjacency_t *adj, + vlib_buffer_t *b0) { + /* + * A no-op w.r.t. the header. but reset the 'have we pushed any + * MPLS labels onto the packet' flag. That way when we enter the + * tunnel we'll get a TTL set to 255 + */ + vnet_buffer(b0)->mpls.first = 0; } static void mpls_tunnel_update_adj (vnet_main_t * vnm, - u32 sw_if_index, - adj_index_t ai) + u32 sw_if_index, + adj_index_t ai) { - adj_nbr_midchain_update_rewrite( - ai, mpls_tunnel_fixup, - ADJ_FLAG_NONE, - mpls_tunnel_build_rewrite(vnm, sw_if_index, - adj_get_link_type(ai), - NULL)); + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != ai); + + adj = adj_get(ai); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + adj_nbr_midchain_update_rewrite(ai, mpls_tunnel_fixup, + ADJ_FLAG_NONE, + mpls_tunnel_build_rewrite_i()); + break; + case IP_LOOKUP_NEXT_MCAST: + /* + * Construct a partial rewrite from the known ethernet mcast dest MAC + * There's no MAC fixup, so the last 2 parameters are 0 + */ + adj_mcast_midchain_update_rewrite(ai, mpls_tunnel_fixup, + ADJ_FLAG_NONE, + mpls_tunnel_build_rewrite_i(), + 0, 0); + break; + + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + ASSERT (0); + break; + } mpls_tunnel_stack(ai); } @@ -312,7 +433,7 @@ typedef struct mpls_tunnel_trace_t_ static u8 * format_mpls_tunnel_tx_trace (u8 * s, - va_list * args) + va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); @@ -327,8 +448,8 @@ format_mpls_tunnel_tx_trace (u8 * s, */ static uword mpls_tunnel_tx (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) + vlib_node_runtime_t * node, + vlib_frame_t * frame) { u32 next_index; u32 * from, * to_next, n_left_from, n_left_to_next; @@ -355,32 +476,32 @@ mpls_tunnel_tx (vlib_main_t * vm, * FIXME DUAL LOOP */ while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t * b0; - u32 bi0; + { + vlib_buffer_t * b0; + u32 bi0; - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; - b0 = vlib_get_buffer(vm, bi0); + b0 = vlib_get_buffer(vm, bi0); - vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mt->mt_l2_adj; - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_id = rd->dev_instance; - } + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_tunnel_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_id = rd->dev_instance; + } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, mt->mt_l2_tx_arc); - } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, mt->mt_l2_tx_arc); + } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } @@ -417,13 +538,13 @@ mpls_tunnel_get (u32 mti) */ void mpls_tunnel_walk (mpls_tunnel_walk_cb_t cb, - void *ctx) + void *ctx) { u32 mti; pool_foreach_index(mti, mpls_tunnel_pool, ({ - cb(mti, ctx); + cb(mti, ctx); })); } @@ -435,25 +556,22 @@ vnet_mpls_tunnel_del (u32 sw_if_index) mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); if (NULL == mt) - return; - - fib_path_list_child_remove(mt->mt_path_list, - mt->mt_sibling_index); - if (ADJ_INDEX_INVALID != mt->mt_l2_adj) - adj_unlock(mt->mt_l2_adj); + return; - vec_free(mt->mt_label_stack); + if (FIB_NODE_INDEX_INVALID != mt->mt_path_list) + fib_path_list_child_remove(mt->mt_path_list, + mt->mt_sibling_index); + if (ADJ_INDEX_INVALID != mt->mt_l2_adj) + adj_unlock(mt->mt_l2_adj); vec_add1 (mpls_tunnel_free_hw_if_indices, mt->mt_hw_if_index); pool_put(mpls_tunnel_pool, mt); mpls_tunnel_db[sw_if_index] = ~0; } -void -vnet_mpls_tunnel_add (fib_route_path_t *rpaths, - mpls_label_t *label_stack, - u8 l2_only, - u32 *sw_if_index) +u32 +vnet_mpls_tunnel_create (u8 l2_only, + u8 is_multicast) { vnet_hw_interface_t * hi; mpls_tunnel_t *mt; @@ -466,28 +584,33 @@ vnet_mpls_tunnel_add (fib_route_path_t *rpaths, mti = mt - mpls_tunnel_pool; fib_node_init(&mt->mt_node, FIB_NODE_TYPE_MPLS_TUNNEL); mt->mt_l2_adj = ADJ_INDEX_INVALID; + mt->mt_path_list = FIB_NODE_INDEX_INVALID; + mt->mt_sibling_index = FIB_NODE_INDEX_INVALID; + + if (is_multicast) + mt->mt_flags |= MPLS_TUNNEL_FLAG_MCAST; /* * Create a new, or re=use and old, tunnel HW interface */ if (vec_len (mpls_tunnel_free_hw_if_indices) > 0) { - mt->mt_hw_if_index = - mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1]; - _vec_len (mpls_tunnel_free_hw_if_indices) -= 1; - hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); - hi->hw_instance = mti; - hi->dev_instance = mti; + mt->mt_hw_if_index = + mpls_tunnel_free_hw_if_indices[vec_len(mpls_tunnel_free_hw_if_indices)-1]; + _vec_len (mpls_tunnel_free_hw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, mt->mt_hw_if_index); + hi->hw_instance = mti; + hi->dev_instance = mti; } - else + else { - mt->mt_hw_if_index = vnet_register_interface( - vnm, - mpls_tunnel_class.index, - mti, - mpls_tunnel_hw_interface_class.index, - mti); - hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); + mt->mt_hw_if_index = vnet_register_interface( + vnm, + mpls_tunnel_class.index, + mti, + mpls_tunnel_hw_interface_class.index, + mti); + hi = vnet_get_hw_interface(vnm, mt->mt_hw_if_index); } /* @@ -497,43 +620,218 @@ vnet_mpls_tunnel_add (fib_route_path_t *rpaths, vec_validate_init_empty(mpls_tunnel_db, mt->mt_sw_if_index, ~0); mpls_tunnel_db[mt->mt_sw_if_index] = mti; + if (l2_only) + { + mt->mt_l2_adj = + adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), + VNET_LINK_ETHERNET, + &zero_addr, + mt->mt_sw_if_index); + + mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), + hi->tx_node_index, + "adj-l2-midchain"); + } + + return (mt->mt_sw_if_index); +} + +/* + * mpls_tunnel_path_ext_add + * + * append a path extension to the entry's list + */ +static void +mpls_tunnel_path_ext_append (mpls_tunnel_t *mt, + const fib_route_path_t *rpath) +{ + if (NULL != rpath->frp_label_stack) + { + fib_path_ext_t *path_ext; + + vec_add2(mt->mt_path_exts, path_ext, 1); + + fib_path_ext_init(path_ext, mt->mt_path_list, rpath); + } +} + +/* + * mpls_tunnel_path_ext_insert + * + * insert, sorted, a path extension to the entry's list. + * It's not strictly necessary in sort the path extensions, since each + * extension has the path index to which it resolves. However, by being + * sorted the load-balance produced has a deterministic order, not an order + * based on the sequence of extension additions. this is a considerable benefit. + */ +static void +mpls_tunnel_path_ext_insert (mpls_tunnel_t *mt, + const fib_route_path_t *rpath) +{ + if (0 == vec_len(mt->mt_path_exts)) + return (mpls_tunnel_path_ext_append(mt, rpath)); + + if (NULL != rpath->frp_label_stack) + { + fib_path_ext_t path_ext; + int i = 0; + + fib_path_ext_init(&path_ext, mt->mt_path_list, rpath); + + while (i < vec_len(mt->mt_path_exts) && + (fib_path_ext_cmp(&mt->mt_path_exts[i], rpath) < 0)) + { + i++; + } + + vec_insert_elts(mt->mt_path_exts, &path_ext, 1, i); + } +} + +void +vnet_mpls_tunnel_path_add (u32 sw_if_index, + fib_route_path_t *rpaths) +{ + mpls_tunnel_t *mt; + u32 mti; + + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); + + if (NULL == mt) + return; + + mti = mt - mpls_tunnel_pool; + /* * construct a path-list from the path provided */ - mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths); - mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, - FIB_NODE_TYPE_MPLS_TUNNEL, - mti); + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + mt->mt_path_list = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, rpaths); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + } + else + { + fib_node_index_t old_pl_index; + fib_path_ext_t *path_ext; + + old_pl_index = mt->mt_path_list; + + mt->mt_path_list = + fib_path_list_copy_and_path_add(old_pl_index, + FIB_PATH_LIST_FLAG_SHARED, + rpaths); + + fib_path_list_child_remove(old_pl_index, + mt->mt_sibling_index); + mt->mt_sibling_index = fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + fib_path_ext_resolve(path_ext, mt->mt_path_list); + } + } + mpls_tunnel_path_ext_insert(mt, rpaths); + mpls_tunnel_restack(mt); +} + +int +vnet_mpls_tunnel_path_remove (u32 sw_if_index, + fib_route_path_t *rpaths) +{ + mpls_tunnel_t *mt; + u32 mti; - mt->mt_label_stack = vec_dup(label_stack); + mt = mpls_tunnel_get_from_sw_if_index(sw_if_index); - if (l2_only) + if (NULL == mt) + return (0); + + mti = mt - mpls_tunnel_pool; + + /* + * construct a path-list from the path provided + */ + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) { - mt->mt_l2_adj = - adj_nbr_add_or_lock(fib_path_list_get_proto(mt->mt_path_list), - VNET_LINK_ETHERNET, - &zero_addr, - mt->mt_sw_if_index); - - mt->mt_l2_tx_arc = vlib_node_add_named_next(vlib_get_main(), - hi->tx_node_index, - "adj-l2-midchain"); + /* can't remove a path if we have onoe */ + return (0); } - - *sw_if_index = mt->mt_sw_if_index; + else + { + fib_node_index_t old_pl_index; + fib_path_ext_t *path_ext; + + old_pl_index = mt->mt_path_list; + + mt->mt_path_list = + fib_path_list_copy_and_path_remove(old_pl_index, + FIB_PATH_LIST_FLAG_SHARED, + rpaths); + + fib_path_list_child_remove(old_pl_index, + mt->mt_sibling_index); + + if (FIB_NODE_INDEX_INVALID == mt->mt_path_list) + { + /* no paths left */ + return (0); + } + else + { + mt->mt_sibling_index = + fib_path_list_child_add(mt->mt_path_list, + FIB_NODE_TYPE_MPLS_TUNNEL, + mti); + } + /* + * find the matching path extension and remove it + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + if (!fib_path_ext_cmp(path_ext, rpaths)) + { + /* + * delete the element moving the remaining elements down 1 position. + * this preserves the sorted order. + */ + vec_free(path_ext->fpe_label_stack); + vec_delete(mt->mt_path_exts, 1, + (path_ext - mt->mt_path_exts)); + break; + } + } + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, mt->mt_path_exts) + { + fib_path_ext_resolve(path_ext, mt->mt_path_list); + } + + mpls_tunnel_restack(mt); + } + + return (fib_path_list_get_n_paths(mt->mt_path_list)); } + static clib_error_t * vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { unformat_input_t _line_input, * line_input = &_line_input; vnet_main_t * vnm = vnet_get_main(); - u8 is_del = 0; - u8 l2_only = 0; + u8 is_del = 0, l2_only = 0, is_multicast =0; fib_route_path_t rpath, *rpaths = NULL; - mpls_label_t out_label = MPLS_LABEL_INVALID, *labels = NULL; + mpls_label_t out_label = MPLS_LABEL_INVALID; u32 sw_if_index; clib_error_t *error = NULL; @@ -541,87 +839,89 @@ vnet_create_mpls_tunnel_command_fn (vlib_main_t * vm, /* Get a line of input. */ if (! unformat_user (input, unformat_line_input, line_input)) - return 0; + return 0; while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - if (unformat (line_input, "del %U", - unformat_vnet_sw_interface, vnm, - &sw_if_index)) - is_del = 1; - else if (unformat (line_input, "add")) - is_del = 0; - else if (unformat (line_input, "out-label %U", - unformat_mpls_unicast_label, &out_label)) - { - vec_add1(labels, out_label); - } - else if (unformat (line_input, "via %U %U", - unformat_ip4_address, - &rpath.frp_addr.ip4, - unformat_vnet_sw_interface, vnm, - &rpath.frp_sw_if_index)) - { - rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP4; - } - - else if (unformat (line_input, "via %U %U", - unformat_ip6_address, - &rpath.frp_addr.ip6, - unformat_vnet_sw_interface, vnm, - &rpath.frp_sw_if_index)) - { - rpath.frp_weight = 1; - rpath.frp_proto = FIB_PROTOCOL_IP6; - } - else if (unformat (line_input, "via %U", - unformat_ip6_address, - &rpath.frp_addr.ip6)) - { - rpath.frp_fib_index = 0; - rpath.frp_weight = 1; - rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP6; - } - else if (unformat (line_input, "via %U", - unformat_ip4_address, - &rpath.frp_addr.ip4)) - { - rpath.frp_fib_index = 0; - rpath.frp_weight = 1; - rpath.frp_sw_if_index = ~0; - rpath.frp_proto = FIB_PROTOCOL_IP4; - } - else if (unformat (line_input, "l2-only")) - l2_only = 1; - else - { - error = clib_error_return (0, "unknown input '%U'", - format_unformat_error, line_input); - goto done; - } + if (unformat (line_input, "del %U", + unformat_vnet_sw_interface, vnm, + &sw_if_index)) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "out-label %U", + unformat_mpls_unicast_label, &out_label)) + { + vec_add1(rpath.frp_label_stack, out_label); + } + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + rpath.frp_fib_index = 0; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + else if (unformat (line_input, "l2-only")) + l2_only = 1; + else if (unformat (line_input, "multicast")) + is_multicast = 1; + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } } if (is_del) { - vnet_mpls_tunnel_del(sw_if_index); + vnet_mpls_tunnel_del(sw_if_index); } else { - if (0 == vec_len(labels)) - { - error = clib_error_return (0, "No Output Labels '%U'", - format_unformat_error, line_input); - goto done; - } - - vec_add1(rpaths, rpath); - vnet_mpls_tunnel_add(rpaths, labels, l2_only, &sw_if_index); + if (0 == vec_len(rpath.frp_label_stack)) + { + error = clib_error_return (0, "No Output Labels '%U'", + format_unformat_error, line_input); + goto done; + } + + vec_add1(rpaths, rpath); + sw_if_index = vnet_mpls_tunnel_create(l2_only, is_multicast); + vnet_mpls_tunnel_path_add(sw_if_index, rpaths); } done: - vec_free(labels); vec_free(rpaths); unformat_free (line_input); @@ -638,7 +938,7 @@ done: ?*/ VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = { .path = "mpls tunnel", - .short_help = + .short_help = "mpls tunnel via [addr] [interface] [out-labels]", .function = vnet_create_mpls_tunnel_command_fn, }; @@ -647,19 +947,28 @@ static u8 * format_mpls_tunnel (u8 * s, va_list * args) { mpls_tunnel_t *mt = va_arg (*args, mpls_tunnel_t *); - int ii; + mpls_tunnel_attribute_t attr; + fib_path_ext_t *path_ext; s = format(s, "mpls_tunnel%d: sw_if_index:%d hw_if_index:%d", - mt - mpls_tunnel_pool, - mt->mt_sw_if_index, - mt->mt_hw_if_index); - s = format(s, "\n label-stack:\n "); - for (ii = 0; ii < vec_len(mt->mt_label_stack); ii++) - { - s = format(s, "%d, ", mt->mt_label_stack[ii]); + mt - mpls_tunnel_pool, + mt->mt_sw_if_index, + mt->mt_hw_if_index); + if (MPLS_TUNNEL_FLAG_NONE != mt->mt_flags) { + s = format(s, " \n flags:"); + FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(attr) { + if ((1<mt_flags) { + s = format (s, "%s,", mpls_tunnel_attribute_names[attr]); + } + } } s = format(s, "\n via:\n"); s = fib_path_list_format(mt->mt_path_list, s); + s = format(s, " Extensions:"); + vec_foreach(path_ext, mt->mt_path_exts) + { + s = format(s, "\n %U", format_fib_path_ext, path_ext); + } s = format(s, "\n"); return (s); @@ -667,42 +976,42 @@ format_mpls_tunnel (u8 * s, va_list * args) static clib_error_t * show_mpls_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { mpls_tunnel_t * mt; u32 mti = ~0; if (pool_elts (mpls_tunnel_pool) == 0) - vlib_cli_output (vm, "No MPLS tunnels configured..."); + vlib_cli_output (vm, "No MPLS tunnels configured..."); while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%d", &mti)) - ; - else - break; + if (unformat (input, "%d", &mti)) + ; + else + break; } if (~0 == mti) { - pool_foreach (mt, mpls_tunnel_pool, - ({ - vlib_cli_output (vm, "[@%d] %U", - mt - mpls_tunnel_pool, - format_mpls_tunnel, mt); - })); + pool_foreach (mt, mpls_tunnel_pool, + ({ + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); + })); } else { - if (pool_is_free_index(mpls_tunnel_pool, mti)) - return clib_error_return (0, "Not atunnel index %d", mti); + if (pool_is_free_index(mpls_tunnel_pool, mti)) + return clib_error_return (0, "Not atunnel index %d", mti); - mt = pool_elt_at_index(mpls_tunnel_pool, mti); + mt = pool_elt_at_index(mpls_tunnel_pool, mti); - vlib_cli_output (vm, "[@%d] %U", - mt - mpls_tunnel_pool, - format_mpls_tunnel, mt); + vlib_cli_output (vm, "[@%d] %U", + mt - mpls_tunnel_pool, + format_mpls_tunnel, mt); } return 0; @@ -715,7 +1024,7 @@ show_mpls_tunnel_command_fn (vlib_main_t * vm, * @cliexstart{sh mpls tunnel 2} * [@2] mpls_tunnel2: sw_if_index:5 hw_if_index:5 * label-stack: - * 3, + * 3, * via: * index:26 locks:1 proto:ipv4 uPRF-list:26 len:1 itfs:[2, ] * index:26 pl-index:26 ipv4 weight=1 attached-nexthop: oper-flags:resolved, @@ -743,7 +1052,7 @@ mpls_tunnel_from_fib_node (fib_node_t *node) */ static fib_node_back_walk_rc_t mpls_tunnel_back_walk (fib_node_t *node, - fib_node_back_walk_ctx_t *ctx) + fib_node_back_walk_ctx_t *ctx) { mpls_tunnel_restack(mpls_tunnel_from_fib_node(node)); diff --git a/src/vnet/mpls/mpls_tunnel.h b/src/vnet/mpls/mpls_tunnel.h index ee56c0fc8e3..0b55d0dbe3d 100644 --- a/src/vnet/mpls/mpls_tunnel.h +++ b/src/vnet/mpls/mpls_tunnel.h @@ -17,6 +17,31 @@ #define __MPLS_TUNNEL_H__ #include +#include + +typedef enum mpls_tunnel_attribute_t_ +{ + MPLS_TUNNEL_ATTRIBUTE_FIRST = 0, + /** + * @brief The tunnel has an underlying multicast LSP + */ + MPLS_TUNNEL_ATTRIBUTE_MCAST = MPLS_TUNNEL_ATTRIBUTE_FIRST, + MPLS_TUNNEL_ATTRIBUTE_LAST = MPLS_TUNNEL_ATTRIBUTE_MCAST, +} mpls_tunnel_attribute_t; + +#define MPLS_TUNNEL_ATTRIBUTES { \ + [MPLS_TUNNEL_ATTRIBUTE_MCAST] = "multicast", \ +} +#define FOR_EACH_MPLS_TUNNEL_ATTRIBUTE(_item) \ + for (_item = MPLS_TUNNEL_ATTRIBUTE_FIRST; \ + _item < MPLS_TUNNEL_ATTRIBUTE_LAST; \ + _item++) + +typedef enum mpls_tunnel_flag_t_ { + MPLS_TUNNEL_FLAG_NONE = 0, + MPLS_TUNNEL_FLAG_MCAST = (1 << MPLS_TUNNEL_ATTRIBUTE_MCAST), +} __attribute__ ((packed)) mpls_tunnel_flags_t; + /** * @brief A uni-directional MPLS tunnel @@ -28,6 +53,11 @@ typedef struct mpls_tunnel_t_ */ fib_node_t mt_node; + /** + * @brief Tunnel flags + */ + mpls_tunnel_flags_t mt_flags; + /** * @brief If the tunnel is an L2 tunnel, this is the link type ETHERNET * adjacency @@ -50,9 +80,9 @@ typedef struct mpls_tunnel_t_ u32 mt_sibling_index; /** - * @brief The Label stack to apply to egress packets + * A vector of path extensions o hold the label stack for each path */ - mpls_label_t *mt_label_stack; + fib_path_ext_t *mt_path_exts; /** * @brief Flag to indicate the tunnel is only for L2 traffic, that is @@ -74,12 +104,27 @@ typedef struct mpls_tunnel_t_ /** * @brief Create a new MPLS tunnel + * @return the SW Interface index of the newly created tuneel */ -extern void vnet_mpls_tunnel_add (fib_route_path_t *rpath, - mpls_label_t *label_stack, - u8 l2_only, - u32 *sw_if_index); +extern u32 vnet_mpls_tunnel_create (u8 l2_only, + u8 is_multicast); +/** + * @brief Add a path to an MPLS tunnel + */ +extern void vnet_mpls_tunnel_path_add (u32 sw_if_index, + fib_route_path_t *rpath); + +/** + * @brief remove a path from a tunnel. + * @return the number of remaining paths. 0 implies the tunnel can be deleted + */ +extern int vnet_mpls_tunnel_path_remove (u32 sw_if_index, + fib_route_path_t *rpath); + +/** + * @brief Delete an MPLS tunnel + */ extern void vnet_mpls_tunnel_del (u32 sw_if_index); extern const mpls_tunnel_t *mpls_tunnel_get(u32 index); diff --git a/src/vnet/mpls/mpls_types.h b/src/vnet/mpls/mpls_types.h index d7c629df832..b1075cdda57 100644 --- a/src/vnet/mpls/mpls_types.h +++ b/src/vnet/mpls/mpls_types.h @@ -1,3 +1,17 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #ifndef __MPLS_TYPES_H__ #define __MPLS_TYPES_H__ @@ -36,4 +50,10 @@ (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \ ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL)) +/** + * The top bit of the index, which is the result of the MPLS lookup + * is used to determine if the DPO is a load-balance or a replicate + */ +#define MPLS_IS_REPLICATE 0x80000000 + #endif -- cgit 1.2.3-korg