From 32e1c010b0c34fd0984f7fc45fae648a182025c5 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 22 Nov 2016 17:07:28 +0000 Subject: IP Multicast FIB (mfib) - IPv[46] mfib tables with support for (*,G/m), (*,G) and (S,G) exact and longest prefix match - Replication represented via a new replicate DPO. - RPF configuration and data-plane checking - data-plane signals sent to listening control planes. The functions of multicast forwarding entries differ from their unicast conterparts, so we introduce a new mfib_table_t and mfib_entry_t objects. However, we re-use the fib_path_list to resolve and build the entry's output list. the fib_path_list provides the service to construct a replicate DPO for multicast. 'make tests' is added to with two new suites; TEST=mfib, this is invocation of the CLI command 'test mfib' which deals with many path add/remove, flag set/unset scenarios, TEST=ip-mcast, data-plane forwarding tests. Updated applications to use the new MIFB functions; - IPv6 NS/RA. - DHCPv6 unit tests for these are undated accordingly. Change-Id: I49ec37b01f1b170335a5697541c8fd30e6d3a961 Signed-off-by: Neale Ranns --- src/vnet/adj/adj.c | 11 +- src/vnet/adj/adj.h | 1 - src/vnet/adj/adj_internal.h | 2 + src/vnet/adj/adj_mcast.c | 346 ++++++++++++++++++++++++++++++++++++++++++++ src/vnet/adj/adj_mcast.h | 78 ++++++++++ src/vnet/adj/adj_nbr.c | 2 +- src/vnet/adj/adj_rewrite.c | 53 ------- src/vnet/adj/adj_rewrite.h | 49 ------- 8 files changed, 436 insertions(+), 106 deletions(-) create mode 100644 src/vnet/adj/adj_mcast.c create mode 100644 src/vnet/adj/adj_mcast.h delete mode 100644 src/vnet/adj/adj_rewrite.c delete mode 100644 src/vnet/adj/adj_rewrite.h (limited to 'src/vnet/adj') diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index d0be0f0eaff..a99f173f6d0 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -17,6 +17,7 @@ #include #include #include +#include #include /* @@ -58,8 +59,6 @@ adj_alloc (fib_protocol_t proto) adj_get_index(adj)); adj->rewrite_header.sw_if_index = ~0; - adj->mcast_group_index = ~0; - adj->saved_lookup_next_index = 0; adj->n_adj = 1; adj->lookup_next_index = 0; @@ -116,6 +115,9 @@ format_ip_adjacency (u8 * s, va_list * args) case IP_LOOKUP_NEXT_MIDCHAIN: s = format (s, "%U", format_adj_midchain, adj_index, 2); break; + case IP_LOOKUP_NEXT_MCAST: + s = format (s, "%U", format_adj_mcast, adj_index, 0); + break; default: break; } @@ -179,6 +181,10 @@ adj_last_lock_gone (ip_adjacency_t *adj) adj_glean_remove(adj->ia_nh_proto, adj->rewrite_header.sw_if_index); break; + case IP_LOOKUP_NEXT_MCAST: + adj_mcast_remove(adj->ia_nh_proto, + adj->rewrite_header.sw_if_index); + break; default: /* * type not stored in any DB from which we need to remove it @@ -350,6 +356,7 @@ adj_module_init (vlib_main_t * vm) adj_nbr_module_init(); adj_glean_module_init(); adj_midchain_module_init(); + adj_mcast_module_init(); /* * one special adj to reserve index 0 diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h index e85625db7ee..29bae6733b3 100644 --- a/src/vnet/adj/adj.h +++ b/src/vnet/adj/adj.h @@ -45,7 +45,6 @@ #include #include #include -#include #include /** diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h index 833bc7c9e01..ece59121e9c 100644 --- a/src/vnet/adj/adj_internal.h +++ b/src/vnet/adj/adj_internal.h @@ -100,5 +100,7 @@ extern void adj_nbr_remove(adj_index_t ai, u32 sw_if_index); extern void adj_glean_remove(fib_protocol_t proto, u32 sw_if_index); +extern void adj_mcast_remove(fib_protocol_t proto, + u32 sw_if_index); #endif diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c new file mode 100644 index 00000000000..1345aedbad6 --- /dev/null +++ b/src/vnet/adj/adj_mcast.c @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +/* + * The 'DB' of all mcast adjs. + * There is only one mcast per-interface per-protocol, so this is a per-interface + * vector + */ +static adj_index_t *adj_mcasts[FIB_PROTOCOL_MAX]; + +static u32 +adj_get_mcast_node (fib_protocol_t proto) +{ + switch (proto) { + case FIB_PROTOCOL_IP4: + return (ip4_rewrite_mcast_node.index); + case FIB_PROTOCOL_IP6: + return (ip6_rewrite_mcast_node.index); + case FIB_PROTOCOL_MPLS: + break; + } + ASSERT(0); + return (0); +} + +/* + * adj_mcast_add_or_lock + * + * The next_hop address here is used for source address selection in the DP. + * The mcast adj is added to an interface's connected prefix, the next-hop + * passed here is the local prefix on the same interface. + */ +adj_index_t +adj_mcast_add_or_lock (fib_protocol_t proto, + vnet_link_t link_type, + u32 sw_if_index) +{ + ip_adjacency_t * adj; + + vec_validate_init_empty(adj_mcasts[proto], sw_if_index, ADJ_INDEX_INVALID); + + if (ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) + { + vnet_main_t *vnm; + + vnm = vnet_get_main(); + adj = adj_alloc(proto); + + adj->lookup_next_index = IP_LOOKUP_NEXT_MCAST; + adj->ia_nh_proto = proto; + adj->ia_link = link_type; + adj_mcasts[proto][sw_if_index] = adj_get_index(adj); + adj_lock(adj_get_index(adj)); + + vnet_rewrite_init(vnm, sw_if_index, + adj_get_mcast_node(proto), + vnet_tx_node_index_for_sw_interface(vnm, sw_if_index), + &adj->rewrite_header); + + /* + * we need a rewrite where the destination IP address is converted + * to the appropriate link-layer address. This is interface specific. + * So ask the interface to do it. + */ + vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, + adj_get_index(adj)); + } + else + { + adj = adj_get(adj_mcasts[proto][sw_if_index]); + adj_lock(adj_get_index(adj)); + } + + return (adj_get_index(adj)); +} + +/** + * adj_mcast_update_rewrite + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewirte is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_mcast_update_rewrite (adj_index_t adj_index, + u8 *rewrite) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + /* + * update the adj's rewrite string and build the arc + * from the rewrite node to the interface's TX node + */ + adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST, + adj_get_mcast_node(adj->ia_nh_proto), + vnet_tx_node_index_for_sw_interface( + vnet_get_main(), + adj->rewrite_header.sw_if_index), + rewrite); +} + +void +adj_mcast_remove (fib_protocol_t proto, + u32 sw_if_index) +{ + ASSERT(sw_if_index < vec_len(adj_mcasts[proto])); + + adj_mcasts[proto][sw_if_index] = ADJ_INDEX_INVALID; +} + +static clib_error_t * +adj_mcast_interface_state_change (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + /* + * for each mcast on the interface trigger a walk back to the children + */ + fib_protocol_t proto; + ip_adjacency_t *adj; + + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + if (sw_if_index >= vec_len(adj_mcasts[proto]) || + ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) + continue; + + adj = adj_get(adj_mcasts[proto][sw_if_index]); + + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? + FIB_NODE_BW_REASON_FLAG_INTERFACE_UP : + FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN), + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx); + } + + return (NULL); +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_mcast_interface_state_change); + +/** + * @brief Invoked on each SW interface of a HW interface when the + * HW interface state changes + */ +static void +adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm, + u32 sw_if_index, + void *arg) +{ + adj_mcast_interface_state_change(vnm, sw_if_index, (uword) arg); +} + +/** + * @brief Registered callback for HW interface state changes + */ +static clib_error_t * +adj_mcast_hw_interface_state_change (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + /* + * walk SW interfaces on the HW + */ + uword sw_flags; + + sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? + VNET_SW_INTERFACE_FLAG_ADMIN_UP : + 0); + + vnet_hw_interface_walk_sw(vnm, hw_if_index, + adj_nbr_hw_sw_interface_state_change, + (void*) sw_flags); + + return (NULL); +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION( + adj_mcast_hw_interface_state_change); + +static clib_error_t * +adj_mcast_interface_delete (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + /* + * for each mcast on the interface trigger a walk back to the children + */ + fib_protocol_t proto; + ip_adjacency_t *adj; + + if (is_add) + { + /* + * not interested in interface additions. we will not back walk + * to resolve paths through newly added interfaces. Why? The control + * plane should have the brains to add interfaces first, then routes. + * So the case where there are paths with a interface that matches + * one just created is the case where the path resolved through an + * interface that was deleted, and still has not been removed. The + * new interface added, is NO GUARANTEE that the interface being + * added now, even though it may have the same sw_if_index, is the + * same interface that the path needs. So tough! + * If the control plane wants these routes to resolve it needs to + * remove and add them again. + */ + return (NULL); + } + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + if (sw_if_index >= vec_len(adj_mcasts[proto]) || + ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) + continue; + + adj = adj_get(adj_mcasts[proto][sw_if_index]); + + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE, + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx); + } + + return (NULL); +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete); + +u8* +format_adj_mcast (u8* s, va_list *ap) +{ + index_t index = va_arg(*ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); + vnet_main_t * vnm = vnet_get_main(); + ip_adjacency_t * adj = adj_get(index); + + s = format(s, "%U-mcast: ", + format_fib_protocol, adj->ia_nh_proto); + s = format (s, "%U", + format_vnet_rewrite, + vnm->vlib_main, &adj->rewrite_header, + sizeof (adj->rewrite_data), 0); + + return (s); +} + + +static void +adj_dpo_lock (dpo_id_t *dpo) +{ + adj_lock(dpo->dpoi_index); +} +static void +adj_dpo_unlock (dpo_id_t *dpo) +{ + adj_unlock(dpo->dpoi_index); +} + +const static dpo_vft_t adj_mcast_dpo_vft = { + .dv_lock = adj_dpo_lock, + .dv_unlock = adj_dpo_unlock, + .dv_format = format_adj_mcast, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a mcast + * object. + * + * this means that these graph nodes are ones from which a mcast is the + * parent object in the DPO-graph. + */ +const static char* const adj_mcast_ip4_nodes[] = +{ + "ip4-rewrite-mcast", + NULL, +}; +const static char* const adj_mcast_ip6_nodes[] = +{ + "ip6-rewrite-mcast", + NULL, +}; + +const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = adj_mcast_ip4_nodes, + [DPO_PROTO_IP6] = adj_mcast_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +/** + * @brief Return the size of the adj DB. + * This is only for testing purposes so an efficient implementation is not needed + */ +u32 +adj_mcast_db_size (void) +{ + u32 n_adjs, sw_if_index; + fib_protocol_t proto; + + n_adjs = 0; + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + for (sw_if_index = 0; + sw_if_index < vec_len(adj_mcasts[proto]); + sw_if_index++) + { + if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index]) + { + n_adjs++; + } + } + } + + return (n_adjs); +} + +void +adj_mcast_module_init (void) +{ + dpo_register(DPO_ADJACENCY_MCAST, &adj_mcast_dpo_vft, adj_mcast_nodes); +} diff --git a/src/vnet/adj/adj_mcast.h b/src/vnet/adj/adj_mcast.h new file mode 100644 index 00000000000..21c5a1417dd --- /dev/null +++ b/src/vnet/adj/adj_mcast.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @brief Mcast Adjacency + * + * The multicast adjacency forwards IP traffic on an interface toward a multicast + * group address. This is a different type of adjacency to a unicast adjacency + * since the application of the MAC header is different, and so the VLIB node + * visited is also different. DPO types have different VLIB nodes. + */ + +#ifndef __ADJ_MCAST_H__ +#define __ADJ_MCAST_H__ + +#include + +/** + * @brief + * Add (and lock) a new or lock an existing mcast adjacency + * + * @param proto + * The protocol for the neighbours that we wish to mcast + * + * @param link_type + * A description of the protocol of the packets that will forward + * through this adj. On an ethernet interface this is the MAC header's + * ether-type + * + * @param sw_if_index + * The interface on which to mcast + */ +extern adj_index_t adj_mcast_add_or_lock(fib_protocol_t proto, + vnet_link_t link_type, + u32 sw_if_index); + +/** + * @brief + * Update the rewrite string for an existing adjacecny. + * + * @param + * The index of the adj to update + * + * @param + * The new rewrite + */ +extern void adj_mcast_update_rewrite(adj_index_t adj_index, + u8 *rewrite); + +/** + * @brief Format/display a mcast adjacency. + */ +extern u8* format_adj_mcast(u8* s, va_list *ap); + +/** + * @brief Get the sze of the mcast adj DB. Test purposes only. + */ +extern u32 adj_mcast_db_size(void); + +/** + * @brief + * Module initialisation + */ +extern void adj_mcast_module_init(void); + +#endif diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c index 1344bb67fcc..9e8073d3225 100644 --- a/src/vnet/adj/adj_nbr.c +++ b/src/vnet/adj/adj_nbr.c @@ -162,7 +162,7 @@ adj_nbr_alloc (fib_protocol_t nh_proto, } /* - * adj_add_for_nbr + * adj_nbr_add_or_lock * * Add an adjacency for the neighbour requested. * diff --git a/src/vnet/adj/adj_rewrite.c b/src/vnet/adj/adj_rewrite.c deleted file mode 100644 index 7d792557724..00000000000 --- a/src/vnet/adj/adj_rewrite.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -/** - * adj_rewrite_add_and_lock - * - * A rewrite sub-type has the rewrite string provided, but no key - */ -adj_index_t -adj_rewrite_add_and_lock (fib_protocol_t nh_proto, - vnet_link_t link_type, - u32 sw_if_index, - u8 *rewrite) -{ - ip_adjacency_t *adj; - - adj = adj_alloc(nh_proto); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - memset(&adj->sub_type.nbr.next_hop, 0, sizeof(adj->sub_type.nbr.next_hop)); - adj->ia_link = link_type; - adj->ia_nh_proto = nh_proto; - adj->rewrite_header.sw_if_index = sw_if_index; - - ASSERT(NULL != rewrite); - - vnet_rewrite_for_sw_interface(vnet_get_main(), - link_type, - adj->rewrite_header.sw_if_index, - adj_get_rewrite_node(link_type), - rewrite, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); - - adj_lock(adj_get_index(adj)); - - return (adj_get_index(adj)); -} diff --git a/src/vnet/adj/adj_rewrite.h b/src/vnet/adj/adj_rewrite.h deleted file mode 100644 index 25e6bba8868..00000000000 --- a/src/vnet/adj/adj_rewrite.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * @brief - * A rewrite adjacency has no key, and thus cannot be 'found' from the - * FIB resolution code. the client therefore needs to maange these adjacencies - */ - -#ifndef __ADJ_REWRITE_H__ -#define __ADJ_REWRITE_H__ - -#include - -/** - * @brief - * Add (and lock) a new or lock an existing neighbour adjacency - * - * @param nh_proto - * The protocol for the next-hop address (v4 or v6) - * - * @param link_type - * A description of the protocol of the packets that will forward - * through this adj. On an ethernet interface this is the MAC header's - * ether-type - * - * @param sw_if_index - * The interface on which the peer resides - * - * @param rewrite - * The rewrite to prepend to packets - */ -extern adj_index_t adj_rewrite_add_and_lock(fib_protocol_t nh_proto, - vnet_link_t link_type, - u32 sw_if_index, - u8 *rewrite); - -#endif -- cgit 1.2.3-korg