From 32e1c010b0c34fd0984f7fc45fae648a182025c5 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Tue, 22 Nov 2016 17:07:28 +0000 Subject: IP Multicast FIB (mfib) - IPv[46] mfib tables with support for (*,G/m), (*,G) and (S,G) exact and longest prefix match - Replication represented via a new replicate DPO. - RPF configuration and data-plane checking - data-plane signals sent to listening control planes. The functions of multicast forwarding entries differ from their unicast conterparts, so we introduce a new mfib_table_t and mfib_entry_t objects. However, we re-use the fib_path_list to resolve and build the entry's output list. the fib_path_list provides the service to construct a replicate DPO for multicast. 'make tests' is added to with two new suites; TEST=mfib, this is invocation of the CLI command 'test mfib' which deals with many path add/remove, flag set/unset scenarios, TEST=ip-mcast, data-plane forwarding tests. Updated applications to use the new MIFB functions; - IPv6 NS/RA. - DHCPv6 unit tests for these are undated accordingly. Change-Id: I49ec37b01f1b170335a5697541c8fd30e6d3a961 Signed-off-by: Neale Ranns --- src/vnet/adj/adj.c | 11 +- src/vnet/adj/adj.h | 1 - src/vnet/adj/adj_internal.h | 2 + src/vnet/adj/adj_mcast.c | 346 ++++++++++ src/vnet/adj/adj_mcast.h | 78 +++ src/vnet/adj/adj_nbr.c | 2 +- src/vnet/adj/adj_rewrite.c | 53 -- src/vnet/adj/adj_rewrite.h | 49 -- src/vnet/dhcpv6/proxy_node.c | 46 +- src/vnet/dpo/dpo.c | 2 + src/vnet/dpo/dpo.h | 8 +- src/vnet/dpo/load_balance.c | 13 +- src/vnet/dpo/load_balance.h | 8 + src/vnet/dpo/replicate_dpo.c | 759 ++++++++++++++++++++++ src/vnet/dpo/replicate_dpo.h | 143 +++++ src/vnet/ethernet/arp.c | 84 ++- src/vnet/ethernet/ethernet.h | 2 + src/vnet/ethernet/interface.c | 20 + src/vnet/fib/fib_attached_export.c | 4 +- src/vnet/fib/fib_entry.h | 2 +- src/vnet/fib/fib_entry_delegate.c | 3 + src/vnet/fib/fib_entry_src.c | 4 + src/vnet/fib/fib_node.h | 2 + src/vnet/fib/fib_path.c | 91 ++- src/vnet/fib/fib_path_list.c | 9 +- src/vnet/fib/fib_path_list.h | 6 + src/vnet/fib/fib_table.c | 69 +- src/vnet/fib/fib_table.h | 16 + src/vnet/fib/fib_test.c | 207 +++--- src/vnet/fib/fib_types.c | 4 + src/vnet/fib/fib_types.h | 14 + src/vnet/fib/fib_urpf_list.c | 20 +- src/vnet/fib/ip4_fib.c | 50 +- src/vnet/fib/ip4_fib.h | 9 + src/vnet/fib/ip6_fib.c | 117 ++-- src/vnet/fib/ip6_fib.h | 11 +- src/vnet/fib/mpls_fib.c | 17 +- src/vnet/fib/mpls_fib.h | 9 + src/vnet/ip/ip.api | 53 ++ src/vnet/ip/ip4.h | 24 + src/vnet/ip/ip4_forward.c | 498 +++++---------- src/vnet/ip/ip4_input.c | 4 +- src/vnet/ip/ip6.h | 27 + src/vnet/ip/ip6_forward.c | 83 ++- src/vnet/ip/ip6_input.c | 43 +- src/vnet/ip/ip6_neighbor.c | 134 ++-- src/vnet/ip/ip_api.c | 210 +++++++ src/vnet/ip/lookup.c | 171 +++++ src/vnet/ip/lookup.h | 82 +-- src/vnet/mcast/mcast.c | 565 ----------------- src/vnet/mcast/mcast.h | 50 -- src/vnet/mcast/mcast_test.c | 149 ----- src/vnet/mfib/ip4_mfib.c | 465 ++++++++++++++ src/vnet/mfib/ip4_mfib.h | 95 +++ src/vnet/mfib/ip6_mfib.c | 663 +++++++++++++++++++ src/vnet/mfib/ip6_mfib.h | 109 ++++ src/vnet/mfib/mfib_entry.c | 1096 ++++++++++++++++++++++++++++++++ src/vnet/mfib/mfib_entry.h | 172 +++++ src/vnet/mfib/mfib_forward.c | 512 +++++++++++++++ src/vnet/mfib/mfib_itf.c | 119 ++++ src/vnet/mfib/mfib_itf.h | 63 ++ src/vnet/mfib/mfib_signal.c | 201 ++++++ src/vnet/mfib/mfib_signal.h | 59 ++ src/vnet/mfib/mfib_table.c | 489 ++++++++++++++ src/vnet/mfib/mfib_table.h | 331 ++++++++++ src/vnet/mfib/mfib_test.c | 1225 ++++++++++++++++++++++++++++++++++++ src/vnet/mfib/mfib_types.c | 213 +++++++ src/vnet/mfib/mfib_types.h | 185 ++++++ src/vnet/misc.c | 3 + src/vnet/rewrite.h | 31 + src/vnet/sr/sr.c | 4 +- src/vnet/util/radix.c | 1104 ++++++++++++++++++++++++++++++++ src/vnet/util/radix.h | 147 +++++ src/vnet/vxlan/vxlan.c | 112 +++- 74 files changed, 10141 insertions(+), 1641 deletions(-) create mode 100644 src/vnet/adj/adj_mcast.c create mode 100644 src/vnet/adj/adj_mcast.h delete mode 100644 src/vnet/adj/adj_rewrite.c delete mode 100644 src/vnet/adj/adj_rewrite.h create mode 100644 src/vnet/dpo/replicate_dpo.c create mode 100644 src/vnet/dpo/replicate_dpo.h delete mode 100644 src/vnet/mcast/mcast.c delete mode 100644 src/vnet/mcast/mcast.h delete mode 100644 src/vnet/mcast/mcast_test.c create mode 100644 src/vnet/mfib/ip4_mfib.c create mode 100644 src/vnet/mfib/ip4_mfib.h create mode 100644 src/vnet/mfib/ip6_mfib.c create mode 100644 src/vnet/mfib/ip6_mfib.h create mode 100644 src/vnet/mfib/mfib_entry.c create mode 100644 src/vnet/mfib/mfib_entry.h create mode 100644 src/vnet/mfib/mfib_forward.c create mode 100644 src/vnet/mfib/mfib_itf.c create mode 100644 src/vnet/mfib/mfib_itf.h create mode 100644 src/vnet/mfib/mfib_signal.c create mode 100644 src/vnet/mfib/mfib_signal.h create mode 100644 src/vnet/mfib/mfib_table.c create mode 100644 src/vnet/mfib/mfib_table.h create mode 100644 src/vnet/mfib/mfib_test.c create mode 100644 src/vnet/mfib/mfib_types.c create mode 100644 src/vnet/mfib/mfib_types.h create mode 100644 src/vnet/util/radix.c create mode 100644 src/vnet/util/radix.h (limited to 'src/vnet') diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c index d0be0f0eaff..a99f173f6d0 100644 --- a/src/vnet/adj/adj.c +++ b/src/vnet/adj/adj.c @@ -17,6 +17,7 @@ #include #include #include +#include #include /* @@ -58,8 +59,6 @@ adj_alloc (fib_protocol_t proto) adj_get_index(adj)); adj->rewrite_header.sw_if_index = ~0; - adj->mcast_group_index = ~0; - adj->saved_lookup_next_index = 0; adj->n_adj = 1; adj->lookup_next_index = 0; @@ -116,6 +115,9 @@ format_ip_adjacency (u8 * s, va_list * args) case IP_LOOKUP_NEXT_MIDCHAIN: s = format (s, "%U", format_adj_midchain, adj_index, 2); break; + case IP_LOOKUP_NEXT_MCAST: + s = format (s, "%U", format_adj_mcast, adj_index, 0); + break; default: break; } @@ -179,6 +181,10 @@ adj_last_lock_gone (ip_adjacency_t *adj) adj_glean_remove(adj->ia_nh_proto, adj->rewrite_header.sw_if_index); break; + case IP_LOOKUP_NEXT_MCAST: + adj_mcast_remove(adj->ia_nh_proto, + adj->rewrite_header.sw_if_index); + break; default: /* * type not stored in any DB from which we need to remove it @@ -350,6 +356,7 @@ adj_module_init (vlib_main_t * vm) adj_nbr_module_init(); adj_glean_module_init(); adj_midchain_module_init(); + adj_mcast_module_init(); /* * one special adj to reserve index 0 diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h index e85625db7ee..29bae6733b3 100644 --- a/src/vnet/adj/adj.h +++ b/src/vnet/adj/adj.h @@ -45,7 +45,6 @@ #include #include #include -#include #include /** diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h index 833bc7c9e01..ece59121e9c 100644 --- a/src/vnet/adj/adj_internal.h +++ b/src/vnet/adj/adj_internal.h @@ -100,5 +100,7 @@ extern void adj_nbr_remove(adj_index_t ai, u32 sw_if_index); extern void adj_glean_remove(fib_protocol_t proto, u32 sw_if_index); +extern void adj_mcast_remove(fib_protocol_t proto, + u32 sw_if_index); #endif diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c new file mode 100644 index 00000000000..1345aedbad6 --- /dev/null +++ b/src/vnet/adj/adj_mcast.c @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +/* + * The 'DB' of all mcast adjs. + * There is only one mcast per-interface per-protocol, so this is a per-interface + * vector + */ +static adj_index_t *adj_mcasts[FIB_PROTOCOL_MAX]; + +static u32 +adj_get_mcast_node (fib_protocol_t proto) +{ + switch (proto) { + case FIB_PROTOCOL_IP4: + return (ip4_rewrite_mcast_node.index); + case FIB_PROTOCOL_IP6: + return (ip6_rewrite_mcast_node.index); + case FIB_PROTOCOL_MPLS: + break; + } + ASSERT(0); + return (0); +} + +/* + * adj_mcast_add_or_lock + * + * The next_hop address here is used for source address selection in the DP. + * The mcast adj is added to an interface's connected prefix, the next-hop + * passed here is the local prefix on the same interface. + */ +adj_index_t +adj_mcast_add_or_lock (fib_protocol_t proto, + vnet_link_t link_type, + u32 sw_if_index) +{ + ip_adjacency_t * adj; + + vec_validate_init_empty(adj_mcasts[proto], sw_if_index, ADJ_INDEX_INVALID); + + if (ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) + { + vnet_main_t *vnm; + + vnm = vnet_get_main(); + adj = adj_alloc(proto); + + adj->lookup_next_index = IP_LOOKUP_NEXT_MCAST; + adj->ia_nh_proto = proto; + adj->ia_link = link_type; + adj_mcasts[proto][sw_if_index] = adj_get_index(adj); + adj_lock(adj_get_index(adj)); + + vnet_rewrite_init(vnm, sw_if_index, + adj_get_mcast_node(proto), + vnet_tx_node_index_for_sw_interface(vnm, sw_if_index), + &adj->rewrite_header); + + /* + * we need a rewrite where the destination IP address is converted + * to the appropriate link-layer address. This is interface specific. + * So ask the interface to do it. + */ + vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, + adj_get_index(adj)); + } + else + { + adj = adj_get(adj_mcasts[proto][sw_if_index]); + adj_lock(adj_get_index(adj)); + } + + return (adj_get_index(adj)); +} + +/** + * adj_mcast_update_rewrite + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewirte is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_mcast_update_rewrite (adj_index_t adj_index, + u8 *rewrite) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + /* + * update the adj's rewrite string and build the arc + * from the rewrite node to the interface's TX node + */ + adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST, + adj_get_mcast_node(adj->ia_nh_proto), + vnet_tx_node_index_for_sw_interface( + vnet_get_main(), + adj->rewrite_header.sw_if_index), + rewrite); +} + +void +adj_mcast_remove (fib_protocol_t proto, + u32 sw_if_index) +{ + ASSERT(sw_if_index < vec_len(adj_mcasts[proto])); + + adj_mcasts[proto][sw_if_index] = ADJ_INDEX_INVALID; +} + +static clib_error_t * +adj_mcast_interface_state_change (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + /* + * for each mcast on the interface trigger a walk back to the children + */ + fib_protocol_t proto; + ip_adjacency_t *adj; + + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + if (sw_if_index >= vec_len(adj_mcasts[proto]) || + ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) + continue; + + adj = adj_get(adj_mcasts[proto][sw_if_index]); + + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? + FIB_NODE_BW_REASON_FLAG_INTERFACE_UP : + FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN), + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx); + } + + return (NULL); +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_mcast_interface_state_change); + +/** + * @brief Invoked on each SW interface of a HW interface when the + * HW interface state changes + */ +static void +adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm, + u32 sw_if_index, + void *arg) +{ + adj_mcast_interface_state_change(vnm, sw_if_index, (uword) arg); +} + +/** + * @brief Registered callback for HW interface state changes + */ +static clib_error_t * +adj_mcast_hw_interface_state_change (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + /* + * walk SW interfaces on the HW + */ + uword sw_flags; + + sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? + VNET_SW_INTERFACE_FLAG_ADMIN_UP : + 0); + + vnet_hw_interface_walk_sw(vnm, hw_if_index, + adj_nbr_hw_sw_interface_state_change, + (void*) sw_flags); + + return (NULL); +} + +VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION( + adj_mcast_hw_interface_state_change); + +static clib_error_t * +adj_mcast_interface_delete (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + /* + * for each mcast on the interface trigger a walk back to the children + */ + fib_protocol_t proto; + ip_adjacency_t *adj; + + if (is_add) + { + /* + * not interested in interface additions. we will not back walk + * to resolve paths through newly added interfaces. Why? The control + * plane should have the brains to add interfaces first, then routes. + * So the case where there are paths with a interface that matches + * one just created is the case where the path resolved through an + * interface that was deleted, and still has not been removed. The + * new interface added, is NO GUARANTEE that the interface being + * added now, even though it may have the same sw_if_index, is the + * same interface that the path needs. So tough! + * If the control plane wants these routes to resolve it needs to + * remove and add them again. + */ + return (NULL); + } + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + if (sw_if_index >= vec_len(adj_mcasts[proto]) || + ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) + continue; + + adj = adj_get(adj_mcasts[proto][sw_if_index]); + + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE, + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx); + } + + return (NULL); +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete); + +u8* +format_adj_mcast (u8* s, va_list *ap) +{ + index_t index = va_arg(*ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); + vnet_main_t * vnm = vnet_get_main(); + ip_adjacency_t * adj = adj_get(index); + + s = format(s, "%U-mcast: ", + format_fib_protocol, adj->ia_nh_proto); + s = format (s, "%U", + format_vnet_rewrite, + vnm->vlib_main, &adj->rewrite_header, + sizeof (adj->rewrite_data), 0); + + return (s); +} + + +static void +adj_dpo_lock (dpo_id_t *dpo) +{ + adj_lock(dpo->dpoi_index); +} +static void +adj_dpo_unlock (dpo_id_t *dpo) +{ + adj_unlock(dpo->dpoi_index); +} + +const static dpo_vft_t adj_mcast_dpo_vft = { + .dv_lock = adj_dpo_lock, + .dv_unlock = adj_dpo_unlock, + .dv_format = format_adj_mcast, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a mcast + * object. + * + * this means that these graph nodes are ones from which a mcast is the + * parent object in the DPO-graph. + */ +const static char* const adj_mcast_ip4_nodes[] = +{ + "ip4-rewrite-mcast", + NULL, +}; +const static char* const adj_mcast_ip6_nodes[] = +{ + "ip6-rewrite-mcast", + NULL, +}; + +const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = adj_mcast_ip4_nodes, + [DPO_PROTO_IP6] = adj_mcast_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +/** + * @brief Return the size of the adj DB. + * This is only for testing purposes so an efficient implementation is not needed + */ +u32 +adj_mcast_db_size (void) +{ + u32 n_adjs, sw_if_index; + fib_protocol_t proto; + + n_adjs = 0; + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + for (sw_if_index = 0; + sw_if_index < vec_len(adj_mcasts[proto]); + sw_if_index++) + { + if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index]) + { + n_adjs++; + } + } + } + + return (n_adjs); +} + +void +adj_mcast_module_init (void) +{ + dpo_register(DPO_ADJACENCY_MCAST, &adj_mcast_dpo_vft, adj_mcast_nodes); +} diff --git a/src/vnet/adj/adj_mcast.h b/src/vnet/adj/adj_mcast.h new file mode 100644 index 00000000000..21c5a1417dd --- /dev/null +++ b/src/vnet/adj/adj_mcast.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @brief Mcast Adjacency + * + * The multicast adjacency forwards IP traffic on an interface toward a multicast + * group address. This is a different type of adjacency to a unicast adjacency + * since the application of the MAC header is different, and so the VLIB node + * visited is also different. DPO types have different VLIB nodes. + */ + +#ifndef __ADJ_MCAST_H__ +#define __ADJ_MCAST_H__ + +#include + +/** + * @brief + * Add (and lock) a new or lock an existing mcast adjacency + * + * @param proto + * The protocol for the neighbours that we wish to mcast + * + * @param link_type + * A description of the protocol of the packets that will forward + * through this adj. On an ethernet interface this is the MAC header's + * ether-type + * + * @param sw_if_index + * The interface on which to mcast + */ +extern adj_index_t adj_mcast_add_or_lock(fib_protocol_t proto, + vnet_link_t link_type, + u32 sw_if_index); + +/** + * @brief + * Update the rewrite string for an existing adjacecny. + * + * @param + * The index of the adj to update + * + * @param + * The new rewrite + */ +extern void adj_mcast_update_rewrite(adj_index_t adj_index, + u8 *rewrite); + +/** + * @brief Format/display a mcast adjacency. + */ +extern u8* format_adj_mcast(u8* s, va_list *ap); + +/** + * @brief Get the sze of the mcast adj DB. Test purposes only. + */ +extern u32 adj_mcast_db_size(void); + +/** + * @brief + * Module initialisation + */ +extern void adj_mcast_module_init(void); + +#endif diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c index 1344bb67fcc..9e8073d3225 100644 --- a/src/vnet/adj/adj_nbr.c +++ b/src/vnet/adj/adj_nbr.c @@ -162,7 +162,7 @@ adj_nbr_alloc (fib_protocol_t nh_proto, } /* - * adj_add_for_nbr + * adj_nbr_add_or_lock * * Add an adjacency for the neighbour requested. * diff --git a/src/vnet/adj/adj_rewrite.c b/src/vnet/adj/adj_rewrite.c deleted file mode 100644 index 7d792557724..00000000000 --- a/src/vnet/adj/adj_rewrite.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -/** - * adj_rewrite_add_and_lock - * - * A rewrite sub-type has the rewrite string provided, but no key - */ -adj_index_t -adj_rewrite_add_and_lock (fib_protocol_t nh_proto, - vnet_link_t link_type, - u32 sw_if_index, - u8 *rewrite) -{ - ip_adjacency_t *adj; - - adj = adj_alloc(nh_proto); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - memset(&adj->sub_type.nbr.next_hop, 0, sizeof(adj->sub_type.nbr.next_hop)); - adj->ia_link = link_type; - adj->ia_nh_proto = nh_proto; - adj->rewrite_header.sw_if_index = sw_if_index; - - ASSERT(NULL != rewrite); - - vnet_rewrite_for_sw_interface(vnet_get_main(), - link_type, - adj->rewrite_header.sw_if_index, - adj_get_rewrite_node(link_type), - rewrite, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); - - adj_lock(adj_get_index(adj)); - - return (adj_get_index(adj)); -} diff --git a/src/vnet/adj/adj_rewrite.h b/src/vnet/adj/adj_rewrite.h deleted file mode 100644 index 25e6bba8868..00000000000 --- a/src/vnet/adj/adj_rewrite.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * @brief - * A rewrite adjacency has no key, and thus cannot be 'found' from the - * FIB resolution code. the client therefore needs to maange these adjacencies - */ - -#ifndef __ADJ_REWRITE_H__ -#define __ADJ_REWRITE_H__ - -#include - -/** - * @brief - * Add (and lock) a new or lock an existing neighbour adjacency - * - * @param nh_proto - * The protocol for the next-hop address (v4 or v6) - * - * @param link_type - * A description of the protocol of the packets that will forward - * through this adj. On an ethernet interface this is the MAC header's - * ether-type - * - * @param sw_if_index - * The interface on which the peer resides - * - * @param rewrite - * The rewrite to prepend to packets - */ -extern adj_index_t adj_rewrite_add_and_lock(fib_protocol_t nh_proto, - vnet_link_t link_type, - u32 sw_if_index, - u8 *rewrite); - -#endif diff --git a/src/vnet/dhcpv6/proxy_node.c b/src/vnet/dhcpv6/proxy_node.c index 4dd2239ffaa..77afef2aa7d 100644 --- a/src/vnet/dhcpv6/proxy_node.c +++ b/src/vnet/dhcpv6/proxy_node.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include static char * dhcpv6_proxy_error_strings[] = { #define dhcpv6_proxy_error(n,s) s, @@ -819,7 +821,7 @@ int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address, u32 server_fib_index = 0; u32 rx_fib_index = 0; - rx_fib_index = ip6_fib_table_find_or_create_and_lock(rx_fib_id); + rx_fib_index = ip6_mfib_table_find_or_create_and_lock(rx_fib_id); server_fib_index = ip6_fib_table_find_or_create_and_lock(server_fib_id); if (is_del) @@ -848,8 +850,10 @@ int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address, if (rx_fib_id == 0) { server = pool_elt_at_index (dm->dhcp6_servers, 0); - - goto initialize_it; + if (server->valid) + goto reconfigure_it; + else + goto initialize_it; } if (rx_fib_index < vec_len(dm->dhcp6_server_index_by_rx_fib_index)) @@ -866,6 +870,42 @@ int dhcpv6_proxy_set_server_2 (ip6_address_t *addr, ip6_address_t *src_address, pool_get (dm->dhcp6_servers, server); initialize_it: + { + const mfib_prefix_t all_dhcp_servers = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6 = dm->all_dhcpv6_server_relay_agent_address, + } + }; + const fib_route_path_t path_for_us = { + .frp_proto = FIB_PROTOCOL_IP6, + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + mfib_table_entry_path_update(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP, + &path_for_us, + MFIB_ITF_FLAG_FORWARD); + /* + * Each interface that is enabled in this table, needs to be added + * as an accepting interface, but this is not easily doable in VPP. + * So we cheat. Add a flag to the entry that indicates accept form + * any interface. + * We will still only accept on v6 enabled interfaces, since the input + * feature ensures this. + */ + mfib_table_entry_update(rx_fib_index, + &all_dhcp_servers, + MFIB_SOURCE_DHCP, + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF); + } + +reconfigure_it: copy_ip6_address(&server->dhcp6_server, addr); copy_ip6_address(&server->dhcp6_src_address, src_address); diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c index 688d2892412..cc2fa0eb91c 100644 --- a/src/vnet/dpo/dpo.c +++ b/src/vnet/dpo/dpo.c @@ -36,6 +36,7 @@ #include #include #include +#include /** * Array of char* names for the DPO types and protos @@ -449,6 +450,7 @@ dpo_module_init (vlib_main_t * vm) classify_dpo_module_init(); lookup_dpo_module_init(); ip_null_dpo_module_init(); + replicate_module_init(); return (NULL); } diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h index 1efcbc8834b..aff4e1b82cc 100644 --- a/src/vnet/dpo/dpo.h +++ b/src/vnet/dpo/dpo.h @@ -100,15 +100,18 @@ typedef enum dpo_type_t_ { * @brief load-balancing over a choice of [un]equal cost paths */ DPO_LOAD_BALANCE, + DPO_REPLICATE, DPO_ADJACENCY, DPO_ADJACENCY_INCOMPLETE, DPO_ADJACENCY_MIDCHAIN, DPO_ADJACENCY_GLEAN, + DPO_ADJACENCY_MCAST, DPO_RECEIVE, DPO_LOOKUP, DPO_LISP_CP, DPO_CLASSIFY, DPO_MPLS_LABEL, + DPO_MFIB_ENTRY, DPO_LAST, } __attribute__((packed)) dpo_type_t; @@ -123,12 +126,15 @@ typedef enum dpo_type_t_ { [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete", \ [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \ [DPO_ADJACENCY_GLEAN] = "dpo-glean", \ + [DPO_ADJACENCY_MCAST] = "dpo-adj-mcast", \ [DPO_RECEIVE] = "dpo-receive", \ [DPO_LOOKUP] = "dpo-lookup", \ [DPO_LOAD_BALANCE] = "dpo-load-balance", \ + [DPO_REPLICATE] = "dpo-replicate", \ [DPO_LISP_CP] = "dpo-lisp-cp", \ [DPO_CLASSIFY] = "dpo-classify", \ - [DPO_MPLS_LABEL] = "dpo-mpls-label" \ + [DPO_MPLS_LABEL] = "dpo-mpls-label", \ + [DPO_MFIB_ENTRY] = "dpo-mfib_entry" \ } /** diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index e70a7a306e1..f11b4e4de84 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -238,6 +238,17 @@ load_balance_is_drop (const dpo_id_t *dpo) return (0); } +void +load_balance_set_fib_entry_flags (index_t lbi, + fib_entry_flag_t flags) +{ + load_balance_t *lb; + + lb = load_balance_get(lbi); + lb->lb_fib_entry_flags = flags; +} + + void load_balance_set_urpf (index_t lbi, index_t urpf) @@ -683,7 +694,7 @@ load_balance_multipath_update (const dpo_id_t *dpo, buckets, n_buckets); - for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++) + for (ii = n_buckets; ii < old_n_buckets; ii++) { dpo_reset(&buckets[ii]); } diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h index 1799653628d..b901c5beb84 100644 --- a/src/vnet/dpo/load_balance.h +++ b/src/vnet/dpo/load_balance.h @@ -36,6 +36,7 @@ #include #include #include +#include /** * Load-balance main @@ -98,6 +99,11 @@ typedef struct load_balance_t_ { */ dpo_proto_t lb_proto; + /** + * Flags from the load-balance's associated fib_entry_t + */ + fib_entry_flag_t lb_fib_entry_flags; + /** * The number of locks, which is approximately the number of users, * of this load-balance. @@ -167,6 +173,8 @@ extern void load_balance_set_bucket(index_t lbi, const dpo_id_t *next); extern void load_balance_set_urpf(index_t lbi, index_t urpf); +extern void load_balance_set_fib_entry_flags(index_t lbi, + fib_entry_flag_t flags); extern index_t load_balance_get_urpf(index_t lbi); extern u8* format_load_balance(u8 * s, va_list * args); diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c new file mode 100644 index 00000000000..a2d5fdb68bd --- /dev/null +++ b/src/vnet/dpo/replicate_dpo.c @@ -0,0 +1,759 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#undef REP_DEBUG + +#ifdef REP_DEBUG +#define REP_DBG(_rep, _fmt, _args...) \ +{ \ + u8* _tmp =NULL; \ + clib_warning("rep:[%s]:" _fmt, \ + replicate_format(replicate_get_index((_rep)), \ + 0, _tmp), \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define REP_DBG(_p, _fmt, _args...) +#endif + + +/** + * Pool of all DPOs. It's not static so the DP can have fast access + */ +replicate_t *replicate_pool; + +/** + * The one instance of replicate main + */ +replicate_main_t replicate_main; + +static inline index_t +replicate_get_index (const replicate_t *rep) +{ + return (rep - replicate_pool); +} + +static inline dpo_id_t* +replicate_get_buckets (replicate_t *rep) +{ + if (REP_HAS_INLINE_BUCKETS(rep)) + { + return (rep->rep_buckets_inline); + } + else + { + return (rep->rep_buckets); + } +} + +static replicate_t * +replicate_alloc_i (void) +{ + replicate_t *rep; + + pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES); + memset(rep, 0, sizeof(*rep)); + + vlib_validate_combined_counter(&(replicate_main.repm_counters), + replicate_get_index(rep)); + vlib_zero_combined_counter(&(replicate_main.repm_counters), + replicate_get_index(rep)); + + return (rep); +} + +static u8* +replicate_format (index_t repi, + replicate_format_flags_t flags, + u32 indent, + u8 *s) +{ + vlib_counter_t to; + replicate_t *rep; + dpo_id_t *buckets; + u32 i; + + rep = replicate_get(repi); + vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to); + buckets = replicate_get_buckets(rep); + + s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE); + s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets); + s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes); + + for (i = 0; i < rep->rep_n_buckets; i++) + { + s = format(s, "\n%U", format_white_space, indent+2); + s = format(s, "[%d]", i); + s = format(s, " %U", format_dpo_id, &buckets[i], indent+6); + } + return (s); +} + +u8* +format_replicate (u8 * s, va_list * args) +{ + index_t repi = va_arg(*args, index_t); + replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t); + + return (replicate_format(repi, flags, 0, s)); +} +static u8* +format_replicate_dpo (u8 * s, va_list * args) +{ + index_t repi = va_arg(*args, index_t); + u32 indent = va_arg(*args, u32); + + return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s)); +} + + +static replicate_t * +replicate_create_i (u32 num_buckets, + dpo_proto_t rep_proto) +{ + replicate_t *rep; + + rep = replicate_alloc_i(); + rep->rep_n_buckets = num_buckets; + rep->rep_proto = rep_proto; + + if (!REP_HAS_INLINE_BUCKETS(rep)) + { + vec_validate_aligned(rep->rep_buckets, + rep->rep_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + } + + REP_DBG(rep, "create"); + + return (rep); +} + +index_t +replicate_create (u32 n_buckets, + dpo_proto_t rep_proto) +{ + return (replicate_get_index(replicate_create_i(n_buckets, rep_proto))); +} + +static inline void +replicate_set_bucket_i (replicate_t *rep, + u32 bucket, + dpo_id_t *buckets, + const dpo_id_t *next) +{ + dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next); +} + +void +replicate_set_bucket (index_t repi, + u32 bucket, + const dpo_id_t *next) +{ + replicate_t *rep; + dpo_id_t *buckets; + + rep = replicate_get(repi); + buckets = replicate_get_buckets(rep); + + ASSERT(bucket < rep->rep_n_buckets); + + replicate_set_bucket_i(rep, bucket, buckets, next); +} + +int +replicate_is_drop (const dpo_id_t *dpo) +{ + replicate_t *rep; + + if (DPO_REPLICATE != dpo->dpoi_type) + return (0); + + rep = replicate_get(dpo->dpoi_index); + + if (1 == rep->rep_n_buckets) + { + return (dpo_is_drop(replicate_get_bucket_i(rep, 0))); + } + return (0); +} + +const dpo_id_t * +replicate_get_bucket (index_t repi, + u32 bucket) +{ + replicate_t *rep; + + rep = replicate_get(repi); + + return (replicate_get_bucket_i(rep, bucket)); +} + + +static load_balance_path_t * +replicate_multipath_next_hop_fixup (load_balance_path_t *nhs, + dpo_proto_t drop_proto) +{ + if (0 == vec_len(nhs)) + { + load_balance_path_t *nh; + + /* + * we need something for the replicate. so use the drop + */ + vec_add2(nhs, nh, 1); + + nh->path_weight = 1; + dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto)); + } + + return (nhs); +} + +/* + * Fill in adjacencies in block based on corresponding + * next hop adjacencies. + */ +static void +replicate_fill_buckets (replicate_t *rep, + load_balance_path_t *nhs, + dpo_id_t *buckets, + u32 n_buckets) +{ + load_balance_path_t * nh; + u16 ii, bucket; + + bucket = 0; + + /* + * the next-hops have normalised weights. that means their sum is the number + * of buckets we need to fill. + */ + vec_foreach (nh, nhs) + { + for (ii = 0; ii < nh->path_weight; ii++) + { + ASSERT(bucket < n_buckets); + replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo); + } + } +} + +static inline void +replicate_set_n_buckets (replicate_t *rep, + u32 n_buckets) +{ + rep->rep_n_buckets = n_buckets; +} + +void +replicate_multipath_update (const dpo_id_t *dpo, + load_balance_path_t * next_hops) +{ + load_balance_path_t * nh, * nhs; + dpo_id_t *tmp_dpo; + u32 ii, n_buckets; + replicate_t *rep; + + ASSERT(DPO_REPLICATE == dpo->dpoi_type); + rep = replicate_get(dpo->dpoi_index); + nhs = replicate_multipath_next_hop_fixup(next_hops, + rep->rep_proto); + n_buckets = vec_len(nhs); + + if (0 == rep->rep_n_buckets) + { + /* + * first time initialisation. no packets inflight, so we can write + * at leisure. + */ + replicate_set_n_buckets(rep, n_buckets); + + if (!REP_HAS_INLINE_BUCKETS(rep)) + vec_validate_aligned(rep->rep_buckets, + rep->rep_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + } + else + { + /* + * This is a modification of an existing replicate. + * We need to ensure that packets in flight see a consistent state, that + * is the number of reported buckets the REP has + * is not more than it actually has. So if the + * number of buckets is increasing, we must update the bucket array first, + * then the reported number. vice-versa if the number of buckets goes down. + */ + if (n_buckets == rep->rep_n_buckets) + { + /* + * no change in the number of buckets. we can simply fill what + * is new over what is old. + */ + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + } + else if (n_buckets > rep->rep_n_buckets) + { + /* + * we have more buckets. the old replicate map (if there is one) + * will remain valid, i.e. mapping to indices within range, so we + * update it last. + */ + if (n_buckets > REP_NUM_INLINE_BUCKETS && + rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS) + { + /* + * the new increased number of buckets is crossing the threshold + * from the inline storage to out-line. Alloc the outline buckets + * first, then fixup the number. then reset the inlines. + */ + ASSERT(NULL == rep->rep_buckets); + vec_validate_aligned(rep->rep_buckets, + n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, + rep->rep_buckets, + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + + CLIB_MEMORY_BARRIER(); + + for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++) + { + dpo_reset(&rep->rep_buckets_inline[ii]); + } + } + else + { + if (n_buckets <= REP_NUM_INLINE_BUCKETS) + { + /* + * we are not crossing the threshold and it's still inline buckets. + * we can write the new on the old.. + */ + replicate_fill_buckets(rep, nhs, + replicate_get_buckets(rep), + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + } + else + { + /* + * we are not crossing the threshold. We need a new bucket array to + * hold the increased number of choices. + */ + dpo_id_t *new_buckets, *old_buckets, *tmp_dpo; + + new_buckets = NULL; + old_buckets = replicate_get_buckets(rep); + + vec_validate_aligned(new_buckets, + n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + replicate_fill_buckets(rep, nhs, new_buckets, n_buckets); + CLIB_MEMORY_BARRIER(); + rep->rep_buckets = new_buckets; + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + + vec_foreach(tmp_dpo, old_buckets) + { + dpo_reset(tmp_dpo); + } + vec_free(old_buckets); + } + } + } + else + { + /* + * bucket size shrinkage. + */ + if (n_buckets <= REP_NUM_INLINE_BUCKETS && + rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS) + { + /* + * the new decreased number of buckets is crossing the threshold + * from out-line storage to inline: + * 1 - Fill the inline buckets, + * 2 - fixup the number (and this point the inline buckets are + * used). + * 3 - free the outline buckets + */ + replicate_fill_buckets(rep, nhs, + rep->rep_buckets_inline, + n_buckets); + CLIB_MEMORY_BARRIER(); + replicate_set_n_buckets(rep, n_buckets); + CLIB_MEMORY_BARRIER(); + + vec_foreach(tmp_dpo, rep->rep_buckets) + { + dpo_reset(tmp_dpo); + } + vec_free(rep->rep_buckets); + } + else + { + /* + * not crossing the threshold. + * 1 - update the number to the smaller size + * 2 - write the new buckets + * 3 - reset those no longer used. + */ + dpo_id_t *buckets; + u32 old_n_buckets; + + old_n_buckets = rep->rep_n_buckets; + buckets = replicate_get_buckets(rep); + + replicate_set_n_buckets(rep, n_buckets); + CLIB_MEMORY_BARRIER(); + + replicate_fill_buckets(rep, nhs, + buckets, + n_buckets); + + for (ii = n_buckets; ii < old_n_buckets; ii++) + { + dpo_reset(&buckets[ii]); + } + } + } + } + + vec_foreach (nh, nhs) + { + dpo_reset(&nh->path_dpo); + } + vec_free(nhs); +} + +static void +replicate_lock (dpo_id_t *dpo) +{ + replicate_t *rep; + + rep = replicate_get(dpo->dpoi_index); + + rep->rep_locks++; +} + +static void +replicate_destroy (replicate_t *rep) +{ + dpo_id_t *buckets; + int i; + + buckets = replicate_get_buckets(rep); + + for (i = 0; i < rep->rep_n_buckets; i++) + { + dpo_reset(&buckets[i]); + } + + REP_DBG(rep, "destroy"); + if (!REP_HAS_INLINE_BUCKETS(rep)) + { + vec_free(rep->rep_buckets); + } + + pool_put(replicate_pool, rep); +} + +static void +replicate_unlock (dpo_id_t *dpo) +{ + replicate_t *rep; + + rep = replicate_get(dpo->dpoi_index); + + rep->rep_locks--; + + if (0 == rep->rep_locks) + { + replicate_destroy(rep); + } +} + +static void +replicate_mem_show (void) +{ + fib_show_memory_usage("replicate", + pool_elts(replicate_pool), + pool_len(replicate_pool), + sizeof(replicate_t)); +} + +const static dpo_vft_t rep_vft = { + .dv_lock = replicate_lock, + .dv_unlock = replicate_unlock, + .dv_format = format_replicate_dpo, + .dv_mem_show = replicate_mem_show, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a replicate + * object. + * + * this means that these graph nodes are ones from which a replicate is the + * parent object in the DPO-graph. + */ +const static char* const replicate_ip4_nodes[] = +{ + "ip4-replicate", + NULL, +}; +const static char* const replicate_ip6_nodes[] = +{ + "ip6-replicate", + NULL, +}; +const static char* const replicate_mpls_nodes[] = +{ + "mpls-replicate", + NULL, +}; + +const static char* const * const replicate_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = replicate_ip4_nodes, + [DPO_PROTO_IP6] = replicate_ip6_nodes, + [DPO_PROTO_MPLS] = replicate_mpls_nodes, +}; + +void +replicate_module_init (void) +{ + dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes); +} + +static clib_error_t * +replicate_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t repi = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &repi)) + ; + else + break; + } + + if (INDEX_INVALID != repi) + { + vlib_cli_output (vm, "%U", format_replicate, repi, + REPLICATE_FORMAT_DETAIL); + } + else + { + replicate_t *rep; + + pool_foreach(rep, replicate_pool, + ({ + vlib_cli_output (vm, "%U", format_replicate, + replicate_get_index(rep), + REPLICATE_FORMAT_NONE); + })); + } + + return 0; +} + +VLIB_CLI_COMMAND (replicate_show_command, static) = { + .path = "show replicate", + .short_help = "show replicate []", + .function = replicate_show, +}; + +typedef struct replicate_trace_t_ +{ + index_t rep_index; + index_t dpo_index; + dpo_type_t dpo_type; +} replicate_trace_t; + +static uword +replicate_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vlib_combined_counter_main_t * cm = &replicate_main.repm_counters; + u32 n_left_from, * from, * to_next, next_index; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 next0, ci0, bi0, bucket, repi0; + const replicate_t *rep0; + vlib_buffer_t * b0, *c0; + const dpo_id_t *dpo0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + rep0 = replicate_get(repi0); + + vlib_increment_combined_counter( + cm, cpu_index, repi0, 1, + vlib_buffer_length_in_chain(vm, b0)); + + /* ship the original to the first bucket */ + dpo0 = replicate_get_bucket_i(rep0, 0); + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rep_index = repi0; + t->dpo_index = dpo0->dpoi_index; + t->dpo_type = dpo0->dpoi_type; + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + + /* ship copies to the rest of the buckets */ + for (bucket = 1; bucket < rep0->rep_n_buckets; bucket++) + { + /* Make a copy */ + c0 = vlib_buffer_copy(vm, b0); + ci0 = vlib_get_buffer_index(vm, c0); + + to_next[0] = ci0; + to_next += 1; + n_left_to_next -= 1; + + dpo0 = replicate_get_bucket_i(rep0, bucket); + next0 = dpo0->dpoi_next_node; + vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + replicate_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); + t->rep_index = repi0; + t->dpo_index = dpo0->dpoi_index; + t->dpo_type = dpo0->dpoi_type; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + ci0, next0); + } + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static u8 * +format_replicate_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + replicate_trace_t *t = va_arg (*args, replicate_trace_t *); + + s = format (s, "replicate: %d via %U:%d", + t->rep_index, + format_dpo_type, t->dpo_type, + t->dpo_index); + return s; +} + +static uword +ip4_replicate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (replicate_inline (vm, node, frame)); +} + +/** + * @brief + */ +VLIB_REGISTER_NODE (ip4_replicate_node) = { + .function = ip4_replicate, + .name = "ip4-replicate", + .vector_size = sizeof (u32), + + .format_trace = format_replicate_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +static uword +ip6_replicate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (replicate_inline (vm, node, frame)); +} + +/** + * @brief + */ +VLIB_REGISTER_NODE (ip6_replicate_node) = { + .function = ip6_replicate, + .name = "ip6-replicate", + .vector_size = sizeof (u32), + + .format_trace = format_replicate_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h new file mode 100644 index 00000000000..a564739c9f2 --- /dev/null +++ b/src/vnet/dpo/replicate_dpo.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * + */ + +#ifndef __REPLICATE_DPO_H__ +#define __REPLICATE_DPO_H__ + +#include +#include +#include +#include +#include + +/** + * replicate main + */ +typedef struct replicate_main_t_ +{ + vlib_combined_counter_main_t repm_counters; +} replicate_main_t; + +extern replicate_main_t replicate_main; + +/** + * The number of buckets that a load-balance object can have and still + * fit in one cache-line + */ +#define REP_NUM_INLINE_BUCKETS 4 + +/** + * The FIB DPO provieds; + * - load-balancing over the next DPOs in the chain/graph + * - per-route counters + */ +typedef struct replicate_t_ { + /** + * number of buckets in the load-balance. always a power of 2. + */ + u16 rep_n_buckets; + + /** + * The protocol of packets that traverse this REP. + * need in combination with the flow hash config to determine how to hash. + * u8. + */ + dpo_proto_t rep_proto; + + /** + * The number of locks, which is approximately the number of users, + * of this load-balance. + * Load-balance objects of via-entries are heavily shared by recursives, + * so the lock count is a u32. + */ + u32 rep_locks; + + /** + * Vector of buckets containing the next DPOs, sized as repo_num + */ + dpo_id_t *rep_buckets; + + /** + * The rest of the cache line is used for buckets. In the common case + * where there there are less than 4 buckets, then the buckets are + * on the same cachlie and we save ourselves a pointer dereferance in + * the data-path. + */ + dpo_id_t rep_buckets_inline[REP_NUM_INLINE_BUCKETS]; +} replicate_t; + +STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES, + "A replicate object size exceeds one cachline"); + +/** + * Flags controlling load-balance formatting/display + */ +typedef enum replicate_format_flags_t_ { + REPLICATE_FORMAT_NONE, + REPLICATE_FORMAT_DETAIL = (1 << 0), +} replicate_format_flags_t; + +extern index_t replicate_create(u32 num_buckets, + dpo_proto_t rep_proto); +extern void replicate_multipath_update( + const dpo_id_t *dpo, + load_balance_path_t *next_hops); + +extern void replicate_set_bucket(index_t repi, + u32 bucket, + const dpo_id_t *next); + +extern u8* format_replicate(u8 * s, va_list * args); + +extern const dpo_id_t *replicate_get_bucket(index_t repi, + u32 bucket); +extern int replicate_is_drop(const dpo_id_t *dpo); + +/** + * The encapsulation breakages are for fast DP access + */ +extern replicate_t *replicate_pool; +static inline replicate_t* +replicate_get (index_t repi) +{ + return (pool_elt_at_index(replicate_pool, repi)); +} + +#define REP_HAS_INLINE_BUCKETS(_rep) \ + ((_rep)->rep_n_buckets <= REP_NUM_INLINE_BUCKETS) + +static inline const dpo_id_t * +replicate_get_bucket_i (const replicate_t *rep, + u32 bucket) +{ + ASSERT(bucket < rep->rep_n_buckets); + + if (PREDICT_TRUE(REP_HAS_INLINE_BUCKETS(rep))) + { + return (&rep->rep_buckets_inline[bucket]); + } + else + { + return (&rep->rep_buckets[bucket]); + } +} + +extern void replicate_module_init(void); + +#endif diff --git a/src/vnet/ethernet/arp.c b/src/vnet/ethernet/arp.c index 0298541bb46..8f07fa55394 100644 --- a/src/vnet/ethernet/arp.c +++ b/src/vnet/ethernet/arp.c @@ -23,6 +23,7 @@ #include #include #include +#include #include /** @@ -438,33 +439,74 @@ arp_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index]; e = arp_entry_find (arp_int, &adj->sub_type.nbr.next_hop.ip4); - if (NULL != e) - { - adj_nbr_walk_nh4 (sw_if_index, - &e->ip4_address, arp_mk_complete_walk, e); - } - else + switch (adj->lookup_next_index) { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + if (NULL != e) + { + adj_nbr_walk_nh4 (sw_if_index, + &e->ip4_address, arp_mk_complete_walk, e); + } + else + { + /* + * no matching ARP entry. + * construct the rewrite required to for an ARP packet, and stick + * that in the adj's pipe to smoke. + */ + adj_nbr_update_rewrite + (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite + (vnm, + sw_if_index, + VNET_LINK_ARP, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); + + /* + * since the FIB has added this adj for a route, it makes sense it + * may want to forward traffic sometime soon. Let's send a + * speculative ARP. just one. If we were to do periodically that + * wouldn't be bad either, but that's more code than i'm prepared to + * write at this time for relatively little reward. + */ + arp_nbr_probe (adj); + } + break; + case IP_LOOKUP_NEXT_MCAST: /* - * no matching ARP entry. - * construct the rewire required to for an ARP packet, and stick - * that in the adj's pipe to smoke. + * Construct a partial rewrite from the known ethernet mcast dest MAC */ - adj_nbr_update_rewrite (ai, - ADJ_NBR_REWRITE_FLAG_INCOMPLETE, - ethernet_build_rewrite (vnm, - sw_if_index, - VNET_LINK_ARP, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); + adj_mcast_update_rewrite + (ai, + ethernet_build_rewrite (vnm, + sw_if_index, + adj->ia_link, + ethernet_ip4_mcast_dst_addr ())); /* - * since the FIB has added this adj for a route, it makes sense it may - * want to forward traffic sometime soon. Let's send a speculative ARP. - * just one. If we were to do periodically that wouldn't be bad either, - * but that's more code than i'm prepared to write at this time for - * relatively little reward. + * Complete the remaining fields of the adj's rewrite to direct the + * complete of the rewrite at switch time by copying in the IP + * dst address's bytes. + * Ofset is 11 bytes from the end of the MAC header - which is three + * bytes into the desintation address. And we write 3 bytes. */ - arp_nbr_probe (adj); + adj->rewrite_header.dst_mcast_offset = 11; + adj->rewrite_header.dst_mcast_n_bytes = 3; + + break; + + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_LOAD_BALANCE: + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + ASSERT (0); + break; } } diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h index f88b0cf3c87..3acde421f78 100644 --- a/src/vnet/ethernet/ethernet.h +++ b/src/vnet/ethernet/ethernet.h @@ -547,6 +547,8 @@ void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai); u8 *ethernet_build_rewrite (vnet_main_t * vnm, u32 sw_if_index, vnet_link_t link_type, const void *dst_address); +const u8 *ethernet_ip4_mcast_dst_addr (void); +const u8 *ethernet_ip6_mcast_dst_addr (void); extern vlib_node_registration_t ethernet_input_node; diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c index 1c1f4353983..95700309499 100644 --- a/src/vnet/ethernet/interface.c +++ b/src/vnet/ethernet/interface.c @@ -51,6 +51,26 @@ * This file contains code to manage loopback interfaces. */ +const u8 * +ethernet_ip4_mcast_dst_addr (void) +{ + const static u8 ethernet_mcast_dst_mac[] = { + 0x1, 0x0, 0x5e, 0x0, 0x0, 0x0, + }; + + return (ethernet_mcast_dst_mac); +} + +const u8 * +ethernet_ip6_mcast_dst_addr (void) +{ + const static u8 ethernet_mcast_dst_mac[] = { + 0x33, 0x33, 0x00, 0x0, 0x0, 0x0, + }; + + return (ethernet_mcast_dst_mac); +} + /** * @brief build a rewrite string to use for sending packets of type 'link_type' * to 'dst_address' diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c index fd597e3e12c..574131de66a 100644 --- a/src/vnet/fib/fib_attached_export.c +++ b/src/vnet/fib/fib_attached_export.c @@ -303,8 +303,8 @@ fib_attached_export_import (fib_entry_t *fib_entry, * may have realloc'd. */ fib_entry = fib_entry_get(fei); - import->faei_export_sibling = - fib_entry_cover_track(fib_entry_get(import->faei_export_entry), fei); + import->faei_export_sibling = + fib_entry_cover_track(fib_entry_get(import->faei_export_entry), fei); fed = fib_entry_delegate_find_or_add(fib_entry, FIB_ENTRY_DELEGATE_ATTACHED_IMPORT); diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h index 44a5f2e6d7f..f258b755741 100644 --- a/src/vnet/fib/fib_entry.h +++ b/src/vnet/fib/fib_entry.h @@ -220,7 +220,7 @@ typedef enum fib_entry_flag_t_ { FIB_ENTRY_FLAG_EXCLUSIVE = (1 << FIB_ENTRY_ATTRIBUTE_EXCLUSIVE), FIB_ENTRY_FLAG_LOCAL = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL), FIB_ENTRY_FLAG_IMPORT = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT), -} fib_entry_flag_t; +} __attribute__((packed)) fib_entry_flag_t; /** * Flags for the source data diff --git a/src/vnet/fib/fib_entry_delegate.c b/src/vnet/fib/fib_entry_delegate.c index a0d45f970b3..efe402d1482 100644 --- a/src/vnet/fib/fib_entry_delegate.c +++ b/src/vnet/fib/fib_entry_delegate.c @@ -119,6 +119,9 @@ fib_entry_chain_type_to_delegate_type (fib_forward_chain_type_t fct) return (FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS); case FIB_FORW_CHAIN_TYPE_ETHERNET: return (FIB_ENTRY_DELEGATE_CHAIN_ETHERNET); + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: + break; } ASSERT(0); return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4); diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c index 1fb040608b4..d54787cd4d1 100644 --- a/src/vnet/fib/fib_entry_src.c +++ b/src/vnet/fib/fib_entry_src.c @@ -313,6 +313,8 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index, { case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: /* * EOS traffic with no label to stack, we need the IP Adj */ @@ -458,6 +460,8 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry, { load_balance_set_urpf(dpo_lb->dpoi_index, ui); } + load_balance_set_fib_entry_flags(dpo_lb->dpoi_index, + fib_entry_get_flags_i(fib_entry)); } void diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h index 3ad8ee95b64..457dfb7a6b5 100644 --- a/src/vnet/fib/fib_node.h +++ b/src/vnet/fib/fib_node.h @@ -31,6 +31,7 @@ typedef enum fib_node_type_t_ { */ FIB_NODE_TYPE_WALK, FIB_NODE_TYPE_ENTRY, + FIB_NODE_TYPE_MFIB_ENTRY, FIB_NODE_TYPE_PATH_LIST, FIB_NODE_TYPE_PATH, FIB_NODE_TYPE_ADJ, @@ -51,6 +52,7 @@ typedef enum fib_node_type_t_ { #define FIB_NODE_TYPES { \ [FIB_NODE_TYPE_ENTRY] = "entry", \ + [FIB_NODE_TYPE_MFIB_ENTRY] = "mfib-entry", \ [FIB_NODE_TYPE_WALK] = "walk", \ [FIB_NODE_TYPE_PATH_LIST] = "path-list", \ [FIB_NODE_TYPE_PATH] = "path", \ diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c index 809e3e166da..080057f37a9 100644 --- a/src/vnet/fib/fib_path.c +++ b/src/vnet/fib/fib_path.c @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -960,6 +961,8 @@ fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath) cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST; if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED) cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED; + if (rpath->frp_flags & FIB_ROUTE_PATH_LOCAL) + cfg_flags |= FIB_PATH_CFG_FLAG_LOCAL; return (cfg_flags); } @@ -1003,28 +1006,25 @@ fib_path_create (fib_node_index_t pl_index, /* * deduce the path's tpye from the parementers and save what is needed. */ - if (~0 != rpath->frp_sw_if_index) + if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_LOCAL) { - if (flags & FIB_PATH_CFG_FLAG_LOCAL) - { - path->fp_type = FIB_PATH_TYPE_RECEIVE; - path->receive.fp_interface = rpath->frp_sw_if_index; - path->receive.fp_addr = rpath->frp_addr; - } - else - { - if (ip46_address_is_zero(&rpath->frp_addr)) - { - path->fp_type = FIB_PATH_TYPE_ATTACHED; - path->attached.fp_interface = rpath->frp_sw_if_index; - } - else - { - path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP; - path->attached_next_hop.fp_interface = rpath->frp_sw_if_index; - path->attached_next_hop.fp_nh = rpath->frp_addr; - } - } + path->fp_type = FIB_PATH_TYPE_RECEIVE; + path->receive.fp_interface = rpath->frp_sw_if_index; + path->receive.fp_addr = rpath->frp_addr; + } + else if (~0 != rpath->frp_sw_if_index) + { + if (ip46_address_is_zero(&rpath->frp_addr)) + { + path->fp_type = FIB_PATH_TYPE_ATTACHED; + path->attached.fp_interface = rpath->frp_sw_if_index; + } + else + { + path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP; + path->attached_next_hop.fp_interface = rpath->frp_sw_if_index; + path->attached_next_hop.fp_nh = rpath->frp_addr; + } } else { @@ -1199,7 +1199,7 @@ fib_path_cmp_i (const fib_path_t *path1, { res = (path1->fp_type - path2->fp_type); } - if (path1->fp_nh_proto != path2->fp_nh_proto) + else if (path1->fp_nh_proto != path2->fp_nh_proto) { res = (path1->fp_nh_proto - path2->fp_nh_proto); } @@ -1770,8 +1770,11 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, break; } - } + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: break; + } + break; case FIB_PATH_TYPE_RECURSIVE: switch (fct) { @@ -1781,13 +1784,15 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: fib_path_recursive_adj_update(path, fct, dpo); break; + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: case FIB_FORW_CHAIN_TYPE_ETHERNET: ASSERT(0); break; } break; case FIB_PATH_TYPE_DEAG: - switch (fct) + switch (fct) { case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID, @@ -1800,7 +1805,9 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: case FIB_FORW_CHAIN_TYPE_MPLS_EOS: dpo_copy(dpo, &path->fp_dpo); - break; + break; + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: case FIB_FORW_CHAIN_TYPE_ETHERNET: ASSERT(0); break; @@ -1810,12 +1817,38 @@ fib_path_contribute_forwarding (fib_node_index_t path_index, dpo_copy(dpo, &path->exclusive.fp_ex_dpo); break; case FIB_PATH_TYPE_ATTACHED: - case FIB_PATH_TYPE_RECEIVE: - case FIB_PATH_TYPE_SPECIAL: - ASSERT(0); + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_ETHERNET: + break; + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: + { + adj_index_t ai; + + /* + * Create the adj needed for sending IP multicast traffic + */ + ai = adj_mcast_add_or_lock(path->fp_nh_proto, + fib_forw_chain_type_to_link_type(fct), + path->attached.fp_interface); + dpo_set(dpo, DPO_ADJACENCY_MCAST, + fib_forw_chain_type_to_dpo_proto(fct), + ai); + adj_unlock(ai); + } + break; + } + break; + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_SPECIAL: + dpo_copy(dpo, &path->fp_dpo); break; } - } } diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c index db9d1af9e3f..ce11cf452cb 100644 --- a/src/vnet/fib/fib_path_list.c +++ b/src/vnet/fib/fib_path_list.c @@ -585,8 +585,11 @@ fib_path_list_resolve (fib_path_list_t *path_list) path_list = fib_path_list_get(path_list_index); FIB_PATH_LIST_DBG(path_list, "resovled"); - fib_path_list_mk_urpf(path_list); + if (!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_NO_URPF)) + { + fib_path_list_mk_urpf(path_list); + } return (path_list); } @@ -1025,14 +1028,14 @@ fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index, */ void fib_path_list_contribute_forwarding (fib_node_index_t path_list_index, - fib_forward_chain_type_t type, + fib_forward_chain_type_t fct, dpo_id_t *dpo) { fib_path_list_t *path_list; path_list = fib_path_list_get(path_list_index); - fib_path_list_mk_lb(path_list, type, dpo); + fib_path_list_mk_lb(path_list, fct, dpo); } /* diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h index f4f94a1b04a..b4971addfba 100644 --- a/src/vnet/fib/fib_path_list.h +++ b/src/vnet/fib/fib_path_list.h @@ -60,6 +60,10 @@ typedef enum fib_path_list_attribute_t_ { * looped path-list. one path looped implies the whole list is */ FIB_PATH_LIST_ATTRIBUTE_LOOPED, + /** + * no uRPF - do not generate unicast RPF list for this path-list + */ + FIB_PATH_LIST_ATTRIBUTE_NO_URPF, /** * Marher. Add new flags before this one, and then update it. */ @@ -74,6 +78,7 @@ typedef enum fib_path_list_flags_t_ { FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE), FIB_PATH_LIST_FLAG_RESOLVED = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED), FIB_PATH_LIST_FLAG_LOOPED = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED), + FIB_PATH_LIST_FLAG_NO_URPF = (1 << FIB_PATH_LIST_ATTRIBUTE_NO_URPF), } fib_path_list_flags_t; #define FIB_PATH_LIST_ATTRIBUTES { \ @@ -83,6 +88,7 @@ typedef enum fib_path_list_flags_t_ { [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \ + [FIB_PATH_LIST_ATTRIBUTE_NO_URPF] = "no-uRPF", \ } #define FOR_EACH_PATH_LIST_ATTRIBUTE(_item) \ diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c index 76db42d0ec7..57eb0ae8298 100644 --- a/src/vnet/fib/fib_table.c +++ b/src/vnet/fib/fib_table.c @@ -1043,6 +1043,26 @@ fib_table_destroy (fib_table_t *fib_table) break; } } +void +fib_table_walk (u32 fib_index, + fib_protocol_t proto, + fib_table_walk_fn_t fn, + void *ctx) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + ip4_fib_table_walk(ip4_fib_get(fib_index), fn, ctx); + break; + case FIB_PROTOCOL_IP6: + ip6_fib_table_walk(fib_index, fn, ctx); + break; + case FIB_PROTOCOL_MPLS: + mpls_fib_table_walk(mpls_fib_get(fib_index), fn, ctx); + break; + } +} + void fib_table_unlock (u32 fib_index, @@ -1094,11 +1114,56 @@ format_fib_table_name (u8* s, va_list ap) return (s); } +/** + * @brief Table flush context. Store the indicies of matching FIB entries + * that need to be removed. + */ +typedef struct fib_table_flush_ctx_t_ +{ + /** + * The list of entries to flush + */ + fib_node_index_t *ftf_entries; + + /** + * The source we are flushing + */ + fib_source_t ftf_source; +} fib_table_flush_ctx_t; + +static int +fib_table_flush_cb (fib_node_index_t fib_entry_index, + void *arg) +{ + fib_table_flush_ctx_t *ctx = arg; + + if (fib_entry_is_sourced(fib_entry_index, ctx->ftf_source)) + { + vec_add1(ctx->ftf_entries, fib_entry_index); + } + return (1); +} + + void fib_table_flush (u32 fib_index, fib_protocol_t proto, fib_source_t source) { - // FIXME - ASSERT(0); + fib_node_index_t *fib_entry_index; + fib_table_flush_ctx_t ctx = { + .ftf_entries = NULL, + .ftf_source = source, + }; + + fib_table_walk(fib_index, proto, + fib_table_flush_cb, + &ctx); + + vec_foreach(fib_entry_index, ctx.ftf_entries) + { + fib_entry_delete(*fib_entry_index, source); + } + + vec_free(ctx.ftf_entries); } diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h index cfec516de1a..e7e66acb773 100644 --- a/src/vnet/fib/fib_table.h +++ b/src/vnet/fib/fib_table.h @@ -729,4 +729,20 @@ extern u32 fib_table_get_num_entries(u32 fib_index, extern fib_table_t *fib_table_get(fib_node_index_t index, fib_protocol_t proto); +/** + * @brief Call back function when walking entries in a FIB table + */ +typedef int (*fib_table_walk_fn_t)(fib_node_index_t fei, + void *ctx); + +/** + * @brief Walk all entries in a FIB table + * N.B: This is NOT safe to deletes. If you need to delete walk the whole + * table and store elements in a vector, then delete the elements + */ +extern void fib_table_walk(u32 fib_index, + fib_protocol_t proto, + fib_table_walk_fn_t fn, + void *ctx); + #endif diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c index 5083db26872..1c4a63a20d9 100644 --- a/src/vnet/fib/fib_test.c +++ b/src/vnet/fib/fib_test.c @@ -663,14 +663,15 @@ fib_test_v4 (void) /* * at this stage there are 5 entries in the test FIB (plus 5 in the default), * all of which are special sourced and so none of which share path-lists. - * There are also 6 entries, and 6 non-shared path-lists, in the v6 default - * table + * There are also 2 entries, and 2 non-shared path-lists, in the v6 default + * table, and 4 path-lists in the v6 MFIB table */ -#define NBR (5+5+6) +#define ENBR (5+5+2) +#define PNBR (5+5+6) FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -754,9 +755,9 @@ fib_test_v4 (void) * +2 interface routes +2 non-shared path-lists */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -808,9 +809,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+3 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNBR+3 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -838,9 +839,9 @@ fib_test_v4 (void) * -1 shared-path-list */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -1018,9 +1019,9 @@ fib_test_v4 (void) * +2 adj-fibs, and their non-shared path-lists */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -1054,9 +1055,9 @@ fib_test_v4 (void) * +1 entry and a shared path-list */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+5 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* 1.1.2.0/24 */ @@ -1087,9 +1088,9 @@ fib_test_v4 (void) * +1 entry only */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -1127,9 +1128,9 @@ fib_test_v4 (void) * +1 shared-pathlist */ FIB_TEST((2 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -1158,9 +1159,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB is %d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+6 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -1203,9 +1204,9 @@ fib_test_v4 (void) */ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); fib_prefix_t bgp_101_pfx = { @@ -1239,9 +1240,9 @@ fib_test_v4 (void) */ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+8 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -1368,9 +1369,9 @@ fib_test_v4 (void) */ FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+7 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -1983,9 +1984,9 @@ fib_test_v4 (void) */ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2030,9 +2031,9 @@ fib_test_v4 (void) */ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+13 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2080,9 +2081,9 @@ fib_test_v4 (void) */ FIB_TEST((5 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+9 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+9 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+14 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+14 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2118,9 +2119,9 @@ fib_test_v4 (void) */ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+13 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2154,9 +2155,9 @@ fib_test_v4 (void) */ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2185,9 +2186,9 @@ fib_test_v4 (void) */ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+12 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2215,9 +2216,9 @@ fib_test_v4 (void) FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+7 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2245,9 +2246,9 @@ fib_test_v4 (void) */ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+9 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+9 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2355,9 +2356,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2380,9 +2381,9 @@ fib_test_v4 (void) */ FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+6 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+8 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); @@ -2428,9 +2429,9 @@ fib_test_v4 (void) */ FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); ai_03 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, @@ -2492,9 +2493,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); @@ -2562,9 +2563,9 @@ fib_test_v4 (void) */ FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+8 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+10 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2753,9 +2754,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -2830,9 +2831,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3180,9 +3181,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3247,9 +3248,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3297,9 +3298,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3342,9 +3343,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3438,9 +3439,9 @@ fib_test_v4 (void) */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3482,9 +3483,9 @@ fib_test_v4 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+5 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); fib_table_entry_delete(fib_index, @@ -3493,9 +3494,9 @@ fib_test_v4 (void) FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+4 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+4 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3577,9 +3578,9 @@ fib_test_v4 (void) */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR+2 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR+2 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3619,9 +3620,9 @@ fib_test_v4 (void) */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3644,11 +3645,11 @@ fib_test_v4 (void) FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NBR-5 == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNBR-5 == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NBR-5 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENBR-5 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); - FIB_TEST((NBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d", + FIB_TEST((ENBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d", pool_elts(fib_urpf_list_pool)); return 0; @@ -3720,13 +3721,15 @@ fib_test_v6 (void) /* * At this stage there is one v4 FIB with 5 routes and two v6 FIBs - * each with 6 entries. All entries are special so no path-list sharing. + * each with 2 entries and a v6 mfib with 4 path-lists. + * All entries are special so no path-list sharing. */ -#define NPS (5+6+6) +#define ENPS (5+4) +#define PNPS (5+4+4) FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is %d", + FIB_TEST((PNPS == fib_path_list_pool_size()), "path list pool size is %d", fib_path_list_pool_size()); - FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3816,9 +3819,9 @@ fib_test_v6 (void) * +2 entries. +2 unshared path-lists */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); - FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS+2 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3862,9 +3865,9 @@ fib_test_v6 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS+3 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS+3 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -3890,9 +3893,9 @@ fib_test_v6 (void) */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS+2 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS+2 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -4006,9 +4009,9 @@ fib_test_v6 (void) */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS+4 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS+4 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS+4 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS+4 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -4073,9 +4076,9 @@ fib_test_v6 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS+5 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS+6 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -4201,9 +4204,9 @@ fib_test_v6 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS+5 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS+6 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -4275,9 +4278,9 @@ fib_test_v6 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS+7 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS+8 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); @@ -4401,9 +4404,9 @@ fib_test_v6 (void) */ FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS+7 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS+8 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -4501,9 +4504,9 @@ fib_test_v6 (void) */ FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); /* @@ -4513,9 +4516,9 @@ fib_test_v6 (void) FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", fib_path_list_db_size()); - FIB_TEST((NPS-6 == fib_path_list_pool_size()), "path list pool size is%d", + FIB_TEST((PNPS-2 == fib_path_list_pool_size()), "path list pool size is%d", fib_path_list_pool_size()); - FIB_TEST((NPS-6 == fib_entry_pool_size()), "entry pool size is %d", + FIB_TEST((ENPS-2 == fib_entry_pool_size()), "entry pool size is %d", fib_entry_pool_size()); adj_unlock(ai_02); diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c index b66e71940a5..3ecb38e8b6c 100644 --- a/src/vnet/fib/fib_types.c +++ b/src/vnet/fib/fib_types.c @@ -290,8 +290,10 @@ fib_forw_chain_type_to_link_type (fib_forward_chain_type_t fct) switch (fct) { case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: return (VNET_LINK_IP4); case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: return (VNET_LINK_IP6); case FIB_FORW_CHAIN_TYPE_ETHERNET: return (VNET_LINK_ETHERNET); @@ -313,8 +315,10 @@ fib_forw_chain_type_to_dpo_proto (fib_forward_chain_type_t fct) switch (fct) { case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: return (DPO_PROTO_IP4); case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: return (DPO_PROTO_IP6); case FIB_FORW_CHAIN_TYPE_ETHERNET: return (DPO_PROTO_ETHERNET); diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h index 0a15fef1b28..c51bc9c0722 100644 --- a/src/vnet/fib/fib_types.h +++ b/src/vnet/fib/fib_types.h @@ -95,6 +95,14 @@ typedef enum fib_forward_chain_type_t_ { * option is converted into one of the other three internally. */ FIB_FORW_CHAIN_TYPE_MPLS_EOS, + /** + * Contribute an object that is to be used to forward IP4 packets + */ + FIB_FORW_CHAIN_TYPE_MCAST_IP4, + /** + * Contribute an object that is to be used to forward IP6 packets + */ + FIB_FORW_CHAIN_TYPE_MCAST_IP6, /** * Contribute an object that is to be used to forward Ethernet packets. * This is last in the list since it is not valid for many FIB objects, @@ -107,6 +115,8 @@ typedef enum fib_forward_chain_type_t_ { [FIB_FORW_CHAIN_TYPE_ETHERNET] = "ethernet", \ [FIB_FORW_CHAIN_TYPE_UNICAST_IP4] = "unicast-ip4", \ [FIB_FORW_CHAIN_TYPE_UNICAST_IP6] = "unicast-ip6", \ + [FIB_FORW_CHAIN_TYPE_MCAST_IP4] = "multicast-ip4", \ + [FIB_FORW_CHAIN_TYPE_MCAST_IP6] = "multicast-ip6", \ [FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS] = "mpls-neos", \ [FIB_FORW_CHAIN_TYPE_MPLS_EOS] = "mpls-eos", \ } @@ -263,6 +273,10 @@ typedef enum fib_route_path_flags_t_ * Recursion constraint of via an attahced prefix */ FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED = (1 << 1), + /** + * A for-us/local path + */ + FIB_ROUTE_PATH_LOCAL = (1 << 2), } fib_route_path_flags_t; /** diff --git a/src/vnet/fib/fib_urpf_list.c b/src/vnet/fib/fib_urpf_list.c index 263812ade40..b484442026a 100644 --- a/src/vnet/fib/fib_urpf_list.c +++ b/src/vnet/fib/fib_urpf_list.c @@ -29,16 +29,24 @@ format_fib_urpf_list (u8 *s, va_list args) u32 *swi; ui = va_arg(args, index_t); - urpf = fib_urpf_list_get(ui); - s = format(s, "uPRF-list:%d len:%d itfs:[", - ui, vec_len(urpf->furpf_itfs)); + if (INDEX_INVALID != ui) + { + urpf = fib_urpf_list_get(ui); + + s = format(s, "uPRF-list:%d len:%d itfs:[", + ui, vec_len(urpf->furpf_itfs)); - vec_foreach(swi, urpf->furpf_itfs) + vec_foreach(swi, urpf->furpf_itfs) + { + s = format(s, "%d, ", *swi); + } + s = format(s, "]"); + } + else { - s = format(s, "%d, ", *swi); + s = format(s, "uRPF-list: None"); } - s = format(s, "]"); return (s); } diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c index f6ebce00837..e8211c80d20 100644 --- a/src/vnet/fib/ip4_fib.c +++ b/src/vnet/fib/ip4_fib.c @@ -378,16 +378,13 @@ ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib, ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE } -static void -ip4_fib_table_show_all (ip4_fib_t *fib, - vlib_main_t * vm) +void +ip4_fib_table_walk (ip4_fib_t *fib, + fib_table_walk_fn_t fn, + void *ctx) { - fib_node_index_t *fib_entry_indicies; - fib_node_index_t *fib_entry_index; int i; - fib_entry_indicies = NULL; - for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++) { uword * hash = fib->fib_entry_by_dst_address[i]; @@ -398,14 +395,45 @@ ip4_fib_table_show_all (ip4_fib_t *fib, hash_foreach_pair (p, hash, ({ - vec_add1(fib_entry_indicies, p->value[0]); + fn(p->value[0], ctx); })); } } +} + +/** + * Walk show context + */ +typedef struct ip4_fib_show_walk_ctx_t_ +{ + fib_node_index_t *ifsw_indicies; +} ip4_fib_show_walk_ctx_t; + +static int +ip4_fib_show_walk_cb (fib_node_index_t fib_entry_index, + void *arg) +{ + ip4_fib_show_walk_ctx_t *ctx = arg; + + vec_add1(ctx->ifsw_indicies, fib_entry_index); + + return (1); +} + +static void +ip4_fib_table_show_all (ip4_fib_t *fib, + vlib_main_t * vm) +{ + ip4_fib_show_walk_ctx_t ctx = { + .ifsw_indicies = NULL, + }; + fib_node_index_t *fib_entry_index; - vec_sort_with_function(fib_entry_indicies, fib_entry_cmp_for_sort); + ip4_fib_table_walk(fib, ip4_fib_show_walk_cb, &ctx); + vec_sort_with_function(ctx.ifsw_indicies, + fib_entry_cmp_for_sort); - vec_foreach(fib_entry_index, fib_entry_indicies) + vec_foreach(fib_entry_index, ctx.ifsw_indicies) { vlib_cli_output(vm, "%U", format_fib_entry, @@ -413,7 +441,7 @@ ip4_fib_table_show_all (ip4_fib_t *fib, FIB_ENTRY_FORMAT_BRIEF); } - vec_free(fib_entry_indicies); + vec_free(ctx.ifsw_indicies); } static void diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h index cf312cdc629..a8dc68b5d5f 100644 --- a/src/vnet/fib/ip4_fib.h +++ b/src/vnet/fib/ip4_fib.h @@ -64,6 +64,15 @@ extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib, extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib, const ip4_address_t * dst); +/** + * @brief Walk all entries in a FIB table + * N.B: This is NOT safe to deletes. If you need to delete walk the whole + * table and store elements in a vector, then delete the elements + */ +extern void ip4_fib_table_walk(ip4_fib_t *fib, + fib_table_walk_fn_t fn, + void *ctx); + /** * @brief Get the FIB at the given index */ diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c index d5b9bdcbd52..343ff55e80a 100644 --- a/src/vnet/fib/ip6_fib.c +++ b/src/vnet/fib/ip6_fib.c @@ -38,57 +38,6 @@ vnet_ip6_fib_init (u32 fib_index) FIB_ENTRY_FLAG_DROP, ADJ_INDEX_INVALID); - /* - * Add ff02::1:ff00:0/104 via local route for all tables. - * This is required for neighbor discovery to work. - */ - ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0); - pfx.fp_len = 104; - fib_table_entry_special_add(fib_index, - &pfx, - FIB_SOURCE_SPECIAL, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); - - /* - * Add all-routers multicast address via local route for all tables - */ - ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_all_routers); - pfx.fp_len = 128; - fib_table_entry_special_add(fib_index, - &pfx, - FIB_SOURCE_SPECIAL, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); - - /* - * Add all-nodes multicast address via local route for all tables - */ - ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_all_hosts); - pfx.fp_len = 128; - fib_table_entry_special_add(fib_index, - &pfx, - FIB_SOURCE_SPECIAL, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); - - /* - * Add all-mldv2 multicast address via local route for all tables - */ - ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_mldv2_routers); - pfx.fp_len = 128; - fib_table_entry_special_add(fib_index, - &pfx, - FIB_SOURCE_SPECIAL, - FIB_ENTRY_FLAG_LOCAL, - ADJ_INDEX_INVALID); - /* * all link local for us */ @@ -512,27 +461,68 @@ ip6_fib_table_fwding_dpo_remove (u32 fib_index, if (--table->dst_address_length_refcounts[len] == 0) { table->non_empty_dst_address_length_bitmap = - clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, 128 - len, 0); compute_prefix_lengths_in_search_order (table); } } +/** + * @brief Context when walking the IPv6 table. Since all VRFs are in the + * same hash table, we need to filter only those we need as we walk + */ +typedef struct ip6_fib_walk_ctx_t_ +{ + u32 i6w_fib_index; + fib_table_walk_fn_t i6w_fn; + void *i6w_ctx; +} ip6_fib_walk_ctx_t; + +static int +ip6_fib_walk_cb (clib_bihash_kv_24_8_t * kvp, + void *arg) +{ + ip6_fib_walk_ctx_t *ctx = arg; + + if ((kvp->key[2] >> 32) == ctx->i6w_fib_index) + { + ctx->i6w_fn(kvp->value, ctx->i6w_ctx); + } + + return (1); +} + +void +ip6_fib_table_walk (u32 fib_index, + fib_table_walk_fn_t fn, + void *arg) +{ + ip6_fib_walk_ctx_t ctx = { + .i6w_fib_index = fib_index, + .i6w_fn = fn, + .i6w_ctx = arg, + }; + ip6_main_t *im = &ip6_main; + + BV(clib_bihash_foreach_key_value_pair)(&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, + ip6_fib_walk_cb, + &ctx); + +} + typedef struct ip6_fib_show_ctx_t_ { - u32 fib_index; fib_node_index_t *entries; } ip6_fib_show_ctx_t; -static void -ip6_fib_table_collect_entries (clib_bihash_kv_24_8_t * kvp, - void *arg) +static int +ip6_fib_table_show_walk (fib_node_index_t fib_entry_index, + void *arg) { ip6_fib_show_ctx_t *ctx = arg; - if ((kvp->key[2] >> 32) == ctx->fib_index) - { - vec_add1(ctx->entries, kvp->value); - } + vec_add1(ctx->entries, fib_entry_index); + + return (1); } static void @@ -541,15 +531,10 @@ ip6_fib_table_show_all (ip6_fib_t *fib, { fib_node_index_t *fib_entry_index; ip6_fib_show_ctx_t ctx = { - .fib_index = fib->index, .entries = NULL, }; - ip6_main_t *im = &ip6_main; - - BV(clib_bihash_foreach_key_value_pair)(&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, - ip6_fib_table_collect_entries, - &ctx); + ip6_fib_table_walk(fib->index, ip6_fib_table_show_walk, &ctx); vec_sort_with_function(ctx.entries, fib_entry_cmp_for_sort); vec_foreach(fib_entry_index, ctx.entries) diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h index f6af993a3c2..78da3746f17 100644 --- a/src/vnet/fib/ip6_fib.h +++ b/src/vnet/fib/ip6_fib.h @@ -54,9 +54,18 @@ u32 ip6_fib_table_fwding_lookup_with_if_index(ip6_main_t * im, u32 sw_if_index, const ip6_address_t * dst); u32 ip6_fib_table_fwding_lookup(ip6_main_t * im, - u32 fib_index, + u32 fib_index, const ip6_address_t * dst); +/** + * @brief Walk all entries in a FIB table + * N.B: This is NOT safe to deletes. If you need to delete walk the whole + * table and store elements in a vector, then delete the elements + */ +extern void ip6_fib_table_walk(u32 fib_index, + fib_table_walk_fn_t fn, + void *ctx); + /** * @biref return the DPO that the LB stacks on. */ diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c index 6a9b1ac2989..cc657975d0f 100644 --- a/src/vnet/fib/mpls_fib.c +++ b/src/vnet/fib/mpls_fib.c @@ -44,10 +44,11 @@ * Switching between schemes based on observed/measured action similarity is not * considered on the grounds of complexity and flip-flopping. * - * VPP mantra - favour performance over memory. We choose a 21 bit key. + * VPP mantra - favour performance over memory. We choose a 21 bit key. */ #include +#include #include #include #include @@ -342,6 +343,20 @@ mpls_fib_table_get_flow_hash_config (u32 fib_index) return (0); } +void +mpls_fib_table_walk (mpls_fib_t *mpls_fib, + fib_table_walk_fn_t fn, + void *ctx) +{ + fib_node_index_t lfei; + mpls_label_t key; + + hash_foreach(key, lfei, mpls_fib->mf_entries, + ({ + fn(lfei, ctx); + })); +} + static void mpls_fib_table_show_all (const mpls_fib_t *mpls_fib, vlib_main_t * vm) diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h index 93ae4623016..e2ef9253a9d 100644 --- a/src/vnet/fib/mpls_fib.h +++ b/src/vnet/fib/mpls_fib.h @@ -70,6 +70,15 @@ extern void mpls_fib_forwarding_table_reset(mpls_fib_t *mf, mpls_label_t label, mpls_eos_bit_t eos); +/** + * @brief Walk all entries in a FIB table + * N.B: This is NOT safe to deletes. If you need to delete walk the whole + * table and store elements in a vector, then delete the elements + */ +extern void mpls_fib_table_walk(mpls_fib_t *fib, + fib_table_walk_fn_t fn, + void *ctx); + /** * @brief * Lookup a label and EOS bit in the MPLS_FIB table to retrieve the diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api index 5371696c339..65f6e7a79ed 100644 --- a/src/vnet/ip/ip.api +++ b/src/vnet/ip/ip.api @@ -395,6 +395,41 @@ define ip_add_del_route_reply i32 retval; }; +/** \brief Add / del route request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - software index of the new vlan's parent interface + @param vrf_id - fib table /vrf associated with the route + + FIXME +*/ +define ip_mroute_add_del +{ + u32 client_index; + u32 context; + u32 next_hop_sw_if_index; + u32 table_id; + u32 entry_flags; + u32 itf_flags; + u16 grp_address_length; + u8 create_vrf_if_needed; + u8 is_add; + u8 is_ipv6; + u8 is_local; + u8 grp_address[16]; + u8 src_address[16]; +}; + +/** \brief Reply for add / del mroute request + @param context - returned sender context, to match reply w/ request + @param retval - return code +*/ +define ip_mroute_add_del_reply +{ + u32 context; + i32 retval; +}; + define ip_address_details { u32 client_index; @@ -424,6 +459,24 @@ define ip_dump u8 is_ipv6; }; +define mfib_signal_dump +{ + u32 client_index; + u32 context; +}; + +define mfib_signal_details +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 table_id; + u16 grp_address_len; + u8 grp_address[16]; + u8 src_address[16]; + u16 ip_packet_len; + u8 ip_packet_data[256]; +}; /* * Local Variables: diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h index cc33dff468f..b184fbaef42 100644 --- a/src/vnet/ip/ip4.h +++ b/src/vnet/ip/ip4.h @@ -68,6 +68,18 @@ typedef struct ip4_fib_t } ip4_fib_t; +typedef struct ip4_mfib_t +{ + /* Hash table for each prefix length mapping. */ + uword *fib_entry_by_dst_address[65]; + + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; +} ip4_mfib_t; + struct ip4_main_t; typedef void (ip4_add_del_interface_address_function_t) @@ -99,11 +111,17 @@ typedef struct ip4_main_t /** Vector of FIBs. */ struct fib_table_t_ *fibs; + /** Vector of MFIBs. */ + struct mfib_table_t_ *mfibs; + u32 fib_masks[33]; /** Table index indexed by software interface. */ u32 *fib_index_by_sw_if_index; + /** Table index indexed by software interface. */ + u32 *mfib_index_by_sw_if_index; + /* IP4 enabled count by software interface */ u8 *ip_enabled_by_sw_if_index; @@ -111,6 +129,10 @@ typedef struct ip4_main_t ID space is not necessarily dense; index space is dense. */ uword *fib_index_by_table_id; + /** Hash table mapping table id to multicast fib index. + ID space is not necessarily dense; index space is dense. */ + uword *mfib_index_by_table_id; + /** Functions to call when interface address changes. */ ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks; @@ -140,7 +162,9 @@ extern ip4_main_t ip4_main; /** Global ip4 input node. Errors get attached to ip4 input node. */ extern vlib_node_registration_t ip4_input_node; extern vlib_node_registration_t ip4_lookup_node; +extern vlib_node_registration_t ip4_local_node; extern vlib_node_registration_t ip4_rewrite_node; +extern vlib_node_registration_t ip4_rewrite_mcast_node; extern vlib_node_registration_t ip4_rewrite_local_node; extern vlib_node_registration_t ip4_arp_node; extern vlib_node_registration_t ip4_glean_node; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index 87b345bd3f5..8081b34b3dc 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -50,6 +50,7 @@ #include #include #include +#include /* for mFIB table and entry creation */ /** * @file @@ -714,16 +715,17 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, ip_interface_address_t *ia = 0; ip4_address_t *result = 0; - foreach_ip_interface_address (lm, ia, sw_if_index, - 1 /* honor unnumbered */ , - ( - { - ip4_address_t * a = - ip_interface_address_get_address (lm, ia); - result = a; - break; - } - )); + /* *INDENT-OFF* */ + foreach_ip_interface_address + (lm, ia, sw_if_index, + 1 /* honor unnumbered */ , + ({ + ip4_address_t * a = + ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + /* *INDENT-OFF* */ if (result_ia) *result_ia = result ? ia : 0; return result; @@ -748,9 +750,19 @@ ip4_add_interface_routes (u32 sw_if_index, { fib_node_index_t fei; - fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP4, NULL, /* No next-hop address */ - sw_if_index, ~0, // invalid FIB index - 1, NULL, // no out-label stack + fei = fib_table_entry_update_one_path (fib_index, &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + /* No next-hop address */ + NULL, + sw_if_index, + // invalid FIB index + ~0, + 1, + // no out-label stack + NULL, FIB_ROUTE_PATH_FLAG_NONE); a->neighbor_probe_adj_index = fib_entry_get_adj (fei); } @@ -778,8 +790,16 @@ ip4_add_interface_routes (u32 sw_if_index, } } - fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP4, &pfx.fp_addr, sw_if_index, ~0, // invalid FIB index - 1, NULL, // no out-label stack + fib_table_entry_update_one_path (fib_index, &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + &pfx.fp_addr, + sw_if_index, + // invalid FIB index + ~0, + 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); } @@ -827,9 +847,10 @@ ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, !is_enable, 0, 0); - vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index, - !is_enable, 0, 0); + vnet_feature_enable_disable ("ip4-multicast", + "ip4-mfib-forward-lookup", + sw_if_index, is_enable, 0, 0); } static clib_error_t * @@ -855,36 +876,37 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, * subnets on interfaces. Easy fix - disallow overlapping subnets, like * most routers do. */ + /* *INDENT-OFF* */ if (!is_del) { /* When adding an address check that it does not conflict with an existing address. */ ip_interface_address_t *ia; - foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, - 0 /* honor unnumbered */ , - ( - { - ip4_address_t * x = - ip_interface_address_get_address - (&im->lookup_main, ia); - if (ip4_destination_matches_route - (im, address, x, ia->address_length) - || - ip4_destination_matches_route (im, - x, - address, - address_length)) - return - clib_error_create - ("failed to add %U which conflicts with %U for interface %U", - format_ip4_address_and_length, address, - address_length, - format_ip4_address_and_length, x, - ia->address_length, - format_vnet_sw_if_index_name, vnm, - sw_if_index);} - )); + foreach_ip_interface_address + (&im->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */ , + ({ + ip4_address_t * x = + ip_interface_address_get_address + (&im->lookup_main, ia); + if (ip4_destination_matches_route + (im, address, x, ia->address_length) || + ip4_destination_matches_route (im, + x, + address, + address_length)) + return + clib_error_create + ("failed to add %U which conflicts with %U for interface %U", + format_ip4_address_and_length, address, + address_length, + format_ip4_address_and_length, x, + ia->address_length, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + })); } + /* *INDENT-ON* */ elts_before = pool_elts (lm->if_address_pool); @@ -918,9 +940,10 @@ done: } clib_error_t * -ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, - ip4_address_t * address, u32 address_length, - u32 is_del) +ip4_add_del_interface_address (vlib_main_t * vm, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, u32 is_del) { return ip4_add_del_interface_address_internal (vm, sw_if_index, address, address_length, is_del); @@ -1027,13 +1050,13 @@ VNET_FEATURE_INIT (ip4_vpath_mc, static) = { .arc_name = "ip4-multicast", .node_name = "vpath-input-ip4", - .runs_before = VNET_FEATURES ("ip4-lookup-multicast"), + .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; VNET_FEATURE_INIT (ip4_lookup_mc, static) = { .arc_name = "ip4-multicast", - .node_name = "ip4-lookup-multicast", + .node_name = "ip4-mfib-forward-lookup", .runs_before = VNET_FEATURES ("ip4-drop"), }; @@ -1083,6 +1106,7 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) /* Fill in lookup tables with default table (0). */ vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, is_add, 0, 0); @@ -1123,6 +1147,7 @@ ip4_lookup_init (vlib_main_t * vm) /* Create FIB with index 0 and table id of 0. */ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); { pg_node_t *pn; @@ -1341,27 +1366,35 @@ ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); } +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_drop_node, static) = { - .function = ip4_drop,.name = "ip4-drop",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes = - 1,.next_nodes = - { - [0] = "error-drop",} -,}; + .function = ip4_drop,. + name = "ip4-drop", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop); VLIB_REGISTER_NODE (ip4_punt_node, static) = { - .function = ip4_punt,.name = "ip4-punt",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes = - 1,.next_nodes = - { - [0] = "error-punt",} -,}; + .function = ip4_punt, + .name = "ip4-punt", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-punt", + }, +}; VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt); +/* *INDENT-ON */ /* Compute TCP/UDP/ICMP4 checksum in software. */ u16 @@ -1500,13 +1533,15 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? - fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + fib_index0 = + (vnet_buffer (p0)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); - fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? - fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; + fib_index1 = + (vnet_buffer (p1)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; mtrie0 = &ip4_fib_get (fib_index0)->mtrie; mtrie1 = &ip4_fib_get (fib_index1)->mtrie; @@ -1726,8 +1761,9 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? - fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + fib_index0 = + (vnet_buffer (p0)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; mtrie0 = &ip4_fib_get (fib_index0)->mtrie; @@ -1838,10 +1874,11 @@ ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) return frame->n_vectors; } -VLIB_REGISTER_NODE (ip4_local_node, static) = +VLIB_REGISTER_NODE (ip4_local_node) = { .function = ip4_local,.name = "ip4-local",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes = + sizeof (u32),.format_trace = + format_ip4_forward_next_trace,.n_next_nodes = IP_LOCAL_N_NEXT,.next_nodes = { [IP_LOCAL_NEXT_DROP] = "error-drop", @@ -2022,8 +2059,8 @@ ip4_arp_inline (vlib_main_t * vm, * Can happen if the control-plane is programming tables * with traffic flowing; at least that's today's lame excuse. */ - if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) || - (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)) + if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) + || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)) { p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ]; } @@ -2196,15 +2233,17 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) { vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE; return clib_error_return - (0, "no matching interface address for destination %U (interface %U)", - format_ip4_address, dst, - format_vnet_sw_if_index_name, vnm, sw_if_index); + (0, + "no matching interface address for destination %U (interface %U)", + format_ip4_address, dst, format_vnet_sw_if_index_name, vnm, + sw_if_index); } adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index); h = - vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, + vlib_packet_template_get_packet (vm, + &im->ip4_arp_request_packet_template, &bi); hi = vnet_get_sup_hw_interface (vnm, sw_if_index); @@ -2243,7 +2282,7 @@ typedef enum always_inline uword ip4_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_midchain) + vlib_frame_t * frame, int is_midchain, int is_mcast) { ip_lookup_main_t *lm = &ip4_main.lookup_main; u32 *from = vlib_frame_vector_args (frame); @@ -2457,6 +2496,14 @@ ip4_rewrite_inline (vlib_main_t * vm, adj0->sub_type.midchain.fixup_func (vm, adj0, p0); adj1->sub_type.midchain.fixup_func (vm, adj1, p1); } + if (is_mcast) + { + /* + * copy bytes from the IP address into the MAC rewrite + */ + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1); + vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1, 1); + } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, @@ -2530,6 +2577,13 @@ ip4_rewrite_inline (vlib_main_t * vm, /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + if (is_mcast) + { + /* + * copy bytes from the IP address into the MAC rewrite + */ + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1); + } /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; @@ -2624,36 +2678,58 @@ static uword ip4_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_rewrite_inline (vm, node, frame, 0); + return ip4_rewrite_inline (vm, node, frame, 0, 0); } static uword ip4_midchain (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip4_rewrite_inline (vm, node, frame, 1); + return ip4_rewrite_inline (vm, node, frame, 1, 0); } - -VLIB_REGISTER_NODE (ip4_rewrite_node) = +static uword +ip4_rewrite_mcast (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) { - .function = ip4_rewrite,.name = "ip4-rewrite",.vector_size = - sizeof (u32),.format_trace = format_ip4_rewrite_trace,.n_next_nodes = - 2,.next_nodes = - { - [IP4_REWRITE_NEXT_DROP] = "error-drop", - [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",} -,}; + return ip4_rewrite_inline (vm, node, frame, 0, 1); +} -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite); +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_rewrite_node) = { + .function = ip4_rewrite, + .name = "ip4-rewrite", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_rewrite_trace, + + .n_next_nodes = 2, + .next_nodes = { + [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite) -VLIB_REGISTER_NODE (ip4_midchain_node) = -{ -.function = ip4_midchain,.name = "ip4-midchain",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.sibling_of = - "ip4-rewrite",}; +VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = { + .function = ip4_rewrite_mcast, + .name = "ip4-rewrite-mcast", + .vector_size = sizeof (u32), + .format_trace = format_ip4_rewrite_trace, + .sibling_of = "ip4-rewrite", +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast) + +VLIB_REGISTER_NODE (ip4_midchain_node) = { + .function = ip4_midchain, + .name = "ip4-midchain", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .sibling_of = "ip4-rewrite", +}; VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain); +/* *INDENT-ON */ static clib_error_t * add_del_interface_table (vlib_main_t * vm, @@ -2695,6 +2771,11 @@ add_del_interface_table (vlib_main_t * vm, // vec_validate (im->fib_index_by_sw_if_index, sw_if_index); im->fib_index_by_sw_if_index[sw_if_index] = fib_index; + + fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id); + vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); + im->mfib_index_by_sw_if_index[sw_if_index] = fib_index; } done: @@ -2730,243 +2811,6 @@ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = }; /* *INDENT-ON* */ - -static uword -ip4_lookup_multicast (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - ip4_main_t *im = &ip4_main; - vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; - u32 n_left_from, n_left_to_next, *from, *to_next; - ip_lookup_next_t next; - u32 cpu_index = os_get_cpu_number (); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next = node->cached_next_index; - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *p0, *p1; - u32 pi0, pi1, lb_index0, lb_index1, wrong_next; - ip_lookup_next_t next0, next1; - ip4_header_t *ip0, *ip1; - u32 fib_index0, fib_index1; - const dpo_id_t *dpo0, *dpo1; - const load_balance_t *lb0, *lb1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index1 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - fib_index1 = - (vnet_buffer (p1)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; - - lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), - &ip0->dst_address); - lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index1), - &ip1->dst_address); - - lb0 = load_balance_get (lb_index0); - lb1 = load_balance_get (lb_index1); - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - - vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash - (ip0, lb0->lb_hash_config); - - vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash - (ip1, lb1->lb_hash_config); - - dpo0 = load_balance_get_bucket_i (lb0, - (vnet_buffer (p0)->ip.flow_hash & - (lb0->lb_n_buckets_minus_1))); - dpo1 = load_balance_get_bucket_i (lb1, - (vnet_buffer (p1)->ip.flow_hash & - (lb1->lb_n_buckets_minus_1))); - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - next1 = dpo1->dpoi_next_node; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - - if (1) /* $$$$$$ HACK FIXME */ - vlib_increment_combined_counter - (cm, cpu_index, lb_index0, 1, - vlib_buffer_length_in_chain (vm, p0)); - if (1) /* $$$$$$ HACK FIXME */ - vlib_increment_combined_counter - (cm, cpu_index, lb_index1, 1, - vlib_buffer_length_in_chain (vm, p1)); - - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - wrong_next = (next0 != next) + 2 * (next1 != next); - if (PREDICT_FALSE (wrong_next != 0)) - { - switch (wrong_next) - { - case 1: - /* A B A */ - to_next[-2] = pi1; - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - break; - - case 2: - /* A A B */ - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next1, pi1); - break; - - case 3: - /* A B C */ - to_next -= 2; - n_left_to_next += 2; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - vlib_set_next_frame_buffer (vm, node, next1, pi1); - if (next0 == next1) - { - /* A B B */ - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next1; - vlib_get_next_frame (vm, node, next, to_next, - n_left_to_next); - } - } - } - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *p0; - ip4_header_t *ip0; - u32 pi0, lb_index0; - ip_lookup_next_t next0; - u32 fib_index0; - const dpo_id_t *dpo0; - const load_balance_t *lb0; - - pi0 = from[0]; - to_next[0] = pi0; - - p0 = vlib_get_buffer (vm, pi0); - - ip0 = vlib_buffer_get_current (p0); - - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? - fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - - lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), - &ip0->dst_address); - - lb0 = load_balance_get (lb_index0); - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - - vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash - (ip0, lb0->lb_hash_config); - - dpo0 = load_balance_get_bucket_i (lb0, - (vnet_buffer (p0)->ip.flow_hash & - (lb0->lb_n_buckets_minus_1))); - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - - if (1) /* $$$$$$ HACK FIXME */ - vlib_increment_combined_counter - (cm, cpu_index, lb_index0, 1, - vlib_buffer_length_in_chain (vm, p0)); - - from += 1; - to_next += 1; - n_left_to_next -= 1; - n_left_from -= 1; - - if (PREDICT_FALSE (next0 != next)) - { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next0; - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; - } - } - - vlib_put_next_frame (vm, node, next, n_left_to_next); - } - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip4_forward_next_trace (vm, node, frame, VLIB_TX); - - return frame->n_vectors; -} - -VLIB_REGISTER_NODE (ip4_lookup_multicast_node, static) = -{ -.function = ip4_lookup_multicast,.name = - "ip4-lookup-multicast",.vector_size = sizeof (u32),.sibling_of = - "ip4-lookup",.format_trace = format_ip4_lookup_trace,.n_next_nodes = 0,}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, - ip4_lookup_multicast); - -VLIB_REGISTER_NODE (ip4_multicast_node, static) = -{ - .function = ip4_drop,.name = "ip4-multicast",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes = - 1,.next_nodes = - { - [0] = "error-drop",} -,}; - int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) { diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c index 1cf5e0b8517..ba200a9fe13 100644 --- a/src/vnet/ip/ip4_input.c +++ b/src/vnet/ip/ip4_input.c @@ -426,7 +426,7 @@ VLIB_REGISTER_NODE (ip4_input_node) = { [IP4_INPUT_NEXT_DROP] = "error-drop", [IP4_INPUT_NEXT_PUNT] = "error-punt", [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", - [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast", + [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup", [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, @@ -448,7 +448,7 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = { [IP4_INPUT_NEXT_DROP] = "error-drop", [IP4_INPUT_NEXT_PUNT] = "error-punt", [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", - [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast", + [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup", [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index f493db017ec..6fecd42dc61 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -49,6 +49,7 @@ #include #include #include +#include /* * Default size of the ip6 fib hash table @@ -75,6 +76,21 @@ typedef struct flow_hash_config_t flow_hash_config; } ip6_fib_t; +typedef struct ip6_mfib_t +{ + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; + + /* + * Pointer to the top of a radix tree. + * This cannot be realloc'd, hence it cannot be inlined with this table + */ + struct radix_node_head *rhead; +} ip6_mfib_t; + struct ip6_main_t; typedef void (ip6_add_del_interface_address_function_t) @@ -137,12 +153,18 @@ typedef struct ip6_main_t /* Pool of FIBs. */ struct fib_table_t_ *fibs; + /** Vector of MFIBs. */ + struct mfib_table_t_ *mfibs; + /* Network byte orders subnet mask for each prefix length */ ip6_address_t fib_masks[129]; /* Table index indexed by software interface. */ u32 *fib_index_by_sw_if_index; + /** Table index indexed by software interface. */ + u32 *mfib_index_by_sw_if_index; + /* IP6 enabled count by software interface */ u8 *ip_enabled_by_sw_if_index; @@ -150,6 +172,10 @@ typedef struct ip6_main_t ID space is not necessarily dense; index space is dense. */ uword *fib_index_by_table_id; + /** Hash table mapping table id to multicast fib index. + ID space is not necessarily dense; index space is dense. */ + uword *mfib_index_by_table_id; + /* Hash table mapping interface rewrite adjacency index by sw if index. */ uword *interface_route_adj_index_by_sw_if_index; @@ -185,6 +211,7 @@ extern ip6_main_t ip6_main; /* Global ip6 input node. Errors get attached to ip6 input node. */ extern vlib_node_registration_t ip6_input_node; extern vlib_node_registration_t ip6_rewrite_node; +extern vlib_node_registration_t ip6_rewrite_mcast_node; extern vlib_node_registration_t ip6_rewrite_local_node; extern vlib_node_registration_t ip6_discover_neighbor_node; extern vlib_node_registration_t ip6_glean_node; diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index 232f72833fd..ac47b3ad1fb 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -42,8 +42,8 @@ #include /* for ethernet_header_t */ #include /* for srp_hw_interface_class */ #include -#include #include +#include #include #include @@ -411,11 +411,14 @@ ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) return; } + if (sw_if_index != 0) + ip6_mfib_interface_enable_disable (sw_if_index, is_enable); + vnet_feature_enable_disable ("ip6-unicast", "ip6-lookup", sw_if_index, is_enable, 0, 0); - vnet_feature_enable_disable ("ip6-multicast", "ip6-lookup", sw_if_index, - is_enable, 0, 0); + vnet_feature_enable_disable ("ip6-multicast", "ip6-mfib-forward-lookup", + sw_if_index, is_enable, 0, 0); } @@ -457,6 +460,8 @@ ip6_add_del_interface_address (vlib_main_t * vm, ip6_address_fib_t ip6_af, *addr_fib = 0; vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); + ip6_addr_fib_init (&ip6_af, address, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip6_af); @@ -611,12 +616,12 @@ VNET_FEATURE_ARC_INIT (ip6_multicast, static) = VNET_FEATURE_INIT (ip6_vpath_mc, static) = { .arc_name = "ip6-multicast", .node_name = "vpath-input-ip6", - .runs_before = VNET_FEATURES ("ip6-lookup"), + .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"), }; VNET_FEATURE_INIT (ip6_mc_lookup, static) = { .arc_name = "ip6-multicast", - .node_name = "ip6-lookup", + .node_name = "ip6-mfib-forward-lookup", .runs_before = VNET_FEATURES ("ip6-drop"), }; @@ -1122,22 +1127,6 @@ VLIB_REGISTER_NODE (ip6_punt_node, static) = VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt); -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip6_multicast_node, static) = -{ - .function = ip6_drop, - .name = "ip6-multicast", - .vector_size = sizeof (u32), - .format_trace = format_ip6_forward_next_trace, - .n_next_nodes = 1, - .next_nodes = - { - [0] = "error-drop", - }, -}; - -/* *INDENT-ON* */ - /* Compute TCP/UDP/ICMP6 checksum in software. */ u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, @@ -1977,7 +1966,7 @@ typedef enum always_inline uword ip6_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_midchain) + vlib_frame_t * frame, int is_midchain, int is_mcast) { ip_lookup_main_t *lm = &ip6_main.lookup_main; u32 *from = vlib_frame_vector_args (frame); @@ -2165,6 +2154,14 @@ ip6_rewrite_inline (vlib_main_t * vm, adj0->sub_type.midchain.fixup_func (vm, adj0, p0); adj1->sub_type.midchain.fixup_func (vm, adj1, p1); } + if (is_mcast) + { + /* + * copy bytes from the IP address into the MAC rewrite + */ + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 0); + vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1, 0); + } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, @@ -2265,6 +2262,10 @@ ip6_rewrite_inline (vlib_main_t * vm, { adj0->sub_type.midchain.fixup_func (vm, adj0, p0); } + if (is_mcast) + { + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 0); + } p0->error = error_node->errors[error0]; @@ -2292,16 +2293,21 @@ static uword ip6_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_rewrite_inline (vm, node, frame, - /* midchain */ 0); + return ip6_rewrite_inline (vm, node, frame, 0, 0); +} + +static uword +ip6_rewrite_mcast (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip6_rewrite_inline (vm, node, frame, 0, 1); } static uword ip6_midchain (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_rewrite_inline (vm, node, frame, - /* midchain */ 1); + return ip6_rewrite_inline (vm, node, frame, 1, 0); } /* *INDENT-OFF* */ @@ -2335,10 +2341,22 @@ VLIB_REGISTER_NODE (ip6_rewrite_node) = VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite); +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) = +{ + .function = ip6_rewrite_mcast, + .name = "ip6-rewrite-mcast", + .vector_size = sizeof (u32), + .format_trace = format_ip6_rewrite_trace, + .sibling_of = "ip6-rewrite", +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast); + /* * Hop-by-Hop handling */ - ip6_hop_by_hop_main_t ip6_hop_by_hop_main; #define foreach_ip6_hop_by_hop_error \ @@ -2346,13 +2364,15 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \ _(FORMAT, "incorrectly formatted hop-by-hop options") \ _(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options") +/* *INDENT-OFF* */ typedef enum { #define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym, foreach_ip6_hop_by_hop_error #undef _ - IP6_HOP_BY_HOP_N_ERROR, + IP6_HOP_BY_HOP_N_ERROR, } ip6_hop_by_hop_error_t; +/* *INDENT-ON* */ /* * Primary h-b-h handler trace support @@ -2878,6 +2898,7 @@ ip6_lookup_init (vlib_main_t * vm) /* Create FIB with index 0 and table id of 0. */ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0); { pg_node_t *pn; @@ -2955,6 +2976,12 @@ add_del_ip6_interface_table (vlib_main_t * vm, vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + + fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, + table_id); + + vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); + ip6_main.mfib_index_by_sw_if_index[sw_if_index] = fib_index; } diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c index bbc2cebaa39..20306088611 100644 --- a/src/vnet/ip/ip6_input.c +++ b/src/vnet/ip/ip6_input.c @@ -64,6 +64,7 @@ typedef enum { IP6_INPUT_NEXT_DROP, IP6_INPUT_NEXT_LOOKUP, + IP6_INPUT_NEXT_LOOKUP_MULTICAST, IP6_INPUT_NEXT_ICMP_ERROR, IP6_INPUT_N_NEXT, } ip6_input_next_t; @@ -142,12 +143,27 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; - arc0 = - ip6_address_is_multicast (&ip0->dst_address) ? - lm->mcast_feature_arc_index : lm->ucast_feature_arc_index; - arc1 = - ip6_address_is_multicast (&ip1->dst_address) ? - lm->mcast_feature_arc_index : lm->ucast_feature_arc_index; + if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address))) + { + arc0 = lm->mcast_feature_arc_index; + next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc0 = lm->ucast_feature_arc_index; + next0 = IP6_INPUT_NEXT_LOOKUP; + } + + if (PREDICT_FALSE (ip6_address_is_multicast (&ip1->dst_address))) + { + arc1 = lm->mcast_feature_arc_index; + next1 = IP6_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc1 = lm->ucast_feature_arc_index; + next1 = IP6_INPUT_NEXT_LOOKUP; + } vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0; @@ -240,9 +256,17 @@ ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip0 = vlib_buffer_get_current (p0); sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - arc0 = - ip6_address_is_multicast (&ip0->dst_address) ? - lm->mcast_feature_arc_index : lm->ucast_feature_arc_index; + if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address))) + { + arc0 = lm->mcast_feature_arc_index; + next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc0 = lm->ucast_feature_arc_index; + next0 = IP6_INPUT_NEXT_LOOKUP; + } + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); @@ -313,6 +337,7 @@ VLIB_REGISTER_NODE (ip6_input_node) = { [IP6_INPUT_NEXT_DROP] = "error-drop", [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup", [IP6_INPUT_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_INPUT_NEXT_LOOKUP_MULTICAST] = "ip6-mfib-forward-lookup", }, .format_buffer = format_ip6_header, diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c index 46c0e3168c0..46d04769a96 100644 --- a/src/vnet/ip/ip6_neighbor.c +++ b/src/vnet/ip/ip6_neighbor.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -116,9 +117,7 @@ typedef struct u32 seed; u64 randomizer; int ref_count; - adj_index_t all_nodes_adj_index; - adj_index_t all_routers_adj_index; - adj_index_t all_mldv2_routers_adj_index; + adj_index_t mcast_adj_index; /* timing information */ #define DEF_MAX_RADV_INTERVAL 200 @@ -474,33 +473,72 @@ ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6); - if (NULL != nbr) - { - adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address, - ip6_nd_mk_complete_walk, nbr); - } - else + switch (adj->lookup_next_index) { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + if (NULL != nbr) + { + adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address, + ip6_nd_mk_complete_walk, nbr); + } + else + { + /* + * no matching ND entry. + * construct the rewrite required to for an ND packet, and stick + * that in the adj's pipe to smoke. + */ + adj_nbr_update_rewrite (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite (vnm, + sw_if_index, + VNET_LINK_IP6, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); + + /* + * since the FIB has added this adj for a route, it makes sense it may + * want to forward traffic sometime soon. Let's send a speculative ND. + * just one. If we were to do periodically that wouldn't be bad either, + * but that's more code than i'm prepared to write at this time for + * relatively little reward. + */ + ip6_nbr_probe (adj); + } + break; + case IP_LOOKUP_NEXT_MCAST: /* - * no matching ND entry. - * construct the rewrite required to for an ND packet, and stick - * that in the adj's pipe to smoke. + * Construct a partial rewrite from the known ethernet mcast dest MAC */ - adj_nbr_update_rewrite (ai, - ADJ_NBR_REWRITE_FLAG_INCOMPLETE, - ethernet_build_rewrite (vnm, - sw_if_index, - VNET_LINK_IP6, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); + adj_mcast_update_rewrite + (ai, + ethernet_build_rewrite (vnm, + sw_if_index, + adj->ia_link, + ethernet_ip6_mcast_dst_addr ())); /* - * since the FIB has added this adj for a route, it makes sense it may - * want to forward traffic sometime soon. Let's send a speculative ND. - * just one. If we were to do periodically that wouldn't be bad either, - * but that's more code than i'm prepared to write at this time for - * relatively little reward. + * Complete the remaining fields of the adj's rewrite to direct the + * complete of the rewrite at switch time by copying in the IP + * dst address's bytes. + * Ofset is 12 bytes from the end of the MAC header - which is 2 + * bytes into the desintation address. And we write 4 bytes. */ - ip6_nbr_probe (adj); + adj->rewrite_header.dst_mcast_offset = 12; + adj->rewrite_header.dst_mcast_n_bytes = 4; + + break; + + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_LOAD_BALANCE: + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + ASSERT (0); + break; } } @@ -1517,7 +1555,7 @@ icmp6_router_solicitation (vlib_main_t * vm, } else { - adj_index0 = radv_info->all_nodes_adj_index; + adj_index0 = radv_info->mcast_adj_index; if (adj_index0 == 0) error0 = ICMP6_ERROR_DST_LOOKUP_MISS; else @@ -1918,10 +1956,8 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, ip6_radv_prefix_t *p; ip6_mldp_group_t *m; - /* remove adjacencies */ - adj_unlock (a->all_nodes_adj_index); - adj_unlock (a->all_routers_adj_index); - adj_unlock (a->all_mldv2_routers_adj_index); + /* release the lock on the interface's mcast adj */ + adj_unlock (a->mcast_adj_index); /* clean up prefix_pool */ /* *INDENT-OFF* */ @@ -2017,36 +2053,9 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, mhash_init (&a->address_to_mldp_index, sizeof (uword), sizeof (ip6_address_t)); - { - u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00, - IP6_MULTICAST_GROUP_ID_all_hosts - }; - - a->all_nodes_adj_index = - adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, - sw_if_index, link_layer_address); - } - - { - u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00, - IP6_MULTICAST_GROUP_ID_all_routers - }; - - a->all_routers_adj_index = - adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, - sw_if_index, link_layer_address); - } - - { - u8 link_layer_address[6] = { 0x33, 0x33, 0x00, 0x00, 0x00, - IP6_MULTICAST_GROUP_ID_mldv2_routers - }; - - a->all_mldv2_routers_adj_index = - adj_rewrite_add_and_lock (FIB_PROTOCOL_IP6, - VNET_LINK_IP6, - sw_if_index, link_layer_address); - } + a->mcast_adj_index = adj_mcast_add_or_lock (FIB_PROTOCOL_IP6, + VNET_LINK_IP6, + sw_if_index); /* add multicast groups we will always be reporting */ ip6_address_t addr; @@ -2273,11 +2282,10 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index) vnet_buffer (b0)->sw_if_index[VLIB_RX] = vnet_main.local_interface_sw_if_index; - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = - radv_info->all_mldv2_routers_adj_index; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = radv_info->mcast_adj_index; b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; - vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-mcast"); f = vlib_get_frame_to_node (vm, node->index); to_next = vlib_frame_vector_args (f); @@ -2301,7 +2309,7 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT, .next_nodes = { [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop", - [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite", + [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-mcast", [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output", }, }; diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c index aafde464445..437d26749cd 100644 --- a/src/vnet/ip/ip_api.c +++ b/src/vnet/ip/ip_api.c @@ -33,6 +33,9 @@ #include #include #include +//#include +#include +#include #include @@ -58,6 +61,8 @@ _(IP_FIB_DETAILS, ip_fib_details) \ _(IP6_FIB_DUMP, ip6_fib_dump) \ _(IP6_FIB_DETAILS, ip6_fib_details) \ _(IP_NEIGHBOR_DUMP, ip_neighbor_dump) \ +_(IP_MROUTE_ADD_DEL, ip_mroute_add_del) \ +_(MFIB_SIGNAL_DUMP, mfib_signal_dump) \ _(IP_NEIGHBOR_DETAILS, ip_neighbor_details) \ _(IP_ADDRESS_DUMP, ip_address_dump) \ _(IP_DUMP, ip_dump) \ @@ -845,6 +850,144 @@ vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY); } +static int +add_del_mroute_check (fib_protocol_t table_proto, + u32 table_id, + u32 next_hop_sw_if_index, + u8 is_local, u8 create_missing_tables, u32 * fib_index) +{ + vnet_main_t *vnm = vnet_get_main (); + + *fib_index = mfib_table_find (table_proto, ntohl (table_id)); + if (~0 == *fib_index) + { + if (create_missing_tables) + { + *fib_index = mfib_table_find_or_create_and_lock (table_proto, + ntohl (table_id)); + } + else + { + /* No such VRF, and we weren't asked to create one */ + return VNET_API_ERROR_NO_SUCH_FIB; + } + } + + if (~0 != ntohl (next_hop_sw_if_index)) + { + if (pool_is_free_index (vnm->interface_main.sw_interfaces, + ntohl (next_hop_sw_if_index))) + { + return VNET_API_ERROR_NO_MATCHING_INTERFACE; + } + } + + return (0); +} + +static int +mroute_add_del_handler (u8 is_add, + u8 is_local, + u32 fib_index, + const mfib_prefix_t * prefix, + u32 entry_flags, + u32 next_hop_sw_if_index, u32 itf_flags) +{ + stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ ); + + fib_route_path_t path = { + .frp_sw_if_index = next_hop_sw_if_index, + .frp_proto = prefix->fp_proto, + }; + + if (is_local) + path.frp_flags |= FIB_ROUTE_PATH_LOCAL; + + + if (!is_local && ~0 == next_hop_sw_if_index) + { + mfib_table_entry_update (fib_index, prefix, + MFIB_SOURCE_API, entry_flags); + } + else + { + if (is_add) + { + mfib_table_entry_path_update (fib_index, prefix, + MFIB_SOURCE_API, &path, itf_flags); + } + else + { + mfib_table_entry_path_remove (fib_index, prefix, + MFIB_SOURCE_API, &path); + } + } + + stats_dsunlock (); + return (0); +} + +static int +api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) +{ + fib_protocol_t fproto; + u32 fib_index; + int rv; + + fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4); + rv = add_del_mroute_check (fproto, + mp->table_id, + mp->next_hop_sw_if_index, + mp->is_local, + mp->create_vrf_if_needed, &fib_index); + + if (0 != rv) + return (rv); + + mfib_prefix_t pfx = { + .fp_len = ntohs (mp->grp_address_length), + .fp_proto = fproto, + }; + + if (FIB_PROTOCOL_IP4 == fproto) + { + clib_memcpy (&pfx.fp_grp_addr.ip4, mp->grp_address, + sizeof (pfx.fp_grp_addr.ip4)); + clib_memcpy (&pfx.fp_src_addr.ip4, mp->src_address, + sizeof (pfx.fp_src_addr.ip4)); + } + else + { + clib_memcpy (&pfx.fp_grp_addr.ip6, mp->grp_address, + sizeof (pfx.fp_grp_addr.ip6)); + clib_memcpy (&pfx.fp_src_addr.ip6, mp->src_address, + sizeof (pfx.fp_src_addr.ip6)); + } + + return (mroute_add_del_handler (mp->is_add, + mp->is_local, + fib_index, &pfx, + ntohl (mp->entry_flags), + ntohl (mp->next_hop_sw_if_index), + ntohl (mp->itf_flags))); +} + +void +vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) +{ + vl_api_ip_mroute_add_del_reply_t *rmp; + int rv; + vnet_main_t *vnm = vnet_get_main (); + + vnm->api_errno = 0; + + rv = api_mroute_add_del_t_handler (mp); + + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_IP_MROUTE_ADD_DEL_REPLY); +} + static void send_ip_details (vpe_api_main_t * am, unix_shared_memory_queue_t * q, u32 sw_if_index, u32 context) @@ -1148,6 +1291,73 @@ static void REPLY_MACRO (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY); } +void +vl_mfib_signal_send_one (unix_shared_memory_queue_t * q, + u32 context, const mfib_signal_t * mfs) +{ + vl_api_mfib_signal_details_t *mp; + mfib_prefix_t prefix; + mfib_table_t *mfib; + mfib_itf_t *mfi; + + mp = vl_msg_api_alloc (sizeof (*mp)); + + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_MFIB_SIGNAL_DETAILS); + mp->context = context; + + mfi = mfib_itf_get (mfs->mfs_itf); + mfib_entry_get_prefix (mfs->mfs_entry, &prefix); + mfib = mfib_table_get (mfib_entry_get_fib_index (mfs->mfs_entry), + prefix.fp_proto); + mp->table_id = ntohl (mfib->mft_table_id); + mp->sw_if_index = ntohl (mfi->mfi_sw_if_index); + + if (FIB_PROTOCOL_IP4 == prefix.fp_proto) + { + mp->grp_address_len = ntohs (prefix.fp_len); + + memcpy (mp->grp_address, &prefix.fp_grp_addr.ip4, 4); + if (prefix.fp_len > 32) + { + memcpy (mp->src_address, &prefix.fp_src_addr.ip4, 4); + } + } + else + { + mp->grp_address_len = ntohs (prefix.fp_len); + + ASSERT (0); + } + + if (0 != mfs->mfs_buffer_len) + { + mp->ip_packet_len = ntohs (mfs->mfs_buffer_len); + + memcpy (mp->ip_packet_data, mfs->mfs_buffer, mfs->mfs_buffer_len); + } + else + { + mp->ip_packet_len = 0; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_mfib_signal_dump_t_handler (vl_api_mfib_signal_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + while (q->cursize < q->maxsize && mfib_signal_send_one (q, mp->context)) + ; +} #define vl_msg_name_crc_list #include diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c index 734a4cd7cfb..6c5611d3955 100644 --- a/src/vnet/ip/lookup.c +++ b/src/vnet/ip/lookup.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -258,6 +259,9 @@ format_ip_lookup_next (u8 * s, va_list * args) case IP_LOOKUP_NEXT_GLEAN: t = "glean"; break; + case IP_LOOKUP_NEXT_MCAST: + t = "mcast"; + break; case IP_LOOKUP_NEXT_REWRITE: break; } @@ -767,6 +771,173 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { }; /* *INDENT-ON* */ +clib_error_t * +vnet_ip_mroute_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + fib_route_path_t rpath; + u32 table_id, is_del; + vnet_main_t *vnm; + mfib_prefix_t pfx; + u32 fib_index; + mfib_itf_flags_t iflags = 0; + mfib_entry_flags_t eflags = 0; + + vnm = vnet_get_main (); + is_del = 0; + table_id = 0; + memset (&pfx, 0, sizeof (pfx)); + memset (&rpath, 0, sizeof (rpath)); + rpath.frp_sw_if_index = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "%U %U", + unformat_ip4_address, + &pfx.fp_src_addr.ip4, + unformat_ip4_address, &pfx.fp_grp_addr.ip4)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = 64; + } + else if (unformat (line_input, "%U %U", + unformat_ip6_address, + &pfx.fp_src_addr.ip6, + unformat_ip6_address, &pfx.fp_grp_addr.ip6)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = 256; + } + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, + &pfx.fp_grp_addr.ip4, &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + } + else if (unformat (line_input, "%U/%d", + unformat_ip6_address, + &pfx.fp_grp_addr.ip6, &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "%U", + unformat_ip4_address, &pfx.fp_grp_addr.ip4)) + { + memset (&pfx.fp_src_addr.ip4, 0, sizeof (pfx.fp_src_addr.ip4)); + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = 32; + } + else if (unformat (line_input, "%U", + unformat_ip6_address, &pfx.fp_grp_addr.ip6)) + { + memset (&pfx.fp_src_addr.ip6, 0, sizeof (pfx.fp_src_addr.ip6)); + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = 128; + } + else if (unformat (line_input, "via %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + } + else if (unformat (line_input, "%U", unformat_mfib_itf_flags, &iflags)) + ; + else if (unformat (line_input, "%U", + unformat_mfib_entry_flags, &eflags)) + ; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + unformat_free (line_input); + + if (~0 == table_id) + { + /* + * if no table_id is passed we will manipulate the default + */ + fib_index = 0; + } + else + { + fib_index = mfib_table_find (pfx.fp_proto, table_id); + + if (~0 == fib_index) + { + error = clib_error_return (0, "Nonexistent table id %d", table_id); + goto done; + } + } + + if (is_del && 0 == rpath.frp_weight) + { + mfib_table_entry_delete (fib_index, &pfx, MFIB_SOURCE_CLI); + } + else if (eflags) + { + mfib_table_entry_update (fib_index, &pfx, MFIB_SOURCE_CLI, eflags); + } + else + { + if (is_del) + mfib_table_entry_path_remove (fib_index, + &pfx, MFIB_SOURCE_CLI, &rpath); + else + mfib_table_entry_path_update (fib_index, + &pfx, MFIB_SOURCE_CLI, &rpath, iflags); + } + +done: + return error; +} + +/*? + * This command is used to add or delete IPv4 or IPv6 multicastroutes. All + * IP Addresses ('/', + * '' and '') + * can be IPv4 or IPv6, but all must be of the same form in a single + * command. To display the current set of routes, use the commands + * 'show ip mfib' and 'show ip6 mfib'. + * The full set of support flags for interfaces and route is shown via; + * 'show mfib route flags' and 'show mfib itf flags' + * respectively. + * @cliexpar + * Example of how to add a forwarding interface to a route (and create the + * route if it does not exist) + * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/0 Forward} + * Example of how to add an accepting interface to a route (and create the + * route if it does not exist) + * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/1 Accept} + * Example of changing the route's flags to send signals via the API + * @cliexcmd{ip mroute add 232.1.1.1 Signal} + + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip_mroute_command, static) = +{ + .path = "ip mroute", + .short_help = "ip mroute [add|del] / [table ] [via [],", + .function = vnet_ip_mroute_cmd, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + /* * The next two routines address a longstanding script hemorrhoid. * Probing a v4 or v6 neighbor needs to appear to be synchronous, diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h index 3dbd7b3b8e8..27c70943991 100644 --- a/src/vnet/ip/lookup.h +++ b/src/vnet/ip/lookup.h @@ -91,6 +91,9 @@ typedef enum /** This packets needs to go to ICMP error */ IP_LOOKUP_NEXT_ICMP_ERROR, + /** Multicast Adjacency. */ + IP_LOOKUP_NEXT_MCAST, + IP_LOOKUP_N_NEXT, } ip_lookup_next_t; @@ -115,6 +118,7 @@ typedef enum [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \ [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \ + [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \ [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \ @@ -127,6 +131,7 @@ typedef enum [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \ [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \ + [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \ [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \ [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip6-load-balance", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \ @@ -203,12 +208,6 @@ typedef struct ip_adjacency_t_ /** Interface address index for this local/arp adjacency. */ u32 if_address_index; - /** Force re-lookup in a different FIB. ~0 => normal behavior */ - u16 mcast_group_index; - - /** Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */ - u16 saved_lookup_next_index; - /* * link/ether-type */ @@ -236,28 +235,28 @@ typedef struct ip_adjacency_t_ */ struct { - /** - * The recursive next-hop - */ + /** + * The recursive next-hop + */ ip46_address_t next_hop; - /** - * The node index of the tunnel's post rewrite/TX function. - */ + /** + * The node index of the tunnel's post rewrite/TX function. + */ u32 tx_function_node; - /** - * The next DPO to use - */ + /** + * The next DPO to use + */ dpo_id_t next_dpo; - /** - * A function to perform the post-rewrite fixup - */ - adj_midchain_fixup_t fixup_func; - } midchain; /** - * IP_LOOKUP_NEXT_GLEAN - * - * Glean the address to ARP for from the packet's destination + * A function to perform the post-rewrite fixup */ + adj_midchain_fixup_t fixup_func; + } midchain; + /** + * IP_LOOKUP_NEXT_GLEAN + * + * Glean the address to ARP for from the packet's destination + */ struct { ip46_address_t receive_addr; @@ -291,43 +290,6 @@ STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) == /* An all zeros address */ extern const ip46_address_t zero_addr; -/* IP multicast adjacency. */ -typedef struct -{ - /* Handle for this adjacency in adjacency heap. */ - u32 heap_handle; - - /* Number of adjecencies in block. */ - u32 n_adj; - - /* Rewrite string. */ - vnet_declare_rewrite (64 - 2 * sizeof (u32)); -} -ip_multicast_rewrite_t; - -typedef struct -{ - /* ip4-multicast-rewrite next index. */ - u32 next_index; - - u8 n_rewrite_bytes; - - u8 rewrite_string[64 - 1 * sizeof (u32) - 1 * sizeof (u8)]; -} -ip_multicast_rewrite_string_t; - -typedef struct -{ - ip_multicast_rewrite_t *rewrite_heap; - - ip_multicast_rewrite_string_t *rewrite_strings; - - /* Negative rewrite string index; >= 0 sw_if_index. - Sorted. Used to hash. */ - i32 **adjacency_id_vector; - - uword *adjacency_by_id_vector; -} ip_multicast_lookup_main_t; typedef struct { diff --git a/src/vnet/mcast/mcast.c b/src/vnet/mcast/mcast.c deleted file mode 100644 index 55be89ae907..00000000000 --- a/src/vnet/mcast/mcast.c +++ /dev/null @@ -1,565 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -#include -#include -#include -#include -#include -#include -#include - -typedef struct { - u32 sw_if_index; - u32 next_index; - u32 group_index; -} mcast_prep_trace_t; - -/* packet trace format function */ -static u8 * format_mcast_prep_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - mcast_prep_trace_t * t = va_arg (*args, mcast_prep_trace_t *); - - s = format (s, "MCAST_PREP: group %d, next index %d, tx_sw_if_index %d", - t->group_index, t->next_index, t->sw_if_index); - return s; -} - -mcast_main_t mcast_main; -vlib_node_registration_t mcast_prep_node; -vlib_node_registration_t mcast_recycle_node; - -#define foreach_mcast_prep_error \ -_(MCASTS, "Multicast Packets") - -typedef enum { -#define _(sym,str) MCAST_PREP_ERROR_##sym, - foreach_mcast_prep_error -#undef _ - MCAST_PREP_N_ERROR, -} mcast_prep_error_t; - -static char * mcast_prep_error_strings[] = { -#define _(sym,string) string, - foreach_mcast_prep_error -#undef _ -}; - -typedef enum { - MCAST_PREP_NEXT_DROP, - MCAST_PREP_N_NEXT, -} mcast_prep_next_t; - -static uword -mcast_prep_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_left_from, * from, * to_next; - mcast_prep_next_t next_index; - mcast_main_t * mcm = &mcast_main; - vlib_node_t *n = vlib_get_node (vm, mcast_prep_node.index); - u32 node_counter_base_index = n->error_heap_index; - vlib_error_main_t * em = &vm->error_main; - ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - while (0 && n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t * b0, * b1; - u32 next0, next1; - u32 sw_if_index0, sw_if_index1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - /* speculatively enqueue b0 and b1 to the current next frame */ - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - next0 = 0; - sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; - next1 = 0; - - /* $$$$ your message in this space. Process 2 x pkts */ - - if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (b0->flags & VLIB_BUFFER_IS_TRACED) - { - mcast_prep_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - if (b1->flags & VLIB_BUFFER_IS_TRACED) - { - mcast_prep_trace_t *t = - vlib_add_trace (vm, node, b1, sizeof (*t)); - t->sw_if_index = sw_if_index1; - t->next_index = next1; - } - } - - /* verify speculative enqueues, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t * b0; - u32 next0, adj_index0; - mcast_group_t * g0; - ip_adjacency_t * adj0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - adj0 = ip_get_adjacency (lm, adj_index0); - vnet_buffer(b0)->mcast.mcast_group_index = adj0->mcast_group_index; - g0 = pool_elt_at_index (mcm->groups, adj0->mcast_group_index); - - /* - * Handle the degenerate single-copy case - * If we don't change the freelist, the packet will never - * make it to the recycle node... - */ - if (PREDICT_TRUE(vec_len (g0->members) > 1)) - { - /* Save the original free list index */ - vnet_buffer(b0)->mcast.original_free_list_index = - b0->free_list_index; - - /* Swap in the multicast recycle list */ - b0->free_list_index = mcm->mcast_recycle_list_index; - - /* - * Make sure that intermediate "frees" don't screw up - */ - b0->recycle_count = vec_len (g0->members); - b0->flags |= VLIB_BUFFER_RECYCLE; - - /* Set up for the recycle node */ - vnet_buffer(b0)->mcast.mcast_current_index = 1; - } - - /* Transmit the pkt on the first interface */ - next0 = g0->members[0].prep_and_recycle_node_next_index; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = - g0->members[0].tx_sw_if_index; - - if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) { - mcast_prep_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->next_index = next0; - t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; - t->group_index = vnet_buffer(b0)->mcast.mcast_group_index; - } - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - em->counters[node_counter_base_index + MCAST_PREP_ERROR_MCASTS] += - frame->n_vectors; - - return frame->n_vectors; -} - -VLIB_REGISTER_NODE (mcast_prep_node) = { - .function = mcast_prep_node_fn, - .name = "mcast_prep", - .vector_size = sizeof (u32), - .format_trace = format_mcast_prep_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(mcast_prep_error_strings), - .error_strings = mcast_prep_error_strings, - - .n_next_nodes = MCAST_PREP_N_NEXT, - - /* edit / add dispositions here */ - .next_nodes = { - [MCAST_PREP_NEXT_DROP] = "error-drop", - }, -}; - -typedef struct { - u32 sw_if_index; - u32 next_index; - u32 current_member; - u32 group_index; -} mcast_recycle_trace_t; - -static u8 * format_mcast_recycle_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - mcast_recycle_trace_t * t = va_arg (*args, mcast_recycle_trace_t *); - - s = format (s, -"MCAST_R: group %d, current member %d next (node) index %d, tx_sw_if_index %d", - t->group_index, t->current_member, t->next_index, t->sw_if_index); - return s; -} - -#define foreach_mcast_recycle_error \ -_(RECYCLES, "Multicast Recycles") - -typedef enum { -#define _(sym,str) MCAST_RECYCLE_ERROR_##sym, - foreach_mcast_recycle_error -#undef _ - MCAST_RECYCLE_N_ERROR, -} mcast_recycle_error_t; - -static char * mcast_recycle_error_strings[] = { -#define _(sym,string) string, - foreach_mcast_recycle_error -#undef _ -}; - -typedef enum { - MCAST_RECYCLE_NEXT_DROP, - MCAST_RECYCLE_N_NEXT, -} mcast_recycle_next_t; - -static uword -mcast_recycle_node_fn (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - u32 n_left_from, * from, * to_next; - mcast_recycle_next_t next_index; - mcast_main_t * mcm = &mcast_main; - vlib_node_t *n = vlib_get_node (vm, mcast_recycle_node.index); - u32 node_counter_base_index = n->error_heap_index; - vlib_error_main_t * em = &vm->error_main; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - - while (0 && n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t * b0, * b1; - u32 next0, next1; - u32 sw_if_index0, sw_if_index1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - /* speculatively enqueue b0 and b1 to the current next frame */ - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - next0 = 0; - sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; - next1 = 0; - - /* $$$$ your message in this space. Process 2 x pkts */ - - if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE))) - { - if (b0->flags & VLIB_BUFFER_IS_TRACED) - { - mcast_recycle_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->sw_if_index = sw_if_index0; - t->next_index = next0; - } - if (b1->flags & VLIB_BUFFER_IS_TRACED) - { - mcast_recycle_trace_t *t = - vlib_add_trace (vm, node, b1, sizeof (*t)); - t->sw_if_index = sw_if_index1; - t->next_index = next1; - } - } - - /* verify speculative enqueues, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t * b0; - u32 next0; - u32 current_member0; - mcast_group_t * g0; - - /* speculatively enqueue b0 to the current next frame */ - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - g0 = pool_elt_at_index (mcm->groups, - vnet_buffer(b0)->mcast.mcast_group_index); - - /* No more replicas? */ - if (b0->recycle_count == 1) - { - /* Restore the original free list index */ - b0->free_list_index = - vnet_buffer(b0)->mcast.original_free_list_index; - b0->flags &= ~(VLIB_BUFFER_RECYCLE); - } - current_member0 = vnet_buffer(b0)->mcast.mcast_current_index; - - next0 = - g0->members[current_member0].prep_and_recycle_node_next_index; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = - g0->members[current_member0].tx_sw_if_index; - - vnet_buffer(b0)->mcast.mcast_current_index = - current_member0 + 1; - - if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) { - mcast_recycle_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->next_index = next0; - t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; - t->group_index = vnet_buffer(b0)->mcast.mcast_group_index; - t->current_member = current_member0; - } - - /* verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - em->counters[node_counter_base_index + MCAST_RECYCLE_ERROR_RECYCLES] += - frame->n_vectors; - - return frame->n_vectors; -} - -VLIB_REGISTER_NODE (mcast_recycle_node) = { - .function = mcast_recycle_node_fn, - .name = "mcast-recycle", - .vector_size = sizeof (u32), - .format_trace = format_mcast_recycle_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_errors = ARRAY_LEN(mcast_recycle_error_strings), - .error_strings = mcast_recycle_error_strings, - - .n_next_nodes = MCAST_RECYCLE_N_NEXT, - - /* edit / add dispositions here */ - .next_nodes = { - [MCAST_RECYCLE_NEXT_DROP] = "error-drop", - }, -}; - -/* - * fish pkts back from the recycle queue/freelist - * un-flatten the context chains - */ -static void mcast_recycle_callback (vlib_main_t *vm, - vlib_buffer_free_list_t * fl) -{ - vlib_frame_t * f = 0; - u32 n_left_from; - u32 n_left_to_next = 0; - u32 n_this_frame = 0; - u32 * from; - u32 * to_next; - u32 bi0, pi0; - vlib_buffer_t *b0; - vlib_buffer_t *bnext0; - int i; - - /* aligned, unaligned buffers */ - for (i = 0; i < 2; i++) - { - if (i == 0) - { - from = fl->aligned_buffers; - n_left_from = vec_len (from); - } - else - { - from = fl->unaligned_buffers; - n_left_from = vec_len (from); - } - - while (n_left_from > 0) - { - if (PREDICT_FALSE(n_left_to_next == 0)) - { - if (f) - { - f->n_vectors = n_this_frame; - vlib_put_frame_to_node (vm, mcast_recycle_node.index, f); - } - - f = vlib_get_frame_to_node (vm, mcast_recycle_node.index); - to_next = vlib_frame_vector_args (f); - n_left_to_next = VLIB_FRAME_SIZE; - n_this_frame = 0; - } - - bi0 = from[0]; - if (PREDICT_TRUE(n_left_from > 1)) - { - pi0 = from[1]; - vlib_prefetch_buffer_with_index(vm,pi0,LOAD); - } - - bnext0 = b0 = vlib_get_buffer (vm, bi0); - - while (bnext0->flags & VLIB_BUFFER_NEXT_PRESENT) - { - from += 1; - n_left_from -= 1; - bnext0 = vlib_get_buffer (vm, bnext0->next_buffer); - } - to_next[0] = bi0; - - if (CLIB_DEBUG > 0) - vlib_buffer_set_known_state (vm, bi0, VLIB_BUFFER_KNOWN_ALLOCATED); - - from++; - to_next++; - n_this_frame++; - n_left_to_next--; - n_left_from--; - } - } - - vec_reset_length (fl->aligned_buffers); - vec_reset_length (fl->unaligned_buffers); - - if (f) - { - ASSERT(n_this_frame); - f->n_vectors = n_this_frame; - vlib_put_frame_to_node (vm, mcast_recycle_node.index, f); - } -} - -clib_error_t *mcast_init (vlib_main_t *vm) -{ - mcast_main_t * mcm = &mcast_main; - vlib_buffer_main_t * bm = vm->buffer_main; - vlib_buffer_free_list_t * fl; - - mcm->vlib_main = vm; - mcm->vnet_main = vnet_get_main(); - mcm->mcast_recycle_list_index = - vlib_buffer_create_free_list (vm, 1024 /* fictional */, "mcast-recycle"); - - fl = pool_elt_at_index (bm->buffer_free_list_pool, - mcm->mcast_recycle_list_index); - - fl->buffers_added_to_freelist_function = mcast_recycle_callback; - - return 0; -} - -VLIB_INIT_FUNCTION (mcast_init); - - diff --git a/src/vnet/mcast/mcast.h b/src/vnet/mcast/mcast.h deleted file mode 100644 index 96e514427c6..00000000000 --- a/src/vnet/mcast/mcast.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __included_vnet_mcast_h__ -#define __included_vnet_mcast_h__ - -#include -#include -#include - -typedef struct { - /* Arrange for both prep and recycle nodes to have identical - next indices for a given output interface */ - u32 prep_and_recycle_node_next_index; - - /* Show command, etc. */ - u32 tx_sw_if_index; -} mcast_group_member_t; - -typedef struct { - /* vector of group members */ - mcast_group_member_t * members; -} mcast_group_t; - -typedef struct { - /* pool of multicast (interface) groups */ - mcast_group_t * groups; - - /* multicast "free" list, aka recycle list */ - u32 mcast_recycle_list_index; - - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; -} mcast_main_t; - -mcast_main_t mcast_main; - -#endif /* __included_vnet_mcast_h__ */ diff --git a/src/vnet/mcast/mcast_test.c b/src/vnet/mcast/mcast_test.c deleted file mode 100644 index be80c9fc982..00000000000 --- a/src/vnet/mcast/mcast_test.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef struct { - /* convenience */ - vlib_main_t * vlib_main; - vnet_main_t * vnet_main; - mcast_main_t * mcast_main; -} mcast_test_main_t; - -mcast_test_main_t mcast_test_main; -vlib_node_registration_t mcast_prep_node; -vlib_node_registration_t mcast_recycle_node; - -static clib_error_t * -mcast_test_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - /* u8 *rewrite_data; */ - /* mcast_test_main_t * mtm = &mcast_test_main; */ - /* mcast_main_t * mcm = mtm->mcast_main; */ - /* ip_adjacency_t adj; */ - /* u32 adj_index; */ - /* mcast_group_t * g; */ - /* mcast_group_member_t * member; */ - /* unformat_input_t _line_input, * line_input = &_line_input; */ - /* ip4_address_t dst_addr, zero; */ - /* ip4_main_t * im = &ip4_main; */ - /* ip_lookup_main_t * lm = &im->lookup_main; */ - - /* /\* Get a line of input. *\/ */ - /* if (! unformat_user (input, unformat_line_input, line_input)) */ - /* return 0; */ - - /* pool_get (mcm->groups, g); */ - /* memset (g, 0, sizeof (*g)); */ - - /* while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) */ - /* { */ - /* vnet_hw_interface_t *hw; */ - /* u32 next, sw_if_index; */ - - /* if (unformat (line_input, "%U", unformat_vnet_sw_interface, */ - /* mtm->vnet_main, &sw_if_index)) */ - /* { */ - /* vec_add2 (g->members, member, 1); */ - /* member->tx_sw_if_index = sw_if_index; */ - - /* hw = vnet_get_sup_hw_interface (mtm->vnet_main, */ - /* sw_if_index); */ - - /* next = vlib_node_add_next (mtm->vlib_main, */ - /* mcast_prep_node.index, */ - /* hw->output_node_index); */ - - /* /\* Required to be the same next index... *\/ */ - /* vlib_node_add_next_with_slot (mtm->vlib_main, */ - /* mcast_recycle_node.index, */ - /* hw->output_node_index, next); */ - /* member->prep_and_recycle_node_next_index = next; */ - /* } */ - /* else */ - /* { */ - /* return unformat_parse_error (line_input); */ - /* } */ - /* } */ - - /* if (vec_len (g->members) == 0) */ - /* { */ - /* pool_put (mcm->groups, g); */ - /* vlib_cli_output (vm, "no group members specified"); */ - /* return 0; */ - /* } */ - - - /* adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; */ - /* adj.mcast_group_index = g - mcm->groups; */ - /* rewrite_data = format (0, "abcdefg"); */ - - /* vnet_rewrite_for_tunnel */ - /* (mtm->vnet_main, */ - /* (u32)~0, /\* tx_sw_if_index, we dont know yet *\/ */ - /* ip4_rewrite_node.index, */ - /* mcast_prep_node.index, */ - /* &adj.rewrite_header, */ - /* rewrite_data, vec_len(rewrite_data)); */ - - /* ip_add_adjacency (lm, &adj, 1 /\* one adj *\/, */ - /* &adj_index); */ - - /* dst_addr.as_u32 = clib_host_to_net_u32 (0x0a000002); */ - /* zero.as_u32 = 0; */ - - /* ip4_add_del_route_next_hop (im, */ - /* IP4_ROUTE_FLAG_ADD, */ - /* &dst_addr, */ - /* 24 /\* mask width *\/, */ - /* &zero /\* no next hop *\/, */ - - /* 0, // next hop sw if index */ - /* 1, // weight */ - /* adj_index, */ - /* 0 /\* explicit fib 0 *\/); */ - - return 0; -} - -static VLIB_CLI_COMMAND (mcast_test_command) = { - .path = "test mc", - .short_help = "test mc", - .function = mcast_test_command_fn, -}; - -clib_error_t *mcast_test_init (vlib_main_t *vm) -{ - mcast_test_main_t * mtm = &mcast_test_main; - - mtm->vlib_main = vm; - mtm->vnet_main = vnet_get_main(); - mtm->mcast_main = &mcast_main; - - return 0; -} - -VLIB_INIT_FUNCTION (mcast_test_init); diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c new file mode 100644 index 00000000000..08001c3fa7a --- /dev/null +++ b/src/vnet/mfib/ip4_mfib.c @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +static const mfib_prefix_t ip4_specials[] = { + { + /* (*,*)/0 */ + .fp_src_addr = { + .ip4.data_u32 = 0, + }, + .fp_grp_addr = { + .ip4.data_u32 = 0, + }, + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP4, + }, +}; + +static u32 +ip4_create_mfib_with_table_id (u32 table_id) +{ + mfib_table_t *mfib_table; + + pool_get_aligned(ip4_main.mfibs, mfib_table, CLIB_CACHE_LINE_BYTES); + memset(mfib_table, 0, sizeof(*mfib_table)); + + mfib_table->mft_proto = FIB_PROTOCOL_IP4; + mfib_table->mft_index = + mfib_table->v4.index = + (mfib_table - ip4_main.mfibs); + + hash_set (ip4_main.mfib_index_by_table_id, + table_id, + mfib_table->mft_index); + + mfib_table->mft_table_id = + mfib_table->v4.table_id = + table_id; + + mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP4); + + /* + * add the special entries into the new FIB + */ + int ii; + + for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++) + { + mfib_prefix_t prefix = ip4_specials[ii]; + + prefix.fp_src_addr.ip4.data_u32 = + clib_host_to_net_u32(prefix.fp_src_addr.ip4.data_u32); + prefix.fp_grp_addr.ip4.data_u32 = + clib_host_to_net_u32(prefix.fp_grp_addr.ip4.data_u32); + + mfib_table_entry_update(mfib_table->mft_index, + &prefix, + MFIB_SOURCE_DEFAULT_ROUTE, + MFIB_ENTRY_FLAG_DROP); + } + + return (mfib_table->mft_index); +} + +void +ip4_mfib_table_destroy (ip4_mfib_t *mfib) +{ + mfib_table_t *mfib_table = (mfib_table_t*)mfib; + int ii; + + /* + * remove all the specials we added when the table was created. + */ + for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++) + { + fib_node_index_t mfei; + mfib_prefix_t prefix = ip4_specials[ii]; + + prefix.fp_src_addr.ip4.data_u32 = + clib_host_to_net_u32(prefix.fp_src_addr.ip4.data_u32); + prefix.fp_grp_addr.ip4.data_u32 = + clib_host_to_net_u32(prefix.fp_grp_addr.ip4.data_u32); + + mfei = mfib_table_lookup(mfib_table->mft_index, &prefix); + mfib_table_entry_delete_index(mfei, MFIB_SOURCE_DEFAULT_ROUTE); + } + + /* + * validate no more routes. + */ + ASSERT(0 == mfib_table->mft_total_route_counts); + ASSERT(~0 != mfib_table->mft_table_id); + + hash_unset (ip4_main.mfib_index_by_table_id, mfib_table->mft_table_id); + pool_put(ip4_main.mfibs, mfib_table); +} + +u32 +ip4_mfib_table_find_or_create_and_lock (u32 table_id) +{ + u32 index; + + index = ip4_mfib_index_from_table_id(table_id); + if (~0 == index) + return ip4_create_mfib_with_table_id(table_id); + mfib_table_lock(index, FIB_PROTOCOL_IP4); + + return (index); +} + +u32 +ip4_mfib_table_get_index_for_sw_if_index (u32 sw_if_index) +{ + if (sw_if_index >= vec_len(ip4_main.mfib_index_by_sw_if_index)) + { + /* + * This is the case for interfaces that are not yet mapped to + * a IP table + */ + return (~0); + } + return (ip4_main.mfib_index_by_sw_if_index[sw_if_index]); +} + +#define IPV4_MFIB_GRP_LEN(_len)\ + (_len > 32 ? 32 : _len) + +#define IP4_MFIB_MK_KEY(_grp, _src, _len, _key) \ +{ \ + _key = ((u64)(_grp->data_u32 & \ + ip4_main.fib_masks[IPV4_MFIB_GRP_LEN(_len)])) << 32; \ + _key |= _src->data_u32; \ +} +#define IP4_MFIB_MK_GRP_KEY(_grp, _len, _key) \ +{ \ + _key = ((u64)(_grp->data_u32 & \ + ip4_main.fib_masks[IPV4_MFIB_GRP_LEN(_len)])) << 32; \ +} + +/* + * ip4_fib_table_lookup_exact_match + * + * Exact match prefix lookup + */ +fib_node_index_t +ip4_mfib_table_lookup_exact_match (const ip4_mfib_t *mfib, + const ip4_address_t *grp, + const ip4_address_t *src, + u32 len) +{ + uword * hash, * result; + u64 key; + + hash = mfib->fib_entry_by_dst_address[len]; + IP4_MFIB_MK_KEY(grp, src, len, key); + + result = hash_get(hash, key); + + if (NULL != result) { + return (result[0]); + } + return (FIB_NODE_INDEX_INVALID); +} + +/* + * ip4_fib_table_lookup + * + * Longest prefix match + */ +fib_node_index_t +ip4_mfib_table_lookup (const ip4_mfib_t *mfib, + const ip4_address_t *src, + const ip4_address_t *grp, + u32 len) +{ + uword * hash, * result; + i32 mask_len; + u64 key; + + mask_len = len; + + if (PREDICT_TRUE(64 == mask_len)) + { + hash = mfib->fib_entry_by_dst_address[mask_len]; + IP4_MFIB_MK_KEY(grp, src, mask_len, key); + + result = hash_get (hash, key); + + if (NULL != result) { + return (result[0]); + } + } + + for (mask_len = 32; mask_len >= 0; mask_len--) + { + hash = mfib->fib_entry_by_dst_address[mask_len]; + IP4_MFIB_MK_GRP_KEY(grp, mask_len, key); + + result = hash_get (hash, key); + + if (NULL != result) { + return (result[0]); + } + } + return (FIB_NODE_INDEX_INVALID); +} + +void +ip4_mfib_table_entry_insert (ip4_mfib_t *mfib, + const ip4_address_t *grp, + const ip4_address_t *src, + u32 len, + fib_node_index_t fib_entry_index) +{ + uword * hash, * result; + u64 key; + + IP4_MFIB_MK_KEY(grp, src, len, key); + hash = mfib->fib_entry_by_dst_address[len]; + result = hash_get (hash, key); + + if (NULL == result) { + /* + * adding a new entry + */ + if (NULL == hash) { + hash = hash_create (32 /* elts */, sizeof (uword)); + hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK); + } + hash = hash_set(hash, key, fib_entry_index); + mfib->fib_entry_by_dst_address[len] = hash; + } + else + { + ASSERT(0); + } +} + +void +ip4_mfib_table_entry_remove (ip4_mfib_t *mfib, + const ip4_address_t *grp, + const ip4_address_t *src, + u32 len) +{ + uword * hash, * result; + u64 key; + + IP4_MFIB_MK_KEY(grp, src, len, key); + hash = mfib->fib_entry_by_dst_address[len]; + result = hash_get (hash, key); + + if (NULL == result) + { + /* + * removing a non-existant entry. i'll allow it. + */ + } + else + { + hash_unset(hash, key); + } + + mfib->fib_entry_by_dst_address[len] = hash; +} + +static void +ip4_mfib_table_show_all (ip4_mfib_t *mfib, + vlib_main_t * vm) +{ + fib_node_index_t *mfib_entry_indicies; + fib_node_index_t *mfib_entry_index; + int i; + + mfib_entry_indicies = NULL; + + for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++) + { + uword * hash = mfib->fib_entry_by_dst_address[i]; + + if (NULL != hash) + { + hash_pair_t * p; + + hash_foreach_pair (p, hash, + ({ + vec_add1(mfib_entry_indicies, p->value[0]); + })); + } + } + + vec_sort_with_function(mfib_entry_indicies, mfib_entry_cmp_for_sort); + + vec_foreach(mfib_entry_index, mfib_entry_indicies) + { + vlib_cli_output(vm, "%U", + format_mfib_entry, + *mfib_entry_index, + MFIB_ENTRY_FORMAT_BRIEF); + } + + vec_free(mfib_entry_indicies); +} + +static void +ip4_mfib_table_show_one (ip4_mfib_t *mfib, + vlib_main_t * vm, + ip4_address_t *src, + ip4_address_t *grp, + u32 mask_len) +{ + vlib_cli_output(vm, "%U", + format_mfib_entry, + ip4_mfib_table_lookup(mfib, src, grp, mask_len), + MFIB_ENTRY_FORMAT_DETAIL); +} + +static clib_error_t * +ip4_show_mfib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip4_main_t * im4 = &ip4_main; + mfib_table_t *mfib_table; + int verbose, matching; + ip4_address_t grp, src = {{0}}; + u32 mask = 32; + int i, table_id = -1, fib_index = ~0; + + verbose = 1; + matching = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "brief") || unformat (input, "summary") + || unformat (input, "sum")) + verbose = 0; + + else if (unformat (input, "%U %U", + unformat_ip4_address, &src, + unformat_ip4_address, &grp)) + { + matching = 1; + mask = 64; + } + else if (unformat (input, "%U", unformat_ip4_address, &grp)) + { + matching = 1; + mask = 32; + } + else if (unformat (input, "%U/%d", + unformat_ip4_address, &grp, &mask)) + matching = 1; + else if (unformat (input, "table %d", &table_id)) + ; + else if (unformat (input, "index %d", &fib_index)) + ; + else + break; + } + + pool_foreach (mfib_table, im4->mfibs, + ({ + ip4_mfib_t *mfib = &mfib_table->v4; + + if (table_id >= 0 && table_id != (int)mfib->table_id) + continue; + if (fib_index != ~0 && fib_index != (int)mfib->index) + continue; + + vlib_cli_output (vm, "%U, fib_index %d", + format_mfib_table_name, mfib->index, FIB_PROTOCOL_IP4, + mfib->index); + + /* Show summary? */ + if (! verbose) + { + vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); + for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++) + { + uword * hash = mfib->fib_entry_by_dst_address[i]; + uword n_elts = hash_elts (hash); + if (n_elts > 0) + vlib_cli_output (vm, "%20d%16d", i, n_elts); + } + continue; + } + + if (!matching) + { + ip4_mfib_table_show_all(mfib, vm); + } + else + { + ip4_mfib_table_show_one(mfib, vm, &src, &grp, mask); + } + })); + + return 0; +} + +/*? + * This command displays the IPv4 MulticasrFIB Tables (VRF Tables) and + * the route entries for each table. + * + * @note This command will run for a long time when the FIB tables are + * comprised of millions of entries. For those senarios, consider displaying + * a single table or summary mode. + * + * @cliexpar + * Example of how to display all the IPv4 Multicast FIB tables: + * @cliexstart{show ip fib} + * ipv4-VRF:0, fib_index 0 + * (*, 0.0.0.0/0): flags:D, + * Interfaces: + * multicast-ip4-chain + * [@1]: dpo-drop ip4 + * (*, 232.1.1.1/32): + * Interfaces: + * test-eth1: Forward, + * test-eth2: Forward, + * test-eth0: Accept, + * multicast-ip4-chain + * [@2]: dpo-replicate: [index:1 buckets:2 to:[0:0]] + * [0] [@1]: ipv4-mcast: test-eth1: IP4: d0:d1:d2:d3:d4:01 -> 01:00:05:00:00:00 + * [1] [@1]: ipv4-mcast: test-eth2: IP4: d0:d1:d2:d3:d4:02 -> 01:00:05:00:00:00 + * + * @cliexend + * Example of how to display a summary of all IPv4 FIB tables: + * @cliexstart{show ip fib summary} + * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * Prefix length Count + * 0 1 + * 8 2 + * 32 4 + * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto + * Prefix length Count + * 0 1 + * 8 2 + * 24 2 + * 32 4 + * @cliexend + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { + .path = "show ip mfib", + .short_help = "show ip mfib [summary] [table ] [index ] [[/]] [] [ ]", + .function = ip4_show_mfib, +}; +/* *INDENT-ON* */ diff --git a/src/vnet/mfib/ip4_mfib.h b/src/vnet/mfib/ip4_mfib.h new file mode 100644 index 00000000000..6fc74a368bd --- /dev/null +++ b/src/vnet/mfib/ip4_mfib.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief The IPv4 Multicast-FIB + * + * FIXME + * + * This IPv4 FIB is used by the protocol independent FIB. So directly using + * this APIs in client code is not encouraged. However, this IPv4 FIB can be + * used if all the client wants is an IPv4 prefix data-base + */ + +#ifndef __IP4_MFIB_H__ +#define __IP4_MFIB_H__ + +#include +#include + +#include + +extern fib_node_index_t ip4_mfib_table_lookup(const ip4_mfib_t *fib, + const ip4_address_t *src, + const ip4_address_t *grp, + u32 len); +extern fib_node_index_t ip4_mfib_table_lookup_exact_match(const ip4_mfib_t *fib, + const ip4_address_t *grp, + const ip4_address_t *src, + u32 len); + +extern void ip4_mfib_table_entry_remove(ip4_mfib_t *fib, + const ip4_address_t *grp, + const ip4_address_t *src, + u32 len); + +extern void ip4_mfib_table_entry_insert(ip4_mfib_t *fib, + const ip4_address_t *grp, + const ip4_address_t *src, + u32 len, + fib_node_index_t fib_entry_index); +extern void ip4_mfib_table_destroy(ip4_mfib_t *fib); + +/** + * @brief Get the FIB at the given index + */ +static inline ip4_mfib_t * +ip4_mfib_get (u32 index) +{ + return (&(pool_elt_at_index(ip4_main.mfibs, index)->v4)); +} + +/** + * @brief Get or create an IPv4 fib. + * + * Get or create an IPv4 fib with the provided table ID. + * + * @param table_id + * When set to \c ~0, an arbitrary and unused fib ID is picked + * and can be retrieved with \c ret->table_id. + * Otherwise, the fib ID to be used to retrieve or create the desired fib. + * @returns A pointer to the retrieved or created fib. + * + */ +extern u32 ip4_mfib_table_find_or_create_and_lock(u32 table_id); +extern u32 ip4_mfib_table_create_and_lock(void); + +static inline +u32 ip4_mfib_index_from_table_id (u32 table_id) +{ + ip4_main_t * im = &ip4_main; + uword * p; + + p = hash_get (im->mfib_index_by_table_id, table_id); + if (!p) + return ~0; + + return p[0]; +} + +extern u32 ip4_mfib_table_get_index_for_sw_if_index(u32 sw_if_index); + + +#endif + diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c new file mode 100644 index 00000000000..0c2e4c7b796 --- /dev/null +++ b/src/vnet/mfib/ip6_mfib.c @@ -0,0 +1,663 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +/** + * The number of bytes in an address/ask key in the radix tree + * First byte is the length in bytes. + */ +#define IP6_MFIB_KEY_LEN 33 + +/** + * Key and mask for radix + */ +typedef struct ip6_mfib_key_t_ +{ + u8 key[IP6_MFIB_KEY_LEN]; + u8 mask[IP6_MFIB_KEY_LEN]; +} ip6_mfib_key_t; + +/** + * An object that is inserted into the radix tree. + * Since it's in the tree and has pointers, it cannot realloc and so cannot + * come from a vlib pool. + */ +typedef struct ip6_mfib_node_t_ +{ + struct radix_node i6mn_nodes[2]; + ip6_mfib_key_t i6mn_key; + index_t i6mn_entry; +} ip6_mfib_node_t; + +static const mfib_prefix_t all_zeros = { + /* (*,*) */ + .fp_src_addr = { + .ip6.as_u64 = {0, 0}, + }, + .fp_grp_addr = { + .ip6.as_u64 = {0, 0}, + }, + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP6, +}; + +typedef enum ip6_mfib_special_type_t_ { + IP6_MFIB_SPECIAL_TYPE_NONE, + IP6_MFIB_SPECIAL_TYPE_SOLICITED, +} ip6_mfib_special_type_t; + +typedef struct ip6_mfib_special_t_ { + /** + * @brief solicited or not + */ + ip6_mfib_special_type_t ims_type; + + /** + * @brief the Prefix length + */ + u8 ims_len; + + /** + * @brief The last byte of the mcast address + */ + u8 ims_byte; + /** + * @brief The scope of the address + */ + u8 ims_scope; +} ip6_mfib_special_t; + +static const ip6_mfib_special_t ip6_mfib_specials[] = +{ + { + /* + * Add ff02::1:ff00:0/104 via local route for all tables. + * This is required for neighbor discovery to work. + */ + .ims_type = IP6_MFIB_SPECIAL_TYPE_SOLICITED, + .ims_len = 104, + }, + { + /* + * all-routers multicast address + */ + .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE, + .ims_scope = IP6_MULTICAST_SCOPE_link_local, + .ims_byte = IP6_MULTICAST_GROUP_ID_all_routers, + .ims_len = 128, + }, + { + /* + * all-nodes multicast address + */ + .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE, + .ims_scope = IP6_MULTICAST_SCOPE_link_local, + .ims_byte = IP6_MULTICAST_GROUP_ID_all_hosts, + .ims_len = 128, + }, + { + /* + * Add all-mldv2 multicast address via local route for all tables + */ + .ims_type = IP6_MFIB_SPECIAL_TYPE_NONE, + .ims_len = 128, + .ims_scope = IP6_MULTICAST_SCOPE_link_local, + .ims_byte = IP6_MULTICAST_GROUP_ID_mldv2_routers, + } +}; + +#define FOR_EACH_IP6_SPECIAL(_pfx, _body) \ +{ \ + const ip6_mfib_special_t *_spec; \ + u8 _ii; \ + for (_ii = 0; \ + _ii < ARRAY_LEN(ip6_mfib_specials); \ + _ii++) \ + { \ + _spec = &ip6_mfib_specials[_ii]; \ + if (IP6_MFIB_SPECIAL_TYPE_SOLICITED == _spec->ims_type) \ + { \ + ip6_set_solicited_node_multicast_address( \ + &(_pfx)->fp_grp_addr.ip6, 0); \ + } \ + else \ + { \ + ip6_set_reserved_multicast_address ( \ + &(_pfx)->fp_grp_addr.ip6, \ + _spec->ims_scope, \ + _spec->ims_byte); \ + } \ + (_pfx)->fp_len = _spec->ims_len; \ + do { _body; } while (0); \ + } \ +} + + +static u32 +ip6_create_mfib_with_table_id (u32 table_id) +{ + mfib_table_t *mfib_table; + mfib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + }; + const fib_route_path_t path_for_us = { + .frp_proto = FIB_PROTOCOL_IP6, + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + + pool_get_aligned(ip6_main.mfibs, mfib_table, CLIB_CACHE_LINE_BYTES); + memset(mfib_table, 0, sizeof(*mfib_table)); + + mfib_table->mft_proto = FIB_PROTOCOL_IP6; + mfib_table->mft_index = + mfib_table->v6.index = + (mfib_table - ip6_main.mfibs); + + hash_set (ip6_main.mfib_index_by_table_id, + table_id, + mfib_table->mft_index); + + mfib_table->mft_table_id = + mfib_table->v6.table_id = + table_id; + + mfib_table_lock(mfib_table->mft_index, FIB_PROTOCOL_IP6); + + mfib_table->v6.rhead = + clib_mem_alloc_aligned (sizeof(*mfib_table->v6.rhead), + CLIB_CACHE_LINE_BYTES); + rn_inithead0(mfib_table->v6.rhead, 8); + + /* + * add the special entries into the new FIB + */ + mfib_table_entry_update(mfib_table->mft_index, + &all_zeros, + MFIB_SOURCE_DEFAULT_ROUTE, + MFIB_ENTRY_FLAG_DROP); + + /* + * Add each of the specials + */ + FOR_EACH_IP6_SPECIAL(&pfx, + ({ + mfib_table_entry_path_update(mfib_table->mft_index, + &pfx, + MFIB_SOURCE_SPECIAL, + &path_for_us, + MFIB_ITF_FLAG_FORWARD); + })); + + return (mfib_table->mft_index); +} + +void +ip6_mfib_table_destroy (ip6_mfib_t *mfib) +{ + mfib_table_t *mfib_table = (mfib_table_t*)mfib; + fib_node_index_t mfei; + mfib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + }; + const fib_route_path_t path_for_us = { + .frp_proto = FIB_PROTOCOL_IP6, + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + + /* + * remove all the specials we added when the table was created. + */ + FOR_EACH_IP6_SPECIAL(&pfx, + { + mfib_table_entry_path_remove(mfib_table->mft_index, + &pfx, + MFIB_SOURCE_SPECIAL, + &path_for_us); + }); + + mfei = mfib_table_lookup_exact_match(mfib_table->mft_index, &all_zeros); + mfib_table_entry_delete_index(mfei, MFIB_SOURCE_DEFAULT_ROUTE); + + /* + * validate no more routes. + */ + ASSERT(0 == mfib_table->mft_total_route_counts); + ASSERT(~0 != mfib_table->mft_table_id); + + hash_unset (ip6_main.mfib_index_by_table_id, mfib_table->mft_table_id); + clib_mem_free(mfib_table->v6.rhead); + pool_put(ip6_main.mfibs, mfib_table); +} + +void +ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable) +{ + const fib_route_path_t path = { + .frp_proto = FIB_PROTOCOL_IP6, + .frp_addr = zero_addr, + .frp_sw_if_index = sw_if_index, + .frp_fib_index = ~0, + .frp_weight = 0, + }; + mfib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + }; + u32 mfib_index; + + vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index); + mfib_index = ip6_mfib_table_get_index_for_sw_if_index(sw_if_index); + + if (is_enable) + { + FOR_EACH_IP6_SPECIAL(&pfx, + { + mfib_table_entry_path_update(mfib_index, + &pfx, + MFIB_SOURCE_SPECIAL, + &path, + MFIB_ITF_FLAG_ACCEPT); + }); + } + else + { + FOR_EACH_IP6_SPECIAL(&pfx, + { + mfib_table_entry_path_remove(mfib_index, + &pfx, + MFIB_SOURCE_SPECIAL, + &path); + }); + } +} + +u32 +ip6_mfib_table_find_or_create_and_lock (u32 table_id) +{ + u32 index; + + index = ip6_mfib_index_from_table_id(table_id); + if (~0 == index) + return ip6_create_mfib_with_table_id(table_id); + mfib_table_lock(index, FIB_PROTOCOL_IP6); + + return (index); +} + +u32 +ip6_mfib_table_get_index_for_sw_if_index (u32 sw_if_index) +{ + if (sw_if_index >= vec_len(ip6_main.mfib_index_by_sw_if_index)) + { + /* + * This is the case for interfaces that are not yet mapped to + * a IP table + */ + return (~0); + } + return (ip6_main.mfib_index_by_sw_if_index[sw_if_index]); +} + +#define IP6_MFIB_MK_KEY(_grp, _src, _key) \ +{ \ + (_key)->key[0] = 33; \ + memcpy((_key)->key+1, _grp, 16); \ + memcpy((_key)->key+17, _src, 16); \ +} + +#define IP6_MFIB_MK_KEY_MASK(_grp, _src, _len, _key) \ +{ \ + IP6_MFIB_MK_KEY(_grp, _src, _key); \ + \ + (_key)->mask[0] = 33; \ + if (_len <= 128) \ + { \ + memcpy((_key)->mask+1, &ip6_main.fib_masks[_len], 16); \ + memset((_key)->mask+17, 0, 16); \ + } \ + else \ + { \ + ASSERT(_len == 256); \ + memcpy((_key)->mask+1, &ip6_main.fib_masks[128], 16); \ + memcpy((_key)->mask+17, &ip6_main.fib_masks[128], 16); \ + } \ +} + +/* + * ip6_fib_table_lookup_exact_match + * + * Exact match prefix lookup + */ +fib_node_index_t +ip6_mfib_table_lookup_exact_match (const ip6_mfib_t *mfib, + const ip6_address_t *grp, + const ip6_address_t *src, + u32 len) +{ + ip6_mfib_node_t *i6mn; + ip6_mfib_key_t key; + + IP6_MFIB_MK_KEY_MASK(grp, src, len, &key); + + i6mn = (ip6_mfib_node_t*) rn_lookup(key.key, key.mask, + (struct radix_node_head *)mfib->rhead); + + if (NULL == i6mn) + { + return (INDEX_INVALID); + } + + return (i6mn->i6mn_entry); +} + +/* + * ip6_fib_table_lookup + * + * Longest prefix match + */ +fib_node_index_t +ip6_mfib_table_lookup (const ip6_mfib_t *mfib, + const ip6_address_t *src, + const ip6_address_t *grp, + u32 len) +{ + ip6_mfib_node_t *i6mn; + ip6_mfib_key_t key; + + IP6_MFIB_MK_KEY_MASK(grp, src, len, &key); + + i6mn = (ip6_mfib_node_t*) rn_search_m(key.key, + mfib->rhead->rnh_treetop, + key.mask); + + ASSERT(NULL != i6mn); + + return (i6mn->i6mn_entry); +} + +/* + * ip6_fib_table_lookup + * + * Longest prefix match no mask + */ +fib_node_index_t +ip6_mfib_table_lookup2 (const ip6_mfib_t *mfib, + const ip6_address_t *src, + const ip6_address_t *grp) +{ + ip6_mfib_node_t *i6mn; + ip6_mfib_key_t key; + + IP6_MFIB_MK_KEY(grp, src, &key); + + i6mn = (ip6_mfib_node_t*) rn_match(key.key, + (struct radix_node_head *)mfib->rhead); // const cast + + ASSERT(NULL != i6mn); + + return (i6mn->i6mn_entry); +} + +void +ip6_mfib_table_entry_insert (ip6_mfib_t *mfib, + const ip6_address_t *grp, + const ip6_address_t *src, + u32 len, + fib_node_index_t mfib_entry_index) +{ + ip6_mfib_node_t *i6mn = clib_mem_alloc(sizeof(*i6mn)); + + memset(i6mn, 0, sizeof(*i6mn)); + + IP6_MFIB_MK_KEY_MASK(grp, src, len, &i6mn->i6mn_key); + i6mn->i6mn_entry = mfib_entry_index; + + if (NULL == rn_addroute(i6mn->i6mn_key.key, + i6mn->i6mn_key.mask, + mfib->rhead, + i6mn->i6mn_nodes)) + { + ASSERT(0); + } +} + +void +ip6_mfib_table_entry_remove (ip6_mfib_t *mfib, + const ip6_address_t *grp, + const ip6_address_t *src, + u32 len) +{ + ip6_mfib_node_t *i6mn; + ip6_mfib_key_t key; + + IP6_MFIB_MK_KEY_MASK(grp, src, len, &key); + + i6mn = (ip6_mfib_node_t*) rn_delete(key.key, key.mask, mfib->rhead); + + clib_mem_free(i6mn); +} + +static clib_error_t * +ip6_mfib_module_init (vlib_main_t * vm) +{ + return (NULL); +} + +VLIB_INIT_FUNCTION(ip6_mfib_module_init); + +static void +ip6_mfib_table_show_one (ip6_mfib_t *mfib, + vlib_main_t * vm, + ip6_address_t *src, + ip6_address_t *grp, + u32 mask_len) +{ + vlib_cli_output(vm, "%U", + format_mfib_entry, + ip6_mfib_table_lookup(mfib, src, grp, mask_len), + MFIB_ENTRY_FORMAT_DETAIL); +} + +typedef struct ip6_mfib_show_ctx_t_ { + u32 fib_index; + fib_node_index_t *entries; +} ip6_mfib_show_ctx_t; + + +static int +ip6_mfib_table_collect_entries (struct radix_node *rn, void *arg) +{ + ip6_mfib_show_ctx_t *ctx = arg; + ip6_mfib_node_t *i6mn; + + i6mn = (ip6_mfib_node_t*) rn; + + vec_add1(ctx->entries, i6mn->i6mn_entry); + + return (0); +} + +static void +ip6_mfib_table_show_all (ip6_mfib_t *mfib, + vlib_main_t * vm) +{ + fib_node_index_t *mfib_entry_index; + ip6_mfib_show_ctx_t ctx = { + .fib_index = mfib->index, + .entries = NULL, + }; + + rn_walktree(mfib->rhead, + ip6_mfib_table_collect_entries, + &ctx); + + vec_sort_with_function(ctx.entries, mfib_entry_cmp_for_sort); + + vec_foreach(mfib_entry_index, ctx.entries) + { + vlib_cli_output(vm, "%U", + format_mfib_entry, + *mfib_entry_index, + MFIB_ENTRY_FORMAT_BRIEF); + } + + vec_free(ctx.entries); +} + +static clib_error_t * +ip6_show_mfib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_main_t * im4 = &ip6_main; + mfib_table_t *mfib_table; + int verbose, matching; + ip6_address_t grp, src = {{0}}; + u32 mask = 32; + int table_id = -1, fib_index = ~0; + + verbose = 1; + matching = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "brief") || unformat (input, "summary") + || unformat (input, "sum")) + verbose = 0; + + else if (unformat (input, "%U %U", + unformat_ip6_address, &src, + unformat_ip6_address, &grp)) + { + matching = 1; + mask = 64; + } + else if (unformat (input, "%U", unformat_ip6_address, &grp)) + { + matching = 1; + mask = 32; + } + else if (unformat (input, "%U/%d", + unformat_ip6_address, &grp, &mask)) + matching = 1; + else if (unformat (input, "table %d", &table_id)) + ; + else if (unformat (input, "index %d", &fib_index)) + ; + else + break; + } + + pool_foreach (mfib_table, im4->mfibs, + ({ + ip6_mfib_t *mfib = &mfib_table->v6; + + if (table_id >= 0 && table_id != (int)mfib->table_id) + continue; + if (fib_index != ~0 && fib_index != (int)mfib->index) + continue; + + vlib_cli_output (vm, "%U, fib_index %d", + format_mfib_table_name, mfib->index, FIB_PROTOCOL_IP6, + mfib->index); + + /* Show summary? */ + if (! verbose) + { + /* vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); */ + /* for (i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++) */ + /* { */ + /* uword * hash = mfib->fib_entry_by_dst_address[i]; */ + /* uword n_elts = hash_elts (hash); */ + /* if (n_elts > 0) */ + /* vlib_cli_output (vm, "%20d%16d", i, n_elts); */ + /* } */ + continue; + } + + if (!matching) + { + ip6_mfib_table_show_all(mfib, vm); + } + else + { + ip6_mfib_table_show_one(mfib, vm, &src, &grp, mask); + } + })); + + return 0; +} + +/* + * This command displays the IPv4 MulticasrFIB Tables (VRF Tables) and + * the route entries for each table. + * + * @note This command will run for a long time when the FIB tables are + * comprised of millions of entries. For those senarios, consider displaying + * a single table or summary mode. + * + * @cliexpar + * Example of how to display all the IPv4 Multicast FIB tables: + * @cliexstart{show ip fib} + * ipv4-VRF:0, fib_index 0 + * (*, 0.0.0.0/0): flags:D, + * Interfaces: + * multicast-ip6-chain + * [@1]: dpo-drop ip6 + * (*, 232.1.1.1/32): + * Interfaces: + * test-eth1: Forward, + * test-eth2: Forward, + * test-eth0: Accept, + * multicast-ip6-chain + * [@2]: dpo-replicate: [index:1 buckets:2 to:[0:0]] + * [0] [@1]: ipv4-mcast: test-eth1: IP6: d0:d1:d2:d3:d4:01 -> 01:00:05:00:00:00 + * [1] [@1]: ipv4-mcast: test-eth2: IP6: d0:d1:d2:d3:d4:02 -> 01:00:05:00:00:00 + * + * @cliexend + * Example of how to display a summary of all IPv4 FIB tables: + * @cliexstart{show ip fib summary} + * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * Prefix length Count + * 0 1 + * 8 2 + * 32 4 + * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto + * Prefix length Count + * 0 1 + * 8 2 + * 24 2 + * 32 4 + * @cliexend + */ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { + .path = "show ip6 mfib", + .short_help = "show ip mfib [summary] [table ] [index ] [[/]] [] [ ]", + .function = ip6_show_mfib, +}; +/* *INDENT-ON* */ diff --git a/src/vnet/mfib/ip6_mfib.h b/src/vnet/mfib/ip6_mfib.h new file mode 100644 index 00000000000..d91af46dc93 --- /dev/null +++ b/src/vnet/mfib/ip6_mfib.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief The IPv4 Multicast-FIB + * + * FIXME + * + * This IPv4 FIB is used by the protocol independent FIB. So directly using + * this APIs in client code is not encouraged. However, this IPv4 FIB can be + * used if all the client wants is an IPv4 prefix data-base + */ + +#ifndef __IP6_MFIB_H__ +#define __IP6_MFIB_H__ + +#include +#include + +#include + +extern fib_node_index_t ip6_mfib_table_lookup(const ip6_mfib_t *fib, + const ip6_address_t *src, + const ip6_address_t *grp, + u32 len); +extern fib_node_index_t ip6_mfib_table_lookup_exact_match(const ip6_mfib_t *fib, + const ip6_address_t *grp, + const ip6_address_t *src, + u32 len); + +extern void ip6_mfib_table_entry_remove(ip6_mfib_t *fib, + const ip6_address_t *grp, + const ip6_address_t *src, + u32 len); + +extern void ip6_mfib_table_entry_insert(ip6_mfib_t *fib, + const ip6_address_t *grp, + const ip6_address_t *src, + u32 len, + fib_node_index_t fib_entry_index); +extern void ip6_mfib_table_destroy(ip6_mfib_t *fib); + +/** + * @brief + * Add/remove the interface from the accepting list of the special MFIB entries + */ +extern void ip6_mfib_interface_enable_disable(u32 sw_if_index, + int is_enable); + +/** + * @brief Get the FIB at the given index + */ +static inline ip6_mfib_t * +ip6_mfib_get (u32 index) +{ + return (&(pool_elt_at_index(ip6_main.mfibs, index)->v6)); +} + +/** + * @brief Get or create an IPv4 fib. + * + * Get or create an IPv4 fib with the provided table ID. + * + * @param table_id + * When set to \c ~0, an arbitrary and unused fib ID is picked + * and can be retrieved with \c ret->table_id. + * Otherwise, the fib ID to be used to retrieve or create the desired fib. + * @returns A pointer to the retrieved or created fib. + * + */ +extern u32 ip6_mfib_table_find_or_create_and_lock(u32 table_id); +extern u32 ip6_mfib_table_create_and_lock(void); + + +static inline +u32 ip6_mfib_index_from_table_id (u32 table_id) +{ + ip6_main_t * im = &ip6_main; + uword * p; + + p = hash_get (im->mfib_index_by_table_id, table_id); + if (!p) + return ~0; + + return p[0]; +} + +extern u32 ip6_mfib_table_get_index_for_sw_if_index(u32 sw_if_index); + +/** + * @brief Data-plane lookup function + */ +extern fib_node_index_t ip6_mfib_table_lookup2(const ip6_mfib_t *mfib, + const ip6_address_t *src, + const ip6_address_t *grp); + +#endif + diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c new file mode 100644 index 00000000000..479ce5f1442 --- /dev/null +++ b/src/vnet/mfib/mfib_entry.c @@ -0,0 +1,1096 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include + +/** + * Debug macro + */ +#ifdef MFIB_DEBUG +#DEFIne MFIB_ENTRY_DBG(_e, _fmt, _args...) \ +{ \ + u8*__tmp = NULL; \ + __tmp = format(__tmp, "e:[%d:%U", \ + mfib_entry_get_index(_e), \ + format_ip46_address, \ + &_e->mfe_prefix.fp_grp_addr, \ + IP46_TYPE_ANY); \ + __tmp = format(__tmp, "/%d,", \ + _e->mfe_prefix.fp_len); \ + __tmp = format(__tmp, "%U]", \ + mfib_entry_get_index(_e), \ + format_ip46_address, \ + &_e->mfe_prefix.fp_src_addr, \ + IP46_TYPE_ANY); \ + __tmp = format(__tmp, _fmt, ##_args); \ + clib_warning("%s", __tmp); \ + vec_free(__tmp); \ +} +#else +#define MFIB_ENTRY_DBG(_e, _fmt, _args...) +#endif + +/** + * The source of an MFIB entry + */ +typedef struct mfib_entry_src_t_ +{ + /** + * Which source this is + */ + mfib_source_t mfes_src; + + /** + * The path-list of forwarding interfaces + */ + fib_node_index_t mfes_pl; + + /** + * Route flags + */ + mfib_entry_flags_t mfes_flags; + + /** + * The hash table of all interfaces + */ + mfib_itf_t *mfes_itfs; +} mfib_entry_src_t; + +/** + * String names for each source + */ +static const char *mfib_source_names[] = MFIB_SOURCE_NAMES; + +/* + * Pool for all fib_entries + */ +mfib_entry_t *mfib_entry_pool; + +static fib_node_t * +mfib_entry_get_node (fib_node_index_t index) +{ + return ((fib_node_t*)mfib_entry_get(index)); +} + +static fib_protocol_t +mfib_entry_get_proto (const mfib_entry_t * mfib_entry) +{ + return (mfib_entry->mfe_prefix.fp_proto); +} + +fib_forward_chain_type_t +mfib_entry_get_default_chain_type (const mfib_entry_t *mfib_entry) +{ + switch (mfib_entry->mfe_prefix.fp_proto) + { + case FIB_PROTOCOL_IP4: + return (FIB_FORW_CHAIN_TYPE_MCAST_IP4); + case FIB_PROTOCOL_IP6: + return (FIB_FORW_CHAIN_TYPE_MCAST_IP6); + case FIB_PROTOCOL_MPLS: + ASSERT(0); + break; + } + return (FIB_FORW_CHAIN_TYPE_MCAST_IP4); +} + +static u8 * +format_mfib_entry_dpo (u8 * s, va_list * args) +{ + index_t fei = va_arg(*args, index_t); + CLIB_UNUSED(u32 indent) = va_arg(*args, u32); + + return (format(s, "%U", + format_mfib_entry, fei, + MFIB_ENTRY_FORMAT_BRIEF)); +} + +u8 * +format_mfib_entry (u8 * s, va_list * args) +{ + fib_node_index_t fei, mfi; + mfib_entry_t *mfib_entry; + mfib_entry_src_t *msrc; + u32 sw_if_index; + int level; + + fei = va_arg (*args, fib_node_index_t); + level = va_arg (*args, int); + mfib_entry = mfib_entry_get(fei); + + s = format (s, "%U", format_mfib_prefix, &mfib_entry->mfe_prefix); + s = format (s, ": %U", format_mfib_entry_flags, mfib_entry->mfe_flags); + + if (level >= MFIB_ENTRY_FORMAT_DETAIL) + { + s = format (s, "\n"); + s = format (s, " fib:%d", mfib_entry->mfe_fib_index); + s = format (s, " index:%d", mfib_entry_get_index(mfib_entry)); + s = format (s, " locks:%d\n", mfib_entry->mfe_node.fn_locks); + vec_foreach(msrc, mfib_entry->mfe_srcs) + { + s = format (s, " src:%s", mfib_source_names[msrc->mfes_src]); + s = format (s, ": %U\n", format_mfib_entry_flags, msrc->mfes_flags); + if (FIB_NODE_INDEX_INVALID != msrc->mfes_pl) + { + s = fib_path_list_format(msrc->mfes_pl, s); + } + hash_foreach(sw_if_index, mfi, msrc->mfes_itfs, + ({ + s = format(s, " %U\n", format_mfib_itf, mfi); + })); + } + } + + s = format(s, "\n Interfaces:"); + hash_foreach(sw_if_index, mfi, mfib_entry->mfe_itfs, + ({ + s = format(s, "\n %U", format_mfib_itf, mfi); + })); + + s = format(s, "\n %U-chain\n %U", + format_fib_forw_chain_type, + mfib_entry_get_default_chain_type(mfib_entry), + format_dpo_id, + &mfib_entry->mfe_rep, + 2); + s = format(s, "\n"); + + if (level >= MFIB_ENTRY_FORMAT_DETAIL2) + { + s = format(s, "\nchildren:"); + s = fib_node_children_format(mfib_entry->mfe_node.fn_children, s); + } + + return (s); +} + +static mfib_entry_t* +mfib_entry_from_fib_node (fib_node_t *node) +{ +#if CLIB_DEBUG > 0 + ASSERT(FIB_NODE_TYPE_MFIB_ENTRY == node->fn_type); +#endif + return ((mfib_entry_t*)node); +} + +static int +mfib_entry_src_cmp_for_sort (void * v1, + void * v2) +{ + mfib_entry_src_t *esrc1 = v1, *esrc2 = v2; + + return (esrc1->mfes_src - esrc2->mfes_src); +} + +static void +mfib_entry_src_init (mfib_entry_t *mfib_entry, + mfib_source_t source) + +{ + mfib_entry_src_t esrc = { + .mfes_pl = FIB_NODE_INDEX_INVALID, + .mfes_flags = MFIB_ENTRY_FLAG_NONE, + .mfes_src = source, + }; + + vec_add1(mfib_entry->mfe_srcs, esrc); + vec_sort_with_function(mfib_entry->mfe_srcs, + mfib_entry_src_cmp_for_sort); +} + +static mfib_entry_src_t * +mfib_entry_src_find (const mfib_entry_t *mfib_entry, + mfib_source_t source, + u32 *index) + +{ + mfib_entry_src_t *esrc; + int ii; + + ii = 0; + vec_foreach(esrc, mfib_entry->mfe_srcs) + { + if (esrc->mfes_src == source) + { + if (NULL != index) + { + *index = ii; + } + return (esrc); + } + else + { + ii++; + } + } + + return (NULL); +} + +static mfib_entry_src_t * +mfib_entry_src_find_or_create (mfib_entry_t *mfib_entry, + mfib_source_t source) +{ + mfib_entry_src_t *esrc; + + esrc = mfib_entry_src_find(mfib_entry, source, NULL); + + if (NULL == esrc) + { + mfib_entry_src_init(mfib_entry, source); + } + + return (mfib_entry_src_find(mfib_entry, source, NULL)); +} + +static mfib_entry_src_t* +mfib_entry_get_best_src (const mfib_entry_t *mfib_entry) +{ + mfib_entry_src_t *bsrc; + + /* + * the enum of sources is deliberately arranged in priority order + */ + if (0 == vec_len(mfib_entry->mfe_srcs)) + { + bsrc = NULL; + } + else + { + bsrc = vec_elt_at_index(mfib_entry->mfe_srcs, 0); + } + + return (bsrc); +} + +static void +mfib_entry_src_flush (mfib_entry_src_t *msrc) +{ + u32 sw_if_index; + index_t mfii; + + hash_foreach(sw_if_index, mfii, msrc->mfes_itfs, + ({ + mfib_itf_delete(mfib_itf_get(mfii)); + })); +} + +static void +mfib_entry_src_remove (mfib_entry_t *mfib_entry, + mfib_source_t source) + +{ + mfib_entry_src_t *msrc; + u32 index = ~0; + + msrc = mfib_entry_src_find(mfib_entry, source, &index); + + if (NULL != msrc) + { + mfib_entry_src_flush(msrc); + vec_del1(mfib_entry->mfe_srcs, index); + } +} + +static int +mfib_entry_src_n_itfs (const mfib_entry_src_t *msrc) +{ + return (hash_elts(msrc->mfes_itfs)); +} + + +static void +mfib_entry_last_lock_gone (fib_node_t *node) +{ + mfib_entry_t *mfib_entry; + mfib_entry_src_t *msrc; + + mfib_entry = mfib_entry_from_fib_node(node); + + dpo_reset(&mfib_entry->mfe_rep); + + MFIB_ENTRY_DBG(mfib_entry, "last-lock"); + + vec_foreach(msrc, mfib_entry->mfe_srcs) + { + mfib_entry_src_flush(msrc); + } + + fib_path_list_unlock(mfib_entry->mfe_parent); + vec_free(mfib_entry->mfe_srcs); + + fib_node_deinit(&mfib_entry->mfe_node); + pool_put(mfib_entry_pool, mfib_entry); +} + +/* + * mfib_entry_back_walk_notify + * + * A back walk has reach this entry. + */ +static fib_node_back_walk_rc_t +mfib_entry_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + // FIXME - re-evalute + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +static void +mfib_entry_show_memory (void) +{ + fib_show_memory_usage("multicast-Entry", + pool_elts(mfib_entry_pool), + pool_len(mfib_entry_pool), + sizeof(mfib_entry_t)); +} + +/* + * The MFIB entry's graph node virtual function table + */ +static const fib_node_vft_t mfib_entry_vft = { + .fnv_get = mfib_entry_get_node, + .fnv_last_lock = mfib_entry_last_lock_gone, + .fnv_back_walk = mfib_entry_back_walk_notify, + .fnv_mem_show = mfib_entry_show_memory, +}; + +u32 +mfib_entry_child_add (fib_node_index_t mfib_entry_index, + fib_node_type_t child_type, + fib_node_index_t child_index) +{ + return (fib_node_child_add(FIB_NODE_TYPE_MFIB_ENTRY, + mfib_entry_index, + child_type, + child_index)); +}; + +void +mfib_entry_child_remove (fib_node_index_t mfib_entry_index, + u32 sibling_index) +{ + fib_node_child_remove(FIB_NODE_TYPE_MFIB_ENTRY, + mfib_entry_index, + sibling_index); +} + +static mfib_entry_t * +mfib_entry_alloc (u32 fib_index, + const mfib_prefix_t *prefix, + fib_node_index_t *mfib_entry_index) +{ + mfib_entry_t *mfib_entry; + + pool_get(mfib_entry_pool, mfib_entry); + memset(mfib_entry, 0, sizeof(*mfib_entry)); + + fib_node_init(&mfib_entry->mfe_node, + FIB_NODE_TYPE_MFIB_ENTRY); + + mfib_entry->mfe_fib_index = fib_index; + mfib_entry->mfe_prefix = *prefix; + mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID; + + dpo_reset(&mfib_entry->mfe_rep); + + *mfib_entry_index = mfib_entry_get_index(mfib_entry); + + MFIB_ENTRY_DBG(mfib_entry, "alloc"); + + return (mfib_entry); +} + +typedef struct mfib_entry_collect_forwarding_ctx_t_ +{ + load_balance_path_t * next_hops; + fib_forward_chain_type_t fct; +} mfib_entry_collect_forwarding_ctx_t; + +static int +mfib_entry_src_collect_forwarding (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *arg) +{ + mfib_entry_collect_forwarding_ctx_t *ctx; + load_balance_path_t *nh; + + ctx = arg; + + /* + * if the path is not resolved, don't include it. + */ + if (!fib_path_is_resolved(path_index)) + { + return (!0); + } + + switch (ctx->fct) + { + case FIB_FORW_CHAIN_TYPE_MCAST_IP4: + case FIB_FORW_CHAIN_TYPE_MCAST_IP6: + /* + * EOS traffic with no label to stack, we need the IP Adj + */ + vec_add2(ctx->next_hops, nh, 1); + + nh->path_index = path_index; + nh->path_weight = fib_path_get_weight(path_index); + fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo); + break; + + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_ETHERNET: + ASSERT(0); + break; + } + + return (!0); +} + +static void +mfib_entry_stack (mfib_entry_t *mfib_entry) +{ + mfib_entry_collect_forwarding_ctx_t ctx = { + .next_hops = NULL, + .fct = mfib_entry_get_default_chain_type(mfib_entry), + }; + dpo_proto_t dp; + + dp = fib_proto_to_dpo(mfib_entry_get_proto(mfib_entry)); + + if (FIB_NODE_INDEX_INVALID != mfib_entry->mfe_parent) + { + fib_path_list_walk(mfib_entry->mfe_parent, + mfib_entry_src_collect_forwarding, + &ctx); + + if (!dpo_id_is_valid(&mfib_entry->mfe_rep) || + dpo_is_drop(&mfib_entry->mfe_rep)) + { + dpo_id_t tmp_dpo = DPO_INVALID; + + dpo_set(&tmp_dpo, + DPO_REPLICATE, dp, + replicate_create(0, dp)); + + dpo_stack(DPO_MFIB_ENTRY, dp, + &mfib_entry->mfe_rep, + &tmp_dpo); + + dpo_reset(&tmp_dpo); + } + replicate_multipath_update(&mfib_entry->mfe_rep, + ctx.next_hops); + } + else + { + dpo_stack(DPO_MFIB_ENTRY, dp, + &mfib_entry->mfe_rep, + drop_dpo_get(dp)); + } +} + +static void +mfib_entry_forwarding_path_add (mfib_entry_src_t *msrc, + const fib_route_path_t *rpath) +{ + fib_node_index_t old_pl_index; + fib_route_path_t *rpaths; + + /* + * path-lists require a vector of paths + */ + rpaths = NULL; + vec_add1(rpaths, rpath[0]); + + old_pl_index = msrc->mfes_pl; + + if (FIB_NODE_INDEX_INVALID == msrc->mfes_pl) + { + msrc->mfes_pl = + fib_path_list_create(FIB_PATH_LIST_FLAG_NO_URPF, + rpaths); + } + else + { + msrc->mfes_pl = + fib_path_list_copy_and_path_add(msrc->mfes_pl, + FIB_PATH_LIST_FLAG_NO_URPF, + rpaths); + } + fib_path_list_lock(msrc->mfes_pl); + fib_path_list_unlock(old_pl_index); + + vec_free(rpaths); +} + +static int +mfib_entry_forwarding_path_remove (mfib_entry_src_t *msrc, + const fib_route_path_t *rpath) +{ + fib_node_index_t old_pl_index; + fib_route_path_t *rpaths; + + /* + * path-lists require a vector of paths + */ + rpaths = NULL; + vec_add1(rpaths, rpath[0]); + + old_pl_index = msrc->mfes_pl; + + msrc->mfes_pl = + fib_path_list_copy_and_path_remove(msrc->mfes_pl, + FIB_PATH_LIST_FLAG_NONE, + rpaths); + + fib_path_list_lock(msrc->mfes_pl); + fib_path_list_unlock(old_pl_index); + + vec_free(rpaths); + + return (FIB_NODE_INDEX_INVALID != msrc->mfes_pl); +} + +static void +mfib_entry_recalculate_forwarding (mfib_entry_t *mfib_entry) +{ + fib_node_index_t old_pl_index; + mfib_entry_src_t *bsrc; + + old_pl_index = mfib_entry->mfe_parent; + + /* + * copy the forwarding data from the bast source + */ + bsrc = mfib_entry_get_best_src(mfib_entry); + + if (NULL == bsrc) + { + mfib_entry->mfe_parent = FIB_NODE_INDEX_INVALID; + } + else + { + mfib_entry->mfe_parent = bsrc->mfes_pl; + mfib_entry->mfe_flags = bsrc->mfes_flags; + mfib_entry->mfe_itfs = bsrc->mfes_itfs; + } + + /* + * re-stack the entry on the best forwarding info. + */ + if (old_pl_index != mfib_entry->mfe_parent || + FIB_NODE_INDEX_INVALID == old_pl_index) + { + mfib_entry_stack(mfib_entry); + + fib_path_list_lock(mfib_entry->mfe_parent); + fib_path_list_unlock(old_pl_index); + } +} + + +fib_node_index_t +mfib_entry_create (u32 fib_index, + mfib_source_t source, + const mfib_prefix_t *prefix, + mfib_entry_flags_t entry_flags) +{ + fib_node_index_t mfib_entry_index; + mfib_entry_t *mfib_entry; + mfib_entry_src_t *msrc; + + mfib_entry = mfib_entry_alloc(fib_index, prefix, + &mfib_entry_index); + msrc = mfib_entry_src_find_or_create(mfib_entry, source); + msrc->mfes_flags = entry_flags; + + mfib_entry_recalculate_forwarding(mfib_entry); + + return (mfib_entry_index); +} + +static int +mfib_entry_ok_for_delete (mfib_entry_t *mfib_entry) +{ + return (0 == vec_len(mfib_entry->mfe_srcs)); +} + +static int +mfib_entry_src_ok_for_delete (const mfib_entry_src_t *msrc) +{ + return ((MFIB_ENTRY_FLAG_NONE == msrc->mfes_flags && + 0 == mfib_entry_src_n_itfs(msrc))); +} + +int +mfib_entry_update (fib_node_index_t mfib_entry_index, + mfib_source_t source, + mfib_entry_flags_t entry_flags) +{ + mfib_entry_t *mfib_entry; + mfib_entry_src_t *msrc; + + mfib_entry = mfib_entry_get(mfib_entry_index); + msrc = mfib_entry_src_find_or_create(mfib_entry, source); + msrc->mfes_flags = entry_flags; + + if (mfib_entry_src_ok_for_delete(msrc)) + { + /* + * this source has no interfaces and no flags. + * it has nothing left to give - remove it + */ + mfib_entry_src_remove(mfib_entry, source); + } + + mfib_entry_recalculate_forwarding(mfib_entry); + + return (mfib_entry_ok_for_delete(mfib_entry)); +} + +static void +mfib_entry_itf_add (mfib_entry_src_t *msrc, + u32 sw_if_index, + index_t mi) +{ + hash_set(msrc->mfes_itfs, sw_if_index, mi); +} + +static void +mfib_entry_itf_remove (mfib_entry_src_t *msrc, + u32 sw_if_index) +{ + mfib_itf_t *mfi; + + mfi = mfib_entry_itf_find(msrc->mfes_itfs, sw_if_index); + + mfib_itf_delete(mfi); + + hash_unset(msrc->mfes_itfs, sw_if_index); +} + +void +mfib_entry_path_update (fib_node_index_t mfib_entry_index, + mfib_source_t source, + const fib_route_path_t *rpath, + mfib_itf_flags_t itf_flags) +{ + mfib_entry_t *mfib_entry; + mfib_entry_src_t *msrc; + mfib_itf_t *mfib_itf; + + mfib_entry = mfib_entry_get(mfib_entry_index); + ASSERT(NULL != mfib_entry); + msrc = mfib_entry_src_find_or_create(mfib_entry, source); + + /* + * search for the interface in the current set + */ + mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs, + rpath[0].frp_sw_if_index); + + if (NULL == mfib_itf) + { + /* + * this is a path we do not yet have. If it is forwarding then we + * add it to the replication set + */ + if (itf_flags & MFIB_ITF_FLAG_FORWARD) + { + mfib_entry_forwarding_path_add(msrc, rpath); + } + /* + * construct a new ITF for this entry's list + */ + mfib_entry_itf_add(msrc, + rpath[0].frp_sw_if_index, + mfib_itf_create(rpath[0].frp_sw_if_index, + itf_flags)); + } + else + { + int was_forwarding = !!(mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD); + int is_forwarding = !!(itf_flags & MFIB_ITF_FLAG_FORWARD); + + if (!was_forwarding && is_forwarding) + { + mfib_entry_forwarding_path_add(msrc, rpath); + } + else if (was_forwarding && !is_forwarding) + { + mfib_entry_forwarding_path_remove(msrc, rpath); + } + /* + * packets in flight see these updates. + */ + mfib_itf->mfi_flags = itf_flags; + } + + mfib_entry_recalculate_forwarding(mfib_entry); +} + +/* + * mfib_entry_path_remove + * + * remove a path from the entry. + * return the mfib_entry's index if it is still present, INVALID otherwise. + */ +int +mfib_entry_path_remove (fib_node_index_t mfib_entry_index, + mfib_source_t source, + const fib_route_path_t *rpath) +{ + mfib_entry_t *mfib_entry; + mfib_entry_src_t *msrc; + mfib_itf_t *mfib_itf; + + mfib_entry = mfib_entry_get(mfib_entry_index); + ASSERT(NULL != mfib_entry); + msrc = mfib_entry_src_find(mfib_entry, source, NULL); + + if (NULL == msrc) + { + /* + * there are no paths left for this source + */ + return (mfib_entry_ok_for_delete(mfib_entry)); + } + + /* + * search for the interface in the current set + */ + mfib_itf = mfib_entry_itf_find(msrc->mfes_itfs, + rpath[0].frp_sw_if_index); + + if (NULL == mfib_itf) + { + /* + * removing a path that does not exist + */ + return (mfib_entry_ok_for_delete(mfib_entry)); + } + + /* + * we have this path. If it is forwarding then we + * remove it to the replication set + */ + if (mfib_itf->mfi_flags & MFIB_ITF_FLAG_FORWARD) + { + mfib_entry_forwarding_path_remove(msrc, rpath); + } + + /* + * remove the interface/path from this entry's list + */ + mfib_entry_itf_remove(msrc, rpath[0].frp_sw_if_index); + + if (mfib_entry_src_ok_for_delete(msrc)) + { + /* + * this source has no interfaces and no flags. + * it has nothing left to give - remove it + */ + mfib_entry_src_remove(mfib_entry, source); + } + + mfib_entry_recalculate_forwarding(mfib_entry); + + return (mfib_entry_ok_for_delete(mfib_entry)); +} + +/** + * mfib_entry_delete + * + * The source is withdrawing all the paths it provided + */ +int +mfib_entry_delete (fib_node_index_t mfib_entry_index, + mfib_source_t source) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + mfib_entry_src_remove(mfib_entry, source); + + mfib_entry_recalculate_forwarding(mfib_entry); + + return (mfib_entry_ok_for_delete(mfib_entry)); +} + +static int +fib_ip4_address_compare (ip4_address_t * a1, + ip4_address_t * a2) +{ + /* + * IP addresses are unsiged ints. the return value here needs to be signed + * a simple subtraction won't cut it. + * If the addresses are the same, the sort order is undefiend, so phoey. + */ + return ((clib_net_to_host_u32(a1->data_u32) > + clib_net_to_host_u32(a2->data_u32) ) ? + 1 : -1); +} + +static int +fib_ip6_address_compare (ip6_address_t * a1, + ip6_address_t * a2) +{ + int i; + for (i = 0; i < ARRAY_LEN (a1->as_u16); i++) + { + int cmp = (clib_net_to_host_u16 (a1->as_u16[i]) - + clib_net_to_host_u16 (a2->as_u16[i])); + if (cmp != 0) + return cmp; + } + return 0; +} + +static int +mfib_entry_cmp (fib_node_index_t mfib_entry_index1, + fib_node_index_t mfib_entry_index2) +{ + mfib_entry_t *mfib_entry1, *mfib_entry2; + int cmp = 0; + + mfib_entry1 = mfib_entry_get(mfib_entry_index1); + mfib_entry2 = mfib_entry_get(mfib_entry_index2); + + switch (mfib_entry1->mfe_prefix.fp_proto) + { + case FIB_PROTOCOL_IP4: + cmp = fib_ip4_address_compare(&mfib_entry1->mfe_prefix.fp_grp_addr.ip4, + &mfib_entry2->mfe_prefix.fp_grp_addr.ip4); + + if (0 == cmp) + { + cmp = fib_ip4_address_compare(&mfib_entry1->mfe_prefix.fp_src_addr.ip4, + &mfib_entry2->mfe_prefix.fp_src_addr.ip4); + } + break; + case FIB_PROTOCOL_IP6: + cmp = fib_ip6_address_compare(&mfib_entry1->mfe_prefix.fp_grp_addr.ip6, + &mfib_entry2->mfe_prefix.fp_grp_addr.ip6); + + if (0 == cmp) + { + cmp = fib_ip6_address_compare(&mfib_entry1->mfe_prefix.fp_src_addr.ip6, + &mfib_entry2->mfe_prefix.fp_src_addr.ip6); + } + break; + case FIB_PROTOCOL_MPLS: + ASSERT(0); + cmp = 0; + break; + } + + if (0 == cmp) { + cmp = (mfib_entry1->mfe_prefix.fp_len - mfib_entry2->mfe_prefix.fp_len); + } + return (cmp); +} + +int +mfib_entry_cmp_for_sort (void *i1, void *i2) +{ + fib_node_index_t *mfib_entry_index1 = i1, *mfib_entry_index2 = i2; + + return (mfib_entry_cmp(*mfib_entry_index1, + *mfib_entry_index2)); +} + +void +mfib_entry_lock (fib_node_index_t mfib_entry_index) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + + fib_node_lock(&mfib_entry->mfe_node); +} + +void +mfib_entry_unlock (fib_node_index_t mfib_entry_index) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + + fib_node_unlock(&mfib_entry->mfe_node); +} + +static void +mfib_entry_dpo_lock (dpo_id_t *dpo) +{ +} +static void +mfib_entry_dpo_unlock (dpo_id_t *dpo) +{ +} + +const static dpo_vft_t mfib_entry_dpo_vft = { + .dv_lock = mfib_entry_dpo_lock, + .dv_unlock = mfib_entry_dpo_unlock, + .dv_format = format_mfib_entry_dpo, + .dv_mem_show = mfib_entry_show_memory, +}; + +const static char* const mfib_entry_ip4_nodes[] = +{ + "ip4-mfib-forward-rpf", + NULL, +}; +const static char* const mfib_entry_ip6_nodes[] = +{ + "ip6-mfib-forward-rpf", + NULL, +}; + +const static char* const * const mfib_entry_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = mfib_entry_ip4_nodes, + [DPO_PROTO_IP6] = mfib_entry_ip6_nodes, +}; + +void +mfib_entry_module_init (void) +{ + fib_node_register_type (FIB_NODE_TYPE_MFIB_ENTRY, &mfib_entry_vft); + dpo_register(DPO_MFIB_ENTRY, &mfib_entry_dpo_vft, mfib_entry_nodes); +} + +void +mfib_entry_encode (fib_node_index_t mfib_entry_index, + fib_route_path_encode_t **api_rpaths) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + fib_path_list_walk(mfib_entry->mfe_parent, fib_path_encode, api_rpaths); +} + +void +mfib_entry_get_prefix (fib_node_index_t mfib_entry_index, + mfib_prefix_t *pfx) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + *pfx = mfib_entry->mfe_prefix; +} + +u32 +mfib_entry_get_fib_index (fib_node_index_t mfib_entry_index) +{ + mfib_entry_t *mfib_entry; + + mfib_entry = mfib_entry_get(mfib_entry_index); + + return (mfib_entry->mfe_fib_index); +} + +void +mfib_entry_contribute_forwarding (fib_node_index_t mfib_entry_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo) +{ + /* + * An IP mFIB entry can only provide a forwarding chain that + * is the same IP proto as the prefix. + * No use-cases (i know of) for other combinations. + */ + mfib_entry_t *mfib_entry; + dpo_proto_t dp; + + mfib_entry = mfib_entry_get(mfib_entry_index); + + dp = fib_proto_to_dpo(mfib_entry->mfe_prefix.fp_proto); + + if (type == fib_forw_chain_type_from_dpo_proto(dp)) + { + dpo_copy(dpo, &mfib_entry->mfe_rep); + } + else + { + dpo_copy(dpo, drop_dpo_get(dp)); + } +} + +u32 +mfib_entry_pool_size (void) +{ + return (pool_elts(mfib_entry_pool)); +} + +static clib_error_t * +show_mfib_entry_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_node_index_t fei; + + if (unformat (input, "%d", &fei)) + { + /* + * show one in detail + */ + if (!pool_is_free_index(mfib_entry_pool, fei)) + { + vlib_cli_output (vm, "%d@%U", + fei, + format_mfib_entry, fei, + MFIB_ENTRY_FORMAT_DETAIL2); + } + else + { + vlib_cli_output (vm, "entry %d invalid", fei); + } + } + else + { + /* + * show all + */ + vlib_cli_output (vm, "FIB Entries:"); + pool_foreach_index(fei, mfib_entry_pool, + ({ + vlib_cli_output (vm, "%d@%U", + fei, + format_mfib_entry, fei, + MFIB_ENTRY_FORMAT_BRIEF); + })); + } + + return (NULL); +} + +VLIB_CLI_COMMAND (show_mfib_entry, static) = { + .path = "show mfib entry", + .function = show_mfib_entry_command, + .short_help = "show mfib entry", +}; diff --git a/src/vnet/mfib/mfib_entry.h b/src/vnet/mfib/mfib_entry.h new file mode 100644 index 00000000000..cc5d5326ef6 --- /dev/null +++ b/src/vnet/mfib/mfib_entry.h @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MFIB_ENTRY_H__ +#define __MFIB_ENTRY_H__ + +#include +#include +#include +#include +#include + +/** + * An entry in a FIB table. + * + * This entry represents a route added to the FIB that is stored + * in one of the FIB tables. + */ +typedef struct mfib_entry_t_ { + CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); + /** + * Base class. The entry's node representation in the graph. + */ + fib_node_t mfe_node; + /** + * The prefix of the route + */ + mfib_prefix_t mfe_prefix; + /** + * The index of the FIB table this entry is in + */ + u32 mfe_fib_index; + /** + * the path-list for which this entry is a child. This is also the path-list + * that is contributing forwarding for this entry. + */ + fib_node_index_t mfe_parent; + /** + * index of this entry in the parent's child list. + * This is set when this entry is added as a child, but can also + * be changed by the parent as it manages its list. + */ + u32 mfe_sibling; + + /** + * A vector of sources contributing forwarding + */ + struct mfib_entry_src_t_ *mfe_srcs; + + /** + * 2nd cache line has the members used in the data plane + */ + CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); + + /** + * The Replicate used for forwarding. + */ + dpo_id_t mfe_rep; + + /** + * Route flags + */ + mfib_entry_flags_t mfe_flags; + + /** + * A hash table of interfaces + */ + mfib_itf_t *mfe_itfs; +} mfib_entry_t; + +#define MFIB_ENTRY_FORMAT_BRIEF (0x0) +#define MFIB_ENTRY_FORMAT_DETAIL (0x1) +#define MFIB_ENTRY_FORMAT_DETAIL2 (0x2) + +extern u8 *format_mfib_entry(u8 * s, va_list * args); + + +extern fib_node_index_t mfib_entry_create(u32 fib_index, + mfib_source_t source, + const mfib_prefix_t *prefix, + mfib_entry_flags_t entry_flags); + +extern int mfib_entry_update(fib_node_index_t fib_entry_index, + mfib_source_t source, + mfib_entry_flags_t entry_flags); + +extern void mfib_entry_path_update(fib_node_index_t fib_entry_index, + mfib_source_t source, + const fib_route_path_t *rpath, + mfib_itf_flags_t itf_flags); + + +extern int mfib_entry_path_remove(fib_node_index_t fib_entry_index, + mfib_source_t source, + const fib_route_path_t *rpath); + +extern int mfib_entry_delete(fib_node_index_t mfib_entry_index, + mfib_source_t source); + +extern int mfib_entry_cmp_for_sort(void *i1, void *i2); + +extern u32 mfib_entry_child_add(fib_node_index_t mfib_entry_index, + fib_node_type_t type, + fib_node_index_t child_index); +extern void mfib_entry_child_remove(fib_node_index_t mfib_entry_index, + u32 sibling_index); + +extern void mfib_entry_lock(fib_node_index_t fib_entry_index); +extern void mfib_entry_unlock(fib_node_index_t fib_entry_index); + +extern void mfib_entry_get_prefix(fib_node_index_t fib_entry_index, + mfib_prefix_t *pfx); +extern u32 mfib_entry_get_fib_index(fib_node_index_t fib_entry_index); + +extern void mfib_entry_contribute_forwarding( + fib_node_index_t mfib_entry_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo); + +extern void mfib_entry_module_init(void); + + +extern mfib_entry_t *mfib_entry_pool; + +static inline mfib_entry_t * +mfib_entry_get (fib_node_index_t index) +{ + return (pool_elt_at_index(mfib_entry_pool, index)); +} +static inline fib_node_index_t +mfib_entry_get_index (const mfib_entry_t *mfe) +{ + return (mfe - mfib_entry_pool); +} + + +static inline mfib_itf_t * +mfib_entry_itf_find (mfib_itf_t *itfs, + u32 sw_if_index) +{ + uword *p; + + p = hash_get(itfs, sw_if_index); + + if (NULL != p) + { + return (mfib_itf_get(p[0])); + } + + return (NULL); +} + +static inline mfib_itf_t * +mfib_entry_get_itf (const mfib_entry_t *mfe, + u32 sw_if_index) +{ + return (mfib_entry_itf_find(mfe->mfe_itfs, sw_if_index)); +} + +#endif diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c new file mode 100644 index 00000000000..5fe0a57c03b --- /dev/null +++ b/src/vnet/mfib/mfib_forward.c @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef struct mfib_forward_lookup_trace_t_ { + u32 entry_index; + u32 fib_index; +} mfib_forward_lookup_trace_t; + +static u8 * +format_mfib_forward_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mfib_forward_lookup_trace_t * t = va_arg (*args, mfib_forward_lookup_trace_t *); + + s = format (s, "fib %d entry %d", t->fib_index, t->entry_index); + return s; +} + +/* Common trace function for all ip4-forward next nodes. */ +void +mfib_forward_lookup_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + u32 * from, n_left; + ip4_main_t * im = &ip4_main; + + n_left = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left >= 4) + { + mfib_forward_lookup_trace_t * t0, * t1; + vlib_buffer_t * b0, * b1; + u32 bi0, bi1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index, + vnet_buffer(b1)->sw_if_index[VLIB_RX]); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->entry_index = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; + t1->fib_index = vec_elt (im->mfib_index_by_sw_if_index, + vnet_buffer(b1)->sw_if_index[VLIB_RX]); + } + from += 2; + n_left -= 2; + } + + while (n_left >= 1) + { + mfib_forward_lookup_trace_t * t0; + vlib_buffer_t * b0; + u32 bi0; + + bi0 = from[0]; + + b0 = vlib_get_buffer (vm, bi0); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + } + from += 1; + n_left -= 1; + } +} + +typedef enum mfib_forward_lookup_next_t_ { + MFIB_FORWARD_LOOKUP_NEXT_RPF, + MFIB_FORWARD_LOOKUP_N_NEXT, +} mfib_forward_lookup_next_t; + +static uword +mfib_forward_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_v4) +{ + u32 n_left_from, n_left_to_next, * from, * to_next; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, MFIB_FORWARD_LOOKUP_NEXT_RPF, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + fib_node_index_t mfei0; + vlib_buffer_t * p0; + u32 fib_index0; + u32 pi0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + if (is_v4) + { + ip4_header_t * ip0; + + fib_index0 = vec_elt (ip4_main.mfib_index_by_sw_if_index, + vnet_buffer(p0)->sw_if_index[VLIB_RX]); + ip0 = vlib_buffer_get_current (p0); + mfei0 = ip4_mfib_table_lookup(ip4_mfib_get(fib_index0), + &ip0->src_address, + &ip0->dst_address, + 64); + } + else + { + ip6_header_t * ip0; + + fib_index0 = vec_elt (ip6_main.mfib_index_by_sw_if_index, + vnet_buffer(p0)->sw_if_index[VLIB_RX]); + ip0 = vlib_buffer_get_current (p0); + mfei0 = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index0), + &ip0->src_address, + &ip0->dst_address); + } + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = mfei0; + } + + vlib_put_next_frame(vm, node, + MFIB_FORWARD_LOOKUP_NEXT_RPF, + n_left_to_next); + } + + if (node->flags & VLIB_NODE_FLAG_TRACE) + mfib_forward_lookup_trace(vm, node, frame); + + return frame->n_vectors; +} + +static uword +ip4_mfib_forward_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mfib_forward_lookup (vm, node, frame, 1)); +} + +VLIB_REGISTER_NODE (ip4_mfib_forward_lookup_node, static) = { + .function = ip4_mfib_forward_lookup, + .name = "ip4-mfib-forward-lookup", + .vector_size = sizeof (u32), + + .format_trace = format_mfib_forward_lookup_trace, + + .n_next_nodes = MFIB_FORWARD_LOOKUP_N_NEXT, + .next_nodes = { + [MFIB_FORWARD_LOOKUP_NEXT_RPF] = "ip4-mfib-forward-rpf", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_mfib_forward_lookup_node, + ip4_mfib_forward_lookup) + +static uword +ip6_mfib_forward_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mfib_forward_lookup (vm, node, frame, 0)); +} + +VLIB_REGISTER_NODE (ip6_mfib_forward_lookup_node, static) = { + .function = ip6_mfib_forward_lookup, + .name = "ip6-mfib-forward-lookup", + .vector_size = sizeof (u32), + + .format_trace = format_mfib_forward_lookup_trace, + + .n_next_nodes = MFIB_FORWARD_LOOKUP_N_NEXT, + .next_nodes = { + [MFIB_FORWARD_LOOKUP_NEXT_RPF] = "ip6-mfib-forward-rpf", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_mfib_forward_lookup_node, + ip6_mfib_forward_lookup) + + +typedef struct mfib_forward_rpf_trace_t_ { + u32 entry_index; + u32 sw_if_index; + mfib_itf_flags_t itf_flags; +} mfib_forward_rpf_trace_t; + +typedef enum mfib_forward_rpf_next_t_ { + MFIB_FORWARD_RPF_NEXT_DROP, + MFIB_FORWARD_RPF_N_NEXT, +} mfib_forward_rpf_next_t; + +static u8 * +format_mfib_forward_rpf_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mfib_forward_rpf_trace_t * t = va_arg (*args, mfib_forward_rpf_trace_t *); + + s = format (s, "entry %d", t->entry_index); + s = format (s, " %d", t->sw_if_index); + s = format (s, " %U", format_mfib_itf_flags, t->itf_flags); + + return s; +} + +static int +mfib_forward_connected_check (vlib_buffer_t * b0, + u32 sw_if_index, + int is_v4) +{ + /* + * Lookup the source of the IP packet in the + * FIB. return true if the entry is attached. + */ + index_t lbi0; + + if (is_v4) + { + load_balance_t *lb0; + ip4_header_t *ip0; + + ip0 = vlib_buffer_get_current(b0); + + lbi0 = ip4_fib_forwarding_lookup( + ip4_fib_table_get_index_for_sw_if_index( + sw_if_index), + &ip0->src_address); + lb0 = load_balance_get(lbi0); + + return (FIB_ENTRY_FLAG_ATTACHED & + lb0->lb_fib_entry_flags); + } + else + { + ASSERT(0); + } + return (0); +} + +static void +mfib_forward_itf_signal (vlib_main_t *vm, + const mfib_entry_t *mfe, + mfib_itf_t *mfi, + vlib_buffer_t *b0) +{ + mfib_itf_flags_t old_flags; + + old_flags = __sync_fetch_and_or(&mfi->mfi_flags, + MFIB_ITF_FLAG_SIGNAL_PRESENT); + + if (!(old_flags & MFIB_ITF_FLAG_SIGNAL_PRESENT)) + { + /* + * we were the lucky ones to set the signal present flag + */ + if (!(old_flags & MFIB_ITF_FLAG_DONT_PRESERVE)) + { + /* + * preserve a copy of the packet for the control + * plane to examine. + * Only allow one preserved packet at at time, since + * when the signal present flag is cleared so is the + * preserved packet. + */ + mfib_signal_push(mfe, mfi, b0); + } + else + { + /* + * The control plane just wants the signal, not the packet as well + */ + mfib_signal_push(mfe, mfi, NULL); + } + } + /* + * else + * there is already a signal present on this interface that the + * control plane has not yet acknowledged + */ +} + +always_inline uword +mfib_forward_rpf (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_v4) +{ + u32 n_left_from, n_left_to_next, * from, * to_next; + mfib_forward_rpf_next_t next; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = MFIB_FORWARD_RPF_NEXT_DROP; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + fib_node_index_t mfei0; + const mfib_entry_t *mfe0; + mfib_itf_t *mfi0; + vlib_buffer_t * b0; + u32 pi0, next0; + mfib_itf_flags_t iflags0; + mfib_entry_flags_t eflags0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + b0 = vlib_get_buffer (vm, pi0); + mfei0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + mfe0 = mfib_entry_get(mfei0); + mfi0 = mfib_entry_get_itf(mfe0, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + + /* + * throughout this function we are 'PREDICT' optimising + * for the case of throughput traffic that is not replicated + * to the host stack nor sets local flags + */ + if (PREDICT_TRUE(NULL != mfi0)) + { + iflags0 = mfi0->mfi_flags; + } + else + { + iflags0 = MFIB_ITF_FLAG_NONE; + } + eflags0 = mfe0->mfe_flags; + + if (PREDICT_FALSE(eflags0 & MFIB_ENTRY_FLAG_CONNECTED)) + { + /* + * lookup the source in the unicast FIB - check it + * matches a connected. + */ + if (mfib_forward_connected_check( + b0, + vnet_buffer(b0)->sw_if_index[VLIB_RX], + is_v4)) + { + mfib_forward_itf_signal(vm, mfe0, mfi0, b0); + } + } + if (PREDICT_FALSE((eflags0 & MFIB_ENTRY_FLAG_SIGNAL) ^ + (iflags0 & MFIB_ITF_FLAG_NEGATE_SIGNAL))) + { + /* + * Entry signal XOR interface negate-signal + */ + if (NULL != mfi0) + { + mfib_forward_itf_signal(vm, mfe0, mfi0, b0); + } + } + + if (PREDICT_TRUE((iflags0 & MFIB_ITF_FLAG_ACCEPT) || + (eflags0 & MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF))) + { + /* + * This interface is accepting packets for the matching entry + */ + next0 = mfe0->mfe_rep.dpoi_next_node; + + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = + mfe0->mfe_rep.dpoi_index; + } + else + { + next0 = MFIB_FORWARD_RPF_NEXT_DROP; + } + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + mfib_forward_rpf_trace_t *t0; + + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->entry_index = mfei0; + if (NULL == mfi0) + { + t0->sw_if_index = ~0; + t0->itf_flags = MFIB_ITF_FLAG_NONE; + } + else + { + t0->sw_if_index = mfi0->mfi_sw_if_index; + t0->itf_flags = mfi0->mfi_flags; + } + } + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame(vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +ip4_mfib_forward_rpf (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mfib_forward_rpf(vm, node, frame, 1)); +} + + +VLIB_REGISTER_NODE (ip4_mfib_forward_rpf_node, static) = { + .function = ip4_mfib_forward_rpf, + .name = "ip4-mfib-forward-rpf", + .vector_size = sizeof (u32), + + .format_trace = format_mfib_forward_rpf_trace, + + .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT, + .next_nodes = { + [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_mfib_forward_rpf_node, + ip4_mfib_forward_rpf) + +static uword +ip6_mfib_forward_rpf (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mfib_forward_rpf(vm, node, frame, 1)); +} + + +VLIB_REGISTER_NODE (ip6_mfib_forward_rpf_node, static) = { + .function = ip6_mfib_forward_rpf, + .name = "ip6-mfib-forward-rpf", + .vector_size = sizeof (u32), + + .format_trace = format_mfib_forward_rpf_trace, + + .n_next_nodes = MFIB_FORWARD_RPF_N_NEXT, + .next_nodes = { + [MFIB_FORWARD_RPF_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_mfib_forward_rpf_node, + ip6_mfib_forward_rpf) + diff --git a/src/vnet/mfib/mfib_itf.c b/src/vnet/mfib/mfib_itf.c new file mode 100644 index 00000000000..b9fa1ec6be7 --- /dev/null +++ b/src/vnet/mfib/mfib_itf.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +mfib_itf_t *mfib_itf_pool; + +index_t +mfib_itf_create (u32 sw_if_index, + mfib_itf_flags_t mfi_flags) +{ + mfib_itf_t *mfib_itf; + + pool_get_aligned(mfib_itf_pool, mfib_itf, + CLIB_CACHE_LINE_BYTES); + + mfib_itf->mfi_sw_if_index = sw_if_index; + mfib_itf->mfi_flags = mfi_flags; + mfib_itf->mfi_si = INDEX_INVALID; + + return (mfib_itf - mfib_itf_pool); +} + +void +mfib_itf_delete (mfib_itf_t *mfi) +{ + mfib_signal_remove_itf(mfi); + pool_put(mfib_itf_pool, mfi); +} + +u8 * +format_mfib_itf (u8 * s, va_list * args) +{ + mfib_itf_t *mfib_itf; + vnet_main_t *vnm; + index_t mfi; + + mfi = va_arg (*args, index_t); + + vnm = vnet_get_main(); + mfib_itf = mfib_itf_get(mfi); + + if (~0 != mfib_itf->mfi_sw_if_index) + { + return (format(s, " %U: %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface(vnm, + mfib_itf->mfi_sw_if_index), + format_mfib_itf_flags, mfib_itf->mfi_flags)); + } + else + { + return (format(s, " local: %U", + format_mfib_itf_flags, mfib_itf->mfi_flags)); + } + return (s); +} + +static clib_error_t * +show_mfib_itf_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t mfii; + + if (unformat (input, "%d", &mfii)) + { + /* + * show one in detail + */ + if (!pool_is_free_index(mfib_itf_pool, mfii)) + { + vlib_cli_output (vm, "%d@%U", + mfii, + format_mfib_itf, mfii); + } + else + { + vlib_cli_output (vm, "itf %d invalid", mfii); + } + } + else + { + /* + * show all + */ + vlib_cli_output (vm, "mFIB interfaces::"); + pool_foreach_index(mfii, mfib_itf_pool, + ({ + vlib_cli_output (vm, "%d@%U", + mfii, + format_mfib_itf, mfii); + })); + } + + return (NULL); +} + +VLIB_CLI_COMMAND (show_mfib_itf, static) = { + .path = "show mfib interface", + .function = show_mfib_itf_command, + .short_help = "show mfib interface", +}; diff --git a/src/vnet/mfib/mfib_itf.h b/src/vnet/mfib/mfib_itf.h new file mode 100644 index 00000000000..5f26a476525 --- /dev/null +++ b/src/vnet/mfib/mfib_itf.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MFIB_ITF_H__ +#define __MFIB_ITF_H__ + +#include +#include + +/** + * @brief An interface associated with a particular MFIB entry + */ +typedef struct mfib_itf_t_ +{ + /** + * @brief Falags on the entry + */ + mfib_itf_flags_t mfi_flags; + + /** + * The SW IF index that this MFIB interface represents + */ + u32 mfi_sw_if_index; + + /** + * The index of the signal in the pending list + */ + u32 mfi_si; +} mfib_itf_t; + + +extern index_t mfib_itf_create(u32 sw_if_index, + mfib_itf_flags_t mfi_flags); +extern void mfib_itf_delete(mfib_itf_t *mfi); + +extern u8 *format_mfib_itf(u8 * s, va_list * args); + +extern mfib_itf_t *mfib_itf_pool; + +static inline mfib_itf_t * +mfib_itf_get (index_t mi) +{ + return (pool_elt_at_index(mfib_itf_pool, mi)); +} +static inline index_t +mfib_itf_get_index (const mfib_itf_t *mfi) +{ + return (mfi - mfib_itf_pool); +} + +#endif diff --git a/src/vnet/mfib/mfib_signal.c b/src/vnet/mfib/mfib_signal.c new file mode 100644 index 00000000000..9f6205de419 --- /dev/null +++ b/src/vnet/mfib/mfib_signal.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include +#include +#include + +/** + * @brief Pool of signals + */ +static mfib_signal_t *mfib_signal_pool; + +/** + * @brief pool of dlist elements + */ +static dlist_elt_t *mfib_signal_dlist_pool; + +/** + * the list/set of interfaces with signals pending + */ +typedef struct mfib_signal_q_t_ +{ + /** + * the dlist indext that is the head of the list + */ + u32 mip_head; + + /** + * Spin lock to protect the list + */ + int mip_lock; +} mfib_signal_q_t; + +/** + * @brief The pending queue of signals to deliver to the control plane + */ +static mfib_signal_q_t mfib_signal_pending ; + +static void +mfib_signal_list_init (void) +{ + dlist_elt_t *head; + u32 hi; + + pool_get(mfib_signal_dlist_pool, head); + hi = head - mfib_signal_dlist_pool; + + mfib_signal_pending.mip_head = hi; + clib_dlist_init(mfib_signal_dlist_pool, hi); +} + +void +mfib_signal_module_init (void) +{ + mfib_signal_list_init(); +} + +int +mfib_signal_send_one (struct _unix_shared_memory_queue *q, + u32 context) +{ + u32 li, si; + + /* + * with the lock held, pop a signal from the q. + */ + while (__sync_lock_test_and_set (&mfib_signal_pending.mip_lock, 1)) + ; + { + li = clib_dlist_remove_head(mfib_signal_dlist_pool, + mfib_signal_pending.mip_head); + } + mfib_signal_pending.mip_lock = 0; + + if (~0 != li) + { + mfib_signal_t *mfs; + mfib_itf_t *mfi; + dlist_elt_t *elt; + + elt = pool_elt_at_index(mfib_signal_dlist_pool, li); + si = elt->value; + + mfs = pool_elt_at_index(mfib_signal_pool, si); + mfi = mfib_itf_get(mfs->mfs_itf); + mfi->mfi_si = INDEX_INVALID; + __sync_fetch_and_and(&mfi->mfi_flags, + ~MFIB_ITF_FLAG_SIGNAL_PRESENT); + + + vl_mfib_signal_send_one(q, context, mfs); + + /* + * with the lock held, return the resoruces of the signals posted + */ + while (__sync_lock_test_and_set(&mfib_signal_pending.mip_lock, 1)) + ; + { + pool_put_index(mfib_signal_pool, si); + pool_put_index(mfib_signal_dlist_pool, li); + } + mfib_signal_pending.mip_lock = 0; + + return (1); + } + return (0); +} + +void +mfib_signal_push (const mfib_entry_t *mfe, + mfib_itf_t *mfi, + vlib_buffer_t *b0) +{ + mfib_signal_t *mfs; + dlist_elt_t *elt; + u32 si, li; + + while (__sync_lock_test_and_set (&mfib_signal_pending.mip_lock, 1)) + ; + { + pool_get(mfib_signal_pool, mfs); + pool_get(mfib_signal_dlist_pool, elt); + + si = mfs - mfib_signal_pool; + li = elt - mfib_signal_dlist_pool; + + elt->value = si; + mfi->mfi_si = li; + + clib_dlist_addhead(mfib_signal_dlist_pool, + mfib_signal_pending.mip_head, + li); + } + mfib_signal_pending.mip_lock = 0; + + mfs->mfs_entry = mfib_entry_get_index(mfe); + mfs->mfs_itf = mfib_itf_get_index(mfi); + + if (NULL != b0) + { + mfs->mfs_buffer_len = b0->current_length; + memcpy(mfs->mfs_buffer, + vlib_buffer_get_current(b0), + (mfs->mfs_buffer_len > MFIB_SIGNAL_BUFFER_SIZE ? + MFIB_SIGNAL_BUFFER_SIZE : + mfs->mfs_buffer_len)); + } + else + { + mfs->mfs_buffer_len = 0; + } +} + +void +mfib_signal_remove_itf (const mfib_itf_t *mfi) +{ + u32 li; + + /* + * lock the queue to prevent further additions while we fiddle. + */ + li = mfi->mfi_si; + + if (INDEX_INVALID != li) + { + /* + * it's in the pending q + */ + while (__sync_lock_test_and_set (&mfib_signal_pending.mip_lock, 1)) + ; + { + dlist_elt_t *elt; + + /* + * with the lock held; + * - remove the signal from the pending list + * - free up the signal and list entry obejcts + */ + clib_dlist_remove(mfib_signal_dlist_pool, li); + + elt = pool_elt_at_index(mfib_signal_dlist_pool, li); + pool_put_index(mfib_signal_pool, elt->value); + pool_put(mfib_signal_dlist_pool, elt); + } + + mfib_signal_pending.mip_lock = 0; + } +} diff --git a/src/vnet/mfib/mfib_signal.h b/src/vnet/mfib/mfib_signal.h new file mode 100644 index 00000000000..732d8aff3e9 --- /dev/null +++ b/src/vnet/mfib/mfib_signal.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MFIB_SIGNAL_H__ +#define __MFIB_SIGNAL_H__ + +#include +#include +#include +#include + +#define MFIB_SIGNAL_BUFFER_SIZE 255 + +/** + * A pair of indicies, for the entry and interface resp. + */ +typedef struct mfib_signal_t_ +{ + fib_node_index_t mfs_entry; + index_t mfs_itf; + + /** + * @brief A buffer copied from the DP plane that triggered the signal + */ + u8 mfs_buffer[MFIB_SIGNAL_BUFFER_SIZE]; + + u8 mfs_buffer_len; +} mfib_signal_t; + + +extern void mfib_signal_push(const mfib_entry_t *mfe, + mfib_itf_t *mfi, + vlib_buffer_t *b0); +extern void mfib_signal_remove_itf(const mfib_itf_t *mfi); + +extern void mfib_signal_module_init(void); + +struct _unix_shared_memory_queue; + +extern void vl_mfib_signal_send_one(struct _unix_shared_memory_queue *q, + u32 context, + const mfib_signal_t *mfs); +extern int mfib_signal_send_one(struct _unix_shared_memory_queue *q, + u32 context); + +#endif + diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c new file mode 100644 index 00000000000..e4c0936d6c9 --- /dev/null +++ b/src/vnet/mfib/mfib_table.c @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include + +mfib_table_t * +mfib_table_get (fib_node_index_t index, + fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (pool_elt_at_index(ip4_main.mfibs, index)); + case FIB_PROTOCOL_IP6: + return (pool_elt_at_index(ip6_main.mfibs, index)); + case FIB_PROTOCOL_MPLS: + break; + } + ASSERT(0); + return (NULL); +} + +static inline fib_node_index_t +mfib_table_lookup_i (const mfib_table_t *mfib_table, + const mfib_prefix_t *prefix) +{ + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_mfib_table_lookup(&mfib_table->v4, + &prefix->fp_src_addr.ip4, + &prefix->fp_grp_addr.ip4, + prefix->fp_len)); + case FIB_PROTOCOL_IP6: + return (ip6_mfib_table_lookup(&mfib_table->v6, + &prefix->fp_src_addr.ip6, + &prefix->fp_grp_addr.ip6, + prefix->fp_len)); + case FIB_PROTOCOL_MPLS: + break; + } + return (FIB_NODE_INDEX_INVALID); +} + +fib_node_index_t +mfib_table_lookup (u32 fib_index, + const mfib_prefix_t *prefix) +{ + return (mfib_table_lookup_i(mfib_table_get(fib_index, prefix->fp_proto), prefix)); +} + +static inline fib_node_index_t +mfib_table_lookup_exact_match_i (const mfib_table_t *mfib_table, + const mfib_prefix_t *prefix) +{ + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_mfib_table_lookup_exact_match(&mfib_table->v4, + &prefix->fp_grp_addr.ip4, + &prefix->fp_src_addr.ip4, + prefix->fp_len)); + case FIB_PROTOCOL_IP6: + return (ip6_mfib_table_lookup_exact_match(&mfib_table->v6, + &prefix->fp_grp_addr.ip6, + &prefix->fp_src_addr.ip6, + prefix->fp_len)); + case FIB_PROTOCOL_MPLS: + break; + } + return (FIB_NODE_INDEX_INVALID); +} + +fib_node_index_t +mfib_table_lookup_exact_match (u32 fib_index, + const mfib_prefix_t *prefix) +{ + return (mfib_table_lookup_exact_match_i(mfib_table_get(fib_index, + prefix->fp_proto), + prefix)); +} + +static void +mfib_table_entry_remove (mfib_table_t *mfib_table, + const mfib_prefix_t *prefix, + fib_node_index_t fib_entry_index) +{ + vlib_smp_unsafe_warning(); + + mfib_table->mft_total_route_counts--; + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + ip4_mfib_table_entry_remove(&mfib_table->v4, + &prefix->fp_grp_addr.ip4, + &prefix->fp_src_addr.ip4, + prefix->fp_len); + break; + case FIB_PROTOCOL_IP6: + ip6_mfib_table_entry_remove(&mfib_table->v6, + &prefix->fp_grp_addr.ip6, + &prefix->fp_src_addr.ip6, + prefix->fp_len); + break; + case FIB_PROTOCOL_MPLS: + ASSERT(0); + break; + } + + mfib_entry_unlock(fib_entry_index); +} + +static void +mfib_table_entry_insert (mfib_table_t *mfib_table, + const mfib_prefix_t *prefix, + fib_node_index_t mfib_entry_index) +{ + vlib_smp_unsafe_warning(); + + mfib_entry_lock(mfib_entry_index); + mfib_table->mft_total_route_counts++; + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + ip4_mfib_table_entry_insert(&mfib_table->v4, + &prefix->fp_grp_addr.ip4, + &prefix->fp_src_addr.ip4, + prefix->fp_len, + mfib_entry_index); + break; + case FIB_PROTOCOL_IP6: + ip6_mfib_table_entry_insert(&mfib_table->v6, + &prefix->fp_grp_addr.ip6, + &prefix->fp_src_addr.ip6, + prefix->fp_len, + mfib_entry_index); + break; + case FIB_PROTOCOL_MPLS: + break; + } +} + +fib_node_index_t +mfib_table_entry_update (u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source, + mfib_entry_flags_t entry_flags) +{ + fib_node_index_t mfib_entry_index; + mfib_table_t *mfib_table; + + mfib_table = mfib_table_get(fib_index, prefix->fp_proto); + mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == mfib_entry_index) + { + if (MFIB_ENTRY_FLAG_NONE != entry_flags) + { + /* + * update to a non-existing entry with non-zero flags + */ + mfib_entry_index = mfib_entry_create(fib_index, source, + prefix, entry_flags); + + mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); + } + /* + * else + * the entry doesn't exist and the request is to set no flags + * the result would be an entry that doesn't exist - so do nothing + */ + } + else + { + mfib_entry_lock(mfib_entry_index); + + if (mfib_entry_update(mfib_entry_index, source, entry_flags)) + { + /* + * this update means we can now remove the entry. + */ + mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index); + } + + mfib_entry_unlock(mfib_entry_index); + } + + return (mfib_entry_index); +} + +fib_node_index_t +mfib_table_entry_path_update (u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source, + const fib_route_path_t *rpath, + mfib_itf_flags_t itf_flags) +{ + fib_node_index_t mfib_entry_index; + mfib_table_t *mfib_table; + + mfib_table = mfib_table_get(fib_index, prefix->fp_proto); + mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == mfib_entry_index) + { + mfib_entry_index = mfib_entry_create(fib_index, + source, + prefix, + MFIB_ENTRY_FLAG_NONE); + + mfib_table_entry_insert(mfib_table, prefix, mfib_entry_index); + } + + mfib_entry_path_update(mfib_entry_index, + source, + rpath, + itf_flags); + + return (mfib_entry_index); +} + +void +mfib_table_entry_path_remove (u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source, + const fib_route_path_t *rpath) +{ + fib_node_index_t mfib_entry_index; + mfib_table_t *mfib_table; + + mfib_table = mfib_table_get(fib_index, prefix->fp_proto); + mfib_entry_index = mfib_table_lookup_exact_match_i(mfib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == mfib_entry_index) + { + /* + * removing an etry that does not exist. i'll allow it. + */ + } + else + { + int no_more_sources; + + /* + * don't nobody go nowhere + */ + mfib_entry_lock(mfib_entry_index); + + no_more_sources = mfib_entry_path_remove(mfib_entry_index, + source, + rpath); + + if (no_more_sources) + { + /* + * last source gone. remove from the table + */ + mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index); + } + + mfib_entry_unlock(mfib_entry_index); + } +} + +static void +mfib_table_entry_delete_i (u32 fib_index, + fib_node_index_t mfib_entry_index, + const mfib_prefix_t *prefix, + mfib_source_t source) +{ + mfib_table_t *mfib_table; + + mfib_table = mfib_table_get(fib_index, prefix->fp_proto); + + /* + * don't nobody go nowhere + */ + mfib_entry_lock(mfib_entry_index); + + if (mfib_entry_delete(mfib_entry_index, source)) + { + /* + * last source gone. remove from the table + */ + mfib_table_entry_remove(mfib_table, prefix, mfib_entry_index); + } + /* + * else + * still has sources, leave it be. + */ + + mfib_entry_unlock(mfib_entry_index); +} + +void +mfib_table_entry_delete (u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source) +{ + fib_node_index_t mfib_entry_index; + + mfib_entry_index = mfib_table_lookup_exact_match(fib_index, prefix); + + if (FIB_NODE_INDEX_INVALID == mfib_entry_index) + { + /* + * removing an etry that does not exist. + * i'll allow it, but i won't like it. + */ + clib_warning("%U not in FIB", format_mfib_prefix, prefix); + } + else + { + mfib_table_entry_delete_i(fib_index, mfib_entry_index, + prefix, source); + } +} + +void +mfib_table_entry_delete_index (fib_node_index_t mfib_entry_index, + mfib_source_t source) +{ + mfib_prefix_t prefix; + + mfib_entry_get_prefix(mfib_entry_index, &prefix); + + mfib_table_entry_delete_i(mfib_entry_get_fib_index(mfib_entry_index), + mfib_entry_index, &prefix, source); +} + +u32 +mfib_table_get_index_for_sw_if_index (fib_protocol_t proto, + u32 sw_if_index) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_mfib_table_get_index_for_sw_if_index(sw_if_index)); + case FIB_PROTOCOL_IP6: + return (ip6_mfib_table_get_index_for_sw_if_index(sw_if_index)); + case FIB_PROTOCOL_MPLS: + ASSERT(0); + break; + } + return (~0); +} + +u32 +mfib_table_find (fib_protocol_t proto, + u32 table_id) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_mfib_index_from_table_id(table_id)); + case FIB_PROTOCOL_IP6: + return (ip6_mfib_index_from_table_id(table_id)); + case FIB_PROTOCOL_MPLS: + ASSERT(0); + break; + } + return (~0); +} + +u32 +mfib_table_find_or_create_and_lock (fib_protocol_t proto, + u32 table_id) +{ + mfib_table_t *mfib_table; + fib_node_index_t fi; + + switch (proto) + { + case FIB_PROTOCOL_IP4: + fi = ip4_mfib_table_find_or_create_and_lock(table_id); + break; + case FIB_PROTOCOL_IP6: + fi = ip6_mfib_table_find_or_create_and_lock(table_id); + break; + case FIB_PROTOCOL_MPLS: + default: + return (~0); + } + + mfib_table = mfib_table_get(fi, proto); + + mfib_table->mft_desc = format(NULL, "%U-VRF:%d", + format_fib_protocol, proto, + table_id); + + return (fi); +} + +static void +mfib_table_destroy (mfib_table_t *mfib_table) +{ + vec_free(mfib_table->mft_desc); + + switch (mfib_table->mft_proto) + { + case FIB_PROTOCOL_IP4: + ip4_mfib_table_destroy(&mfib_table->v4); + break; + case FIB_PROTOCOL_IP6: + ip6_mfib_table_destroy(&mfib_table->v6); + break; + case FIB_PROTOCOL_MPLS: + ASSERT(0); + break; + } +} + +void +mfib_table_unlock (u32 fib_index, + fib_protocol_t proto) +{ + mfib_table_t *mfib_table; + + mfib_table = mfib_table_get(fib_index, proto); + mfib_table->mft_locks--; + + if (0 == mfib_table->mft_locks) + { + mfib_table_destroy(mfib_table); + } +} + +void +mfib_table_lock (u32 fib_index, + fib_protocol_t proto) +{ + mfib_table_t *mfib_table; + + mfib_table = mfib_table_get(fib_index, proto); + mfib_table->mft_locks++; +} + +u8* +format_mfib_table_name (u8* s, va_list ap) +{ + fib_node_index_t fib_index = va_arg(ap, fib_node_index_t); + fib_protocol_t proto = va_arg(ap, int); // int promotion + mfib_table_t *mfib_table; + + mfib_table = mfib_table_get(fib_index, proto); + + s = format(s, "%v", mfib_table->mft_desc); + + return (s); +} + +static clib_error_t * +mfib_module_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, fib_module_init))) + return (error); + if ((error = vlib_call_init_function (vm, rn_module_init))) + return (error); + + mfib_entry_module_init(); + mfib_signal_module_init(); + + return (error); +} + +VLIB_INIT_FUNCTION(mfib_module_init); diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h new file mode 100644 index 00000000000..4faa69ee999 --- /dev/null +++ b/src/vnet/mfib/mfib_table.h @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MFIB_TABLE_H__ +#define __MFIB_TABLE_H__ + +#include +#include + +#include + +/** + * @brief + * A protocol Independent IP multicast FIB table + */ +typedef struct mfib_table_t_ +{ + /** + * A union of the protocol specific FIBs that provide the + * underlying LPM mechanism. + * This element is first in the struct so that it is in the + * first cache line. + */ + union { + ip4_mfib_t v4; + ip6_mfib_t v6; + }; + + /** + * Which protocol this table serves. Used to switch on the union above. + */ + fib_protocol_t mft_proto; + + /** + * number of locks on the table + */ + u16 mft_locks; + + /** + * Table ID (hash key) for this FIB. + */ + u32 mft_table_id; + + /** + * Index into FIB vector. + */ + fib_node_index_t mft_index; + + /** + * Total route counters + */ + u32 mft_total_route_counts; + + /** + * Table description + */ + u8* mft_desc; +} mfib_table_t; + +/** + * @brief + * Format the description/name of the table + */ +extern u8* format_mfib_table_name(u8* s, va_list ap); + +/** + * @brief + * Perfom a longest prefix match in the non-forwarding table + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to lookup + * + * @return + * The index of the fib_entry_t for the best match, which may be the default route + */ +extern fib_node_index_t mfib_table_lookup(u32 fib_index, + const mfib_prefix_t *prefix); + +/** + * @brief + * Perfom an exact match in the non-forwarding table + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to lookup + * + * @return + * The index of the fib_entry_t for the exact match, or INVALID + * is there is no match. + */ +extern fib_node_index_t mfib_table_lookup_exact_match(u32 fib_index, + const mfib_prefix_t *prefix); + +/** + * @brief + * Add a new (with no replication) or lock an existing entry + * + * @param prefix + * The prefix for the entry to add + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t mfib_table_entry_update(u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source, + mfib_entry_flags_t flags); + +/** + * @brief + * Add n paths to an entry (aka route) in the FIB. If the entry does not + * exist, it will be created. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param rpaths + * A vector of paths. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t mfib_table_entry_path_update(u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source, + const fib_route_path_t *rpath, + mfib_itf_flags_t flags); + +/** + * @brief + * Remove n paths to an entry (aka route) in the FIB. If this is the entry's + * last path, then the entry will be removed, unless it has other sources. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param rpaths + * A vector of paths. + */ +extern void mfib_table_entry_path_remove(u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source, + const fib_route_path_t *paths); + + + +/** + * @brief + * Delete a FIB entry. If the entry has no more sources, then it is + * removed from the table. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to remove + * + * @param source + * The ID of the client/source adding the entry. + */ +extern void mfib_table_entry_delete(u32 fib_index, + const mfib_prefix_t *prefix, + mfib_source_t source); + +/** + * @brief + * Delete a FIB entry. If the entry has no more sources, then it is + * removed from the table. + * + * @param entry_index + * The index of the FIB entry + * + * @param source + * The ID of the client/source adding the entry. + */ +extern void mfib_table_entry_delete_index(fib_node_index_t entry_index, + mfib_source_t source); + +/** + * @brief + * Flush all entries from a table for the source + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the entries in the table + * + * @param source + * the source to flush + */ +extern void mfib_table_flush(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Get the index of the FIB bound to the interface + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param sw_if_index + * The interface index + * + * @return fib_index + * The index of the FIB + */ +extern u32 mfib_table_get_index_for_sw_if_index(fib_protocol_t proto, + u32 sw_if_index); + +/** + * @brief + * Get the index of the FIB for a Table-ID. This DOES NOT create the + * FIB if it does not exist. + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param table-id + * The Table-ID + * + * @return fib_index + * The index of the FIB, which may be INVALID. + */ +extern u32 mfib_table_find(fib_protocol_t proto, u32 table_id); + + +/** + * @brief + * Get the index of the FIB for a Table-ID. This DOES create the + * FIB if it does not exist. + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param table-id + * The Table-ID + * + * @return fib_index + * The index of the FIB + */ +extern u32 mfib_table_find_or_create_and_lock(fib_protocol_t proto, + u32 table_id); + + +/** + * @brief + * Take a reference counting lock on the table + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + */ +extern void mfib_table_unlock(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Release a reference counting lock on the table. When the last lock + * has gone. the FIB is deleted. + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + */ +extern void mfib_table_lock(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Return the number of entries in the FIB added by a given source. + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @return number of sourced entries. + */ +extern u32 mfib_table_get_num_entries(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Get a pointer to a FIB table + */ +extern mfib_table_t *mfib_table_get(fib_node_index_t index, + fib_protocol_t proto); + +#endif diff --git a/src/vnet/mfib/mfib_test.c b/src/vnet/mfib/mfib_test.c new file mode 100644 index 00000000000..8735bfa73fc --- /dev/null +++ b/src/vnet/mfib/mfib_test.c @@ -0,0 +1,1225 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include +#include + +#define MFIB_TEST_I(_cond, _comment, _args...) \ +({ \ + int _evald = (_cond); \ + if (!(_evald)) { \ + fformat(stderr, "FAIL:%d: " _comment "\n", \ + __LINE__, ##_args); \ + } else { \ + fformat(stderr, "PASS:%d: " _comment "\n", \ + __LINE__, ##_args); \ + } \ + _evald; \ +}) +#define MFIB_TEST(_cond, _comment, _args...) \ +{ \ + if (!MFIB_TEST_I(_cond, _comment, ##_args)) { \ + return 1;\ + ASSERT(!("FAIL: " _comment)); \ + } \ +} +#define MFIB_TEST_NS(_cond) \ +{ \ + if (!MFIB_TEST_I(_cond, "")) { \ + return 1;\ + ASSERT(!("FAIL: ")); \ + } \ +} + +/** + * A 'i'm not fussed is this is not efficient' store of test data + */ +typedef struct test_main_t_ { + /** + * HW if indicies + */ + u32 hw_if_indicies[4]; + /** + * HW interfaces + */ + vnet_hw_interface_t * hw[4]; + +} test_main_t; +static test_main_t test_main; + +/* fake ethernet device class, distinct from "fake-ethX" */ +static u8 * format_test_interface_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "test-eth%d", dev_instance); +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +static clib_error_t * +test_interface_admin_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? + VNET_HW_INTERFACE_FLAG_LINK_UP : 0; + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + return 0; +} + +VNET_DEVICE_CLASS (test_interface_device_class,static) = { + .name = "Test interface", + .format_device_name = format_test_interface_name, + .tx_function = dummy_interface_tx, + .admin_up_down_function = test_interface_admin_up_down, +}; + +static u8 *hw_address; + +static int +mfib_test_mk_intf (u32 ninterfaces) +{ + clib_error_t * error = NULL; + test_main_t *tm = &test_main; + u8 byte; + u32 i; + + ASSERT(ninterfaces <= ARRAY_LEN(tm->hw_if_indicies)); + + for (i=0; i<6; i++) + { + byte = 0xd0+i; + vec_add1(hw_address, byte); + } + + for (i = 0; i < ninterfaces; i++) + { + hw_address[5] = i; + + error = ethernet_register_interface(vnet_get_main(), + test_interface_device_class.index, + i /* instance */, + hw_address, + &tm->hw_if_indicies[i], + /* flag change */ 0); + + MFIB_TEST((NULL == error), "ADD interface %d", i); + + error = vnet_hw_interface_set_flags(vnet_get_main(), + tm->hw_if_indicies[i], + VNET_HW_INTERFACE_FLAG_LINK_UP); + tm->hw[i] = vnet_get_hw_interface(vnet_get_main(), + tm->hw_if_indicies[i]); + vec_validate (ip4_main.fib_index_by_sw_if_index, + tm->hw[i]->sw_if_index); + vec_validate (ip6_main.fib_index_by_sw_if_index, + tm->hw[i]->sw_if_index); + ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0; + ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0; + + vec_validate (ip4_main.mfib_index_by_sw_if_index, + tm->hw[i]->sw_if_index); + vec_validate (ip6_main.mfib_index_by_sw_if_index, + tm->hw[i]->sw_if_index); + ip4_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0; + ip6_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0; + + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[i]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + MFIB_TEST((NULL == error), "UP interface %d", i); + } + /* + * re-eval after the inevitable realloc + */ + for (i = 0; i < ninterfaces; i++) + { + tm->hw[i] = vnet_get_hw_interface(vnet_get_main(), + tm->hw_if_indicies[i]); + } + + return (0); +} + +#define MFIB_TEST_REP(_cond, _comment, _args...) \ +{ \ + if (!MFIB_TEST_I(_cond, _comment, ##_args)) { \ + return (0); \ + } \ +} + +static int +mfib_test_validate_rep_v (const replicate_t *rep, + u16 n_buckets, + va_list ap) +{ + const dpo_id_t *dpo; + adj_index_t ai; + dpo_type_t dt; + int bucket; + + MFIB_TEST_REP((n_buckets == rep->rep_n_buckets), + "n_buckets = %d", rep->rep_n_buckets); + + for (bucket = 0; bucket < n_buckets; bucket++) + { + dt = va_arg(ap, int); // type promotion + ai = va_arg(ap, adj_index_t); + dpo = replicate_get_bucket_i(rep, bucket); + + MFIB_TEST_REP((dt == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + if (DPO_RECEIVE != dt) + { + MFIB_TEST_REP((ai == dpo->dpoi_index), + "bucket %d stacks on %U", + bucket, + format_dpo_id, dpo, 0); + } + } + return (!0); +} + +static fib_forward_chain_type_t +fib_forw_chain_type_from_fib_proto (fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case FIB_PROTOCOL_IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + default: + break; + } + ASSERT(0); + return (0); +} + + +static int +mfib_test_entry (fib_node_index_t fei, + mfib_entry_flags_t eflags, + u16 n_buckets, + ...) +{ + const mfib_entry_t *mfe; + const replicate_t *rep; + mfib_prefix_t pfx; + va_list ap; + + va_start(ap, n_buckets); + + mfe = mfib_entry_get(fei); + mfib_entry_get_prefix(fei, &pfx); + + MFIB_TEST_REP((eflags == mfe->mfe_flags), + "%U has %U expect %U", + format_mfib_prefix, &pfx, + format_mfib_entry_flags, mfe->mfe_flags, + format_mfib_entry_flags, eflags); + + if (0 == n_buckets) + { + MFIB_TEST_REP((DPO_DROP == mfe->mfe_rep.dpoi_type), + "%U links to %U", + format_mfib_prefix, &pfx, + format_dpo_id, &mfe->mfe_rep, 0); + return (!0); + } + else + { + dpo_id_t tmp = DPO_INVALID; + int res; + + mfib_entry_contribute_forwarding( + fei, + fib_forw_chain_type_from_fib_proto(pfx.fp_proto), + &tmp); + rep = replicate_get(tmp.dpoi_index); + + MFIB_TEST_REP((DPO_REPLICATE == tmp.dpoi_type), + "%U links to %U", + format_mfib_prefix, &pfx, + format_dpo_type, tmp.dpoi_type); + + res = mfib_test_validate_rep_v(rep, n_buckets, ap); + + dpo_reset(&tmp); + + return (res); + } +} + +static int +mfib_test_entry_itf (fib_node_index_t fei, + u32 sw_if_index, + mfib_itf_flags_t flags) +{ + const mfib_entry_t *mfe; + const mfib_itf_t *mfi; + mfib_prefix_t pfx; + + mfe = mfib_entry_get(fei); + mfi = mfib_entry_get_itf(mfe, sw_if_index); + mfib_entry_get_prefix(fei, &pfx); + + MFIB_TEST_REP((NULL != mfi), + "%U has interface %d", + format_mfib_prefix, &pfx, sw_if_index); + + MFIB_TEST_REP((flags == mfi->mfi_flags), + "%U interface %d has flags %U expect %U", + format_mfib_prefix, &pfx, sw_if_index, + format_mfib_itf_flags, flags, + format_mfib_itf_flags, mfi->mfi_flags); + + return (!0); +} + +static int +mfib_test_entry_no_itf (fib_node_index_t fei, + u32 sw_if_index) +{ + const mfib_entry_t *mfe; + const mfib_itf_t *mfi; + mfib_prefix_t pfx; + + mfe = mfib_entry_get(fei); + mfi = mfib_entry_get_itf(mfe, sw_if_index); + mfib_entry_get_prefix(fei, &pfx); + + MFIB_TEST_REP((NULL == mfi), + "%U has no interface %d", + format_mfib_prefix, &pfx, sw_if_index); + + return (!0); +} + +static int +mfib_test_i (fib_protocol_t PROTO, + vnet_link_t LINKT, + const mfib_prefix_t *pfx_no_forward, + const mfib_prefix_t *pfx_s_g, + const mfib_prefix_t *pfx_star_g_1, + const mfib_prefix_t *pfx_star_g_2, + const mfib_prefix_t *pfx_star_g_3, + const mfib_prefix_t *pfx_star_g_slash_m) +{ + fib_node_index_t mfei, mfei_dflt, mfei_no_f, mfei_s_g, mfei_g_1, mfei_g_2, mfei_g_3, mfei_g_m; + u32 fib_index, n_entries, n_itfs, n_reps; + fib_node_index_t ai_1, ai_2, ai_3; + test_main_t *tm; + + mfib_prefix_t all_1s; + memset(&all_1s, 0xfd, sizeof(all_1s)); + + n_entries = pool_elts(mfib_entry_pool); + n_itfs = pool_elts(mfib_itf_pool); + n_reps = pool_elts(replicate_pool); + tm = &test_main; + + ai_1 = adj_mcast_add_or_lock(PROTO, + LINKT, + tm->hw[1]->sw_if_index); + ai_2 = adj_mcast_add_or_lock(PROTO, + LINKT, + tm->hw[2]->sw_if_index); + ai_3 = adj_mcast_add_or_lock(PROTO, + LINKT, + tm->hw[3]->sw_if_index); + + MFIB_TEST(3 == adj_mcast_db_size(), "3 MCAST adjs"); + + /* Find or create FIB table 11 */ + fib_index = mfib_table_find_or_create_and_lock(PROTO, 11); + + mfib_prefix_t pfx_dft = { + .fp_len = 0, + .fp_proto = PROTO, + }; + mfei_dflt = mfib_table_lookup_exact_match(fib_index, &pfx_dft); + MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_dflt, "(*,*) presnet"); + MFIB_TEST(mfib_test_entry(mfei_dflt, + MFIB_ENTRY_FLAG_DROP, + 0), + "(*,*) no replcaitions"); + + MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_dflt, "(*,*) presnet"); + MFIB_TEST(mfib_test_entry(mfei_dflt, + MFIB_ENTRY_FLAG_DROP, + 0), + "(*,*) no replcaitions"); + + + fib_route_path_t path_via_if0 = { + .frp_proto = PROTO, + .frp_addr = zero_addr, + .frp_sw_if_index = tm->hw[0]->sw_if_index, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = 0, + }; + + mfib_table_entry_path_update(fib_index, + pfx_no_forward, + MFIB_SOURCE_API, + &path_via_if0, + MFIB_ITF_FLAG_ACCEPT); + + mfei_no_f = mfib_table_lookup_exact_match(fib_index, pfx_no_forward); + MFIB_TEST(mfib_test_entry(mfei_no_f, + MFIB_ENTRY_FLAG_NONE, + 0), + "%U no replcaitions", + format_mfib_prefix, pfx_no_forward); + MFIB_TEST_NS(mfib_test_entry_itf(mfei_no_f, tm->hw[0]->sw_if_index, + MFIB_ITF_FLAG_ACCEPT)); + + fib_route_path_t path_via_if1 = { + .frp_proto = PROTO, + .frp_addr = zero_addr, + .frp_sw_if_index = tm->hw[1]->sw_if_index, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = 0, + }; + fib_route_path_t path_via_if2 = { + .frp_proto = PROTO, + .frp_addr = zero_addr, + .frp_sw_if_index = tm->hw[2]->sw_if_index, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = 0, + }; + fib_route_path_t path_via_if3 = { + .frp_proto = PROTO, + .frp_addr = zero_addr, + .frp_sw_if_index = tm->hw[3]->sw_if_index, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = 0, + }; + fib_route_path_t path_for_us = { + .frp_proto = PROTO, + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + + /* + * An (S,G) with 1 accepting and 3 forwarding paths + */ + mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if0, + MFIB_ITF_FLAG_ACCEPT); + mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if1, + MFIB_ITF_FLAG_FORWARD); + mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if2, + MFIB_ITF_FLAG_FORWARD); + mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if3, + (MFIB_ITF_FLAG_FORWARD | + MFIB_ITF_FLAG_NEGATE_SIGNAL)); + + mfei_s_g = mfib_table_lookup_exact_match(fib_index, pfx_s_g); + + MFIB_TEST(FIB_NODE_INDEX_INVALID != mfei_s_g, + "%U present", + format_mfib_prefix, pfx_s_g); + MFIB_TEST(mfib_test_entry(mfei_s_g, + MFIB_ENTRY_FLAG_NONE, + 3, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate ok", + format_mfib_prefix, pfx_s_g); + MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[0]->sw_if_index, + MFIB_ITF_FLAG_ACCEPT)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[1]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[2]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei_s_g, tm->hw[3]->sw_if_index, + (MFIB_ITF_FLAG_FORWARD | + MFIB_ITF_FLAG_NEGATE_SIGNAL))); + + /* + * A (*,G), which the same G as the (S,G). + * different paths. test our LPM. + */ + mfei_g_1 = mfib_table_entry_path_update(fib_index, + pfx_star_g_1, + MFIB_SOURCE_API, + &path_via_if0, + MFIB_ITF_FLAG_ACCEPT); + mfib_table_entry_path_update(fib_index, + pfx_star_g_1, + MFIB_SOURCE_API, + &path_via_if1, + MFIB_ITF_FLAG_FORWARD); + + /* + * test we find the *,G and S,G via LPM and exact matches + */ + mfei = mfib_table_lookup_exact_match(fib_index, + pfx_star_g_1); + MFIB_TEST(mfei == mfei_g_1, + "%U found via exact match", + format_mfib_prefix, pfx_star_g_1); + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 1, + DPO_ADJACENCY_MCAST, ai_1), + "%U replicate ok", + format_mfib_prefix, pfx_star_g_1); + + mfei = mfib_table_lookup(fib_index, + pfx_star_g_1); + MFIB_TEST(mfei == mfei_g_1, + "%U found via LP match", + format_mfib_prefix, pfx_star_g_1); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 1, + DPO_ADJACENCY_MCAST, ai_1), + "%U replicate ok", + format_mfib_prefix, pfx_star_g_1); + + mfei = mfib_table_lookup_exact_match(fib_index, pfx_s_g); + MFIB_TEST(mfei == mfei_s_g, + "%U found via exact match", + format_mfib_prefix, pfx_s_g); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 3, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + mfei = mfib_table_lookup(fib_index, pfx_s_g); + MFIB_TEST(mfei == mfei_s_g, + "%U found via LP match", + format_mfib_prefix, pfx_s_g); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 3, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + + /* + * A (*,G/m), which the same root G as the (*,G). + * different paths. test our LPM. + */ + mfei_g_m = mfib_table_entry_path_update(fib_index, + pfx_star_g_slash_m, + MFIB_SOURCE_API, + &path_via_if2, + MFIB_ITF_FLAG_ACCEPT); + mfib_table_entry_path_update(fib_index, + pfx_star_g_slash_m, + MFIB_SOURCE_API, + &path_via_if3, + MFIB_ITF_FLAG_FORWARD); + + /* + * test we find the (*,G/m), (*,G) and (S,G) via LPM and exact matches + */ + mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_1); + MFIB_TEST((mfei_g_1 == mfei), + "%U found via DP LPM: %d", + format_mfib_prefix, pfx_star_g_1, mfei); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 1, + DPO_ADJACENCY_MCAST, ai_1), + "%U replicate ok", + format_mfib_prefix, pfx_star_g_1); + + mfei = mfib_table_lookup(fib_index, pfx_star_g_1); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 1, + DPO_ADJACENCY_MCAST, ai_1), + "%U replicate ok", + format_mfib_prefix, pfx_star_g_1); + + mfei = mfib_table_lookup_exact_match(fib_index, pfx_s_g); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 3, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + mfei = mfib_table_lookup(fib_index, pfx_s_g); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 3, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + + mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_slash_m); + MFIB_TEST(mfei = mfei_g_m, + "%U Found via exact match", + format_mfib_prefix, pfx_star_g_slash_m); + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 1, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate OK", + format_mfib_prefix, pfx_star_g_slash_m); + MFIB_TEST(mfei_g_m == mfib_table_lookup(fib_index, pfx_star_g_slash_m), + "%U found via LPM", + format_mfib_prefix, pfx_star_g_slash_m); + + /* + * Add a for-us path + */ + mfei = mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_for_us, + MFIB_ITF_FLAG_FORWARD); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 4, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3, + DPO_RECEIVE, 0), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + + /* + * remove a for-us path + */ + mfib_table_entry_path_remove(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_for_us); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 3, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + + /* + * update an existing forwarding path to be only accepting + * - expect it to be removed from the replication set. + */ + mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if3, + MFIB_ITF_FLAG_ACCEPT); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 2, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index, + MFIB_ITF_FLAG_ACCEPT)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[3]->sw_if_index, + MFIB_ITF_FLAG_ACCEPT)); + /* + * Make the path forwarding again + * - expect it to be added back to the replication set + */ + mfib_table_entry_path_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if3, + (MFIB_ITF_FLAG_FORWARD | + MFIB_ITF_FLAG_ACCEPT | + MFIB_ITF_FLAG_NEGATE_SIGNAL)); + + mfei = mfib_table_lookup_exact_match(fib_index, + pfx_s_g); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_NONE, + 3, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index, + MFIB_ITF_FLAG_ACCEPT)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[3]->sw_if_index, + (MFIB_ITF_FLAG_FORWARD | + MFIB_ITF_FLAG_ACCEPT | + MFIB_ITF_FLAG_NEGATE_SIGNAL))); + + /* + * update flags on the entry + */ + mfib_table_entry_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + MFIB_ENTRY_FLAG_SIGNAL); + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_SIGNAL, + 3, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2, + DPO_ADJACENCY_MCAST, ai_3), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + + /* + * remove paths + */ + mfib_table_entry_path_remove(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if3); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_SIGNAL, + 2, + DPO_ADJACENCY_MCAST, ai_1, + DPO_ADJACENCY_MCAST, ai_2), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index, + MFIB_ITF_FLAG_ACCEPT)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[1]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index)); + + mfib_table_entry_path_remove(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if1); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_SIGNAL, + 1, + DPO_ADJACENCY_MCAST, ai_2), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[0]->sw_if_index, + MFIB_ITF_FLAG_ACCEPT)); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index)); + + /* + * remove the accpeting only interface + */ + mfib_table_entry_path_remove(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if0); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_SIGNAL, + 1, + DPO_ADJACENCY_MCAST, ai_2), + "%U replicate OK", + format_mfib_prefix, pfx_s_g); + MFIB_TEST_NS(mfib_test_entry_itf(mfei, tm->hw[2]->sw_if_index, + MFIB_ITF_FLAG_FORWARD)); + MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[0]->sw_if_index)); + MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[1]->sw_if_index)); + MFIB_TEST_NS(mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index)); + + /* + * remove the last path, the entry still has flags so it remains + */ + mfib_table_entry_path_remove(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + &path_via_if2); + + MFIB_TEST(mfib_test_entry(mfei, + MFIB_ENTRY_FLAG_SIGNAL, + 0), + "%U no replications", + format_mfib_prefix, pfx_s_g); + + /* + * update flags on the entry + */ + mfib_table_entry_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + (MFIB_ENTRY_FLAG_SIGNAL | + MFIB_ENTRY_FLAG_CONNECTED)); + MFIB_TEST(mfib_test_entry(mfei, + (MFIB_ENTRY_FLAG_SIGNAL | + MFIB_ENTRY_FLAG_CONNECTED), + 0), + "%U no replications", + format_mfib_prefix, pfx_s_g); + + /* + * An entry with a NS interface + */ + mfei_g_2 = mfib_table_entry_path_update(fib_index, + pfx_star_g_2, + MFIB_SOURCE_API, + &path_via_if0, + (MFIB_ITF_FLAG_ACCEPT | + MFIB_ITF_FLAG_NEGATE_SIGNAL)); + MFIB_TEST(mfib_test_entry(mfei_g_2, + MFIB_ENTRY_FLAG_NONE, + 0), + "%U No replications", + format_mfib_prefix, pfx_star_g_2); + + /* + * Simulate a signal from the data-plane + */ + { + mfib_entry_t *mfe; + mfib_itf_t *mfi; + + mfe = mfib_entry_get(mfei_g_2); + mfi = mfib_entry_get_itf(mfe, path_via_if0.frp_sw_if_index); + + mfib_signal_push(mfe, mfi, NULL); + } + + /* + * An entry with a NS interface + */ + mfei_g_3 = mfib_table_entry_path_update(fib_index, + pfx_star_g_3, + MFIB_SOURCE_API, + &path_via_if0, + (MFIB_ITF_FLAG_ACCEPT | + MFIB_ITF_NEGATE_SIGNAL)); + MFIB_TEST(mfib_test_entry(mfei_g_3, + MFIB_ENTRY_FLAG_NONE, + 0), + "%U No replications", + format_mfib_prefix, pfx_star_g_3); + + /* + * Simulate a signal from the data-plane + */ + { + mfib_entry_t *mfe; + mfib_itf_t *mfi; + + mfe = mfib_entry_get(mfei_g_3); + mfi = mfib_entry_get_itf(mfe, path_via_if0.frp_sw_if_index); + + mfib_signal_push(mfe, mfi, NULL); + } + + if (FIB_PROTOCOL_IP6 == PROTO) + { + /* + * All the entries are present. let's ensure we can find them all + * via exact and longest prefix matches. + */ + /* + * A source address we will never match + */ + ip6_address_t src = { + .as_u64[0] = clib_host_to_net_u64(0x3001000000000000), + .as_u64[1] = clib_host_to_net_u64(0xffffffffffffffff), + }; + + /* + * Find the (*,G/m) + */ + MFIB_TEST((mfei_g_m == ip6_mfib_table_lookup2( + ip6_mfib_get(fib_index), + &src, + &pfx_star_g_slash_m->fp_grp_addr.ip6)), + "%U found via DP LPM grp=%U", + format_mfib_prefix, pfx_star_g_slash_m, + format_ip6_address, &pfx_star_g_slash_m->fp_grp_addr.ip6); + + ip6_address_t tmp = pfx_star_g_slash_m->fp_grp_addr.ip6; + tmp.as_u8[15] = 0xff; + + MFIB_TEST((mfei_g_m == ip6_mfib_table_lookup2( + ip6_mfib_get(fib_index), + &pfx_s_g->fp_src_addr.ip6, + &tmp)), + "%U found via DP LPM grp=%U", + format_mfib_prefix, pfx_star_g_slash_m, + format_ip6_address, &tmp); + + /* + * Find the (S,G). + */ + mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index), + &pfx_s_g->fp_src_addr.ip6, + &pfx_s_g->fp_grp_addr.ip6); + MFIB_TEST((mfei_s_g == mfei), + "%U found via DP LPM: %d", + format_mfib_prefix, pfx_s_g, mfei); + + /* + * Find the 3 (*,G) s + */ + mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index), + &src, + &pfx_star_g_1->fp_grp_addr.ip6); + MFIB_TEST((mfei_g_1 == mfei), + "%U found via DP LPM: %d", + format_mfib_prefix, pfx_star_g_1, mfei); + mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index), + &src, + &pfx_star_g_2->fp_grp_addr.ip6); + MFIB_TEST((mfei_g_2 == mfei), + "%U found via DP LPM: %d", + format_mfib_prefix, pfx_star_g_2, mfei); + mfei = ip6_mfib_table_lookup2(ip6_mfib_get(fib_index), + &src, + &pfx_star_g_3->fp_grp_addr.ip6); + MFIB_TEST((mfei_g_3 == mfei), + "%U found via DP LPM: %d", + format_mfib_prefix, pfx_star_g_3, mfei); + } + + /* + * remove flags on the entry. This is the last of the + * state associated with the entry, so now it goes. + */ + mfib_table_entry_update(fib_index, + pfx_s_g, + MFIB_SOURCE_API, + MFIB_ENTRY_FLAG_NONE); + mfei = mfib_table_lookup_exact_match(fib_index, + pfx_s_g); + MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei, + "%U gone", + format_mfib_prefix, pfx_s_g); + + /* + * remove the last path on the no forward entry - the last entry + */ + mfib_table_entry_path_remove(fib_index, + pfx_no_forward, + MFIB_SOURCE_API, + &path_via_if0); + + mfei = mfib_table_lookup_exact_match(fib_index, pfx_no_forward); + MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei, + "%U gone", + format_mfib_prefix, pfx_no_forward); + + /* + * hard delete the (*,232.1.1.1) + */ + mfib_table_entry_delete(fib_index, + pfx_star_g_1, + MFIB_SOURCE_API); + + mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_1); + MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei, + "%U gone", + format_mfib_prefix, pfx_star_g_1); + /* + * remove the entry whilst the signal is pending + */ + mfib_table_entry_delete(fib_index, + pfx_star_g_2, + MFIB_SOURCE_API); + + mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_2); + MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei, + "%U Gone", + format_mfib_prefix, pfx_star_g_2); + mfib_table_entry_delete(fib_index, + pfx_star_g_3, + MFIB_SOURCE_API); + + mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_3); + MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei, + "%U Gone", + format_mfib_prefix, pfx_star_g_3); + + mfib_table_entry_delete(fib_index, + pfx_star_g_slash_m, + MFIB_SOURCE_API); + + mfei = mfib_table_lookup_exact_match(fib_index, pfx_star_g_slash_m); + MFIB_TEST(FIB_NODE_INDEX_INVALID == mfei, + "%U Gone", + format_mfib_prefix, pfx_star_g_slash_m); + + /* + * Unlock the table - it's the last lock so should be gone thereafter + */ + mfib_table_unlock(fib_index, PROTO); + + MFIB_TEST((FIB_NODE_INDEX_INVALID == + mfib_table_find(PROTO, fib_index)), + "MFIB table %d gone", fib_index); + + adj_unlock(ai_1); + adj_unlock(ai_2); + adj_unlock(ai_3); + + /* + * test we've leaked no resources + */ + MFIB_TEST(0 == adj_mcast_db_size(), "%d MCAST adjs", adj_mcast_db_size()); + MFIB_TEST(n_reps == pool_elts(replicate_pool), "%d=%d replicates", + n_reps, pool_elts(replicate_pool)); + MFIB_TEST(n_entries == pool_elts(mfib_entry_pool), + " No more entries %d!=%d", + n_entries, pool_elts(mfib_entry_pool)); + MFIB_TEST(n_itfs == pool_elts(mfib_itf_pool), + " No more Interfaces %d!=%d", + n_itfs, pool_elts(mfib_itf_pool)); + + return (0); +} + +static int +mfib_test_v4 (void) +{ + const mfib_prefix_t pfx_224_s_8 = { + .fp_len = 8, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_grp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0xe0000000), + } + }; + const mfib_prefix_t pfx_1_1_1_1_c_239_1_1_1 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_grp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0xef010101), + }, + .fp_src_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010101), + }, + }; + const mfib_prefix_t pfx_239_1_1_1 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_grp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0xef010101), + }, + .fp_src_addr = { + .ip4.as_u32 = 0, + }, + }; + const mfib_prefix_t pfx_239_1_1_2 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_grp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0xef010102), + }, + .fp_src_addr = { + .ip4.as_u32 = 0, + }, + }; + const mfib_prefix_t pfx_239_1_1_3 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_grp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0xef010103), + }, + .fp_src_addr = { + .ip4.as_u32 = 0, + }, + }; + const mfib_prefix_t pfx_239 = { + .fp_len = 8, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_grp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0xef000000), + }, + .fp_src_addr = { + .ip4.as_u32 = 0, + }, + }; + + return (mfib_test_i(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &pfx_224_s_8, + &pfx_1_1_1_1_c_239_1_1_1, + &pfx_239_1_1_1, + &pfx_239_1_1_2, + &pfx_239_1_1_3, + &pfx_239)); +} + +static int +mfib_test_v6 (void) +{ + const mfib_prefix_t pfx_ffd_s_12 = { + .fp_len = 12, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0xffd0000000000000), + } + }; + const mfib_prefix_t pfx_2001_1_c_ff_1 = { + .fp_len = 256, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000), + .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001), + }, + .fp_src_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0x2001000000000000), + .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001), + }, + }; + const mfib_prefix_t pfx_ff_1 = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000), + .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000001), + }, + }; + const mfib_prefix_t pfx_ff_2 = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000), + .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000002), + }, + }; + const mfib_prefix_t pfx_ff_3 = { + /* + * this is the ALL DHCP routers address + */ + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0xff02000100000000), + .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000002), + }, + }; + const mfib_prefix_t pfx_ff = { + .fp_len = 16, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_grp_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0xff01000000000000), + .ip6.as_u64[1] = clib_host_to_net_u64(0x0000000000000000), + }, + }; + + return (mfib_test_i(FIB_PROTOCOL_IP6, + VNET_LINK_IP6, + &pfx_ffd_s_12, + &pfx_2001_1_c_ff_1, + &pfx_ff_1, + &pfx_ff_2, + &pfx_ff_3, + &pfx_ff)); +} + +static clib_error_t * +mfib_test (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd_arg) +{ + int res = 0; + + res += mfib_test_mk_intf(4); + res += mfib_test_v4(); + res += mfib_test_v6(); + + if (res) + { + return clib_error_return(0, "MFIB Unit Test Failed"); + } + else + { + return (NULL); + } +} + +VLIB_CLI_COMMAND (test_fib_command, static) = { + .path = "test mfib", + .short_help = "fib unit tests - DO NOT RUN ON A LIVE SYSTEM", + .function = mfib_test, +}; + +clib_error_t * +mfib_test_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (mfib_test_init); diff --git a/src/vnet/mfib/mfib_types.c b/src/vnet/mfib/mfib_types.c new file mode 100644 index 00000000000..6d77c3d88d1 --- /dev/null +++ b/src/vnet/mfib/mfib_types.c @@ -0,0 +1,213 @@ + /* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +/** + * String names for each flag + */ +static const char *mfib_flag_names[] = MFIB_ENTRY_NAMES_SHORT; +static const char *mfib_flag_names_long[] = MFIB_ENTRY_NAMES_LONG; + +static const char *mfib_itf_flag_long_names[] = MFIB_ITF_NAMES_LONG; +static const char *mfib_itf_flag_names[] = MFIB_ITF_NAMES_SHORT; + +u8 * +format_mfib_prefix (u8 * s, va_list * args) +{ + mfib_prefix_t *fp = va_arg (*args, mfib_prefix_t *); + + /* + * protocol specific so it prints ::/0 correctly. + */ + switch (fp->fp_proto) + { + case FIB_PROTOCOL_IP6: + { + ip6_address_t p6 = fp->fp_grp_addr.ip6; + u32 len = (fp->fp_len > 128 ? 128 : fp->fp_len); + + ip6_address_mask(&p6, &(ip6_main.fib_masks[len])); + + if (ip6_address_is_zero(&fp->fp_src_addr.ip6)) + { + s = format(s, "(*, "); + } + else + { + s = format (s, "(%U, ", format_ip6_address, &fp->fp_src_addr.ip6); + } + s = format (s, "%U", format_ip6_address, &p6); + s = format (s, "/%d)", len); + break; + } + case FIB_PROTOCOL_IP4: + { + ip4_address_t p4 = fp->fp_grp_addr.ip4; + u32 len = (fp->fp_len > 32 ? 32 : fp->fp_len); + + p4.as_u32 &= ip4_main.fib_masks[len]; + + if (0 == fp->fp_src_addr.ip4.as_u32) + { + s = format(s, "(*, "); + } + else + { + s = format (s, "(%U, ", format_ip4_address, &fp->fp_src_addr.ip4); + } + s = format (s, "%U", format_ip4_address, &p4); + s = format (s, "/%d)", len); + break; + } + case FIB_PROTOCOL_MPLS: + break; + } + + return (s); +} + +u8 * +format_mfib_entry_flags (u8 * s, va_list * args) +{ + mfib_entry_attribute_t attr; + mfib_entry_flags_t flags; + + flags = va_arg (*args, mfib_entry_flags_t); + + if (MFIB_ENTRY_FLAG_NONE != flags) { + s = format(s, " flags:"); + FOR_EACH_MFIB_ATTRIBUTE(attr) { + if ((1< + +/** + * Aggregrate type for a prefix + */ +typedef struct mfib_prefix_t_ { + /** + * The mask length + */ + u16 fp_len; + + /** + * protocol type + */ + fib_protocol_t fp_proto; + + /** + * Pad to keep the address 4 byte aligned + */ + u8 ___fp___pad; + + /** + * The address type is not deriveable from the fp_addr member. + * If it's v4, then the first 3 u32s of the address will be 0. + * v6 addresses (even v4 mapped ones) have at least 2 u32s assigned + * to non-zero values. true. but when it's all zero, one cannot decide. + */ + ip46_address_t fp_grp_addr; + ip46_address_t fp_src_addr; +} mfib_prefix_t; + +typedef enum mfib_entry_attribute_t_ +{ + MFIB_ENTRY_ATTRIBUTE_FIRST = 0, + /** + * The control planes needs packets mathing this entry to generate + * a signal. + */ + MFIB_ENTRY_SIGNAL = MFIB_ENTRY_ATTRIBUTE_FIRST, + /** + * Drop all traffic to this route + */ + MFIB_ENTRY_DROP, + /** + * The control plane needs to be informed of coneected sources + */ + MFIB_ENTRY_CONNECTED, + /** + * Accept packets from any incpoming interface + * Use with extreme caution + */ + MFIB_ENTRY_ACCEPT_ALL_ITF, + MFIB_ENTRY_INHERIT_ACCEPT, + MFIB_ENTRY_ATTRIBUTE_LAST = MFIB_ENTRY_INHERIT_ACCEPT, +} mfib_entry_attribute_t; + +#define FOR_EACH_MFIB_ATTRIBUTE(_item) \ + for (_item = MFIB_ENTRY_ATTRIBUTE_FIRST; \ + _item <= MFIB_ENTRY_ATTRIBUTE_LAST; \ + _item++) + +#define MFIB_ENTRY_NAMES_SHORT { \ + [MFIB_ENTRY_SIGNAL] = "S", \ + [MFIB_ENTRY_CONNECTED] = "C", \ + [MFIB_ENTRY_DROP] = "D", \ + [MFIB_ENTRY_ACCEPT_ALL_ITF] = "AA", \ + [MFIB_ENTRY_INHERIT_ACCEPT] = "IA", \ +} + +#define MFIB_ENTRY_NAMES_LONG { \ + [MFIB_ENTRY_SIGNAL] = "Signal", \ + [MFIB_ENTRY_CONNECTED] = "Connected", \ + [MFIB_ENTRY_DROP] = "Drop", \ + [MFIB_ENTRY_ACCEPT_ALL_ITF] = "Accept-all-itf", \ + [MFIB_ENTRY_INHERIT_ACCEPT] = "Inherit-Accept", \ +} + +typedef enum mfib_entry_flags_t_ +{ + MFIB_ENTRY_FLAG_NONE, + MFIB_ENTRY_FLAG_SIGNAL = (1 << MFIB_ENTRY_SIGNAL), + MFIB_ENTRY_FLAG_DROP = (1 << MFIB_ENTRY_DROP), + MFIB_ENTRY_FLAG_CONNECTED = (1 << MFIB_ENTRY_CONNECTED), + MFIB_ENTRY_FLAG_INHERIT_ACCEPT = (1 << MFIB_ENTRY_INHERIT_ACCEPT), + MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF = (1 << MFIB_ENTRY_ACCEPT_ALL_ITF), +} mfib_entry_flags_t; + +typedef enum mfib_itf_attribute_t_ +{ + MFIB_ITF_ATTRIBUTE_FIRST, + MFIB_ITF_NEGATE_SIGNAL = MFIB_ITF_ATTRIBUTE_FIRST, + MFIB_ITF_ACCEPT, + MFIB_ITF_FORWARD, + MFIB_ITF_SIGNAL_PRESENT, + MFIB_ITF_DONT_PRESERVE, + MFIB_ITF_ATTRIBUTE_LAST = MFIB_ITF_DONT_PRESERVE, +} mfib_itf_attribute_t; + +#define FOR_EACH_MFIB_ITF_ATTRIBUTE(_item) \ + for (_item = MFIB_ITF_ATTRIBUTE_FIRST; \ + _item <= MFIB_ITF_ATTRIBUTE_LAST; \ + _item++) + +#define MFIB_ITF_NAMES_SHORT { \ + [MFIB_ITF_NEGATE_SIGNAL] = "NS", \ + [MFIB_ITF_ACCEPT] = "A", \ + [MFIB_ITF_FORWARD] = "F", \ + [MFIB_ITF_SIGNAL_PRESENT] = "SP", \ + [MFIB_ITF_DONT_PRESERVE] = "DP", \ +} + +#define MFIB_ITF_NAMES_LONG { \ + [MFIB_ITF_NEGATE_SIGNAL] = "Negate-Signal", \ + [MFIB_ITF_ACCEPT] = "Accept", \ + [MFIB_ITF_FORWARD] = "Forward", \ + [MFIB_ITF_SIGNAL_PRESENT] = "Signal-Present", \ + [MFIB_ITF_DONT_PRESERVE] = "Don't-Preserve", \ +} + +typedef enum mfib_itf_flags_t_ +{ + MFIB_ITF_FLAG_NONE, + MFIB_ITF_FLAG_NEGATE_SIGNAL = (1 << MFIB_ITF_NEGATE_SIGNAL), + MFIB_ITF_FLAG_ACCEPT = (1 << MFIB_ITF_ACCEPT), + MFIB_ITF_FLAG_FORWARD = (1 << MFIB_ITF_FORWARD), + MFIB_ITF_FLAG_SIGNAL_PRESENT = (1 << MFIB_ITF_SIGNAL_PRESENT), + MFIB_ITF_FLAG_DONT_PRESERVE = (1 << MFIB_ITF_DONT_PRESERVE), +} mfib_itf_flags_t; + +/** + * Possible [control plane] sources of MFIB entries + */ +typedef enum mfib_source_t_ +{ + MFIB_SOURCE_SPECIAL, + MFIB_SOURCE_API, + MFIB_SOURCE_CLI, + MFIB_SOURCE_VXLAN, + MFIB_SOURCE_DHCP, + MFIB_SOURCE_DEFAULT_ROUTE, +} mfib_source_t; + +#define MFIB_SOURCE_NAMES { \ + [MFIB_SOURCE_SPECIAL] = "Special", \ + [MFIB_SOURCE_API] = "API", \ + [MFIB_SOURCE_CLI] = "CLI", \ + [MFIB_SOURCE_DHCP] = "DHCP", \ + [MFIB_SOURCE_VXLAN] = "VXLAN", \ + [MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \ +} + +/** + * \brief Compare two prefixes for equality + */ +extern int mfib_prefix_cmp(const mfib_prefix_t *p1, + const mfib_prefix_t *p2); + +extern u8 * format_mfib_prefix(u8 * s, va_list * args); + +extern u8 *format_mfib_entry_flags(u8 * s, va_list * args); +extern u8 *format_mfib_itf_flags(u8 * s, va_list * args); +extern uword unformat_mfib_itf_flags(unformat_input_t * input, + va_list * args); +extern uword unformat_mfib_entry_flags(unformat_input_t * input, + va_list * args); + +#endif diff --git a/src/vnet/misc.c b/src/vnet/misc.c index 4c8c4cad5a7..9cfe83940f1 100644 --- a/src/vnet/misc.c +++ b/src/vnet/misc.c @@ -83,6 +83,9 @@ vnet_main_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, fib_module_init))) return error; + if ((error = vlib_call_init_function (vm, mfib_module_init))) + return error; + if ((error = vlib_call_init_function (vm, ip_main_init))) return error; diff --git a/src/vnet/rewrite.h b/src/vnet/rewrite.h index 00c1efbdc5e..ce2bce3a3c8 100644 --- a/src/vnet/rewrite.h +++ b/src/vnet/rewrite.h @@ -64,6 +64,16 @@ typedef CLIB_PACKED (struct { Used for MTU check after packet rewrite. */ u16 max_l3_packet_bytes; + /* When dynamically writing a multicast destination L2 addresss + * this is the offset within the address to start writing n + * bytes of the IP mcast address */ + u8 dst_mcast_offset; + + /* When dynamically writing a multicast destination L2 addresss + * this is the number of bytes of the dest IP address to write into + * the MAC rewrite */ + u8 dst_mcast_n_bytes; + /* Rewrite string starting at end and going backwards. */ u8 data[0]; }) vnet_rewrite_header_t; @@ -261,6 +271,27 @@ _vnet_rewrite_two_headers (vnet_rewrite_header_t * h0, sizeof ((rw0).rewrite_data), \ (most_likely_size)) +always_inline void +_vnet_fixup_one_header (vnet_rewrite_header_t * h0, + u8 * addr, u32 addr_len, + u8 * packet0, int clear_first_bit) +{ + /* location to write to in the packet */ + u8 *p0 = packet0 - h0->dst_mcast_offset; + u8 *p1 = p0; + /* location to write from in the L3 dest address */ + u8 *a0 = addr + addr_len - h0->dst_mcast_n_bytes; + + clib_memcpy (p0, a0, h0->dst_mcast_n_bytes); + if (clear_first_bit) + *p1 &= 0x7f; +} + +#define vnet_fixup_one_header(rw0,addr,p0,clear_first_bit) \ + _vnet_fixup_one_header (&((rw0).rewrite_header), \ + (u8*)(addr), sizeof((*addr)), \ + (u8*)(p0), (clear_first_bit)) + #define VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST ((void *) 0) /** Deprecated */ void vnet_rewrite_for_sw_interface (struct vnet_main_t *vnm, diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c index 5d0275d992a..f30c0da940e 100644 --- a/src/vnet/sr/sr.c +++ b/src/vnet/sr/sr.c @@ -2161,7 +2161,7 @@ sr_fix_dst_addr (vlib_main_t * vm, adj0 = ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - next0 = adj0->mcast_group_index; + next0 = adj0->if_address_index; /* We should be pointing at an Ethernet header... */ eh0 = vlib_buffer_get_current (b0); @@ -3088,7 +3088,7 @@ set_ip6_sr_rewrite_fn (vlib_main_t * vm, adj->rewrite_header.node_index = sr_fix_dst_addr_node.index; /* $$$$$ hack... steal the mcast group index */ - adj->mcast_group_index = + adj->if_address_index = vlib_node_add_next (vm, sr_fix_dst_addr_node.index, hi->output_node_index); diff --git a/src/vnet/util/radix.c b/src/vnet/util/radix.c new file mode 100644 index 00000000000..ff0b0f7bf38 --- /dev/null +++ b/src/vnet/util/radix.c @@ -0,0 +1,1104 @@ +/* $NetBSD: radix.c,v 1.47 2016/12/12 03:55:57 ozaki-r Exp $ */ + +/* + * Copyright (c) 1988, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)radix.c 8.6 (Berkeley) 10/17/95 + */ + +/* + * Routines to build and maintain radix trees for routing lookups. + */ + +#include + +typedef void (*rn_printer_t)(void *, const char *fmt, ...); + +static int max_keylen = 33; // me +struct radix_mask *rn_mkfreelist; +struct radix_node_head *mask_rnhead; +static char *addmask_key; +static const char normal_chars[] = + {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1}; +static char *rn_zeros, *rn_ones; + +#define rn_masktop (mask_rnhead->rnh_treetop) + +static int rn_satisfies_leaf(const char *, struct radix_node *, int); +static int rn_lexobetter(const void *, const void *); +static struct radix_mask *rn_new_radix_mask(struct radix_node *, + struct radix_mask *); +static struct radix_node *rn_walknext(struct radix_node *, rn_printer_t, + void *); +static struct radix_node *rn_walkfirst(struct radix_node *, rn_printer_t, + void *); +static void rn_nodeprint(struct radix_node *, rn_printer_t, void *, + const char *); + +#define SUBTREE_OPEN "[ " +#define SUBTREE_CLOSE " ]" + +#ifdef RN_DEBUG +static void rn_treeprint(struct radix_node_head *, rn_printer_t, void *); +#endif /* RN_DEBUG */ + +#define MIN(x,y) (((x)<(y))?(x):(y)) + +static struct radix_mask* +rm_alloc (void) +{ + struct radix_mask *rm = clib_mem_alloc(sizeof(struct radix_mask)); + + memset(rm, 0, sizeof(*rm)); + + return (rm); +} + +static void +rm_free (struct radix_mask *rm) +{ + clib_mem_free(rm); +} + +#define R_Malloc(p, t, n) \ +{ \ + p = (t) clib_mem_alloc((unsigned int)(n)); \ + memset(p, 0, n); \ +} +#define Free(p) clib_mem_free((p)) +#define log(a,b, c...) +#define bool i32 + +/* + * The data structure for the keys is a radix tree with one way + * branching removed. The index rn_b at an internal node n represents a bit + * position to be tested. The tree is arranged so that all descendants + * of a node n have keys whose bits all agree up to position rn_b - 1. + * (We say the index of n is rn_b.) + * + * There is at least one descendant which has a one bit at position rn_b, + * and at least one with a zero there. + * + * A route is determined by a pair of key and mask. We require that the + * bit-wise logical and of the key and mask to be the key. + * We define the index of a route to associated with the mask to be + * the first bit number in the mask where 0 occurs (with bit number 0 + * representing the highest order bit). + * + * We say a mask is normal if every bit is 0, past the index of the mask. + * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b, + * and m is a normal mask, then the route applies to every descendant of n. + * If the index(m) < rn_b, this implies the trailing last few bits of k + * before bit b are all 0, (and hence consequently true of every descendant + * of n), so the route applies to all descendants of the node as well. + * + * Similar logic shows that a non-normal mask m such that + * index(m) <= index(n) could potentially apply to many children of n. + * Thus, for each non-host route, we attach its mask to a list at an internal + * node as high in the tree as we can go. + * + * The present version of the code makes use of normal routes in short- + * circuiting an explicit mask and compare operation when testing whether + * a key satisfies a normal route, and also in remembering the unique leaf + * that governs a subtree. + */ + +struct radix_node * +rn_search( + const void *v_arg, + struct radix_node *head) +{ + const u8 * const v = v_arg; + struct radix_node *x; + + for (x = head; x->rn_b >= 0;) { + if (x->rn_bmask & v[x->rn_off]) + x = x->rn_r; + else + x = x->rn_l; + } + return x; +} + +struct radix_node * +rn_search_m( + const void *v_arg, + struct radix_node *head, + const void *m_arg) +{ + struct radix_node *x; + const u8 * const v = v_arg; + const u8 * const m = m_arg; + + for (x = head; x->rn_b >= 0;) { + if ((x->rn_bmask & m[x->rn_off]) && + (x->rn_bmask & v[x->rn_off])) + x = x->rn_r; + else + x = x->rn_l; + } + return x; +} + +int +rn_refines( + const void *m_arg, + const void *n_arg) +{ + const char *m = m_arg; + const char *n = n_arg; + const char *lim = n + *(const u8 *)n; + const char *lim2 = lim; + int longer = (*(const u8 *)n++) - (int)(*(const u8 *)m++); + int masks_are_equal = 1; + + if (longer > 0) + lim -= longer; + while (n < lim) { + if (*n & ~(*m)) + return 0; + if (*n++ != *m++) + masks_are_equal = 0; + } + while (n < lim2) + if (*n++) + return 0; + if (masks_are_equal && (longer < 0)) + for (lim2 = m - longer; m < lim2; ) + if (*m++) + return 1; + return !masks_are_equal; +} + +struct radix_node * +rn_lookup( + const void *v_arg, + const void *m_arg, + struct radix_node_head *head) +{ + struct radix_node *x; + const char *netmask = NULL; + + if (m_arg) { + if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0) + return NULL; + netmask = x->rn_key; + } + x = rn_match(v_arg, head); + if (x != NULL && netmask != NULL) { + while (x != NULL && x->rn_mask != netmask) + x = x->rn_dupedkey; + } + return x; +} + +static int +rn_satisfies_leaf( + const char *trial, + struct radix_node *leaf, + int skip) +{ + const char *cp = trial; + const char *cp2 = leaf->rn_key; + const char *cp3 = leaf->rn_mask; + const char *cplim; + int length = MIN(*(const u8 *)cp, *(const u8 *)cp2); + + if (cp3 == 0) + cp3 = rn_ones; + else + length = MIN(length, *(const u8 *)cp3); + cplim = cp + length; cp3 += skip; cp2 += skip; + for (cp += skip; cp < cplim; cp++, cp2++, cp3++) + if ((*cp ^ *cp2) & *cp3) + return 0; + return 1; +} + +struct radix_node * +rn_match( + const void *v_arg, + struct radix_node_head *head) +{ + const char * const v = v_arg; + struct radix_node *t = head->rnh_treetop; + struct radix_node *top = t; + struct radix_node *x; + struct radix_node *saved_t; + const char *cp = v; + const char *cp2; + const char *cplim; + int off = t->rn_off; + int vlen = *(const u8 *)cp; + int matched_off; + int test, b, rn_b; + + /* + * Open code rn_search(v, top) to avoid overhead of extra + * subroutine call. + */ + for (; t->rn_b >= 0; ) { + if (t->rn_bmask & cp[t->rn_off]) + t = t->rn_r; + else + t = t->rn_l; + } + /* + * See if we match exactly as a host destination + * or at least learn how many bits match, for normal mask finesse. + * + * It doesn't hurt us to limit how many bytes to check + * to the length of the mask, since if it matches we had a genuine + * match and the leaf we have is the most specific one anyway; + * if it didn't match with a shorter length it would fail + * with a long one. This wins big for class B&C netmasks which + * are probably the most common case... + */ + if (t->rn_mask) + vlen = *(const u8 *)t->rn_mask; + cp += off; cp2 = t->rn_key + off; cplim = v + vlen; + for (; cp < cplim; cp++, cp2++) + if (*cp != *cp2) + goto on1; + /* + * This extra grot is in case we are explicitly asked + * to look up the default. Ugh! + */ + if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey) + t = t->rn_dupedkey; + return t; +on1: + test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ + for (b = 7; (test >>= 1) > 0;) + b--; + matched_off = cp - v; + b += matched_off << 3; + rn_b = -1 - b; + /* + * If there is a host route in a duped-key chain, it will be first. + */ + if ((saved_t = t)->rn_mask == 0) + t = t->rn_dupedkey; + for (; t; t = t->rn_dupedkey) + /* + * Even if we don't match exactly as a host, + * we may match if the leaf we wound up at is + * a route to a net. + */ + if (t->rn_flags & RNF_NORMAL) { + if (rn_b <= t->rn_b) + return t; + } else if (rn_satisfies_leaf(v, t, matched_off)) + return t; + t = saved_t; + /* start searching up the tree */ + do { + struct radix_mask *m; + t = t->rn_p; + m = t->rn_mklist; + if (m) { + /* + * If non-contiguous masks ever become important + * we can restore the masking and open coding of + * the search and satisfaction test and put the + * calculation of "off" back before the "do". + */ + do { + if (m->rm_flags & RNF_NORMAL) { + if (rn_b <= m->rm_b) + return m->rm_leaf; + } else { + off = MIN(t->rn_off, matched_off); + x = rn_search_m(v, t, m->rm_mask); + while (x && x->rn_mask != m->rm_mask) + x = x->rn_dupedkey; + if (x && rn_satisfies_leaf(v, x, off)) + return x; + } + m = m->rm_mklist; + } while (m); + } + } while (t != top); + return NULL; +} + +static void +rn_nodeprint(struct radix_node *rn, rn_printer_t printer, void *arg, + const char *delim) +{ + (*printer)(arg, "%s(%s%p: p<%p> l<%p> r<%p>)", + delim, ((void *)rn == arg) ? "*" : "", rn, rn->rn_p, + rn->rn_l, rn->rn_r); +} + +#ifdef RN_DEBUG +int rn_debug = 1; + +static void +rn_dbg_print(void *arg, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vlog(LOG_DEBUG, fmt, ap); + va_end(ap); +} + +static void +rn_treeprint(struct radix_node_head *h, rn_printer_t printer, void *arg) +{ + struct radix_node *dup, *rn; + const char *delim; + + if (printer == NULL) + return; + + rn = rn_walkfirst(h->rnh_treetop, printer, arg); + for (;;) { + /* Process leaves */ + delim = ""; + for (dup = rn; dup != NULL; dup = dup->rn_dupedkey) { + if ((dup->rn_flags & RNF_ROOT) != 0) + continue; + rn_nodeprint(dup, printer, arg, delim); + delim = ", "; + } + rn = rn_walknext(rn, printer, arg); + if (rn->rn_flags & RNF_ROOT) + return; + } + /* NOTREACHED */ +} + +#define traverse(__head, __rn) rn_treeprint((__head), rn_dbg_print, (__rn)) +#endif /* RN_DEBUG */ + +struct radix_node * +rn_newpair( + const void *v, + int b, + struct radix_node nodes[2]) +{ + struct radix_node *tt = nodes; + struct radix_node *t = tt + 1; + t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7); + t->rn_l = tt; t->rn_off = b >> 3; + tt->rn_b = -1; tt->rn_key = v; tt->rn_p = t; + tt->rn_flags = t->rn_flags = RNF_ACTIVE; + return t; +} + +struct radix_node * +rn_insert( + const void *v_arg, + struct radix_node_head *head, + int *dupentry, + struct radix_node nodes[2]) +{ + struct radix_node *top = head->rnh_treetop; + struct radix_node *t = rn_search(v_arg, top); + struct radix_node *tt; + const char *v = v_arg; + int head_off = top->rn_off; + int vlen = *((const u8 *)v); + const char *cp = v + head_off; + int b; + /* + * Find first bit at which v and t->rn_key differ + */ + { + const char *cp2 = t->rn_key + head_off; + const char *cplim = v + vlen; + int cmp_res; + + while (cp < cplim) + if (*cp2++ != *cp++) + goto on1; + *dupentry = 1; + return t; +on1: + *dupentry = 0; + cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; + for (b = (cp - v) << 3; cmp_res; b--) + cmp_res >>= 1; + } + { + struct radix_node *p, *x = top; + cp = v; + do { + p = x; + if (cp[x->rn_off] & x->rn_bmask) + x = x->rn_r; + else x = x->rn_l; + } while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */ +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, p); +#endif + t = rn_newpair(v_arg, b, nodes); tt = t->rn_l; + if ((cp[p->rn_off] & p->rn_bmask) == 0) + p->rn_l = t; + else + p->rn_r = t; + x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */ + if ((cp[t->rn_off] & t->rn_bmask) == 0) { + t->rn_r = x; + } else { + t->rn_r = tt; t->rn_l = x; + } +#ifdef RN_DEBUG + if (rn_debug) { + log(LOG_DEBUG, "%s: Coming Out:\n", __func__), + traverse(head, p); + } +#endif /* RN_DEBUG */ + } + return tt; +} + +struct radix_node * +rn_addmask( + const void *n_arg, + int search, + int skip) +{ + const char *netmask = n_arg; + const char *cp; + const char *cplim; + struct radix_node *x; + struct radix_node *saved_x; + int b = 0, mlen, j; + int maskduplicated, m0, isnormal; + static int last_zeroed = 0; + + if ((mlen = *(const u8 *)netmask) > max_keylen) + mlen = max_keylen; + if (skip == 0) + skip = 1; + if (mlen <= skip) + return mask_rnhead->rnh_nodes; + if (skip > 1) + memmove(addmask_key + 1, rn_ones + 1, skip - 1); + if ((m0 = mlen) > skip) + memmove(addmask_key + skip, netmask + skip, mlen - skip); + /* + * Trim trailing zeroes. + */ + for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) + cp--; + mlen = cp - addmask_key; + if (mlen <= skip) { + if (m0 >= last_zeroed) + last_zeroed = mlen; + return mask_rnhead->rnh_nodes; + } + if (m0 < last_zeroed) + memset(addmask_key + m0, 0, last_zeroed - m0); + *addmask_key = last_zeroed = mlen; + x = rn_search(addmask_key, rn_masktop); + if (memcmp(addmask_key, x->rn_key, mlen) != 0) + x = 0; + if (x || search) + return x; + R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); + if ((saved_x = x) == NULL) + return NULL; + memset(x, 0, max_keylen + 2 * sizeof (*x)); + cp = netmask = (void *)(x + 2); + memmove(x + 2, addmask_key, mlen); + x = rn_insert(cp, mask_rnhead, &maskduplicated, x); + if (maskduplicated) { + log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n"); + Free(saved_x); + return x; + } + /* + * Calculate index of mask, and check for normalcy. + */ + cplim = netmask + mlen; isnormal = 1; + for (cp = netmask + skip; (cp < cplim) && *(const u8 *)cp == 0xff;) + cp++; + if (cp != cplim) { + for (j = 0x80; (j & *cp) != 0; j >>= 1) + b++; + if (*cp != normal_chars[b] || cp != (cplim - 1)) + isnormal = 0; + } + b += (cp - netmask) << 3; + x->rn_b = -1 - b; + if (isnormal) + x->rn_flags |= RNF_NORMAL; + return x; +} + +static int /* XXX: arbitrary ordering for non-contiguous masks */ +rn_lexobetter( + const void *m_arg, + const void *n_arg) +{ + const u8 *mp = m_arg; + const u8 *np = n_arg; + const u8 *lim; + + if (*mp > *np) + return 1; /* not really, but need to check longer one first */ + if (*mp == *np) + for (lim = mp + *mp; mp < lim;) + if (*mp++ > *np++) + return 1; + return 0; +} + +static struct radix_mask * +rn_new_radix_mask( + struct radix_node *tt, + struct radix_mask *next) +{ + struct radix_mask *m; + + m = rm_alloc(); + if (m == NULL) { + log(LOG_ERR, "Mask for route not entered\n"); + return NULL; + } + memset(m, 0, sizeof(*m)); + m->rm_b = tt->rn_b; + m->rm_flags = tt->rn_flags; + if (tt->rn_flags & RNF_NORMAL) + m->rm_leaf = tt; + else + m->rm_mask = tt->rn_mask; + m->rm_mklist = next; + tt->rn_mklist = m; + return m; +} + +struct radix_node * +rn_addroute( + const void *v_arg, + const void *n_arg, + struct radix_node_head *head, + struct radix_node treenodes[2]) +{ + const char *v = v_arg, *netmask = n_arg; + struct radix_node *t, *x = NULL, *tt; + struct radix_node *saved_tt, *top = head->rnh_treetop; + short b = 0, b_leaf = 0; + int keyduplicated; + const char *mmask; + struct radix_mask *m, **mp; + + /* + * In dealing with non-contiguous masks, there may be + * many different routes which have the same mask. + * We will find it useful to have a unique pointer to + * the mask to speed avoiding duplicate references at + * nodes and possibly save time in calculating indices. + */ + if (netmask != NULL) { + if ((x = rn_addmask(netmask, 0, top->rn_off)) == NULL) + return NULL; + b_leaf = x->rn_b; + b = -1 - x->rn_b; + netmask = x->rn_key; + } + /* + * Deal with duplicated keys: attach node to previous instance + */ + saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); + if (keyduplicated) { + for (t = tt; tt != NULL; t = tt, tt = tt->rn_dupedkey) { + if (tt->rn_mask == netmask) + return NULL; + if (netmask == NULL || + (tt->rn_mask != NULL && + (b_leaf < tt->rn_b || /* index(netmask) > node */ + rn_refines(netmask, tt->rn_mask) || + rn_lexobetter(netmask, tt->rn_mask)))) + break; + } + /* + * If the mask is not duplicated, we wouldn't + * find it among possible duplicate key entries + * anyway, so the above test doesn't hurt. + * + * We sort the masks for a duplicated key the same way as + * in a masklist -- most specific to least specific. + * This may require the unfortunate nuisance of relocating + * the head of the list. + * + * We also reverse, or doubly link the list through the + * parent pointer. + */ + if (tt == saved_tt) { + struct radix_node *xx = x; + /* link in at head of list */ + (tt = treenodes)->rn_dupedkey = t; + tt->rn_flags = t->rn_flags; + tt->rn_p = x = t->rn_p; + t->rn_p = tt; + if (x->rn_l == t) + x->rn_l = tt; + else + x->rn_r = tt; + saved_tt = tt; + x = xx; + } else { + (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; + t->rn_dupedkey = tt; + tt->rn_p = t; + if (tt->rn_dupedkey) + tt->rn_dupedkey->rn_p = tt; + } + tt->rn_key = v; + tt->rn_b = -1; + tt->rn_flags = RNF_ACTIVE; + } + /* + * Put mask in tree. + */ + if (netmask != NULL) { + tt->rn_mask = netmask; + tt->rn_b = x->rn_b; + tt->rn_flags |= x->rn_flags & RNF_NORMAL; + } + t = saved_tt->rn_p; + if (keyduplicated) + goto on2; + b_leaf = -1 - t->rn_b; + if (t->rn_r == saved_tt) + x = t->rn_l; + else + x = t->rn_r; + /* Promote general routes from below */ + if (x->rn_b < 0) { + for (mp = &t->rn_mklist; x != NULL; x = x->rn_dupedkey) { + if (x->rn_mask != NULL && x->rn_b >= b_leaf && + x->rn_mklist == NULL) { + *mp = m = rn_new_radix_mask(x, NULL); + if (m != NULL) + mp = &m->rm_mklist; + } + } + } else if (x->rn_mklist != NULL) { + /* + * Skip over masks whose index is > that of new node + */ + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) + if (m->rm_b >= b_leaf) + break; + t->rn_mklist = m; + *mp = NULL; + } +on2: + /* Add new route to highest possible ancestor's list */ + if (netmask == NULL || b > t->rn_b) + return tt; /* can't lift at all */ + b_leaf = tt->rn_b; + do { + x = t; + t = t->rn_p; + } while (b <= t->rn_b && x != top); + /* + * Search through routes associated with node to + * insert new route according to index. + * Need same criteria as when sorting dupedkeys to avoid + * double loop on deletion. + */ + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) { + if (m->rm_b < b_leaf) + continue; + if (m->rm_b > b_leaf) + break; + if (m->rm_flags & RNF_NORMAL) { + mmask = m->rm_leaf->rn_mask; + if (tt->rn_flags & RNF_NORMAL) { + log(LOG_ERR, "Non-unique normal route," + " mask not entered\n"); + return tt; + } + } else + mmask = m->rm_mask; + if (mmask == netmask) { + m->rm_refs++; + tt->rn_mklist = m; + return tt; + } + if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask)) + break; + } + *mp = rn_new_radix_mask(tt, *mp); + return tt; +} + +struct radix_node * +rn_delete1( + const void *v_arg, + const void *netmask_arg, + struct radix_node_head *head, + struct radix_node *rn) +{ + struct radix_node *t, *p, *x, *tt; + struct radix_mask *m, *saved_m, **mp; + struct radix_node *dupedkey, *saved_tt, *top; + const char *v, *netmask; + int b, head_off, vlen; + + v = v_arg; + netmask = netmask_arg; + x = head->rnh_treetop; + tt = rn_search(v, x); + head_off = x->rn_off; + vlen = *(const u8 *)v; + saved_tt = tt; + top = x; + if (tt == NULL || + memcmp(v + head_off, tt->rn_key + head_off, vlen - head_off) != 0) + return NULL; + /* + * Delete our route from mask lists. + */ + if (netmask != NULL) { + if ((x = rn_addmask(netmask, 1, head_off)) == NULL) + return NULL; + netmask = x->rn_key; + while (tt->rn_mask != netmask) + if ((tt = tt->rn_dupedkey) == NULL) + return NULL; + } + if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL) + goto on1; + if (tt->rn_flags & RNF_NORMAL) { + if (m->rm_leaf != tt || m->rm_refs > 0) { + log(LOG_ERR, "rn_delete: inconsistent annotation\n"); + return NULL; /* dangling ref could cause disaster */ + } + } else { + if (m->rm_mask != tt->rn_mask) { + log(LOG_ERR, "rn_delete: inconsistent annotation\n"); + goto on1; + } + if (--m->rm_refs >= 0) + goto on1; + } + b = -1 - tt->rn_b; + t = saved_tt->rn_p; + if (b > t->rn_b) + goto on1; /* Wasn't lifted at all */ + do { + x = t; + t = t->rn_p; + } while (b <= t->rn_b && x != top); + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) { + if (m == saved_m) { + *mp = m->rm_mklist; + rm_free(m); + break; + } + } + if (m == NULL) { + log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); + if (tt->rn_flags & RNF_NORMAL) + return NULL; /* Dangling ref to us */ + } +on1: + /* + * Eliminate us from tree + */ + if (tt->rn_flags & RNF_ROOT) + return NULL; +#ifdef RN_DEBUG + if (rn_debug) + log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, tt); +#endif + t = tt->rn_p; + dupedkey = saved_tt->rn_dupedkey; + if (dupedkey != NULL) { + /* + * Here, tt is the deletion target, and + * saved_tt is the head of the dupedkey chain. + */ + if (tt == saved_tt) { + x = dupedkey; + x->rn_p = t; + if (t->rn_l == tt) + t->rn_l = x; + else + t->rn_r = x; + } else { + /* find node in front of tt on the chain */ + for (x = p = saved_tt; + p != NULL && p->rn_dupedkey != tt;) + p = p->rn_dupedkey; + if (p != NULL) { + p->rn_dupedkey = tt->rn_dupedkey; + if (tt->rn_dupedkey != NULL) + tt->rn_dupedkey->rn_p = p; + } else + log(LOG_ERR, "rn_delete: couldn't find us\n"); + } + t = tt + 1; + if (t->rn_flags & RNF_ACTIVE) { + *++x = *t; + p = t->rn_p; + if (p->rn_l == t) + p->rn_l = x; + else + p->rn_r = x; + x->rn_l->rn_p = x; + x->rn_r->rn_p = x; + } + goto out; + } + if (t->rn_l == tt) + x = t->rn_r; + else + x = t->rn_l; + p = t->rn_p; + if (p->rn_r == t) + p->rn_r = x; + else + p->rn_l = x; + x->rn_p = p; + /* + * Demote routes attached to us. + */ + if (t->rn_mklist == NULL) + ; + else if (x->rn_b >= 0) { + for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) + ; + *mp = t->rn_mklist; + } else { + /* If there are any key,mask pairs in a sibling + duped-key chain, some subset will appear sorted + in the same order attached to our mklist */ + for (m = t->rn_mklist; + m != NULL && x != NULL; + x = x->rn_dupedkey) { + if (m == x->rn_mklist) { + struct radix_mask *mm = m->rm_mklist; + x->rn_mklist = NULL; + if (--(m->rm_refs) < 0) + rm_free(m); + m = mm; + } + } + if (m != NULL) { + log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n", + m, x); + } + } + /* + * We may be holding an active internal node in the tree. + */ + x = tt + 1; + if (t != x) { + *t = *x; + t->rn_l->rn_p = t; + t->rn_r->rn_p = t; + p = x->rn_p; + if (p->rn_l == x) + p->rn_l = t; + else + p->rn_r = t; + } +out: +#ifdef RN_DEBUG + if (rn_debug) { + log(LOG_DEBUG, "%s: Coming Out:\n", __func__), + traverse(head, tt); + } +#endif /* RN_DEBUG */ + tt->rn_flags &= ~RNF_ACTIVE; + tt[1].rn_flags &= ~RNF_ACTIVE; + return tt; +} + +struct radix_node * +rn_delete( + const void *v_arg, + const void *netmask_arg, + struct radix_node_head *head) +{ + return rn_delete1(v_arg, netmask_arg, head, NULL); +} + +static struct radix_node * +rn_walknext(struct radix_node *rn, rn_printer_t printer, void *arg) +{ + /* If at right child go back up, otherwise, go right */ + while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) { + if (printer != NULL) + (*printer)(arg, SUBTREE_CLOSE); + rn = rn->rn_p; + } + if (printer) + rn_nodeprint(rn->rn_p, printer, arg, ""); + /* Find the next *leaf* since next node might vanish, too */ + for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) { + if (printer != NULL) + (*printer)(arg, SUBTREE_OPEN); + rn = rn->rn_l; + } + return rn; +} + +static struct radix_node * +rn_walkfirst(struct radix_node *rn, rn_printer_t printer, void *arg) +{ + /* First time through node, go left */ + while (rn->rn_b >= 0) { + if (printer != NULL) + (*printer)(arg, SUBTREE_OPEN); + rn = rn->rn_l; + } + return rn; +} + +int +rn_walktree( + struct radix_node_head *h, + int (*f)(struct radix_node *, void *), + void *w) +{ + int error; + struct radix_node *base, *next, *rn; + /* + * This gets complicated because we may delete the node + * while applying the function f to it, so we need to calculate + * the successor node in advance. + */ + rn = rn_walkfirst(h->rnh_treetop, NULL, NULL); + for (;;) { + base = rn; + next = rn_walknext(rn, NULL, NULL); + /* Process leaves */ + while ((rn = base) != NULL) { + base = rn->rn_dupedkey; + if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w))) + return error; + } + rn = next; + if (rn->rn_flags & RNF_ROOT) + return 0; + } + /* NOTREACHED */ +} + +struct radix_node * +rn_search_matched(struct radix_node_head *h, + int (*matcher)(struct radix_node *, void *), void *w) +{ + bool matched; + struct radix_node *base, *next, *rn; + /* + * This gets complicated because we may delete the node + * while applying the function f to it, so we need to calculate + * the successor node in advance. + */ + rn = rn_walkfirst(h->rnh_treetop, NULL, NULL); + for (;;) { + base = rn; + next = rn_walknext(rn, NULL, NULL); + /* Process leaves */ + while ((rn = base) != NULL) { + base = rn->rn_dupedkey; + if (!(rn->rn_flags & RNF_ROOT)) { + matched = (*matcher)(rn, w); + if (matched) + return rn; + } + } + rn = next; + if (rn->rn_flags & RNF_ROOT) + return NULL; + } + /* NOTREACHED */ +} + +int +rn_inithead(void **head, int off) +{ + struct radix_node_head *rnh; + + if (*head != NULL) + return 1; + R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh)); + if (rnh == NULL) + return 0; + *head = rnh; + return rn_inithead0(rnh, off); +} + +int +rn_inithead0(struct radix_node_head *rnh, int off) +{ + struct radix_node *t; + struct radix_node *tt; + struct radix_node *ttt; + + memset(rnh, 0, sizeof(*rnh)); + t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); + ttt = rnh->rnh_nodes + 2; + t->rn_r = ttt; + t->rn_p = t; + tt = t->rn_l; + tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; + tt->rn_b = -1 - off; + *ttt = *tt; + ttt->rn_key = rn_ones; + rnh->rnh_addaddr = rn_addroute; + rnh->rnh_deladdr = rn_delete; + rnh->rnh_matchaddr = rn_match; + rnh->rnh_lookup = rn_lookup; + rnh->rnh_treetop = t; + return 1; +} + +static clib_error_t * +rn_module_init (vlib_main_t * vm) +{ + char *cp, *cplim; + + R_Malloc(rn_zeros, char *, 3 * max_keylen); + if (rn_zeros == NULL) + return (clib_error_return (0, "RN Zeros...")); + + memset(rn_zeros, 0, 3 * max_keylen); + rn_ones = cp = rn_zeros + max_keylen; + addmask_key = cplim = rn_ones + max_keylen; + while (cp < cplim) + *cp++ = -1; + if (rn_inithead((void *)&mask_rnhead, 0) == 0) + return (clib_error_return (0, "RN Init 2")); + + return (NULL); +} + +VLIB_INIT_FUNCTION(rn_module_init); diff --git a/src/vnet/util/radix.h b/src/vnet/util/radix.h new file mode 100644 index 00000000000..d9ba66592ac --- /dev/null +++ b/src/vnet/util/radix.h @@ -0,0 +1,147 @@ +/* $NetBSD: radix.h,v 1.23 2016/11/15 01:50:06 ozaki-r Exp $ */ + +/* + * Copyright (c) 1988, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)radix.h 8.2 (Berkeley) 10/31/94 + */ + +#ifndef _NET_RADIX_H_ +#define _NET_RADIX_H_ + +#include + +/* + * Radix search tree node layout. + */ + +struct radix_node { + struct radix_mask *rn_mklist; /* list of masks contained in subtree */ + struct radix_node *rn_p; /* parent */ + i16 rn_b; /* bit offset; -1-index(netmask) */ + u8 rn_bmask; /* node: mask for bit test*/ + u8 rn_flags; /* enumerated next */ +#define RNF_NORMAL 1 /* leaf contains normal route */ +#define RNF_ROOT 2 /* leaf is root leaf for tree */ +#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */ + union { + struct { /* leaf only data: */ + const char *rn_Key; /* object of search */ + const char *rn_Mask; /* netmask, if present */ + struct radix_node *rn_Dupedkey; + } rn_leaf; + struct { /* node only data: */ + int rn_Off; /* where to start compare */ + struct radix_node *rn_L;/* progeny */ + struct radix_node *rn_R;/* progeny */ + } rn_node; + } rn_u; +#ifdef RN_DEBUG + i32 rn_info; + struct radix_node *rn_twin; + struct radix_node *rn_ybro; +#endif +}; + +#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey +#define rn_key rn_u.rn_leaf.rn_Key +#define rn_mask rn_u.rn_leaf.rn_Mask +#define rn_off rn_u.rn_node.rn_Off +#define rn_l rn_u.rn_node.rn_L +#define rn_r rn_u.rn_node.rn_R + +/* + * Annotations to tree concerning potential routes applying to subtrees. + */ + +struct radix_mask { + i16 rm_b; /* bit offset; -1-index(netmask) */ + i8 rm_unused; /* cf. rn_bmask */ + u8 rm_flags; /* cf. rn_flags */ + struct radix_mask *rm_mklist; /* more masks to try */ + union { + const char *rmu_mask; /* the mask */ + struct radix_node *rmu_leaf; /* for normal routes */ + } rm_rmu; + i32 rm_refs; /* # of references to this struct */ +}; + +#define rm_mask rm_rmu.rmu_mask +#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */ + +struct radix_node_head { + struct radix_node *rnh_treetop; + i32 rnh_addrsize; /* permit, but not require fixed keys */ + i32 rnh_pktsize; /* permit, but not require fixed keys */ + struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ + (const void *v, const void *mask, + struct radix_node_head *head, struct radix_node nodes[]); + struct radix_node *(*rnh_addpkt) /* add based on packet hdr */ + (const void *v, const void *mask, + struct radix_node_head *head, struct radix_node nodes[]); + struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */ + (const void *v, const void *mask, struct radix_node_head *head); + struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */ + (const void *v, const void *mask, struct radix_node_head *head); + struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */ + (const void *v, struct radix_node_head *head); + struct radix_node *(*rnh_lookup) /* locate based on sockaddr */ + (const void *v, const void *mask, struct radix_node_head *head); + struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */ + (const void *v, struct radix_node_head *head); + struct radix_node rnh_nodes[3]; /* empty tree for common case */ +}; + +void rn_init(void); +int rn_inithead(void **, int); +void rn_delayedinit(void **, int); +int rn_inithead0(struct radix_node_head *, int); +int rn_refines(const void *, const void *); +int rn_walktree(struct radix_node_head *, + int (*)(struct radix_node *, void *), + void *); +struct radix_node * + rn_search_matched(struct radix_node_head *, + int (*)(struct radix_node *, void *), + void *); +struct radix_node + *rn_addmask(const void *, int, int), + *rn_addroute(const void *, const void *, struct radix_node_head *, + struct radix_node [2]), + *rn_delete1(const void *, const void *, struct radix_node_head *, + struct radix_node *), + *rn_delete(const void *, const void *, struct radix_node_head *), + *rn_insert(const void *, struct radix_node_head *, int *, + struct radix_node [2]), + *rn_lookup(const void *, const void *, struct radix_node_head *), + *rn_match(const void *, struct radix_node_head *), + *rn_newpair(const void *, int, struct radix_node[2]), + *rn_search(const void *, struct radix_node *), + *rn_search_m(const void *, struct radix_node *, const void *); + +#endif /* !_NET_RADIX_H_ */ diff --git a/src/vnet/vxlan/vxlan.c b/src/vnet/vxlan/vxlan.c index f749429f28d..9ed42875a35 100644 --- a/src/vnet/vxlan/vxlan.c +++ b/src/vnet/vxlan/vxlan.c @@ -16,7 +16,8 @@ #include #include #include -#include +#include +#include #include /** @@ -337,7 +338,7 @@ vtep_addr_unref(ip46_address_t *ip) typedef CLIB_PACKED(union { struct { - fib_node_index_t fib_entry_index; + fib_node_index_t mfib_entry_index; adj_index_t mcast_adj_index; }; u64 as_u64; @@ -353,11 +354,28 @@ mcast_shared_get(ip46_address_t * ip) } static inline void -ip46_multicast_ethernet_address(u8 * ethernet_address, ip46_address_t * ip) { - if (ip46_address_is_ip4(ip)) - ip4_multicast_ethernet_address(ethernet_address, &ip->ip4); - else - ip6_multicast_ethernet_address(ethernet_address, ip->ip6.as_u32[0]); +mcast_shared_add(ip46_address_t *dst, + fib_node_index_t mfei, + adj_index_t ai) +{ + mcast_shared_t new_ep = { + .mcast_adj_index = ai, + .mfib_entry_index = mfei, + }; + + hash_set_key_copy (&vxlan_main.mcast_shared, dst, new_ep.as_u64); +} + +static inline void +mcast_shared_remove(ip46_address_t *dst) +{ + mcast_shared_t ep = mcast_shared_get(dst); + + adj_unlock(ep.mcast_adj_index); + mfib_table_entry_delete_index(ep.mfib_entry_index, + MFIB_SOURCE_VXLAN); + + hash_unset_key_free (&vxlan_main.mcast_shared, dst); } int vnet_vxlan_add_del_tunnel @@ -503,28 +521,65 @@ int vnet_vxlan_add_del_tunnel */ fib_protocol_t fp = (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4; dpo_id_t dpo = DPO_INVALID; - dpo_proto_t dproto = fib_proto_to_dpo(fp); + mcast_shared_t ep; if (vtep_addr_ref(&t->dst) == 1) - { - u8 mcast_mac[6]; - - ip46_multicast_ethernet_address(mcast_mac, &t->dst); - receive_dpo_add_or_lock(dproto, ~0, NULL, &dpo); - mcast_shared_t new_ep = { - .mcast_adj_index = adj_rewrite_add_and_lock - (fp, fib_proto_to_link(fp), a->mcast_sw_if_index, mcast_mac), - /* Add VRF local mcast adj. */ - .fib_entry_index = fib_table_entry_special_dpo_add - (t->encap_fib_index, &tun_dst_pfx, - FIB_SOURCE_SPECIAL, FIB_ENTRY_FLAG_NONE, &dpo) - }; - hash_set_key_copy (&vxm->mcast_shared, &t->dst, new_ep.as_u64); - dpo_reset(&dpo); - } + { + fib_node_index_t mfei; + adj_index_t ai; + fib_route_path_t path = { + .frp_proto = fp, + .frp_addr = zero_addr, + .frp_sw_if_index = 0xffffffff, + .frp_fib_index = ~0, + .frp_weight = 0, + .frp_flags = FIB_ROUTE_PATH_LOCAL, + }; + const mfib_prefix_t mpfx = { + .fp_proto = fp, + .fp_len = (is_ip6 ? 128 : 32), + .fp_grp_addr = tun_dst_pfx.fp_addr, + }; + + /* + * Setup the (*,G) to receive traffic on the mcast group + * - the forwarding interface is for-us + * - the accepting interface is that from the API + */ + mfib_table_entry_path_update(t->encap_fib_index, + &mpfx, + MFIB_SOURCE_VXLAN, + &path, + MFIB_ITF_FLAG_FORWARD); + + path.frp_sw_if_index = a->mcast_sw_if_index; + path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE; + mfei = mfib_table_entry_path_update(t->encap_fib_index, + &mpfx, + MFIB_SOURCE_VXLAN, + &path, + MFIB_ITF_FLAG_ACCEPT); + + /* + * Create the mcast adjacency to send traffic to the group + */ + ai = adj_mcast_add_or_lock(fp, + fib_proto_to_link(fp), + a->mcast_sw_if_index); + + /* + * create a new end-point + */ + mcast_shared_add(&t->dst, mfei, ai); + } + + ep = mcast_shared_get(&t->dst); + /* Stack shared mcast dst mac addr rewrite on encap */ - mcast_shared_t ep = mcast_shared_get(&t->dst); - dpo_set (&dpo, DPO_ADJACENCY, dproto, ep.mcast_adj_index); + dpo_set (&dpo, DPO_ADJACENCY, + fib_proto_to_dpo(fp), + ep.mcast_adj_index); + dpo_stack_from_node (encap_index, &t->next_dpo, &dpo); dpo_reset (&dpo); flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER; @@ -563,10 +618,7 @@ int vnet_vxlan_add_del_tunnel } else if (vtep_addr_unref(&t->dst) == 0) { - mcast_shared_t ep = mcast_shared_get(&t->dst); - adj_unlock(ep.mcast_adj_index); - fib_table_entry_delete_index(ep.fib_entry_index, FIB_SOURCE_SPECIAL); - hash_unset_key_free (&vxm->mcast_shared, &t->dst); + mcast_shared_remove(&t->dst); } fib_node_deinit(&t->node); -- cgit 1.2.3-korg