diff options
author | Neale Ranns <nranns@cisco.com> | 2016-08-25 15:29:12 +0100 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2016-09-21 17:37:39 +0000 |
commit | 0bfe5d8c792abcdbcf27bfcc7b7b353fba04aee2 (patch) | |
tree | d600b0e2e693e766e722936744930d3bebac493c /vnet | |
parent | 60537f3d83e83d0ce10a620ca99aad4eddf85f5e (diff) |
A Protocol Independent Hierarchical FIB (VPP-352)
Main Enhancements:
- Protocol Independent FIB API
- Hierarchical FIB entries. Dynamic recursive route resolution.
- Extranet Support.
- Integration of IP and MPLS forwarding.
- Separation of FIB and Adjacency databases.
- Data-Plane Object forwarding model.
Change-Id: I52dc815c0d0aa8b493e3cf6b978568f3cc82296c
Signed-off-by: Neale Ranns <nranns@cisco.com>
Diffstat (limited to 'vnet')
172 files changed, 39115 insertions, 10658 deletions
diff --git a/vnet/Makefile.am b/vnet/Makefile.am index 1c47c658ac7..41568e06045 100644 --- a/vnet/Makefile.am +++ b/vnet/Makefile.am @@ -13,7 +13,7 @@ AUTOMAKE_OPTIONS = foreign subdir-objects -AM_CFLAGS = -Wall @DPDK@ @IPSEC@ @IPV6SR@ +AM_CFLAGS = -Wall -Werror @DPDK@ @IPSEC@ @IPV6SR@ libvnet_la_SOURCES = libvnetplugin_la_SOURCES = @@ -264,7 +264,6 @@ nobase_include_HEADERS += \ # Layer 3 protocol: IP v4/v6 ######################################## libvnet_la_SOURCES += \ - vnet/ip/adj_alloc.c \ vnet/ip/format.c \ vnet/ip/icmp4.c \ vnet/ip/icmp6.c \ @@ -296,7 +295,6 @@ libvnet_la_SOURCES += \ vnet/ip/ip_frag.c nobase_include_HEADERS += \ - vnet/ip/adj_alloc.h \ vnet/ip/format.h \ vnet/ip/icmp46_packet.h \ vnet/ip/icmp4.h \ @@ -369,13 +367,15 @@ nobase_include_HEADERS += \ ######################################## libvnet_la_SOURCES += \ vnet/map/map.c \ + vnet/map/map_dpo.c \ vnet/map/ip4_map.c \ vnet/map/ip6_map.c \ vnet/map/ip4_map_t.c \ vnet/map/ip6_map_t.c nobase_include_HEADERS += \ - vnet/map/map.h + vnet/map/map.h \ + vnet/map/map_dpo.h if ENABLE_TESTS TESTS += test_map @@ -422,16 +422,20 @@ nobase_include_HEADERS += \ # Tunnel protocol: gre+mpls ######################################## libvnet_la_SOURCES += \ - vnet/mpls-gre/mpls.c \ - vnet/mpls-gre/node.c \ - vnet/mpls-gre/interface.c \ - vnet/mpls-gre/policy_encap.c \ - vnet/mpls-gre/pg.c + vnet/mpls/mpls.c \ + vnet/mpls/mpls_lookup.c \ + vnet/mpls/mpls_output.c \ + vnet/mpls/mpls_features.c \ + vnet/mpls/node.c \ + vnet/mpls/interface.c \ + vnet/mpls/policy_encap.c \ + vnet/mpls/pg.c nobase_include_HEADERS += \ - vnet/mpls-gre/mpls.h \ - vnet/mpls-gre/packet.h \ - vnet/mpls-gre/error.def + vnet/mpls/mpls.h \ + vnet/mpls/mpls_types.h \ + vnet/mpls/packet.h \ + vnet/mpls/error.def ######################################## @@ -466,6 +470,7 @@ nobase_include_HEADERS += \ libvnet_la_SOURCES += \ vnet/lisp-cp/lisp_types.c \ + vnet/lisp-cp/lisp_cp_dpo.c \ vnet/lisp-cp/control.c \ vnet/lisp-cp/gid_dictionary.c \ vnet/lisp-cp/lisp_msg_serdes.c \ @@ -513,6 +518,9 @@ endif libvnet_la_SOURCES += \ vnet/lisp-gpe/lisp_gpe.c \ + vnet/lisp-gpe/lisp_gpe_sub_interface.c \ + vnet/lisp-gpe/lisp_gpe_adjacency.c \ + vnet/lisp-gpe/lisp_gpe_tunnel.c \ vnet/lisp-gpe/interface.c \ vnet/lisp-gpe/ip_forward.c \ vnet/lisp-gpe/decap.c @@ -720,6 +728,90 @@ nobase_include_HEADERS += \ vnet/unix/tapcli.h ######################################## +# FIB +######################################## + +libvnet_la_SOURCES += \ + vnet/fib/fib.c \ + vnet/fib/fib_test.c \ + vnet/fib/ip4_fib.c \ + vnet/fib/ip6_fib.c \ + vnet/fib/mpls_fib.c \ + vnet/fib/fib_table.c \ + vnet/fib/fib_walk.c \ + vnet/fib/fib_types.c \ + vnet/fib/fib_node.c \ + vnet/fib/fib_node_list.c \ + vnet/fib/fib_entry.c \ + vnet/fib/fib_entry_src.c \ + vnet/fib/fib_entry_src_rr.c \ + vnet/fib/fib_entry_src_interface.c \ + vnet/fib/fib_entry_src_default_route.c \ + vnet/fib/fib_entry_src_special.c \ + vnet/fib/fib_entry_src_api.c \ + vnet/fib/fib_entry_src_adj.c \ + vnet/fib/fib_entry_src_mpls.c \ + vnet/fib/fib_entry_src_lisp.c \ + vnet/fib/fib_entry_cover.c \ + vnet/fib/fib_path_list.c \ + vnet/fib/fib_path.c \ + vnet/fib/fib_path_ext.c \ + vnet/fib/fib_attached_export.c + +nobase_include_HEADERS += \ + vnet/fib/fib.h \ + vnet/fib/ip4_fib.h \ + vnet/fib/ip6_fib.h \ + vnet/fib/fib_types.h \ + vnet/fib/fib_table.h \ + vnet/fib/fib_node.h \ + vnet/fib/fib_node_list.h \ + vnet/fib/fib_entry.h + +######################################## +# ADJ +######################################## + +libvnet_la_SOURCES += \ + vnet/adj/adj_alloc.c \ + vnet/adj/adj_nbr.c \ + vnet/adj/adj_rewrite.c \ + vnet/adj/adj_glean.c \ + vnet/adj/adj_midchain.c \ + vnet/adj/adj.c + +nobase_include_HEADERS += \ + vnet/adj/adj.h \ + vnet/adj/adj_types.h \ + vnet/adj/adj_rewrite.h \ + vnet/adj/adj_glean.h \ + vnet/adj/adj_nbr.h + +######################################## +# Data-Plane Objects +######################################## + +libvnet_la_SOURCES += \ + vnet/dpo/dpo.c \ + vnet/dpo/drop_dpo.c \ + vnet/dpo/punt_dpo.c \ + vnet/dpo/receive_dpo.c \ + vnet/dpo/load_balance.c \ + vnet/dpo/load_balance_map.c \ + vnet/dpo/lookup_dpo.c \ + vnet/dpo/classify_dpo.c \ + vnet/dpo/mpls_label_dpo.c + +nobase_include_HEADERS += \ + vnet/dpo/load_balance.h \ + vnet/dpo/drop_dpo.h \ + vnet/dpo/lookup_dpo.h \ + vnet/dpo/punt_dpo.h \ + vnet/dpo/classify_dpo.h \ + vnet/dpo/receive_dpo.h \ + vnet/dpo/dpo.h + +######################################## # Plugin client library ######################################## diff --git a/vnet/etc/scripts/arp4-mpls b/vnet/etc/scripts/arp4-mpls new file mode 100644 index 00000000000..d3d39f3b921 --- /dev/null +++ b/vnet/etc/scripts/arp4-mpls @@ -0,0 +1,24 @@ +packet-generator new { + name x + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + ICMP: 1.0.0.2 -> 2.2.2.2 + ICMP echo_request + incrementing 100 + } +} + +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 1.0.0.1/24 +set int ip address loop1 2.0.0.1/24 + +ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33 + +trace add pg-input 100 diff --git a/vnet/etc/scripts/lfib/ip4-to-mpls b/vnet/etc/scripts/lfib/ip4-to-mpls new file mode 100644 index 00000000000..85753797751 --- /dev/null +++ b/vnet/etc/scripts/lfib/ip4-to-mpls @@ -0,0 +1,26 @@ +packet-generator new { + name x + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + ICMP: 1.0.0.2 -> 2.2.2.2 + ICMP echo_request + incrementing 100 + } +} + +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 1.0.0.1/24 +set int ip address loop1 2.0.0.1/24 + +set ip arp static loop1 2.0.0.2 dead.beef.babe +set int mpls loop1 enable +ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33 + +trace add pg-input 100 diff --git a/vnet/etc/scripts/lfib/mpls-pop-to-mpls b/vnet/etc/scripts/lfib/mpls-pop-to-mpls new file mode 100644 index 00000000000..2818ac133e1 --- /dev/null +++ b/vnet/etc/scripts/lfib/mpls-pop-to-mpls @@ -0,0 +1,28 @@ +packet-generator new { + name x + limit 1 + node mpls-input + size 72-72 + no-recycle + data { + hex 0x0001e0ff0001f1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627 + } +} + +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 1.0.0.1/24 +set int ip address loop1 2.0.0.1/24 + +set ip arp static loop1 2.0.0.2 dead.beef.babe +set int mpls loop1 enable + +ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33 + +mpls local-label add 30 non-eos mpls-lookup-in-table 0 +mpls local-label add 31 2.2.2.2/32 + +trace add pg-input 100 diff --git a/vnet/etc/scripts/lfib/mpls-to-ip4 b/vnet/etc/scripts/lfib/mpls-to-ip4 new file mode 100644 index 00000000000..24e235e01db --- /dev/null +++ b/vnet/etc/scripts/lfib/mpls-to-ip4 @@ -0,0 +1,27 @@ +packet-generator new { + name x + limit 1 + node mpls-input + size 68-68 + no-recycle + data { + hex 0x0001e1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627 + } +} + +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 1.0.0.1/24 +set int ip address loop1 2.0.0.1/24 + +set ip arp static loop1 2.0.0.2 dead.beef.babe +set int mpls loop1 enable + +ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33 + +mpls local-label add 30 eos ip4-lookup-in-table 0 + +trace add pg-input 100 diff --git a/vnet/etc/scripts/lfib/mpls-to-mpls b/vnet/etc/scripts/lfib/mpls-to-mpls new file mode 100644 index 00000000000..497dbab324f --- /dev/null +++ b/vnet/etc/scripts/lfib/mpls-to-mpls @@ -0,0 +1,26 @@ +packet-generator new { + name x + limit 1 + node mpls-input + size 68-68 + no-recycle + data { + hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627 + } +} + +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 1.0.0.1/24 +set int ip address loop1 2.0.0.1/24 + +set ip arp static loop1 2.0.0.2 dead.beef.babe +set int mpls loop1 enable + +ip route add 2.2.2.2/32 via 2.0.0.2 loop1 out-label 33 +mpls local-label add 30 2.2.2.2/32 + +trace add pg-input 100 diff --git a/vnet/etc/scripts/mpls-o-ethernet/pg b/vnet/etc/scripts/mpls-o-ethernet/pg new file mode 100644 index 00000000000..ba5397f7648 --- /dev/null +++ b/vnet/etc/scripts/mpls-o-ethernet/pg @@ -0,0 +1,10 @@ +packet-generator new { + name x + limit 1 + node mpls-ethernet-input + size 68-68 + no-recycle + data { + hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627 + } +}
\ No newline at end of file diff --git a/vnet/etc/scripts/mpls-o-ethernet/single.conf b/vnet/etc/scripts/mpls-o-ethernet/single.conf new file mode 100644 index 00000000000..2a25d35512c --- /dev/null +++ b/vnet/etc/scripts/mpls-o-ethernet/single.conf @@ -0,0 +1,17 @@ +comment { single node configuration } + +loop create +loop create +set int state loop0 up +set int state loop1 up + +set int ip address loop0 1.0.0.1/24 +set int ip address loop1 2.0.0.1/24 + + +ip route add 2.2.2.2/32 via 2.0.0.2 loop1 + +mpls encap add label 30 fib 0 dest 2.2.2.2 +mpls decap add label 30 fib 0 + +create mpls ethernet tunnel dst 00:50:56:b7:05:cb adj 2.2.2.2/32 tx-intfc loop1 fib-id 0 diff --git a/vnet/etc/scripts/source_and_port_range_check b/vnet/etc/scripts/source_and_port_range_check new file mode 100644 index 00000000000..dce227b4315 --- /dev/null +++ b/vnet/etc/scripts/source_and_port_range_check @@ -0,0 +1,63 @@ + +create loop int + +set int state loop0 up +set int ip addr loop0 10.10.10.10/32 + +packet-generator new { + name deny-from-default-route + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + UDP: 1.2.3.4 -> 5.6.7.8 + UDP: 3000 -> 3001 + length 128 checksum 0 incrementing 1 + } +} + +packet-generator new { + name allow + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + UDP: 1.1.1.1 -> 5.6.7.8 + UDP: 3000 -> 3001 + length 128 checksum 0 incrementing 1 + } +} + +packet-generator new { + name deny-from-port-range + limit 1 + node ip4-input + size 64-64 + no-recycle + data { + UDP: 1.1.1.1 -> 5.6.7.8 + UDP: 6000 -> 6001 + length 128 checksum 0 incrementing 1 + } +} + +set ip source-and-port-range-check 1.1.1.0/24 range 2000 - 3000 vrf 99 + +set interface ip source-and-port-range-check pg0 udp-out-vrf 99 + + show ip source-and-port-range-check vrf 99 1.1.1.1 + +set ip source-and-port-range-check 1.1.1.0/24 range 4000 - 5000 vrf 99 + +set ip source-and-port-range-check 1.1.2.0/24 range 4000 - 5000 vrf 99 + +show ip source-and-port-range-check vrf 99 1.1.1.1 +show ip source-and-port-range-check vrf 99 1.1.2.1 + +set ip source-and-port-range-check 1.1.2.0/24 range 4000 - 5000 vrf 99 del + +show ip source-and-port-range-check vrf 99 1.1.2.1 + +tr add pg-input 100 diff --git a/vnet/vnet/adj/adj.c b/vnet/vnet/adj/adj.c new file mode 100644 index 00000000000..b552fdb2bbc --- /dev/null +++ b/vnet/vnet/adj/adj.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj.h> +#include <vnet/adj/adj_alloc.h> +#include <vnet/adj/adj_internal.h> +#include <vnet/adj/adj_glean.h> +#include <vnet/adj/adj_midchain.h> +#include <vnet/fib/fib_node_list.h> + +/* + * Special Adj with index zero. we need to define this since the v4 mtrie + * assumes an index of 0 implies the ply is empty. therefore all 'real' + * adjs need a non-zero index. + */ +static ip_adjacency_t *special_v4_miss_adj_with_index_zero; + +/* Adjacency packet/byte counters indexed by adjacency index. */ +vlib_combined_counter_main_t adjacency_counters; + +always_inline void +adj_poison (ip_adjacency_t * adj) +{ + if (CLIB_DEBUG > 0) + { + u32 save_handle = adj->heap_handle;; + + memset (adj, 0xfe, sizeof (adj[0])); + + adj->heap_handle = save_handle; + } +} + +ip_adjacency_t * +adj_alloc (fib_protocol_t proto) +{ + ip_adjacency_t *adj; + + adj = aa_alloc(); + + adj_poison(adj); + + /* Make sure certain fields are always initialized. */ + /* Validate adjacency counters. */ + vlib_validate_combined_counter(&adjacency_counters, + adj->heap_handle); + + adj->rewrite_header.sw_if_index = ~0; + adj->mcast_group_index = ~0; + adj->saved_lookup_next_index = 0; + adj->n_adj = 1; + + fib_node_init(&adj->ia_node, + FIB_NODE_TYPE_ADJ); + adj->ia_nh_proto = proto; + + return (adj); +} + +static int +adj_index_is_special (adj_index_t adj_index) +{ + if (ADJ_INDEX_INVALID == adj_index) + return (!0); + + return (0); +} + +/** + * @brief Pretty print helper function for formatting specific adjacencies. + * @param s - input string to format + * @param args - other args passed to format function such as: + * - vnet_main_t + * - ip_lookup_main_t + * - adj_index + */ +u8 * +format_ip_adjacency (u8 * s, va_list * args) +{ + vnet_main_t * vnm = va_arg (*args, vnet_main_t *); + u32 adj_index = va_arg (*args, u32); + format_ip_adjacency_flags_t fiaf = va_arg (*args, format_ip_adjacency_flags_t); + ip_adjacency_t * adj = adj_get(adj_index); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_REWRITE: + s = format (s, "%U", format_adj_nbr, adj_index, 0); + break; + case IP_LOOKUP_NEXT_ARP: + s = format (s, "%U", format_adj_nbr_incomplete, adj_index, 0); + break; + case IP_LOOKUP_NEXT_GLEAN: + s = format (s, " %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface(vnm, + adj->rewrite_header.sw_if_index)); + break; + + case IP_LOOKUP_NEXT_MIDCHAIN: + s = format (s, "%U", format_adj_midchain, adj_index, 2); + break; + default: + break; + } + s = format (s, " index:%d", adj_index); + + if (fiaf & FORMAT_IP_ADJACENCY_DETAIL) + { + s = format (s, " locks:%d", adj->ia_node.fn_locks); + s = format(s, "\nchildren:\n "); + s = fib_node_children_format(adj->ia_node.fn_children, s); + } + + return s; +} + +/* + * adj_last_lock_gone + * + * last lock/reference to the adj has gone, we no longer need it. + */ +static void +adj_last_lock_gone (ip_adjacency_t *adj) +{ + ASSERT(0 == fib_node_list_get_size(adj->ia_node.fn_children)); + ADJ_DBG(adj, "last-lock-gone"); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_MIDCHAIN: + dpo_reset(&adj->sub_type.midchain.next_dpo); + /* FALL THROUGH */ + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_REWRITE: + /* + * complete and incomplete nbr adjs + */ + adj_nbr_remove(adj->ia_nh_proto, + adj->ia_link, + &adj->sub_type.nbr.next_hop, + adj->rewrite_header.sw_if_index); + break; + case IP_LOOKUP_NEXT_GLEAN: + adj_glean_remove(adj->ia_nh_proto, + adj->rewrite_header.sw_if_index); + break; + default: + /* + * type not stored in any DB from which we need to remove it + */ + break; + } + + fib_node_deinit(&adj->ia_node); + aa_free(adj); +} + +void +adj_lock (adj_index_t adj_index) +{ + ip_adjacency_t *adj; + + if (adj_index_is_special(adj_index)) + { + return; + } + + adj = adj_get(adj_index); + ASSERT(adj); + ASSERT(adj->heap_handle!=0); + + ADJ_DBG(adj, "lock"); + fib_node_lock(&adj->ia_node); +} + +void +adj_unlock (adj_index_t adj_index) +{ + ip_adjacency_t *adj; + + if (adj_index_is_special(adj_index)) + { + return; + } + + adj = adj_get(adj_index); + ASSERT(adj); + ASSERT(adj->heap_handle!=0); + + ADJ_DBG(adj, "unlock"); + ASSERT(adj); + ASSERT(adj->heap_handle!=0); + + fib_node_unlock(&adj->ia_node); +} + +u32 +adj_child_add (adj_index_t adj_index, + fib_node_type_t child_type, + fib_node_index_t child_index) +{ + ASSERT(ADJ_INDEX_INVALID != adj_index); + if (adj_index_is_special(adj_index)) + { + return (~0); + } + + return (fib_node_child_add(FIB_NODE_TYPE_ADJ, + adj_index, + child_type, + child_index)); +} + +void +adj_child_remove (adj_index_t adj_index, + u32 sibling_index) +{ + if (adj_index_is_special(adj_index)) + { + return; + } + + fib_node_child_remove(FIB_NODE_TYPE_ADJ, + adj_index, + sibling_index); +} + +static fib_node_t * +adj_get_node (fib_node_index_t index) +{ + ip_adjacency_t *adj; + + adj = adj_get(index); + + return (&adj->ia_node); +} + +#define ADJ_FROM_NODE(_node) \ + ((ip_adjacency_t*)((char*)_node - STRUCT_OFFSET_OF(ip_adjacency_t, ia_node))) + +static void +adj_node_last_lock_gone (fib_node_t *node) +{ + adj_last_lock_gone(ADJ_FROM_NODE(node)); +} + +static fib_node_back_walk_rc_t +adj_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + /* + * Que pasa. yo soj en el final! + */ + ASSERT(0); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/* + * Adjacency's graph node virtual function table + */ +static const fib_node_vft_t adj_vft = { + .fnv_get = adj_get_node, + .fnv_last_lock = adj_node_last_lock_gone, + .fnv_back_walk = adj_back_walk_notify, +}; + +static clib_error_t * +adj_module_init (vlib_main_t * vm) +{ + fib_node_register_type(FIB_NODE_TYPE_ADJ, &adj_vft); + + adj_nbr_module_init(); + adj_glean_module_init(); + adj_midchain_module_init(); + + /* + * 4 special adjs for v4 and v6 resp. + */ + aa_bootstrap(8); + special_v4_miss_adj_with_index_zero = adj_alloc(FIB_PROTOCOL_IP4); + + return (NULL); +} + +VLIB_INIT_FUNCTION (adj_module_init); + +/* + * DEPRECATED: DO NOT USE + * + * Create new block of given number of contiguous adjacencies. + */ +ip_adjacency_t * +ip_add_adjacency (ip_lookup_main_t * lm, + ip_adjacency_t * copy_adj, + u32 n_adj, + u32 * adj_index_return) +{ + ip_adjacency_t * adj; + u32 ai, i, handle; + + ASSERT(1==n_adj); + + adj = aa_alloc (); + handle = ai = adj->heap_handle; + + /* Validate adjacency counters. */ + vlib_validate_combined_counter (&adjacency_counters, ai + n_adj - 1); + + for (i = 0; i < n_adj; i++) + { + /* Make sure certain fields are always initialized. */ + adj[i].rewrite_header.sw_if_index = ~0; + adj[i].mcast_group_index = ~0; + adj[i].saved_lookup_next_index = 0; + + if (copy_adj) + adj[i] = copy_adj[i]; + + adj[i].heap_handle = handle; + adj[i].n_adj = n_adj; + + /* Zero possibly stale counters for re-used adjacencies. */ + vlib_zero_combined_counter (&adjacency_counters, ai + i); + } + + *adj_index_return = ai; + return adj; +} diff --git a/vnet/vnet/adj/adj.h b/vnet/vnet/adj/adj.h new file mode 100644 index 00000000000..3a1236497e1 --- /dev/null +++ b/vnet/vnet/adj/adj.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * An adjacency is a representation of an attached L3 peer. + * + * Adjacency Sub-types: + * - neighbour: a representation of an attached L3 peer. + * Key:{addr,interface,link/ether-type} + * SHARED + * - glean: used to drive ARP/ND for packets destined to a local sub-net. + * 'glean' mean use the packet's destination address as the target + * address in the ARP packet. + * UNSHARED. Only one per-interface. + * - midchain: a nighbour adj on a virtual/tunnel interface. + * - rewrite: an adj with no key, but with a rewrite string. + * + * The API to create and update the adjacency is very sub-type specific. This + * is intentional as it encourages the user to carefully consider which adjacency + * sub-type they are really using, and hence assign it data in the appropriate + * sub-type space in the union of sub-types. This prevents the adj becoming a + * disorganised dumping group for 'my features needs a u16 somewhere' data. It + * is important to enforce this approach as space in the adjacency is a premium, + * as we need it to fit in 1 cache line. + * + * the API is also based around an index to an ajdacency not a raw pointer. This + * is so the user doesn't suffer the same limp inducing firearm injuries that + * the author suffered as the adjacenices can realloc. + */ + +#ifndef __ADJ_H__ +#define __ADJ_H__ + +#include <vnet/ip/lookup.h> +#include <vnet/adj/adj_types.h> +#include <vnet/adj/adj_nbr.h> +#include <vnet/adj/adj_rewrite.h> +#include <vnet/adj/adj_glean.h> + +/** + * @brief + * Take a reference counting lock on the adjacency + */ +extern void adj_lock(adj_index_t adj_index); +/** + * @brief + * Release a reference counting lock on the adjacency + */ +extern void adj_unlock(adj_index_t adj_index); + +/** + * @brief + * Add a child dependent to an adjacency. The child will + * thus be informed via its registerd back-walk function + * when the adjacency state changes. + */ +extern u32 adj_child_add(adj_index_t adj_index, + fib_node_type_t type, + fib_node_index_t child_index); +/** + * @brief + * Remove a child dependent + */ +extern void adj_child_remove(adj_index_t adj_index, + u32 sibling_index); + +/** + * @brief + * The global adjacnecy heap. Exposed for fast/inline data-plane access + */ +extern ip_adjacency_t *adj_heap; + +/** + * @brief + * Adjacency packet counters + */ +extern vlib_combined_counter_main_t adjacency_counters; + +/** + * @brief + * Get a pointer to an adjacency object from its index + */ +static inline ip_adjacency_t * +adj_get (adj_index_t adj_index) +{ + return (vec_elt_at_index(adj_heap, adj_index)); +} + +#endif diff --git a/vnet/vnet/ip/adj_alloc.c b/vnet/vnet/adj/adj_alloc.c index 3ae7a199f19..5cc8cf6ef04 100644 --- a/vnet/vnet/ip/adj_alloc.c +++ b/vnet/vnet/adj/adj_alloc.c @@ -13,14 +13,18 @@ * limitations under the License. */ -#include <vnet/ip/adj_alloc.h> +#include <vnet/adj/adj_alloc.h> #include <vnet/ip/ip.h> +/* + * the single adj heap + */ +ip_adjacency_t *adj_heap; + /* * any operation which could cause the adj vector to be reallocated * must have a worker thread barrier */ - static inline int will_reallocate (ip_adjacency_t * adjs, u32 n) { uword aligned_header_bytes, new_data_bytes; @@ -45,13 +49,14 @@ static inline int will_reallocate (ip_adjacency_t * adjs, u32 n) } ip_adjacency_t * -aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n) +aa_alloc (void) { vlib_main_t * vm = &vlib_global_main; - aa_header_t * ah = aa_header (adjs); + aa_header_t * ah = aa_header (adj_heap); ip_adjacency_t * adj_block; u32 freelist_length; int need_barrier_sync = 0; + u32 n = 1; ASSERT(os_get_cpu_number() == 0); ASSERT (clib_mem_is_heap_object (_vec_find(ah))); @@ -59,14 +64,14 @@ aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n) /* If we don't have a freelist of size N, fresh allocation is required */ if (vec_len (ah->free_indices_by_size) <= n) { - if (will_reallocate (adjs, n)) + if (will_reallocate (adj_heap, n)) { need_barrier_sync = 1; vlib_worker_thread_barrier_sync (vm); } /* Workers wont look at the freelists... */ vec_validate (ah->free_indices_by_size, n); - vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, + vec_add2_ha (adj_heap, adj_block, n, aa_aligned_header_bytes, CLIB_CACHE_LINE_BYTES); if (need_barrier_sync) vlib_worker_thread_barrier_release (vm); @@ -77,17 +82,17 @@ aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n) { u32 index = ah->free_indices_by_size[n][freelist_length-1]; - adj_block = &adjs[index]; + adj_block = &adj_heap[index]; _vec_len(ah->free_indices_by_size[n]) -= 1; goto out; } /* Allocate a new block of size N */ - if (will_reallocate (adjs, n)) + if (will_reallocate (adj_heap, n)) { need_barrier_sync = 1; vlib_worker_thread_barrier_sync (vm); } - vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, + vec_add2_ha (adj_heap, adj_block, n, aa_aligned_header_bytes, CLIB_CACHE_LINE_BYTES); if (need_barrier_sync) @@ -95,40 +100,45 @@ aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n) out: memset (adj_block, 0, n * (sizeof(*adj_block))); - adj_block->heap_handle = adj_block - adjs; + adj_block->heap_handle = adj_block - adj_heap; adj_block->n_adj = n; - *blockp = adj_block; - return adjs; + + /* + * the adj heap may have realloc'd. recache. + */ + ip4_main.lookup_main.adjacency_heap = adj_heap; + ip6_main.lookup_main.adjacency_heap = adj_heap; + + return (adj_block); } -void aa_free (ip_adjacency_t * adjs, ip_adjacency_t * adj) +void aa_free (ip_adjacency_t * adj) { - aa_header_t * ah = aa_header (adjs); + aa_header_t * ah = aa_header (adj_heap); - ASSERT (adjs && adj && (adj->heap_handle < vec_len (adjs))); - ASSERT (adj->n_adj < vec_len (ah->free_indices_by_size)); + ASSERT (adj_heap && adj && (adj->heap_handle < vec_len (adj_heap))); ASSERT (adj->heap_handle != 0); vec_add1 (ah->free_indices_by_size[adj->n_adj], adj->heap_handle); adj->heap_handle = 0; } -ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n) +void aa_bootstrap (u32 n) { ip_adjacency_t * adj_block; aa_header_t * ah; int i; - vec_add2_ha (adjs, adj_block, n, aa_aligned_header_bytes, + vec_add2_ha (adj_heap, adj_block, n, aa_aligned_header_bytes, CLIB_CACHE_LINE_BYTES); memset (adj_block, 0, n * sizeof(*adj_block)); - ah = aa_header (adjs); + ah = aa_header (adj_heap); memset (ah, 0, sizeof (*ah)); vec_validate (ah->free_indices_by_size, 1); - for (i = 0 ; i < vec_len (adjs); i++) + for (i = 0 ; i < vec_len (adj_heap); i++) { adj_block->n_adj = 1; adj_block->heap_handle = ~0; @@ -136,24 +146,23 @@ ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n) vec_add1 (ah->free_indices_by_size[1], n - (i+1)); } - return adjs; + ip4_main.lookup_main.adjacency_heap = adj_heap; + ip6_main.lookup_main.adjacency_heap = adj_heap; } u8 * format_adjacency_alloc (u8 * s, va_list * args) { vnet_main_t * vnm = va_arg (*args, vnet_main_t *); - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); - ip_adjacency_t * adjs = va_arg (*args, ip_adjacency_t *); int verbose = va_arg (*args, int); ip_adjacency_t * adj; u32 inuse = 0, freed = 0; u32 on_freelist = 0; int i, j; - aa_header_t * ah = aa_header (adjs); + aa_header_t * ah = aa_header (adj_heap); - for (i = 0; i < vec_len (adjs); i += adj->n_adj) + for (i = 0; i < vec_len (adj_heap); i += adj->n_adj) { - adj = adjs + i; + adj = adj_heap + i; if ((i == 0) || adj->heap_handle) inuse += adj->n_adj; else @@ -164,19 +173,19 @@ u8 * format_adjacency_alloc (u8 * s, va_list * args) { for (j = 0; j < vec_len(ah->free_indices_by_size[i]); j++) { - adj = adjs + ah->free_indices_by_size[i][j]; + adj = adj_heap + ah->free_indices_by_size[i][j]; ASSERT(adj->heap_handle == 0); on_freelist += adj->n_adj; } } - s = format (s, "adjs: %d total, %d in use, %d free, %d on freelists\n", - vec_len(adjs), inuse, freed, on_freelist); + s = format (s, "adj_heap: %d total, %d in use, %d free, %d on freelists\n", + vec_len(adj_heap), inuse, freed, on_freelist); if (verbose) { - for (i = 0; i < vec_len (adjs); i += adj->n_adj) + for (i = 0; i < vec_len (adj_heap); i += adj->n_adj) { - adj = adjs + i; + adj = adj_heap + i; if ((i == 0) || adj->heap_handle) { if (adj->n_adj > 1) @@ -190,7 +199,7 @@ u8 * format_adjacency_alloc (u8 * s, va_list * args) s = format (s, " "); s = format(s, "%U\n", format_ip_adjacency, - vnm, lm, i+j); + vnm, i+j, FORMAT_IP_ADJACENCY_NONE); } } } @@ -200,36 +209,22 @@ u8 * format_adjacency_alloc (u8 * s, va_list * args) static clib_error_t * show_adjacency_alloc_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) + unformat_input_t * input, + vlib_cli_command_t * cmd) { int verbose = 0; vnet_main_t *vnm = vnet_get_main(); - ip_lookup_main_t *lm = 0; - ip_adjacency_t * adjs = 0; - int is_ip4 = 1; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "verbose")) verbose = 1; - else if (unformat (input, "ip4")) - ; - else if (unformat (input, "ip6")) - is_ip4 = 0; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); } - if (is_ip4) - lm = &ip4_main.lookup_main; - else - lm = &ip6_main.lookup_main; - - adjs = lm->adjacency_heap; - - vlib_cli_output (vm, "%U", format_adjacency_alloc, vnm, lm, adjs, verbose); + vlib_cli_output (vm, "%U", format_adjacency_alloc, vnm, verbose); return 0; } diff --git a/vnet/vnet/ip/adj_alloc.h b/vnet/vnet/adj/adj_alloc.h index a10146c53a5..7d1a3fb3133 100644 --- a/vnet/vnet/ip/adj_alloc.h +++ b/vnet/vnet/adj/adj_alloc.h @@ -16,7 +16,8 @@ #ifndef __adj_alloc_h__ #define __adj_alloc_h__ -/* +/** + * @brief * Adjacency allocator: heap-like in that the code * will dole out contiguous chunks of n items. In the interests of * thread safety, we don't bother about coalescing free blocks of size r @@ -43,10 +44,9 @@ static inline aa_header_t * aa_header (void * v) return vec_aligned_header (v, sizeof (aa_header_t), sizeof (void *)); } -ip_adjacency_t * -aa_alloc (ip_adjacency_t * adjs, ip_adjacency_t **blockp, u32 n); -void aa_free (ip_adjacency_t * adjs, ip_adjacency_t * adj); -ip_adjacency_t * aa_bootstrap (ip_adjacency_t * adjs, u32 n); +extern ip_adjacency_t *aa_alloc(void); +extern void aa_free (ip_adjacency_t * adj); +extern void aa_bootstrap (u32 n); format_function_t format_adj_allocation; diff --git a/vnet/vnet/adj/adj_glean.c b/vnet/vnet/adj/adj_glean.c new file mode 100644 index 00000000000..6eb6718e216 --- /dev/null +++ b/vnet/vnet/adj/adj_glean.c @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj.h> +#include <vnet/adj/adj_alloc.h> +#include <vnet/adj/adj_internal.h> +#include <vnet/fib/fib_walk.h> + +/* + * The 'DB' of all glean adjs. + * There is only one glean per-interface per-protocol, so this is a per-interface + * vector + */ +static adj_index_t *adj_gleans[FIB_PROTOCOL_MAX]; + +static inline vlib_node_registration_t* +adj_get_glean_node (fib_protocol_t proto) +{ + switch (proto) { + case FIB_PROTOCOL_IP4: + return (&ip4_glean_node); + case FIB_PROTOCOL_IP6: + return (&ip6_glean_node); + case FIB_PROTOCOL_MPLS: + break; + } + ASSERT(0); + return (NULL); +} + +/* + * adj_glean_add_or_lock + * + * The next_hop address here is used for source address selection in the DP. + * The glean adj is added to an interface's connected prefix, the next-hop + * passed here is the local prefix on the same interface. + */ +adj_index_t +adj_glean_add_or_lock (fib_protocol_t proto, + u32 sw_if_index, + const ip46_address_t *nh_addr) +{ + ip_adjacency_t * adj; + + vec_validate_init_empty(adj_gleans[proto], sw_if_index, ADJ_INDEX_INVALID); + + if (ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index]) + { + adj = adj_alloc(proto); + + adj->lookup_next_index = IP_LOOKUP_NEXT_GLEAN; + adj->ia_nh_proto = proto; + adj_gleans[proto][sw_if_index] = adj->heap_handle; + + if (NULL != nh_addr) + { + adj->sub_type.glean.receive_addr = *nh_addr; + } + + adj->rewrite_header.data_bytes = 0; + + vnet_rewrite_for_sw_interface(vnet_get_main(), + adj_fib_proto_2_nd(proto), + sw_if_index, + adj_get_glean_node(proto)->index, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); + } + else + { + adj = adj_get(adj_gleans[proto][sw_if_index]); + } + + adj_lock(adj->heap_handle); + + return (adj->heap_handle); +} + +void +adj_glean_remove (fib_protocol_t proto, + u32 sw_if_index) +{ + ASSERT(sw_if_index < vec_len(adj_gleans[proto])); + + adj_gleans[proto][sw_if_index] = ADJ_INDEX_INVALID; +} + +static clib_error_t * +adj_glean_interface_state_change (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + /* + * for each glean on the interface trigger a walk back to the children + */ + fib_protocol_t proto; + ip_adjacency_t *adj; + + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + if (sw_if_index >= vec_len(adj_gleans[proto]) || + ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index]) + continue; + + adj = adj_get(adj_gleans[proto][sw_if_index]); + + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? + FIB_NODE_BW_REASON_FLAG_INTERFACE_UP : + FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN), + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj->heap_handle, &bw_ctx); + } + + return (NULL); +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_glean_interface_state_change); + +static clib_error_t * +adj_glean_interface_delete (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + /* + * for each glean on the interface trigger a walk back to the children + */ + fib_protocol_t proto; + ip_adjacency_t *adj; + + if (is_add) + { + /* + * not interested in interface additions. we will not back walk + * to resolve paths through newly added interfaces. Why? The control + * plane should have the brains to add interfaces first, then routes. + * So the case where there are paths with a interface that matches + * one just created is the case where the path resolved through an + * interface that was deleted, and still has not been removed. The + * new interface added, is NO GUARANTEE that the interface being + * added now, even though it may have the same sw_if_index, is the + * same interface that the path needs. So tough! + * If the control plane wants these routes to resolve it needs to + * remove and add them again. + */ + return (NULL); + } + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + if (sw_if_index >= vec_len(adj_gleans[proto]) || + ADJ_INDEX_INVALID == adj_gleans[proto][sw_if_index]) + continue; + + adj = adj_get(adj_gleans[proto][sw_if_index]); + + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE, + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj->heap_handle, &bw_ctx); + } + + return (NULL); +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_glean_interface_delete); + +u8* +format_adj_glean (u8* s, va_list *ap) +{ + index_t index = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + vnet_main_t * vnm = vnet_get_main(); + ip_adjacency_t * adj = adj_get(index); + + return (format(s, " glean: %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface(vnm, + adj->rewrite_header.sw_if_index))); +} + + +static void +adj_dpo_lock (dpo_id_t *dpo) +{ + adj_lock(dpo->dpoi_index); +} +static void +adj_dpo_unlock (dpo_id_t *dpo) +{ + adj_unlock(dpo->dpoi_index); +} + +const static dpo_vft_t adj_glean_dpo_vft = { + .dv_lock = adj_dpo_lock, + .dv_unlock = adj_dpo_unlock, + .dv_format = format_adj_glean, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a glean + * object. + * + * this means that these graph nodes are ones from which a glean is the + * parent object in the DPO-graph. + */ +const static char* const glean_ip4_nodes[] = +{ + "ip4-glean", + NULL, +}; +const static char* const glean_ip6_nodes[] = +{ + "ip6-glean", + NULL, +}; + +const static char* const * const glean_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = glean_ip4_nodes, + [DPO_PROTO_IP6] = glean_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +adj_glean_module_init (void) +{ + dpo_register(DPO_ADJACENCY_GLEAN, &adj_glean_dpo_vft, glean_nodes); +} diff --git a/vnet/vnet/adj/adj_glean.h b/vnet/vnet/adj/adj_glean.h new file mode 100644 index 00000000000..ce3534ecee6 --- /dev/null +++ b/vnet/vnet/adj/adj_glean.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @brief Glean Adjacency + * + * A gleean adjacency represent the need to discover new peers on an + * attached link. Packets that hit a glean adjacency will generate an + * ARP/ND packet addessesed to the packet's destination address. + * Note this is different to an incomplete neighbour adjacency, which + * does not send ARP/ND requests to the packet's destination address, + * but instead to the next-hop address of the adjacency itself. + */ + +#ifndef __ADJ_GLEAN_H__ +#define __ADJ_GLEAN_H__ + +#include <vnet/adj/adj_types.h> + +/** + * @brief + * Add (and lock) a new or lock an existing glean adjacency + * + * @param proto + * The protocol for the neighbours that we wish to glean + * + * @param sw_if_index + * The interface on which to glean + * + * @param nh_addr + * the address applied to the interface on which to glean. This + * as the source address in packets when the ARP/ND packet is sent + */ +extern adj_index_t adj_glean_add_or_lock(fib_protocol_t proto, + u32 sw_if_index, + const ip46_address_t *nh_addr); + +/** + * @brief + * Module initialisation + */ +extern void adj_glean_module_init(void); + +#endif diff --git a/vnet/vnet/adj/adj_internal.h b/vnet/vnet/adj/adj_internal.h new file mode 100644 index 00000000000..79042d1fd2a --- /dev/null +++ b/vnet/vnet/adj/adj_internal.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ADJ_INTERNAL_H__ +#define __ADJ_INTERNAL_H__ + +#include <vnet/adj/adj.h> +#include <vnet/ip/ip.h> +#include <vnet/mpls/mpls.h> + + +/** + * big switch to turn on Adjacency debugging + */ +#undef ADJ_DEBUG + +/* + * Debug macro + */ +#ifdef ADJ_DEBUG +#define ADJ_DBG(_adj, _fmt, _args...) \ +{ \ + clib_warning("adj:[%d:%p]:" _fmt, \ + _adj->heap_handle, _adj, \ + ##_args); \ +} +#else +#define ADJ_DBG(_e, _fmt, _args...) +#endif + +static inline vlib_node_registration_t* +adj_get_rewrite_node (fib_link_t linkt) +{ + switch (linkt) { + case FIB_LINK_IP4: + return (&ip4_rewrite_node); + case FIB_LINK_IP6: + return (&ip6_rewrite_node); + case FIB_LINK_MPLS: + return (&mpls_output_node); + } + ASSERT(0); + return (NULL); +} + +static inline vnet_l3_packet_type_t +adj_fib_link_2_vnet (fib_link_t linkt) +{ + switch (linkt) + { + case FIB_LINK_IP4: + return (VNET_L3_PACKET_TYPE_IP4); + case FIB_LINK_IP6: + return (VNET_L3_PACKET_TYPE_IP6); + case FIB_LINK_MPLS: + return (VNET_L3_PACKET_TYPE_MPLS_UNICAST); + } + return (0); +} + +static inline vnet_l3_packet_type_t +adj_fib_proto_2_nd (fib_protocol_t fp) +{ + switch (fp) + { + case FIB_PROTOCOL_IP4: + return (VNET_L3_PACKET_TYPE_ARP); + case FIB_PROTOCOL_IP6: + return (VNET_L3_PACKET_TYPE_IP6); + case FIB_PROTOCOL_MPLS: + return (VNET_L3_PACKET_TYPE_MPLS_UNICAST); + } + return (0); +} + +extern ip_adjacency_t * adj_alloc(fib_protocol_t proto); + +extern void adj_nbr_remove(fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index); +extern void adj_glean_remove(fib_protocol_t proto, + u32 sw_if_index); + +#endif diff --git a/vnet/vnet/adj/adj_midchain.c b/vnet/vnet/adj/adj_midchain.c new file mode 100644 index 00000000000..4b9b6a414d2 --- /dev/null +++ b/vnet/vnet/adj/adj_midchain.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj_nbr.h> +#include <vnet/adj/adj_internal.h> +#include <vnet/ethernet/arp_packet.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/fib/fib_walk.h> + +static inline u32 +adj_get_midchain_node (fib_link_t link) +{ + switch (link) { + case FIB_LINK_IP4: + return (ip4_midchain_node.index); + case FIB_LINK_IP6: + return (ip6_midchain_node.index); + case FIB_LINK_MPLS: + return (mpls_midchain_node.index); + } + ASSERT(0); + return (0); +} + +/** + * adj_nbr_midchain_update_rewrite + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewrite is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_nbr_midchain_update_rewrite (adj_index_t adj_index, + u32 post_rewrite_node, + u8 *rewrite) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + adj->lookup_next_index = IP_LOOKUP_NEXT_MIDCHAIN; + adj->sub_type.midchain.tx_function_node = post_rewrite_node; + + if (NULL != rewrite) + { + /* + * new rewrite provided. + * use a dummy rewrite header to get the interface to print into. + */ + ip_adjacency_t dummy; + dpo_id_t tmp = DPO_NULL; + + vnet_rewrite_for_tunnel(vnet_get_main(), + adj->rewrite_header.sw_if_index, + adj_get_midchain_node(adj->ia_link), + adj->sub_type.midchain.tx_function_node, + &dummy.rewrite_header, + rewrite, + vec_len(rewrite)); + + /* + * this is an update of an existing rewrite. + * packets are in flight. we'll need to briefly stack on the drop DPO + * whilst the rewrite is written, so any packets that see the partial update + * are binned. + */ + if (!dpo_id_is_valid(&adj->sub_type.midchain.next_dpo)) + { + /* + * not stacked yet. stack on the drop + */ + dpo_stack(DPO_ADJACENCY_MIDCHAIN, + fib_proto_to_dpo(adj->ia_nh_proto), + &adj->sub_type.midchain.next_dpo, + drop_dpo_get(fib_proto_to_dpo(adj->ia_nh_proto))); + } + + dpo_copy(&tmp, &adj->sub_type.midchain.next_dpo); + dpo_stack(DPO_ADJACENCY_MIDCHAIN, + fib_proto_to_dpo(adj->ia_nh_proto), + &adj->sub_type.midchain.next_dpo, + drop_dpo_get(fib_proto_to_dpo(adj->ia_nh_proto))); + + CLIB_MEMORY_BARRIER(); + + clib_memcpy(&adj->rewrite_header, + &dummy.rewrite_header, + VLIB_BUFFER_PRE_DATA_SIZE); + + CLIB_MEMORY_BARRIER(); + + /* + * The graph arc used/created here is from the post-rewirte node to the + * child's registered node. This is because post adj processing the next + * node is the interface's specific node, then the post-write-node (aka + * the interface's tx-function) - from there we need to get to the stacked + * child's node. + */ + dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, + &adj->sub_type.midchain.next_dpo, + &tmp); + dpo_reset(&tmp); + } + else + { + ASSERT(0); + } + + /* + * time for walkies fido. + */ + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_ADJ_UPDATE, + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj->heap_handle, &bw_ctx); +} + +/** + * adj_nbr_midchain_stack + */ +void +adj_nbr_midchain_stack (adj_index_t adj_index, + const dpo_id_t *next) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + ASSERT(IP_LOOKUP_NEXT_MIDCHAIN == adj->lookup_next_index); + + dpo_stack_from_node(adj->sub_type.midchain.tx_function_node, + &adj->sub_type.midchain.next_dpo, + next); +} + +u8* +format_adj_midchain (u8* s, va_list *ap) +{ + index_t index = va_arg(ap, index_t); + u32 indent = va_arg(ap, u32); + vnet_main_t * vnm = vnet_get_main(); + ip_adjacency_t * adj = adj_get(index); + + s = format (s, "%U", format_fib_link, adj->ia_link); + s = format (s, " via %U ", + format_ip46_address, &adj->sub_type.nbr.next_hop); + s = format (s, " %U", + format_vnet_rewrite, + vnm->vlib_main, &adj->rewrite_header, + sizeof (adj->rewrite_data), indent); + s = format (s, "\n%Ustacked-on:\n%U%U", + format_white_space, indent, + format_white_space, indent+2, + format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2); + + return (s); +} + +static void +adj_dpo_lock (dpo_id_t *dpo) +{ + adj_lock(dpo->dpoi_index); +} +static void +adj_dpo_unlock (dpo_id_t *dpo) +{ + adj_unlock(dpo->dpoi_index); +} + +const static dpo_vft_t adj_midchain_dpo_vft = { + .dv_lock = adj_dpo_lock, + .dv_unlock = adj_dpo_unlock, + .dv_format = format_adj_midchain, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a midchain + * object. + * + * this means that these graph nodes are ones from which a midchain is the + * parent object in the DPO-graph. + */ +const static char* const midchain_ip4_nodes[] = +{ + "ip4-midchain", + NULL, +}; +const static char* const midchain_ip6_nodes[] = +{ + "ip6-midchain", + NULL, +}; +const static char* const midchain_mpls_nodes[] = +{ + "mpls-midchain", + NULL, +}; + +const static char* const * const midchain_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = midchain_ip4_nodes, + [DPO_PROTO_IP6] = midchain_ip6_nodes, + [DPO_PROTO_MPLS] = midchain_mpls_nodes, +}; + +void +adj_midchain_module_init (void) +{ + dpo_register(DPO_ADJACENCY_MIDCHAIN, &adj_midchain_dpo_vft, midchain_nodes); +} diff --git a/vnet/vnet/adj/adj_midchain.h b/vnet/vnet/adj/adj_midchain.h new file mode 100644 index 00000000000..adf86f1d007 --- /dev/null +++ b/vnet/vnet/adj/adj_midchain.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Midchain Adjacency sub-type. These adjs represent an L3 peer on a + * tunnel interface. The tunnel's adjacency is thus not the end of the chain, + * and needs to stack on/link to another chain (or portion of the graph) to + * reach the tunnel's destination. + */ + +#ifndef __ADJ_MIDCHAIN_H__ +#define __ADJ_MIDCHAIN_H__ + +#include <vnet/adj/adj.h> + +/** + * @brief + * Convert an existing neighbour adjacency into a midchain + * + * @param adj_index + * The index of the neighbour adjacency. + * + * @param post_rewrite_node + * The VLIB graph node that provides the post-encap fixup. + * where 'fixup' is e.g., correcting chksum, length, etc. + * + * @param rewrite + * The rewrite. + */ +extern void adj_nbr_midchain_update_rewrite(adj_index_t adj_index, + u32 post_rewrite_node, + u8 *rewrite); + +/** + * @brief + * [re]stack a midchain. 'Stacking' is the act of forming parent-child + * relationships in the data-plane graph. + * + * @param adj_index + * The index of the midchain to stack + * + * @param dpo + * The parent DPO to stack onto (i.e. become a child of). + */ +extern void adj_nbr_midchain_stack(adj_index_t adj_index, + const dpo_id_t *dpo); + +/** + * @brief + * Module initialisation + */ +extern void adj_midchain_module_init(void); + +/** + * @brief + * Format a midchain adjacency + */ +extern u8* format_adj_midchain(u8* s, va_list *ap); + +#endif diff --git a/vnet/vnet/adj/adj_nbr.c b/vnet/vnet/adj/adj_nbr.c new file mode 100644 index 00000000000..7da1becd4c1 --- /dev/null +++ b/vnet/vnet/adj/adj_nbr.c @@ -0,0 +1,835 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj_nbr.h> +#include <vnet/adj/adj_internal.h> +#include <vnet/ethernet/arp_packet.h> +#include <vnet/fib/fib_walk.h> + +/* + * Vector Hash tables of neighbour (traditional) adjacencies + * Key: interface(for the vector index), address (and its proto), + * link-type/ether-type. + */ +static BVT(clib_bihash) **adj_nbr_tables[FIB_PROTOCOL_MAX]; + +// FIXME SIZE APPROPRIATELY. ASK DAVEB. +#define ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (64 * 64) +#define ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (32<<20) + + +#define ADJ_NBR_SET_KEY(_key, _lt, _nh) \ +{ \ + _key.key[0] = (_nh)->as_u64[0]; \ + _key.key[1] = (_nh)->as_u64[1]; \ + _key.key[2] = (_lt); \ +} + +#define ADJ_NBR_ITF_OK(_proto, _itf) \ + (((_itf) < vec_len(adj_nbr_tables[_proto])) && \ + (NULL != adj_nbr_tables[_proto][sw_if_index])) + +static void +adj_nbr_insert (fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index, + adj_index_t adj_index) +{ + BVT(clib_bihash_kv) kv; + + if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto])) + { + vec_validate(adj_nbr_tables[nh_proto], sw_if_index); + } + if (NULL == adj_nbr_tables[nh_proto][sw_if_index]) + { + adj_nbr_tables[nh_proto][sw_if_index] = + clib_mem_alloc_aligned(sizeof(BVT(clib_bihash)), + CLIB_CACHE_LINE_BYTES); + memset(adj_nbr_tables[nh_proto][sw_if_index], + 0, + sizeof(BVT(clib_bihash))); + + BV(clib_bihash_init) (adj_nbr_tables[nh_proto][sw_if_index], + "Adjacency Neighbour table", + ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS, + ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE); + } + + ADJ_NBR_SET_KEY(kv, link_type, nh_addr); + kv.value = adj_index; + + BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 1); +} + +void +adj_nbr_remove (fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index) +{ + BVT(clib_bihash_kv) kv; + + if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index)) + return; + + ADJ_NBR_SET_KEY(kv, link_type, nh_addr); + + BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 0); +} + +static adj_index_t +adj_nbr_find (fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index) +{ + BVT(clib_bihash_kv) kv; + + ADJ_NBR_SET_KEY(kv, link_type, nh_addr); + + if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index)) + return (ADJ_INDEX_INVALID); + + if (BV(clib_bihash_search)(adj_nbr_tables[nh_proto][sw_if_index], + &kv, &kv) < 0) + { + return (ADJ_INDEX_INVALID); + } + else + { + return (kv.value); + } +} + +static inline vlib_node_registration_t* +adj_get_nd_node (fib_protocol_t proto) +{ + switch (proto) { + case FIB_PROTOCOL_IP4: + return (&ip4_arp_node); + case FIB_PROTOCOL_IP6: + return (&ip6_discover_neighbor_node); + case FIB_PROTOCOL_MPLS: + break; + } + ASSERT(0); + return (NULL); +} + +static void +adj_ip4_nbr_probe (ip_adjacency_t *adj) +{ + vnet_main_t * vnm = vnet_get_main(); + ip4_main_t * im = &ip4_main; + ip_interface_address_t * ia; + ethernet_arp_header_t * h; + vnet_hw_interface_t * hi; + vnet_sw_interface_t * si; + ip4_address_t * src; + vlib_buffer_t * b; + vlib_main_t * vm; + u32 bi = 0; + + vm = vlib_get_main(); + + si = vnet_get_sw_interface (vnm, + adj->rewrite_header.sw_if_index); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + return; + } + + src = + ip4_interface_address_matching_destination(im, + &adj->sub_type.nbr.next_hop.ip4, + adj->rewrite_header.sw_if_index, + &ia); + if (! src) + { + return; + } + + h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi); + + hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); + + clib_memcpy (h->ip4_over_ethernet[0].ethernet, + hi->hw_address, + sizeof (h->ip4_over_ethernet[0].ethernet)); + + h->ip4_over_ethernet[0].ip4 = src[0]; + h->ip4_over_ethernet[1].ip4 = adj->sub_type.nbr.next_hop.ip4; + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = + adj->rewrite_header.sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 * to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + +static void +adj_ip6_nbr_probe (ip_adjacency_t *adj) +{ + icmp6_neighbor_solicitation_header_t * h; + vnet_main_t * vnm = vnet_get_main(); + ip6_main_t * im = &ip6_main; + ip_interface_address_t * ia; + ip6_address_t * dst, *src; + vnet_hw_interface_t * hi; + vnet_sw_interface_t * si; + vlib_buffer_t * b; + int bogus_length; + vlib_main_t * vm; + u32 bi = 0; + + vm = vlib_get_main(); + + si = vnet_get_sw_interface(vnm, adj->rewrite_header.sw_if_index); + dst = &adj->sub_type.nbr.next_hop.ip6; + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + return; + } + src = ip6_interface_address_matching_destination(im, dst, + adj->rewrite_header.sw_if_index, + &ia); + if (! src) + { + return; + } + + h = vlib_packet_template_get_packet(vm, + &im->discover_neighbor_packet_template, + &bi); + + hi = vnet_get_sup_hw_interface(vnm, adj->rewrite_header.sw_if_index); + + h->ip.dst_address.as_u8[13] = dst->as_u8[13]; + h->ip.dst_address.as_u8[14] = dst->as_u8[14]; + h->ip.dst_address.as_u8[15] = dst->as_u8[15]; + h->ip.src_address = src[0]; + h->neighbor.target_address = dst[0]; + + clib_memcpy (h->link_layer_option.ethernet_address, + hi->hw_address, + vec_len(hi->hw_address)); + + h->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum(vm, 0, &h->ip, &bogus_length); + ASSERT(bogus_length == 0); + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = + adj->rewrite_header.sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + vnet_rewrite_one_header(adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance(b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t * f = vlib_get_frame_to_node(vm, hi->output_node_index); + u32 * to_next = vlib_frame_vector_args(f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node(vm, hi->output_node_index, f); + } +} + +static ip_adjacency_t* +adj_nbr_alloc (fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index) +{ + ip_adjacency_t *adj; + + adj = adj_alloc(nh_proto); + + adj_nbr_insert(nh_proto, link_type, nh_addr, + sw_if_index, + adj->heap_handle); + + /* + * since we just added the ADJ we have no rewrite string for it, + * so its for ARP + */ + adj->lookup_next_index = IP_LOOKUP_NEXT_ARP; + adj->sub_type.nbr.next_hop = *nh_addr; + adj->ia_link = link_type; + adj->ia_nh_proto = nh_proto; + memset(&adj->sub_type.midchain.next_dpo, 0, + sizeof(adj->sub_type.midchain.next_dpo)); + + return (adj); +} + +/* + * adj_add_for_nbr + * + * Add an adjacency for the neighbour requested. + * + * The key for an adj is: + * - the Next-hops protocol (i.e. v4 or v6) + * - the address of the next-hop + * - the interface the next-hop is reachable through + * - fib_index; this is broken. i will fix it. + * the adj lookup currently occurs in the FIB. + */ +adj_index_t +adj_nbr_add_or_lock (fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index) +{ + adj_index_t adj_index; + ip_adjacency_t *adj; + + adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index); + + if (ADJ_INDEX_INVALID == adj_index) + { + adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index); + + /* + * If there is no next-hop, this is the 'auto-adj' used on p2p + * links instead of a glean. + */ + if (ip46_address_is_zero(nh_addr)) + { + adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + vnet_rewrite_for_sw_interface(vnet_get_main(), + adj_fib_link_2_vnet(link_type), + sw_if_index, + adj_get_rewrite_node(link_type)->index, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); + } + else + { + vnet_rewrite_for_sw_interface(vnet_get_main(), + adj_fib_proto_2_nd(nh_proto), + sw_if_index, + adj_get_nd_node(nh_proto)->index, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); + + switch (nh_proto) + { + case FIB_PROTOCOL_IP4: + adj_ip4_nbr_probe(adj); + break; + case FIB_PROTOCOL_IP6: + adj_ip6_nbr_probe(adj); + break; + case FIB_PROTOCOL_MPLS: + break; + } + } + } + else + { + adj = adj_get(adj_index); + } + + adj_lock(adj->heap_handle); + + return (adj->heap_handle); +} + +adj_index_t +adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index, + u8 *rewrite) +{ + adj_index_t adj_index; + ip_adjacency_t *adj; + + adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index); + + if (ADJ_INDEX_INVALID == adj_index) + { + adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index); + adj->rewrite_header.sw_if_index = sw_if_index; + } + else + { + adj = adj_get(adj_index); + } + + adj_lock(adj->heap_handle); + adj_nbr_update_rewrite(adj->heap_handle, rewrite); + + return (adj->heap_handle); +} + +/** + * adj_nbr_update_rewrite + * + * Update the adjacency's rewrite string. A NULL string implies the + * rewirte is reset (i.e. when ARP/ND etnry is gone). + * NB: the adj being updated may be handling traffic in the DP. + */ +void +adj_nbr_update_rewrite (adj_index_t adj_index, + u8 *rewrite) +{ + ip_adjacency_t *adj; + + ASSERT(ADJ_INDEX_INVALID != adj_index); + + adj = adj_get(adj_index); + + if (NULL != rewrite) + { + /* + * new rewrite provided. + * use a dummy rewrite header to get the interface to print into. + */ + ip_adjacency_t dummy; + + vnet_rewrite_for_sw_interface(vnet_get_main(), + adj_fib_link_2_vnet(adj->ia_link), + adj->rewrite_header.sw_if_index, + adj_get_rewrite_node(adj->ia_link)->index, + rewrite, + &dummy.rewrite_header, + sizeof (dummy.rewrite_data)); + + if (IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index) + { + /* + * this is an update of an existing rewrite. + * we can't just paste in the new rewrite as that is not atomic. + * So we briefly swap the ADJ to ARP type, paste, then swap back. + */ + adj->lookup_next_index = IP_LOOKUP_NEXT_ARP; + CLIB_MEMORY_BARRIER(); + } + /* + * else + * this is the first time the rewrite is added. + * paste it on then swap the next type. + */ + clib_memcpy(&adj->rewrite_header, + &dummy.rewrite_header, + VLIB_BUFFER_PRE_DATA_SIZE); + + adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + } + else + { + /* + * clear the rewrite. + */ + adj->lookup_next_index = IP_LOOKUP_NEXT_ARP; + CLIB_MEMORY_BARRIER(); + + adj->rewrite_header.data_bytes = 0; + } + + /* + * time for walkies fido. + * The link type MPLS Adj never has children. So if it is this adj + * that is updated, we need to walk from its IP sibling. + */ + if (FIB_LINK_MPLS == adj->ia_link) + { + adj_index = adj_nbr_find(adj->ia_nh_proto, + fib_proto_to_link(adj->ia_nh_proto), + &adj->sub_type.nbr.next_hop, + adj->rewrite_header.sw_if_index); + + ASSERT(ADJ_INDEX_INVALID != adj_index); + } + + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE, + /* + * This walk only needs to go back one level, but there is no control here. + * the first receiving fib_entry_t will quash the walk + */ + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_index, &bw_ctx); +} + +typedef struct adj_db_count_ctx_t_ { + u64 count; +} adj_db_count_ctx_t; + +static void +adj_db_count (BVT(clib_bihash_kv) * kvp, + void *arg) +{ + adj_db_count_ctx_t * ctx = arg; + ctx->count++; +} + +u32 +adj_nbr_db_size (void) +{ + adj_db_count_ctx_t ctx = { + .count = 0, + }; + fib_protocol_t proto; + u32 sw_if_index = 0; + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + vec_foreach_index(sw_if_index, adj_nbr_tables[proto]) + { + if (NULL != adj_nbr_tables[proto][sw_if_index]) + { + BV(clib_bihash_foreach_key_value_pair) ( + adj_nbr_tables[proto][sw_if_index], + adj_db_count, + &ctx); + } + } + } + return (ctx.count); +} + +/** + * Context for the state change walk of the DB + */ +typedef struct adj_nbr_interface_state_change_ctx_t_ +{ + /** + * Flags passed from the vnet notifiy function + */ + int flags; +} adj_nbr_interface_state_change_ctx_t; + +static void +adj_nbr_interface_state_change_one (BVT(clib_bihash_kv) * kvp, + void *arg) +{ + /* + * Back walk the graph to inform the forwarding entries + * that this interface state has changed. + */ + adj_nbr_interface_state_change_ctx_t *ctx = arg; + + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = (ctx->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? + FIB_NODE_BW_REASON_FLAG_INTERFACE_UP : + FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN), + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx); +} + +static clib_error_t * +adj_nbr_interface_state_change (vnet_main_t * vnm, + u32 sw_if_index, + u32 flags) +{ + fib_protocol_t proto; + + /* + * walk each adj on the interface and trigger a walk from that adj + */ + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + if (!ADJ_NBR_ITF_OK(proto, sw_if_index)) + continue; + + adj_nbr_interface_state_change_ctx_t ctx = { + .flags = flags, + }; + + BV(clib_bihash_foreach_key_value_pair) ( + adj_nbr_tables[proto][sw_if_index], + adj_nbr_interface_state_change_one, + &ctx); + } + + return (NULL); +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_nbr_interface_state_change); + +static void +adj_nbr_interface_delete_one (BVT(clib_bihash_kv) * kvp, + void *arg) +{ + /* + * Back walk the graph to inform the forwarding entries + * that this interface has been deleted. + */ + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE, + }; + + fib_walk_sync(FIB_NODE_TYPE_ADJ, kvp->value, &bw_ctx); +} + +/** + * adj_nbr_interface_add_del + * + * Registered to receive interface Add and delete notifications + */ +static clib_error_t * +adj_nbr_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + fib_protocol_t proto; + + if (is_add) + { + /* + * not interested in interface additions. we will not back walk + * to resolve paths through newly added interfaces. Why? The control + * plane should have the brains to add interfaces first, then routes. + * So the case where there are paths with a interface that matches + * one just created is the case where the path resolved through an + * interface that was deleted, and still has not been removed. The + * new interface added, is NO GUARANTEE that the interface being + * added now, even though it may have the same sw_if_index, is the + * same interface that the path needs. So tough! + * If the control plane wants these routes to resolve it needs to + * remove and add them again. + */ + return (NULL); + } + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + if (!ADJ_NBR_ITF_OK(proto, sw_if_index)) + continue; + + BV(clib_bihash_foreach_key_value_pair) ( + adj_nbr_tables[proto][sw_if_index], + adj_nbr_interface_delete_one, + NULL); + } + + return (NULL); + +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del); + + +static void +adj_nbr_show_one (BVT(clib_bihash_kv) * kvp, + void *arg) +{ + vlib_cli_output (arg, "[@%d] %U", + kvp->value, + format_ip_adjacency, + vnet_get_main(), kvp->value, + FORMAT_IP_ADJACENCY_NONE); +} + +static clib_error_t * +adj_nbr_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + adj_index_t ai = ADJ_INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &ai)) + ; + else + break; + } + + if (ADJ_INDEX_INVALID != ai) + { + vlib_cli_output (vm, "[@%d] %U", + ai, + + format_ip_adjacency, + vnet_get_main(), ai, + FORMAT_IP_ADJACENCY_DETAIL); + } + else + { + fib_protocol_t proto; + + for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) + { + u32 sw_if_index; + + vec_foreach_index(sw_if_index, adj_nbr_tables[proto]) + { + if (!ADJ_NBR_ITF_OK(proto, sw_if_index)) + continue; + + BV(clib_bihash_foreach_key_value_pair) ( + adj_nbr_tables[proto][sw_if_index], + adj_nbr_show_one, + vm); + } + } + } + + return 0; +} + +VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { + .path = "show adj nbr", + .short_help = "show adj nbr [<adj_index>] [sw_if_index <index>]", + .function = adj_nbr_show, +}; + +u8* +format_adj_nbr_incomplete (u8* s, va_list *ap) +{ + index_t index = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + vnet_main_t * vnm = vnet_get_main(); + ip_adjacency_t * adj = adj_get(index); + + s = format (s, "arp-%U", format_fib_link, adj->ia_link); + s = format (s, ": via %U", + format_ip46_address, &adj->sub_type.nbr.next_hop); + s = format (s, " %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface(vnm, + adj->rewrite_header.sw_if_index)); + + return (s); +} + +u8* +format_adj_nbr (u8* s, va_list *ap) +{ + index_t index = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + vnet_main_t * vnm = vnet_get_main(); + ip_adjacency_t * adj = adj_get(index); + + s = format (s, "%U", format_fib_link, adj->ia_link); + s = format (s, " via %U ", + format_ip46_address, &adj->sub_type.nbr.next_hop); + s = format (s, "%U", + format_vnet_rewrite, + vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data), 0); + + return (s); +} + +static void +adj_dpo_lock (dpo_id_t *dpo) +{ + adj_lock(dpo->dpoi_index); +} +static void +adj_dpo_unlock (dpo_id_t *dpo) +{ + adj_unlock(dpo->dpoi_index); +} + +const static dpo_vft_t adj_nbr_dpo_vft = { + .dv_lock = adj_dpo_lock, + .dv_unlock = adj_dpo_unlock, + .dv_format = format_adj_nbr, +}; +const static dpo_vft_t adj_nbr_incompl_dpo_vft = { + .dv_lock = adj_dpo_lock, + .dv_unlock = adj_dpo_unlock, + .dv_format = format_adj_nbr_incomplete, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to an adjacency + * object. + * + * this means that these graph nodes are ones from which a nbr is the + * parent object in the DPO-graph. + */ +const static char* const nbr_ip4_nodes[] = +{ + "ip4-rewrite-transit", + NULL, +}; +const static char* const nbr_ip6_nodes[] = +{ + "ip6-rewrite", + NULL, +}; +const static char* const nbr_mpls_nodes[] = +{ + "mpls-output", + NULL, +}; +const static char* const * const nbr_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = nbr_ip4_nodes, + [DPO_PROTO_IP6] = nbr_ip6_nodes, + [DPO_PROTO_MPLS] = nbr_mpls_nodes, +}; + +const static char* const nbr_incomplete_ip4_nodes[] = +{ + "ip4-arp", + NULL, +}; +const static char* const nbr_incomplete_ip6_nodes[] = +{ + "ip6-discover-neighbor", + NULL, +}; +const static char* const nbr_incomplete_mpls_nodes[] = +{ + "mpls-adj-incomplete", + NULL, +}; + +const static char* const * const nbr_incomplete_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = nbr_incomplete_ip4_nodes, + [DPO_PROTO_IP6] = nbr_incomplete_ip6_nodes, + [DPO_PROTO_MPLS] = nbr_incomplete_mpls_nodes, +}; + +void +adj_nbr_module_init (void) +{ + dpo_register(DPO_ADJACENCY, + &adj_nbr_dpo_vft, + nbr_nodes); + dpo_register(DPO_ADJACENCY_INCOMPLETE, + &adj_nbr_incompl_dpo_vft, + nbr_incomplete_nodes); +} diff --git a/vnet/vnet/adj/adj_nbr.h b/vnet/vnet/adj/adj_nbr.h new file mode 100644 index 00000000000..331423bd036 --- /dev/null +++ b/vnet/vnet/adj/adj_nbr.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * Neighbour Adjacency sub-type. These adjs represent an L3 peer on a + * connected link. + */ + +#ifndef __ADJ_NBR_H__ +#define __ADJ_NBR_H__ + +#include <vnet/vnet.h> +#include <vnet/adj/adj_types.h> +#include <vnet/fib/fib_node.h> +#include <vnet/dpo/dpo.h> + +/** + * @brief + * Add (and lock) a new or lock an existing neighbour adjacency + * + * @param nh_proto + * The protocol for the next-hop address (v4 or v6) + * + * @param link_type + * A description of the protocol of the packets that will forward + * through this adj. On an ethernet interface this is the MAC header's + * ether-type + * + * @param nh_addr + * The address of the next-hop/peer to send the packet to + * + * @param sw_if_index + * The interface on which the peer resides + */ +extern adj_index_t adj_nbr_add_or_lock(fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index); + +/** + * @brief + * Add (and lock) a new or lock an existing neighbour adjacency + * + * @param nh_proto + * The protocol for the next-hop address (v4 or v6) + * + * @param link_type + * A description of the protocol of the packets that will forward + * through this adj. On an ethernet interface this is the MAC header's + * ether-type + * + * @param nh_addr + * The address of the next-hop/peer to send the packet to + * + * @param sw_if_index + * The interface on which the peer resides + * + * @param rewrite + * The rewrite to prepend to packets + */ +extern adj_index_t adj_nbr_add_or_lock_w_rewrite(fib_protocol_t nh_proto, + fib_link_t link_type, + const ip46_address_t *nh_addr, + u32 sw_if_index, + u8 *rewrite); + +/** + * @brief + * Update the rewrite string for an existing adjacecny. + * + * @param + * The index of the adj to update + * + * @param + * The new rewrite + */ +extern void adj_nbr_update_rewrite(adj_index_t adj_index, + u8 *rewrite); + +/** + * @brief + * Format aa incomplete neigbour (ARP) adjacency + */ +extern u8* format_adj_nbr_incomplete(u8* s, va_list *ap); + +/** + * @brief + * Format a neigbour (REWRITE) adjacency + */ +extern u8* format_adj_nbr(u8* s, va_list *ap); + +/** + * @brief + * Module initialisation + */ +extern void adj_nbr_module_init(void); + +/** + * @brief + * Return the size of the adjacency database. for testing purposes + */ +extern u32 adj_nbr_db_size(void); + +#endif diff --git a/vnet/vnet/adj/adj_rewrite.c b/vnet/vnet/adj/adj_rewrite.c new file mode 100644 index 00000000000..db802e33665 --- /dev/null +++ b/vnet/vnet/adj/adj_rewrite.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj.h> +#include <vnet/adj/adj_alloc.h> +#include <vnet/adj/adj_internal.h> + +/** + * adj_rewrite_add_and_lock + * + * A rewrite sub-type has the rewrite string provided, but no key + */ +adj_index_t +adj_rewrite_add_and_lock (fib_protocol_t nh_proto, + fib_link_t link_type, + u32 sw_if_index, + u8 *rewrite) +{ + ip_adjacency_t *adj; + + adj = adj_alloc(nh_proto); + + adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + adj->ia_link = link_type; + adj->rewrite_header.sw_if_index = sw_if_index; + + ASSERT(NULL != rewrite); + + vnet_rewrite_for_sw_interface(vnet_get_main(), + adj_fib_link_2_vnet(link_type), + adj->rewrite_header.sw_if_index, + adj_get_rewrite_node(link_type)->index, + rewrite, + &adj->rewrite_header, + sizeof (adj->rewrite_data)); + + adj_lock(adj->heap_handle); + + return (adj->heap_handle); +} diff --git a/vnet/vnet/adj/adj_rewrite.h b/vnet/vnet/adj/adj_rewrite.h new file mode 100644 index 00000000000..f8df255150d --- /dev/null +++ b/vnet/vnet/adj/adj_rewrite.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * A rewrite adjacency has no key, and thus cannot be 'found' from the + * FIB resolution code. the client therefore needs to maange these adjacencies + */ + +#ifndef __ADJ_REWRITE_H__ +#define __ADJ_REWRITE_H__ + +#include <vnet/adj/adj_types.h> + +/** + * @brief + * Add (and lock) a new or lock an existing neighbour adjacency + * + * @param nh_proto + * The protocol for the next-hop address (v4 or v6) + * + * @param link_type + * A description of the protocol of the packets that will forward + * through this adj. On an ethernet interface this is the MAC header's + * ether-type + * + * @param sw_if_index + * The interface on which the peer resides + * + * @param rewrite + * The rewrite to prepend to packets + */ +extern adj_index_t adj_rewrite_add_and_lock(fib_protocol_t nh_proto, + fib_link_t link_type, + u32 sw_if_index, + u8 *rewrite); + +#endif diff --git a/vnet/vnet/adj/adj_types.h b/vnet/vnet/adj/adj_types.h new file mode 100644 index 00000000000..a7234663d29 --- /dev/null +++ b/vnet/vnet/adj/adj_types.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ADJ_TYPES_H__ +#define __ADJ_TYPES_H__ + +#include <vnet/vnet.h> + +/** + * @brief An index for adjacencies. + * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of + * an adi_index_t. However, for us humans, we can glean much more intent + * from the declaration + * foo bar(adj_index_t t); + * than we can from + * foo bar(u32 t); + */ +typedef u32 adj_index_t; + +/** + * @brief Invalid ADJ index - used when no adj is known + * likewise blazoned capitals INVALID speak volumes where ~0 does not. + */ +#define ADJ_INDEX_INVALID ((u32)~0) + +#endif diff --git a/vnet/vnet/classify/ip_classify.c b/vnet/vnet/classify/ip_classify.c index c44f25e2add..44973ae5e99 100644 --- a/vnet/vnet/classify/ip_classify.c +++ b/vnet/vnet/classify/ip_classify.c @@ -15,6 +15,7 @@ #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ #include <vnet/classify/vnet_classify.h> +#include <vnet/dpo/classify_dpo.h> typedef struct { u32 next_index; @@ -63,7 +64,6 @@ ip_classify_inline (vlib_main_t * vm, u32 n_left_from, * from, * to_next; ip_lookup_next_t next_index; vnet_classify_main_t * vcm = &vnet_classify_main; - ip_lookup_main_t * lm; f64 now = vlib_time_now (vm); u32 hits = 0; u32 misses = 0; @@ -71,10 +71,8 @@ ip_classify_inline (vlib_main_t * vm, u32 n_next; if (is_ip4) { - lm = &ip4_main.lookup_main; n_next = IP4_LOOKUP_N_NEXT; } else { - lm = &ip6_main.lookup_main; n_next = IP6_LOOKUP_N_NEXT; } @@ -88,8 +86,8 @@ ip_classify_inline (vlib_main_t * vm, vlib_buffer_t * b0, * b1; u32 bi0, bi1; u8 * h0, * h1; - u32 adj_index0, adj_index1; - ip_adjacency_t * adj0, * adj1; + u32 cd_index0, cd_index1; + classify_dpo_t *cd0, * cd1; u32 table_index0, table_index1; vnet_classify_table_t * t0, * t1; @@ -116,13 +114,13 @@ ip_classify_inline (vlib_main_t * vm, h1 = (void *)vlib_buffer_get_current(b1) - ethernet_buffer_header_size(b1); - adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - adj0 = ip_get_adjacency (lm, adj_index0); - table_index0 = adj0->classify.table_index; + cd_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + cd0 = classify_dpo_get(cd_index0); + table_index0 = cd0->cd_table_index; - adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; - adj1 = ip_get_adjacency (lm, adj_index1); - table_index1 = adj1->classify.table_index; + cd_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; + cd1 = classify_dpo_get(cd_index1); + table_index1 = cd1->cd_table_index; t0 = pool_elt_at_index (vcm->tables, table_index0); @@ -151,8 +149,8 @@ ip_classify_inline (vlib_main_t * vm, vlib_buffer_t * b0; u32 bi0; u8 * h0; - u32 adj_index0; - ip_adjacency_t * adj0; + u32 cd_index0; + classify_dpo_t *cd0; u32 table_index0; vnet_classify_table_t * t0; @@ -161,9 +159,9 @@ ip_classify_inline (vlib_main_t * vm, h0 = (void *)vlib_buffer_get_current(b0) - ethernet_buffer_header_size(b0); - adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - adj0 = ip_get_adjacency (lm, adj_index0); - table_index0 = adj0->classify.table_index; + cd_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + cd0 = classify_dpo_get(cd_index0); + table_index0 = cd0->cd_table_index; t0 = pool_elt_at_index (vcm->tables, table_index0); vnet_buffer(b0)->l2_classify.hash = @@ -192,7 +190,7 @@ ip_classify_inline (vlib_main_t * vm, { u32 bi0; vlib_buffer_t * b0; - u32 next0 = IP_LOOKUP_NEXT_MISS; + u32 next0 = IP_LOOKUP_NEXT_DROP; u32 table_index0; vnet_classify_table_t * t0; vnet_classify_entry_t * e0; diff --git a/vnet/vnet/classify/vnet_classify.c b/vnet/vnet/classify/vnet_classify.c index 2eee0f5671e..7716fc986f2 100644 --- a/vnet/vnet/classify/vnet_classify.c +++ b/vnet/vnet/classify/vnet_classify.c @@ -1106,9 +1106,7 @@ uword unformat_l2_output_next_index (unformat_input_t * input, va_list * args) } #define foreach_ip_next \ -_(miss, MISS) \ _(drop, DROP) \ -_(local, LOCAL) \ _(rewrite, REWRITE) uword unformat_ip_next_index (unformat_input_t * input, va_list * args) @@ -2121,7 +2119,7 @@ test_classify_command_fn (vlib_main_t * vm, memory_size, 0 /* skip */, 3 /* vectors to match */); - t->miss_next_index = IP_LOOKUP_NEXT_LOCAL; + t->miss_next_index = IP_LOOKUP_NEXT_DROP; vlib_cli_output (vm, "Create table %d", t - cm->tables); } diff --git a/vnet/vnet/config.h b/vnet/vnet/config.h index d80ff19ec28..b77a7794a6e 100644 --- a/vnet/vnet/config.h +++ b/vnet/vnet/config.h @@ -161,6 +161,10 @@ u32 vnet_config_del_feature (vlib_main_t * vm, void *feature_config, u32 n_feature_config_bytes); +u8 *vnet_config_format_features (vlib_main_t * vm, + vnet_config_main_t * cm, + u32 config_index, u8 * s); + #endif /* included_vnet_config_h */ /* diff --git a/vnet/vnet/cop/ip4_whitelist.c b/vnet/vnet/cop/ip4_whitelist.c index 5578558c4b1..d5121e72980 100644 --- a/vnet/vnet/cop/ip4_whitelist.c +++ b/vnet/vnet/cop/ip4_whitelist.c @@ -13,6 +13,8 @@ * limitations under the License. */ #include <vnet/cop/cop.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/dpo/load_balance.h> typedef struct { u32 next_index; @@ -57,9 +59,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, u32 n_left_from, * from, * to_next; cop_feature_type_t next_index; cop_main_t *cm = &cop_main; - ip4_main_t * im4 = &ip4_main; - ip_lookup_main_t * lm4 = &im4->lookup_main; - vlib_combined_counter_main_t * vcm = &im4->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters; u32 cpu_index = vm->cpu_index; from = vlib_frame_vector_args (frame); @@ -74,7 +74,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, to_next, n_left_to_next); while (n_left_from >= 4 && n_left_to_next >= 2) - { + { u32 bi0, bi1; vlib_buffer_t * b0, * b1; u32 next0, next1; @@ -82,147 +82,142 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, ip4_header_t * ip0, * ip1; cop_config_main_t * ccm0, * ccm1; cop_config_data_t * c0, * c1; - ip4_fib_mtrie_t * mtrie0, * mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - u32 adj_index0, adj_index1; - ip_adjacency_t * adj0, * adj1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + u32 lb_index0, lb_index1; + const load_balance_t * lb0, *lb1; + const dpo_id_t *dpo0, *dpo1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); - CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); - } + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } /* speculatively enqueue b0 and b1 to the current next frame */ - to_next[0] = bi0 = from[0]; - to_next[1] = bi1 = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX]; - ip0 = vlib_buffer_get_current (b0); + ip0 = vlib_buffer_get_current (b0); - ccm0 = cm->cop_config_mains + VNET_COP_IP4; + ccm0 = cm->cop_config_mains + VNET_COP_IP4; - c0 = vnet_get_config_data + c0 = vnet_get_config_data (&ccm0->config_main, &vnet_buffer (b0)->cop.current_config_index, &next0, sizeof (c0[0])); - mtrie0 = &vec_elt_at_index (im4->fibs, c0->fib_index)->mtrie; + mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; + leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - ASSERT (adj_index0 - == ip4_fib_lookup_with_table (im4, c0->fib_index, - &ip0->src_address, - 1 /* no_default_route */)); - adj0 = ip_get_adjacency (lm4, adj_index0); - if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL)) + ASSERT (lb_index0 + == ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index), + &ip0->src_address)); + lb0 = load_balance_get (lb_index0); + dpo0 = load_balance_get_bucket_i(lb0, 0); + + if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE)) { b0->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED]; next0 = RX_COP_DROP; } - b1 = vlib_get_buffer (vm, bi1); + b1 = vlib_get_buffer (vm, bi1); sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX]; - ip1 = vlib_buffer_get_current (b1); + ip1 = vlib_buffer_get_current (b1); - ccm1 = cm->cop_config_mains + VNET_COP_IP4; + ccm1 = cm->cop_config_mains + VNET_COP_IP4; - c1 = vnet_get_config_data + c1 = vnet_get_config_data (&ccm1->config_main, &vnet_buffer (b1)->cop.current_config_index, &next1, sizeof (c1[0])); + mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie; - mtrie1 = &vec_elt_at_index (im4->fibs, c1->fib_index)->mtrie; - - leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); - adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - - ASSERT (adj_index1 - == ip4_fib_lookup_with_table (im4, c1->fib_index, - &ip1->src_address, - 1 /* no_default_route */)); - adj1 = ip_get_adjacency (lm4, adj_index1); + lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + ASSERT (lb_index1 + == ip4_fib_table_lookup_lb (ip4_fib_get(c1->fib_index), + &ip1->src_address)); + lb1 = load_balance_get (lb_index1); + dpo1 = load_balance_get_bucket_i(lb1, 0); - vlib_increment_combined_counter - (vcm, cpu_index, adj_index0, 1, - vlib_buffer_length_in_chain (vm, b0) + vlib_increment_combined_counter + (vcm, cpu_index, lb_index0, 1, + vlib_buffer_length_in_chain (vm, b0) + sizeof(ethernet_header_t)); - vlib_increment_combined_counter - (vcm, cpu_index, adj_index1, 1, + vlib_increment_combined_counter + (vcm, cpu_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, b1) + sizeof(ethernet_header_t)); - if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL)) - { - b0->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED]; - next0 = RX_COP_DROP; - } - if (PREDICT_FALSE(adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL)) + if (PREDICT_FALSE(dpo1->dpoi_type != DPO_RECEIVE)) { b1->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED]; next1 = RX_COP_DROP; } - if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) { - ip4_cop_whitelist_trace_t *t = + ip4_cop_whitelist_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t)); t->sw_if_index = sw_if_index0; t->next_index = next0; } - if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) - && (b1->flags & VLIB_BUFFER_IS_TRACED))) + if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) + && (b1->flags & VLIB_BUFFER_IS_TRACED))) { - ip4_cop_whitelist_trace_t *t = + ip4_cop_whitelist_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t)); t->sw_if_index = sw_if_index1; t->next_index = next1; @@ -245,8 +240,9 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, cop_config_data_t *c0; ip4_fib_mtrie_t * mtrie0; ip4_fib_mtrie_leaf_t leaf0; - u32 adj_index0; - ip_adjacency_t * adj0; + u32 lb_index0; + const load_balance_t * lb0; + const dpo_id_t *dpo0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -269,7 +265,7 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, &next0, sizeof (c0[0])); - mtrie0 = &vec_elt_at_index (im4->fibs, c0->fib_index)->mtrie; + mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -285,20 +281,21 @@ ip4_cop_whitelist_node_fn (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + + ASSERT (lb_index0 + == ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index), + &ip0->src_address)); - ASSERT (adj_index0 - == ip4_fib_lookup_with_table (im4, c0->fib_index, - &ip0->src_address, - 1 /* no_default_route */)); - adj0 = ip_get_adjacency (lm4, adj_index0); + lb0 = load_balance_get (lb_index0); + dpo0 = load_balance_get_bucket_i(lb0, 0); vlib_increment_combined_counter - (vcm, cpu_index, adj_index0, 1, + (vcm, cpu_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, b0) + sizeof(ethernet_header_t)); - if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL)) + if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE)) { b0->error = node->errors[IP4_COP_WHITELIST_ERROR_DROPPED]; next0 = RX_COP_DROP; diff --git a/vnet/vnet/cop/ip6_whitelist.c b/vnet/vnet/cop/ip6_whitelist.c index 4a8f33fb727..c2e16ccfe54 100644 --- a/vnet/vnet/cop/ip6_whitelist.c +++ b/vnet/vnet/cop/ip6_whitelist.c @@ -13,6 +13,8 @@ * limitations under the License. */ #include <vnet/cop/cop.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/load_balance.h> typedef struct { u32 next_index; @@ -58,8 +60,7 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, cop_feature_type_t next_index; cop_main_t *cm = &cop_main; ip6_main_t * im6 = &ip6_main; - ip_lookup_main_t * lm6 = &im6->lookup_main; - vlib_combined_counter_main_t * vcm = &im6->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * vcm = &load_balance_main.lbm_via_counters; u32 cpu_index = vm->cpu_index; from = vlib_frame_vector_args (frame); @@ -82,9 +83,10 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, ip6_header_t * ip0, * ip1; cop_config_main_t * ccm0, * ccm1; cop_config_data_t * c0, * c1; - u32 adj_index0, adj_index1; - ip_adjacency_t * adj0, * adj1; - + u32 lb_index0, lb_index1; + const load_balance_t * lb0, *lb1; + const dpo_id_t *dpo0, *dpo1; + /* Prefetch next iteration. */ { vlib_buffer_t * p2, * p3; @@ -120,10 +122,12 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, &next0, sizeof (c0[0])); - adj_index0 = ip6_fib_lookup_with_table (im6, c0->fib_index, - &ip0->src_address); - adj0 = ip_get_adjacency (lm6, adj_index0); - if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL)) + lb_index0 = ip6_fib_table_fwding_lookup (im6, c0->fib_index, + &ip0->src_address); + lb0 = load_balance_get (lb_index0); + dpo0 = load_balance_get_bucket_i(lb0, 0); + + if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE)) { b0->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED]; next0 = RX_COP_DROP; @@ -142,28 +146,23 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, &next1, sizeof (c1[0])); - adj_index1 = ip6_fib_lookup_with_table (im6, c1->fib_index, - &ip1->src_address); + lb_index1 = ip6_fib_table_fwding_lookup (im6, c1->fib_index, + &ip1->src_address); - adj1 = ip_get_adjacency (lm6, adj_index1); + lb1 = load_balance_get (lb_index1); + dpo1 = load_balance_get_bucket_i(lb1, 0); vlib_increment_combined_counter - (vcm, cpu_index, adj_index0, 1, + (vcm, cpu_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, b0) + sizeof(ethernet_header_t)); vlib_increment_combined_counter - (vcm, cpu_index, adj_index1, 1, + (vcm, cpu_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, b1) + sizeof(ethernet_header_t)); - if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL)) - { - b0->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED]; - next0 = RX_COP_DROP; - } - - if (PREDICT_FALSE(adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL)) + if (PREDICT_FALSE(dpo1->dpoi_type != DPO_RECEIVE)) { b1->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED]; next1 = RX_COP_DROP; @@ -202,8 +201,9 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, ip6_header_t * ip0; cop_config_main_t *ccm0; cop_config_data_t *c0; - u32 adj_index0; - ip_adjacency_t * adj0; + u32 lb_index0; + const load_balance_t * lb0; + const dpo_id_t *dpo0; /* speculatively enqueue b0 to the current next frame */ bi0 = from[0]; @@ -226,17 +226,18 @@ ip6_cop_whitelist_node_fn (vlib_main_t * vm, &next0, sizeof (c0[0])); - adj_index0 = ip6_fib_lookup_with_table (im6, c0->fib_index, - &ip0->src_address); + lb_index0 = ip6_fib_table_fwding_lookup (im6, c0->fib_index, + &ip0->src_address); - adj0 = ip_get_adjacency (lm6, adj_index0); + lb0 = load_balance_get (lb_index0); + dpo0 = load_balance_get_bucket_i(lb0, 0); vlib_increment_combined_counter - (vcm, cpu_index, adj_index0, 1, + (vcm, cpu_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, b0) + sizeof(ethernet_header_t)); - if (PREDICT_FALSE(adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL)) + if (PREDICT_FALSE(dpo0->dpoi_type != DPO_RECEIVE)) { b0->error = node->errors[IP6_COP_WHITELIST_ERROR_DROPPED]; next0 = RX_COP_DROP; diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c index 2ffb95884d3..9e8fed44efb 100644 --- a/vnet/vnet/devices/dpdk/cli.c +++ b/vnet/vnet/devices/dpdk/cli.c @@ -21,7 +21,7 @@ #include <vnet/ethernet/ethernet.h> #include <vnet/devices/dpdk/dpdk.h> #include <vnet/classify/vnet_classify.h> -#include <vnet/mpls-gre/packet.h> +#include <vnet/mpls/packet.h> #include "dpdk_priv.h" diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c index a9e286e56eb..63e7e559286 100644 --- a/vnet/vnet/devices/dpdk/node.c +++ b/vnet/vnet/devices/dpdk/node.c @@ -21,7 +21,7 @@ #include <vnet/ethernet/ethernet.h> #include <vnet/devices/dpdk/dpdk.h> #include <vnet/classify/vnet_classify.h> -#include <vnet/mpls-gre/packet.h> +#include <vnet/mpls/packet.h> #include <vnet/handoff.h> #include "dpdk_priv.h" @@ -687,7 +687,7 @@ poll_rate_limit (dpdk_main_t * dm) <em>Next Nodes:</em> - Static arcs to: error-drop, ethernet-input, - ip4-input-no-checksum, ip6-input, mpls-gre-input + ip4-input-no-checksum, ip6-input, mpls-input - per-interface redirection, controlled by <code>xd->per_interface_next_index</code> */ @@ -791,7 +791,7 @@ VLIB_REGISTER_NODE (dpdk_input_node) = { [DPDK_RX_NEXT_ETHERNET_INPUT] = "ethernet-input", [DPDK_RX_NEXT_IP4_INPUT] = "ip4-input-no-checksum", [DPDK_RX_NEXT_IP6_INPUT] = "ip6-input", - [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-gre-input", + [DPDK_RX_NEXT_MPLS_INPUT] = "mpls-input", }, }; @@ -805,7 +805,6 @@ VLIB_NODE_FUNCTION_MULTIARCH_CLONE(dpdk_input_efd) CLIB_MULTIARCH_SELECT_FN(dpdk_input); CLIB_MULTIARCH_SELECT_FN(dpdk_input_rss); CLIB_MULTIARCH_SELECT_FN(dpdk_input_efd); -/* *INDENT-ON* */ /* * Override the next nodes for the dpdk input nodes. @@ -876,11 +875,3 @@ efd_config (u32 enabled, set_efd_bitmap (&tm->efd.mpls_exp_bitmap, mpls_exp, mpls_op); set_efd_bitmap (&tm->efd.vlan_cos_bitmap, vlan_cos, vlan_op); } - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/vnet/vnet/devices/ssvm/node.c b/vnet/vnet/devices/ssvm/node.c index e7d9792bd65..e613cc9cb01 100644 --- a/vnet/vnet/devices/ssvm/node.c +++ b/vnet/vnet/devices/ssvm/node.c @@ -330,7 +330,7 @@ VLIB_REGISTER_NODE (ssvm_eth_input_node) = { [SSVM_ETH_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input", [SSVM_ETH_INPUT_NEXT_IP4_INPUT] = "ip4-input", [SSVM_ETH_INPUT_NEXT_IP6_INPUT] = "ip6-input", - [SSVM_ETH_INPUT_NEXT_MPLS_INPUT] = "mpls-gre-input", + [SSVM_ETH_INPUT_NEXT_MPLS_INPUT] = "mpls-input", }, }; diff --git a/vnet/vnet/dhcp/client.c b/vnet/vnet/dhcp/client.c index 5916cfdb2fa..ffe6e8dab7c 100644 --- a/vnet/vnet/dhcp/client.c +++ b/vnet/vnet/dhcp/client.c @@ -14,19 +14,12 @@ */ #include <vlib/vlib.h> #include <vnet/dhcp/proxy.h> +#include <vnet/fib/fib_table.h> dhcp_client_main_t dhcp_client_main; static u8 * format_dhcp_client_state (u8 * s, va_list * va); static vlib_node_registration_t dhcp_client_process_node; -void __attribute__((weak)) -api_config_default_ip_route (u8 is_ipv6, u8 is_add, u32 vrf_id, - u32 sw_if_index, u8 *next_hop_addr) -{ - /* dummy function */ - return; -} - static void dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c) { @@ -214,14 +207,34 @@ int dhcp_client_for_us (u32 bi, vlib_buffer_t * b, /* * Configure default IP route: - * - vrf_id is 0 by default. */ if (c->router_address.as_u32) - api_config_default_ip_route (0 /* is_ipv6 */, - 1 /* is_add */, - 0 /* vrf_id */, - c->sw_if_index, - (u8 *)&c->router_address); + { + fib_prefix_t all_0s = + { + .fp_len = 0, + .fp_addr.ip4.as_u32 = 0x0, + .fp_proto = FIB_PROTOCOL_IP4, + }; + ip46_address_t nh = + { + .ip4 = c->router_address, + }; + + fib_table_entry_path_add (fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_0s, + FIB_SOURCE_DHCP, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh, + c->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + } /* * Call the user's event callback to report DHCP information @@ -496,11 +509,29 @@ dhcp_bound_state (dhcp_client_main_t * dcm, dhcp_client_t * c, f64 now) if (now > c->lease_expires) { if (c->router_address.as_u32) - api_config_default_ip_route (0 /* is_ipv6 */, - 0 /* is_add */, - 0 /* vrf_id */, - c->sw_if_index, - (u8 *)&c->router_address); + { + fib_prefix_t all_0s = + { + .fp_len = 0, + .fp_addr.ip4.as_u32 = 0x0, + .fp_proto = FIB_PROTOCOL_IP4, + }; + ip46_address_t nh = { + .ip4 = c->router_address, + }; + + fib_table_entry_path_remove(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_0s, + FIB_SOURCE_DHCP, + FIB_PROTOCOL_IP4, + &nh, + c->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + } dhcp_client_release_address (dcm, c); c->state = DHCP_DISCOVER; @@ -689,7 +720,7 @@ show_dhcp_client_command_fn (vlib_main_t * vm, p = hash_get (dcm->client_by_sw_if_index, sw_if_index); if (p == 0) return clib_error_return (0, "dhcp client not configured"); - c = pool_elt_at_index (dcm->clients, sw_if_index); + c = pool_elt_at_index (dcm->clients, p[0]); vlib_cli_output (vm, "%U", format_dhcp_client, dcm, c, verbose); return 0; } @@ -715,6 +746,18 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a) vlib_main_t * vm = dcm->vlib_main; dhcp_client_t * c; uword * p; + fib_prefix_t all_1s = + { + .fp_len = 32, + .fp_addr.ip4.as_u32 = 0xffffffff, + .fp_proto = FIB_PROTOCOL_IP4, + }; + fib_prefix_t all_0s = + { + .fp_len = 0, + .fp_addr.ip4.as_u32 = 0x0, + .fp_proto = FIB_PROTOCOL_IP4, + }; p = hash_get (dcm->client_by_sw_if_index, a->sw_if_index); @@ -738,6 +781,22 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a) } while (c->transaction_id == 0); set_l2_rewrite (dcm, c); hash_set (dcm->client_by_sw_if_index, a->sw_if_index, c - dcm->clients); + + /* this add is ref counted by FIB so we can add for each itf */ + fib_table_entry_special_add(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_1s, + FIB_SOURCE_DHCP, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * enable the interface to RX IPv4 packets + * this is also ref counted + */ + ip4_sw_interface_enable_disable (c->sw_if_index, 1); + vlib_process_signal_event (vm, dhcp_client_process_node.index, EVENT_DHCP_CLIENT_WAKEUP, c - dcm->clients); } @@ -745,12 +804,32 @@ int dhcp_client_add_del (dhcp_client_add_del_args_t * a) { c = pool_elt_at_index (dcm->clients, p[0]); + fib_table_entry_special_remove(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_1s, + FIB_SOURCE_DHCP); + if (c->router_address.as_u32) - api_config_default_ip_route (0 /* is_ipv6 */, - 0 /* is_add */, - 0 /* vrf_id */, - c->sw_if_index, - (u8 *)&c->router_address); + { + ip46_address_t nh = { + .ip4 = c->router_address, + }; + + fib_table_entry_path_remove(fib_table_get_index_for_sw_if_index( + FIB_PROTOCOL_IP4, + c->sw_if_index), + &all_0s, + FIB_SOURCE_DHCP, + FIB_PROTOCOL_IP4, + &nh, + c->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + } + ip4_sw_interface_enable_disable (c->sw_if_index, 0); + vec_free (c->option_55_data); vec_free (c->hostname); vec_free (c->client_identifier); diff --git a/vnet/vnet/dhcp/proxy_node.c b/vnet/vnet/dhcp/proxy_node.c index 2073b3f7bf6..7018fc3958b 100644 --- a/vnet/vnet/dhcp/proxy_node.c +++ b/vnet/vnet/dhcp/proxy_node.c @@ -18,6 +18,7 @@ #include <vlib/vlib.h> #include <vnet/pg/pg.h> #include <vnet/dhcp/proxy.h> +#include <vnet/fib/ip4_fib.h> static char * dhcp_proxy_error_strings[] = { #define dhcp_proxy_error(n,s) s, @@ -225,7 +226,7 @@ dhcp_proxy_to_server_input (vlib_main_t * vm, fib_index = im->fib_index_by_sw_if_index [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - fib = vec_elt_at_index (im->fibs, fib_index); + fib = ip4_fib_get (fib_index); fib_id = fib->table_id; end = b0->data + b0->current_data + b0->current_length; @@ -699,9 +700,7 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, int insert_option_82, int is_del) { dhcp_proxy_main_t * dpm = &dhcp_proxy_main; - ip4_main_t * im = &ip4_main; dhcp_server_t * server = 0; - ip4_fib_t *rx_fib, *server_fib; u32 server_index = 0; u32 rx_fib_index = 0; @@ -711,18 +710,11 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, if (src_address->as_u32 == 0) return VNET_API_ERROR_INVALID_SRC_ADDRESS; - rx_fib = find_ip4_fib_by_table_index_or_id - (&ip4_main, rx_fib_id, IP4_ROUTE_FLAG_TABLE_ID); - - if (rx_fib == 0) - return VNET_API_ERROR_NO_SUCH_INNER_FIB; - - server_fib = find_ip4_fib_by_table_index_or_id - (&ip4_main, server_fib_id, IP4_ROUTE_FLAG_TABLE_ID); - - if (server_fib == 0) - return VNET_API_ERROR_NO_SUCH_FIB; - + rx_fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + rx_fib_id); + server_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, + server_fib_id); + if (rx_fib_id == 0) { server = pool_elt_at_index (dpm->dhcp_servers, 0); @@ -735,8 +727,6 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, goto initialize_it; } - rx_fib_index = rx_fib - im->fibs; - if (is_del) { if (rx_fib_index >= vec_len(dpm->dhcp_server_index_by_rx_fib_index)) @@ -768,7 +758,7 @@ int dhcp_proxy_set_server_2 (ip4_address_t *addr, ip4_address_t *src_address, initialize_it: server->dhcp_server.as_u32 = addr->as_u32; - server->server_fib_index = server_fib - im->fibs; + server->server_fib_index = server_index; server->dhcp_src_address.as_u32 = src_address->as_u32; server->insert_option_82 = insert_option_82; server->valid = 1; @@ -883,14 +873,12 @@ u8 * format_dhcp_proxy_server (u8 * s, va_list * args) return s; } - server_fib = find_ip4_fib_by_table_index_or_id - (&ip4_main, server->server_fib_index, IP4_ROUTE_FLAG_FIB_INDEX); + server_fib = ip4_fib_get(server->server_fib_index); if (server_fib) server_fib_id = server_fib->table_id; - rx_fib = find_ip4_fib_by_table_index_or_id - (&ip4_main, rx_fib_index, IP4_ROUTE_FLAG_FIB_INDEX); + rx_fib = ip4_fib_get(rx_fib_index); if (rx_fib) rx_fib_id = rx_fib->table_id; diff --git a/vnet/vnet/dhcpv6/proxy_node.c b/vnet/vnet/dhcpv6/proxy_node.c index 4dc746f6936..323bdf9b730 100644 --- a/vnet/vnet/dhcpv6/proxy_node.c +++ b/vnet/vnet/dhcpv6/proxy_node.c @@ -18,6 +18,7 @@ #include <vlib/vlib.h> #include <vnet/pg/pg.h> #include <vnet/dhcpv6/proxy.h> +#include <vnet/fib/ip6_fib.h> static char * dhcpv6_proxy_error_strings[] = { #define dhcpv6_proxy_error(n,s) s, @@ -323,7 +324,7 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm, fib_index = im->fib_index_by_sw_if_index [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - fib = vec_elt_at_index (im->fibs, fib_index); + fib = ip6_fib_get (fib_index); fib_id = fib->table_id; p_vss = hash_get (dpm->vss_index_by_vrf_id, @@ -573,7 +574,7 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm, svr_fib_index = im->fib_index_by_sw_if_index [vnet_buffer(b0)->sw_if_index[VLIB_RX]]; - svr_fib = vec_elt_at_index (im->fibs, svr_fib_index); + svr_fib = ip6_fib_get (svr_fib_index); svr_fib_id = svr_fib->table_id; if (svr_fib_id != dpm->server_fib_index || @@ -831,8 +832,7 @@ u8 * format_dhcpv6_proxy_server (u8 * s, va_list * args) return s; } - f = find_ip6_fib_by_table_index_or_id (&ip6_main, dm->server_fib_index, - IP6_ROUTE_FLAG_FIB_INDEX); + f = ip6_fib_get (dm->server_fib_index); if (f) fib_id = f->table_id; diff --git a/vnet/vnet/dpo/classify_dpo.c b/vnet/vnet/dpo/classify_dpo.c new file mode 100644 index 00000000000..3b7b98f9da8 --- /dev/null +++ b/vnet/vnet/dpo/classify_dpo.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/classify_dpo.h> +#include <vnet/mpls/mpls.h> + +/* + * pool of all MPLS Label DPOs + */ +classify_dpo_t *classify_dpo_pool; + +static classify_dpo_t * +classify_dpo_alloc (void) +{ + classify_dpo_t *cd; + + pool_get_aligned(classify_dpo_pool, cd, CLIB_CACHE_LINE_BYTES); + memset(cd, 0, sizeof(*cd)); + + return (cd); +} + +static index_t +classify_dpo_get_index (classify_dpo_t *cd) +{ + return (cd - classify_dpo_pool); +} + +index_t +classify_dpo_create (fib_protocol_t proto, + u32 classify_table_index) +{ + classify_dpo_t *cd; + + cd = classify_dpo_alloc(); + cd->cd_proto = proto; + cd->cd_table_index = classify_table_index; + + return (classify_dpo_get_index(cd)); +} + +u8* +format_classify_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*args, u32); + classify_dpo_t *cd; + + cd = classify_dpo_get(index); + + return (format(s, "classify:[%d]:table:%d", + index, cd->cd_table_index)); +} + +static void +classify_dpo_lock (dpo_id_t *dpo) +{ + classify_dpo_t *cd; + + cd = classify_dpo_get(dpo->dpoi_index); + + cd->cd_locks++; +} + +static void +classify_dpo_unlock (dpo_id_t *dpo) +{ + classify_dpo_t *cd; + + cd = classify_dpo_get(dpo->dpoi_index); + + cd->cd_locks--; + + if (0 == cd->cd_locks) + { + pool_put(classify_dpo_pool, cd); + } +} + +const static dpo_vft_t cd_vft = { + .dv_lock = classify_dpo_lock, + .dv_unlock = classify_dpo_unlock, + .dv_format = format_classify_dpo, +}; + +const static char* const classify_ip4_nodes[] = +{ + "ip4-classify", + NULL, +}; +const static char* const classify_ip6_nodes[] = +{ + "ip6-classify", + NULL, +}; +const static char* const * const classify_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = classify_ip4_nodes, + [DPO_PROTO_IP6] = classify_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +classify_dpo_module_init (void) +{ + dpo_register(DPO_CLASSIFY, &cd_vft, classify_nodes); +} diff --git a/vnet/vnet/dpo/classify_dpo.h b/vnet/vnet/dpo/classify_dpo.h new file mode 100644 index 00000000000..cd35c3c440b --- /dev/null +++ b/vnet/vnet/dpo/classify_dpo.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CLASSIFY_DPO_H__ +#define __CLASSIFY_DPO_H__ + +#include <vnet/vnet.h> +#include <vnet/mpls/packet.h> +#include <vnet/dpo/dpo.h> + +/** + * A representation of an MPLS label for imposition in the data-path + */ +typedef struct classify_dpo_t +{ + fib_protocol_t cd_proto; + + u32 cd_table_index; + + /** + * Number of locks/users of the label + */ + u16 cd_locks; +} classify_dpo_t; + +extern index_t classify_dpo_create(fib_protocol_t proto, + u32 classify_table_index); + +extern u8* format_classify_dpo(u8 *s, va_list *args); + +/* + * Encapsulation violation for fast data-path access + */ +extern classify_dpo_t *classify_dpo_pool; + +static inline classify_dpo_t * +classify_dpo_get (index_t index) +{ + return (pool_elt_at_index(classify_dpo_pool, index)); +} + +extern void classify_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/dpo.c b/vnet/vnet/dpo/dpo.c new file mode 100644 index 00000000000..5eff52b7b8a --- /dev/null +++ b/vnet/vnet/dpo/dpo.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * A Data-Path Object is an object that represents actions that are + * applied to packets are they are switched through VPP. + * + * The DPO is a base class that is specialised by other objects to provide + * concreate actions + * + * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances. + */ + +#include <vnet/dpo/dpo.h> +#include <vnet/ip/lookup.h> +#include <vnet/ip/format.h> +#include <vnet/adj/adj.h> + +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/receive_dpo.h> +#include <vnet/dpo/punt_dpo.h> +#include <vnet/dpo/classify_dpo.h> + +/** + * Array of char* names for the DPO types and protos + */ +static const char* dpo_type_names[] = DPO_TYPES; +static const char* dpo_proto_names[] = DPO_PROTOS; + +/** + * @brief Vector of virtual function tables for the DPO types + * + * This is a vector so we can dynamically register new DPO types in plugins. + */ +static dpo_vft_t *dpo_vfts; + +/** + * @brief vector of graph node names associated with each DPO type and protocol. + * + * dpo_nodes[child_type][child_proto][node_X] = node_name; + * i.e. + * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][0] = "ip4-lookup" + * dpo_node[DPO_LOAD_BALANCE][DPO_PROTO_IP4][1] = "ip4-load-balance" + * + * This is a vector so we can dynamically register new DPO types in plugins. + */ +static const char* const * const ** dpo_nodes; + +/** + * @brief Vector of edge indicies from parent DPO nodes to child + * + * dpo_edges[child_type][child_proto][parent_type] = edge_index + * + * This array is derived at init time from the dpo_nodes above. Note that + * the third dimension in dpo_nodes is lost, hence, the edge index from each + * node MUST be the same. + * + * Note that this array is child type specific, not child instance specific. + */ +static u32 ***dpo_edges; + +/** + * @brief The DPO type value that can be assigend to the next dynamic + * type registration. + */ +static dpo_type_t dpo_dynamic = DPO_LAST; + +u8 * +format_dpo_type (u8 * s, va_list * args) +{ + dpo_type_t type = va_arg (*args, int); + + s = format(s, "%s", dpo_type_names[type]); + + return (s); +} + +u8 * +format_dpo_id (u8 * s, va_list * args) +{ + dpo_id_t *dpo = va_arg (*args, dpo_id_t*); + u32 indent = va_arg (*args, u32); + + s = format(s, "[@%d]: ", dpo->dpoi_next_node); + + if (NULL != dpo_vfts[dpo->dpoi_type].dv_format) + { + return (format(s, "%U", + dpo_vfts[dpo->dpoi_type].dv_format, + dpo->dpoi_index, + indent)); + } + + switch (dpo->dpoi_type) + { + case DPO_FIRST: + s = format(s, "unset"); + break; + default: + s = format(s, "unknown"); + break; + } + return (s); +} + +u8 * +format_dpo_proto (u8 * s, va_list * args) +{ + dpo_proto_t proto = va_arg (*args, int); + + return (format(s, "%s", dpo_proto_names[proto])); +} + +void +dpo_set (dpo_id_t *dpo, + dpo_type_t type, + dpo_proto_t proto, + index_t index) +{ + dpo_id_t tmp = *dpo; + + dpo->dpoi_type = type; + dpo->dpoi_proto = proto, + dpo->dpoi_index = index; + + if (DPO_ADJACENCY == type) + { + /* + * set the adj subtype + */ + ip_adjacency_t *adj; + + adj = adj_get(index); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + dpo->dpoi_type = DPO_ADJACENCY_INCOMPLETE; + break; + case IP_LOOKUP_NEXT_MIDCHAIN: + dpo->dpoi_type = DPO_ADJACENCY_MIDCHAIN; + break; + default: + break; + } + } + dpo_lock(dpo); + dpo_unlock(&tmp); +} + +void +dpo_reset (dpo_id_t *dpo) +{ + dpo_set(dpo, DPO_FIRST, DPO_PROTO_NONE, INDEX_INVALID); +} + +/** + * \brief + * Compare two Data-path objects + * + * like memcmp, return 0 is matching, !0 otherwise. + */ +int +dpo_cmp (const dpo_id_t *dpo1, + const dpo_id_t *dpo2) +{ + int res; + + res = dpo1->dpoi_type - dpo2->dpoi_type; + + if (0 != res) return (res); + + return (dpo1->dpoi_index - dpo2->dpoi_index); +} + +void +dpo_copy (dpo_id_t *dst, + const dpo_id_t *src) +{ + dpo_id_t tmp = *dst; + + /* + * the destination is written in a single u64 write - hence atomically w.r.t + * any packets inflight. + */ + *((u64*)dst) = *(u64*)src; + + dpo_lock(dst); + dpo_unlock(&tmp); +} + +int +dpo_is_adj (const dpo_id_t *dpo) +{ + return ((dpo->dpoi_type == DPO_ADJACENCY) || + (dpo->dpoi_type == DPO_ADJACENCY_INCOMPLETE) || + (dpo->dpoi_type == DPO_ADJACENCY_MIDCHAIN) || + (dpo->dpoi_type == DPO_ADJACENCY_GLEAN)); +} + +void +dpo_register (dpo_type_t type, + const dpo_vft_t *vft, + const char * const * const * nodes) +{ + vec_validate(dpo_vfts, type); + dpo_vfts[type] = *vft; + + vec_validate(dpo_nodes, type); + dpo_nodes[type] = nodes; +} + +dpo_type_t +dpo_register_new_type (const dpo_vft_t *vft, + const char * const * const * nodes) +{ + dpo_type_t type = dpo_dynamic++; + + dpo_register(type, vft, nodes); + + return (type); +} + +void +dpo_lock (dpo_id_t *dpo) +{ + if (!dpo_id_is_valid(dpo)) + return; + + dpo_vfts[dpo->dpoi_type].dv_lock(dpo); +} + +void +dpo_unlock (dpo_id_t *dpo) +{ + if (!dpo_id_is_valid(dpo)) + return; + + dpo_vfts[dpo->dpoi_type].dv_unlock(dpo); +} + + +static u32 +dpo_get_next_node (dpo_type_t child_type, + dpo_proto_t child_proto, + const dpo_id_t *parent_dpo) +{ + dpo_proto_t parent_proto; + dpo_type_t parent_type; + + parent_type = parent_dpo->dpoi_type; + parent_proto = parent_dpo->dpoi_proto; + + vec_validate(dpo_edges, child_type); + vec_validate(dpo_edges[child_type], child_proto); + vec_validate_init_empty(dpo_edges[child_type][child_proto], + parent_dpo->dpoi_type, ~0); + + /* + * if the edge index has not yet been created for this node to node transistion + */ + if (~0 == dpo_edges[child_type][child_proto][parent_type]) + { + vlib_node_t *parent_node, *child_node; + vlib_main_t *vm; + u32 edge ,pp, cc; + + vm = vlib_get_main(); + + ASSERT(NULL != dpo_nodes[child_type]); + ASSERT(NULL != dpo_nodes[child_type][child_proto]); + ASSERT(NULL != dpo_nodes[parent_type]); + ASSERT(NULL != dpo_nodes[parent_type][parent_proto]); + + pp = 0; + + /* + * create a graph arc from each of the parent's registered node types, + * to each of the childs. + */ + while (NULL != dpo_nodes[child_type][child_proto][pp]) + { + parent_node = + vlib_get_node_by_name(vm, + (u8*) dpo_nodes[child_type][child_proto][pp]); + + cc = 0; + + while (NULL != dpo_nodes[parent_type][child_proto][cc]) + { + child_node = + vlib_get_node_by_name(vm, + (u8*) dpo_nodes[parent_type][parent_proto][cc]); + + edge = vlib_node_add_next(vm, + parent_node->index, + child_node->index); + + if (~0 == dpo_edges[child_type][child_proto][parent_type]) + { + dpo_edges[child_type][child_proto][parent_type] = edge; + } + else + { + ASSERT(dpo_edges[child_type][child_proto][parent_type] == edge); + } + cc++; + } + pp++; + } + } + + return (dpo_edges[child_type][child_proto][parent_type]); +} + +/** + * @brief Stack one DPO object on another, and thus establish a child parent + * relationship. The VLIB graph arc used is taken from the parent and child types + * passed. + */ +static void +dpo_stack_i (u32 edge, + dpo_id_t *dpo, + const dpo_id_t *parent) +{ + /* + * in order to get an atomic update of the parent we create a temporary, + * from a copy of the child, and add the next_node. then we copy to the parent + */ + dpo_id_t tmp = DPO_NULL; + dpo_copy(&tmp, parent); + + /* + * get the edge index for the parent to child VLIB graph transisition + */ + tmp.dpoi_next_node = edge; + + /* + * this update is atomic. + */ + dpo_copy(dpo, &tmp); + + dpo_reset(&tmp); +} + +/** + * @brief Stack one DPO object on another, and thus establish a child-parent + * relationship. The VLIB graph arc used is taken from the parent and child types + * passed. + */ +void +dpo_stack (dpo_type_t child_type, + dpo_proto_t child_proto, + dpo_id_t *dpo, + const dpo_id_t *parent) +{ + dpo_stack_i(dpo_get_next_node(child_type, child_proto, parent), dpo, parent); +} + +/** + * @brief Stack one DPO object on another, and thus establish a child parent + * relationship. A new VLIB graph arc is created from the child node passed + * to the nodes registered by the parent. The VLIB infra will ensure this arc + * is added only once. + */ +void +dpo_stack_from_node (u32 child_node_index, + dpo_id_t *dpo, + const dpo_id_t *parent) +{ + dpo_proto_t parent_proto; + vlib_node_t *parent_node; + dpo_type_t parent_type; + vlib_main_t *vm; + u32 edge; + + parent_type = parent->dpoi_type; + parent_proto = parent->dpoi_proto; + + vm = vlib_get_main(); + + ASSERT(NULL != dpo_nodes[parent_type]); + ASSERT(NULL != dpo_nodes[parent_type][parent_proto]); + + parent_node = + vlib_get_node_by_name(vm, (u8*) dpo_nodes[parent_type][parent_proto][0]); + + edge = vlib_node_add_next(vm, + child_node_index, + parent_node->index); + + dpo_stack_i(edge, dpo, parent); +} + +static clib_error_t * +dpo_module_init (vlib_main_t * vm) +{ + drop_dpo_module_init(); + punt_dpo_module_init(); + receive_dpo_module_init(); + load_balance_module_init(); + mpls_label_dpo_module_init(); + classify_dpo_module_init(); + lookup_dpo_module_init(); + + return (NULL); +} + +VLIB_INIT_FUNCTION(dpo_module_init); diff --git a/vnet/vnet/dpo/dpo.h b/vnet/vnet/dpo/dpo.h new file mode 100644 index 00000000000..8c22f00b091 --- /dev/null +++ b/vnet/vnet/dpo/dpo.h @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * A Data-Path Object is an object that represents actions that are + * applied to packets are they are switched through VPP's data-path. + * + * The DPO can be considered to be like is a base class that is specialised + * by other objects to provide concreate actions + * + * The VLIB graph nodes are graph of DPO types, the DPO graph is a graph of + * instances. + */ + +#ifndef __DPO_H__ +#define __DPO_H__ + +#include <vnet/vnet.h> + +/** + * @brief An index for adjacencies. + * Alas 'C' is not typesafe enough to b0rk when a u32 is used instead of + * an index_t. However, for us humans, we can glean much more intent + * from the declaration + * foo barindex_t t); + * than we can from + * foo bar(u32 t); + */ +typedef u32 index_t; + +/** + * @brief Invalid index - used when no index is known + * blazoned capitals INVALID speak volumes where ~0 does not. + */ +#define INDEX_INVALID ((index_t)(~0)) + +/** + * @brief Data path protocol. + * Actions performed on packets in the data-plane can be described and represented + * by protocol independent objects, i.e. ADJACENCY, but the spceifics actions + * required during ADJACENCY processing can be protocol dependent. For example, + * the adjacency rewrite node performs a ip4 checksum calculation, ip6 and MPLS + * do not, all 3 perform a TTL decrement. The VLIB graph nodes are thus protocol + * dependent, and thus each graph edge/arc is too. + * When programming a DPO's next node arc from child to parent it is thus required + * to know the parent's data-path protocol so the correct arc index can be used. + */ +typedef enum dpo_proto_t_ +{ +#if CLIB_DEBUG > 0 + DPO_PROTO_IP4 = 1, +#else + DPO_PROTO_IP4 = 0, +#endif + DPO_PROTO_IP6, + DPO_PROTO_MPLS, +} __attribute__((packed)) dpo_proto_t; + +#define DPO_PROTO_NUM (DPO_PROTO_MPLS+1) +#define DPO_PROTO_NONE (DPO_PROTO_NUM+1) + +#define DPO_PROTOS { \ + [DPO_PROTO_IP4] = "ip4", \ + [DPO_PROTO_IP6] = "ip6", \ + [DPO_PROTO_MPLS] = "mpls", \ +} + +/** + * @brief Common types of data-path objects + * New types can be dynamically added using dpo_register_new_type() + */ +typedef enum dpo_type_t_ { + /** + * A non-zero value first so we can spot unitialisation errors + */ + DPO_FIRST, + DPO_DROP, + DPO_PUNT, + /** + * @brief load-balancing over a choice of [un]equal cost paths + */ + DPO_LOAD_BALANCE, + DPO_ADJACENCY, + DPO_ADJACENCY_INCOMPLETE, + DPO_ADJACENCY_MIDCHAIN, + DPO_ADJACENCY_GLEAN, + DPO_RECEIVE, + DPO_LOOKUP, + DPO_LISP_CP, + DPO_CLASSIFY, + DPO_MPLS_LABEL, + DPO_LAST, +} __attribute__((packed)) dpo_type_t; + +#define DPO_TYPE_NUM DPO_LAST + +#define DPO_TYPES { \ + [DPO_FIRST] = "dpo-invalid", \ + [DPO_DROP] = "dpo-drop", \ + [DPO_PUNT] = "dpo-punt", \ + [DPO_ADJACENCY] = "dpo-adjacency", \ + [DPO_ADJACENCY_INCOMPLETE] = "dpo-adjacency-incomplete", \ + [DPO_ADJACENCY_MIDCHAIN] = "dpo-adjacency-midcahin", \ + [DPO_ADJACENCY_GLEAN] = "dpo-glean", \ + [DPO_RECEIVE] = "dpo-receive", \ + [DPO_LOOKUP] = "dpo-lookup", \ + [DPO_LOAD_BALANCE] = "dpo-load-balance", \ + [DPO_LISP_CP] = "dpo-lisp-cp", \ + [DPO_CLASSIFY] = "dpo-classify", \ + [DPO_MPLS_LABEL] = "dpo-mpls-label", \ +} + +/** + * @brief The identity of a DPO is a combination of its type and its + * instance number/index of objects of that type + */ +typedef struct dpo_id_t_ { + /** + * the type + */ + dpo_type_t dpoi_type; + /** + * the data-path protocol of the type. + */ + dpo_proto_t dpoi_proto; + /** + * The next VLIB node to follow. + */ + u16 dpoi_next_node; + /** + * the index of objects of that type + */ + index_t dpoi_index; +} __attribute__ ((aligned(sizeof(u64)))) dpo_id_t; + +_Static_assert(sizeof(dpo_id_t) <= sizeof(u64), + "DPO ID is greater than sizeof u64 " + "atomic updates need to be revisited"); + +/** + * @brief An initialiser for DPos declared on the stack. + */ +#define DPO_NULL {0} + +/** + * @brief Return true if the DPO object is valid, i.e. has been initialised. + */ +static inline int +dpo_id_is_valid (const dpo_id_t *dpoi) +{ + return (dpoi->dpoi_type != DPO_FIRST && + dpoi->dpoi_index != INDEX_INVALID); +} + +/** + * @brief + * Take a reference counting lock on the DPO + */ +extern void dpo_lock(dpo_id_t *dpo); + +/** + * @brief + * Release a reference counting lock on the DPO + */ +extern void dpo_unlock(dpo_id_t *dpo); + +/** + * @brief Set/create a DPO ID + * The DPO will be locked. + * + * @param dpo + * The DPO object to configure + * + * @param type + * The dpo_type_t of the DPO + * + * @param proto + * The dpo_proto_t of the DPO + * + * @param index + * The type specific index of the DPO + */ +extern void dpo_set(dpo_id_t *dpo, + dpo_type_t type, + dpo_proto_t proto, + index_t index); + +/** + * @brief reset a DPO ID + * The DPO will be unlocked. + * + * @param dpo + * The DPO object to reset + */ +extern void dpo_reset(dpo_id_t *dpo); + +/** + * @brief compare two DPOs for equality + */ +extern int dpo_cmp(const dpo_id_t *dpo1, + const dpo_id_t *dpo2); + +/** + * @brief + * atomic copy a data-plane object. + * This is safe to use when the dst DPO is currently switching packets + */ +extern void dpo_copy(dpo_id_t *dst, + const dpo_id_t *src); + +/** + * @brief Return TRUE is the DPO is any type of adjacency + */ +extern int dpo_is_adj(const dpo_id_t *dpo); + +/** + * @biref Format a DPO_id_t oject + */ +extern u8 *format_dpo_id(u8 * s, va_list * args); + +/** + * @biref format a DPO type + */ +extern u8 *format_dpo_type(u8 * s, va_list * args); + +/** + * @brief format a DPO protocol + */ +extern u8 *format_dpo_proto(u8 * s, va_list * args); + +/** + * @brief + * Set and stack a DPO. + * The DPO passed is set to the parent DPO and the necessary + * VLIB graph arcs are created. The child_type and child_proto + * are used to get the VLID nodes from which the arcs are added. + * + * @param child_type + * Child DPO type. + * + * @param child_proto + * Child DPO proto + * + * @parem dpo + * This is the DPO to stack and set. + * + * @paren parent_dpo + * The parent DPO to stack onto. + */ +extern void dpo_stack(dpo_type_t child_type, + dpo_proto_t child_proto, + dpo_id_t *dpo, + const dpo_id_t *parent_dpo); + +/** + * @brief + * Set and stack a DPO. + * The DPO passed is set to the parent DPO and the necessary + * VLIB graph arcs are created, from the child_node passed. + * + * @param child_node + * The VLIB grpah node index to create an arc from to the parent + * + * @parem dpo + * This is the DPO to stack and set. + * + * @paren parent_dpo + * The parent DPO to stack onto. + */ +extern void dpo_stack_from_node(u32 child_node, + dpo_id_t *dpo, + const dpo_id_t *parent); + +/** + * @brief A lock function registered for a DPO type + */ +typedef void (*dpo_lock_fn_t)(dpo_id_t *dpo); + +/** + * @brief An unlock function registered for a DPO type + */ +typedef void (*dpo_unlock_fn_t)(dpo_id_t *dpo); + +/** + * @brief A virtual function table regisitered for a DPO type + */ +typedef struct dpo_vft_t_ +{ + /** + * A reference counting lock function + */ + dpo_lock_fn_t dv_lock; + /** + * A reference counting unlock function + */ + dpo_lock_fn_t dv_unlock; + /** + * A format function + */ + format_function_t *dv_format; +} dpo_vft_t; + + +/** + * @brief For a given DPO type Register: + * - a virtual function table + * - a NULL terminated array of graph nodes from which that object type + * will originate packets, i.e. the nodes in which the object type will be + * the parent DPO in the DP graph. The ndoes are per-data-path protocol + * (see above). + * + * @param type + * The type being registered. + * + * @param vft + * The virtual function table to register for the type. + * + * @param nodes + * The string description of the per-protocol VLIB graph nodes. + */ +void dpo_register(dpo_type_t type, + const dpo_vft_t *vft, + const char * const * const * nodes); + +/** + * @brief Create and register a new DPO type. + * + * This can be used by plugins to create new DPO types that are not listed + * in dpo_type_t enum + * + * @param vft + * The virtual function table to register for the type. + * + * @param nodes + * The string description of the per-protocol VLIB graph nodes. + * + * @return The new dpo_type_t + */ +dpo_type_t dpo_register_new_type(const dpo_vft_t *vft, + const char * const * const * nodes); + +#endif diff --git a/vnet/vnet/dpo/drop_dpo.c b/vnet/vnet/dpo/drop_dpo.c new file mode 100644 index 00000000000..62f56488a01 --- /dev/null +++ b/vnet/vnet/dpo/drop_dpo.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing dropping the packet + */ + +#include <vnet/dpo/dpo.h> + +static dpo_id_t drop_dpos[DPO_PROTO_NUM]; + +const dpo_id_t * +drop_dpo_get (dpo_proto_t proto) +{ + dpo_set(&drop_dpos[proto], DPO_DROP, proto, 1); + + return (&drop_dpos[proto]); +} + +int +dpo_is_drop (const dpo_id_t *dpo) +{ + return (dpo->dpoi_type == DPO_DROP); +} + +static void +drop_dpo_lock (dpo_id_t *dpo) +{ + /* + * not maintaining a lock count on the drop + * more trouble than it's worth. + * There always needs to be one around. no point it managaing its lifetime + */ +} +static void +drop_dpo_unlock (dpo_id_t *dpo) +{ +} + +static u8* +format_drop_dpo (u8 *s, va_list *ap) +{ + CLIB_UNUSED(index_t index) = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + + return (format(s, "dpo-drop")); +} + +const static dpo_vft_t drop_vft = { + .dv_lock = drop_dpo_lock, + .dv_unlock = drop_dpo_unlock, + .dv_format = format_drop_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a drop + * object. + * + * this means that these graph nodes are ones from which a drop is the + * parent object in the DPO-graph. + */ +const static char* const drop_ip4_nodes[] = +{ + "ip4-drop", + NULL, +}; +const static char* const drop_ip6_nodes[] = +{ + "ip6-drop", + NULL, +}; +const static char* const drop_mpls_nodes[] = +{ + "mpls-drop", + NULL, +}; +const static char* const * const drop_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = drop_ip4_nodes, + [DPO_PROTO_IP6] = drop_ip6_nodes, + [DPO_PROTO_MPLS] = drop_mpls_nodes, +}; + +void +drop_dpo_module_init (void) +{ + dpo_register(DPO_DROP, &drop_vft, drop_nodes); +} diff --git a/vnet/vnet/dpo/drop_dpo.h b/vnet/vnet/dpo/drop_dpo.h new file mode 100644 index 00000000000..e7bd8f5156e --- /dev/null +++ b/vnet/vnet/dpo/drop_dpo.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * A Data-Path Object is an object that represents actions that are + * applied to packets are they are switched through VPP. + * + * The DPO is a base class that is specialised by other objects to provide + * concreate actions + * + * The VLIB graph nodes are graph of types, the DPO graph is a graph of instances. + */ + +#ifndef __DROP_DPO_H__ +#define __DROP_DPO_H__ + +#include <vnet/dpo/dpo.h> + +extern int dpo_is_drop(const dpo_id_t *dpo); + +extern const dpo_id_t *drop_dpo_get(dpo_proto_t proto); + +extern void drop_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/load_balance.c b/vnet/vnet/dpo/load_balance.c new file mode 100644 index 00000000000..963ff0ba160 --- /dev/null +++ b/vnet/vnet/dpo/load_balance.c @@ -0,0 +1,760 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/lookup.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/drop_dpo.h> +#include <vppinfra/math.h> /* for fabs */ +#include <vnet/adj/adj.h> +#include <vnet/adj/adj_alloc.h> +#include <vnet/adj/adj_internal.h> + +/* + * distribution error tolerance for load-balancing + */ +const f64 multipath_next_hop_error_tolerance = 0.1; + +#undef LB_DEBUG + +#ifdef LB_DEBUG +#define LB_DBG(_lb, _fmt, _args...) \ +{ \ + u8* _tmp =NULL; \ + clib_warning("lb:[%s]:" _fmt, \ + load_balance_format(load_balance_get_index((_lb)), \ + 0, _tmp), \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define LB_DBG(_p, _fmt, _args...) +#endif + + +/** + * Pool of all DPOs. It's not static so the DP can have fast access + */ +load_balance_t *load_balance_pool; + +/** + * The one instance of load-balance main + */ +load_balance_main_t load_balance_main; + +f64 +load_balance_get_multipath_tolerance (void) +{ + return (multipath_next_hop_error_tolerance); +} + +static inline index_t +load_balance_get_index (const load_balance_t *lb) +{ + return (lb - load_balance_pool); +} + +static inline dpo_id_t* +load_balance_get_buckets (load_balance_t *lb) +{ + if (LB_HAS_INLINE_BUCKETS(lb)) + { + return (lb->lb_buckets_inline); + } + else + { + return (lb->lb_buckets); + } +} + +static load_balance_t * +load_balance_alloc_i (void) +{ + load_balance_t *lb; + + pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES); + memset(lb, 0, sizeof(*lb)); + + lb->lb_map = INDEX_INVALID; + vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters), + load_balance_get_index(lb)); + vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters), + load_balance_get_index(lb)); + vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters), + load_balance_get_index(lb)); + vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters), + load_balance_get_index(lb)); + + return (lb); +} + +static u8* +load_balance_format (index_t lbi, + load_balance_format_flags_t flags, + u32 indent, + u8 *s) +{ + vlib_counter_t to, via; + load_balance_t *lb; + dpo_id_t *buckets; + u32 i; + + lb = load_balance_get(lbi); + vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to); + vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via); + buckets = load_balance_get_buckets(lb); + + s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE); + s = format(s, "[index:%d buckets:%d ", lbi, lb->lb_n_buckets); + s = format(s, "locks:%d ", lb->lb_locks); + s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes); + if (0 != via.packets) + { + s = format(s, " via:[%Ld:%Ld]", + via.packets, via.bytes); + } + s = format(s, "]"); + + if (INDEX_INVALID != lb->lb_map) + { + s = format(s, "\n%U%U", + format_white_space, indent+4, + format_load_balance_map, lb->lb_map, indent+4); + } + for (i = 0; i < lb->lb_n_buckets; i++) + { + s = format(s, "\n%U[%d] %U", + format_white_space, indent+2, + i, + format_dpo_id, + &buckets[i], indent+6); + } + return (s); +} + +u8* +format_load_balance (u8 * s, va_list * args) +{ + index_t lbi = va_arg(args, index_t); + load_balance_format_flags_t flags = va_arg(args, load_balance_format_flags_t); + + return (load_balance_format(lbi, flags, 0, s)); +} +static u8* +format_load_balance_dpo (u8 * s, va_list * args) +{ + index_t lbi = va_arg(args, index_t); + u32 indent = va_arg(args, u32); + + return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s)); +} + + +static load_balance_t * +load_balance_create_i (u32 num_buckets, + dpo_proto_t lb_proto, + flow_hash_config_t fhc) +{ + load_balance_t *lb; + + lb = load_balance_alloc_i(); + lb->lb_hash_config = fhc; + lb->lb_n_buckets = num_buckets; + lb->lb_n_buckets_minus_1 = num_buckets-1; + lb->lb_proto = lb_proto; + + if (!LB_HAS_INLINE_BUCKETS(lb)) + { + vec_validate_aligned(lb->lb_buckets, + lb->lb_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + } + + LB_DBG(lb, "create"); + + return (lb); +} + +index_t +load_balance_create (u32 n_buckets, + dpo_proto_t lb_proto, + flow_hash_config_t fhc) +{ + return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc))); +} + +static inline void +load_balance_set_bucket_i (load_balance_t *lb, + u32 bucket, + dpo_id_t *buckets, + const dpo_id_t *next) +{ + dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next); +} + +void +load_balance_set_bucket (index_t lbi, + u32 bucket, + const dpo_id_t *next) +{ + load_balance_t *lb; + dpo_id_t *buckets; + + lb = load_balance_get(lbi); + buckets = load_balance_get_buckets(lb); + + ASSERT(bucket < lb->lb_n_buckets); + + load_balance_set_bucket_i(lb, bucket, buckets, next); +} + +int +load_balance_is_drop (const dpo_id_t *dpo) +{ + load_balance_t *lb; + + if (DPO_LOAD_BALANCE != dpo->dpoi_type) + return (0); + + lb = load_balance_get(dpo->dpoi_index); + + if (1 == lb->lb_n_buckets) + { + return (dpo_is_drop(load_balance_get_bucket_i(lb, 0))); + } + return (0); +} + +const dpo_id_t * +load_balance_get_bucket (index_t lbi, + u32 bucket) +{ + load_balance_t *lb; + + lb = load_balance_get(lbi); + + return (load_balance_get_bucket_i(lb, bucket)); +} + +static int +next_hop_sort_by_weight (load_balance_path_t * n1, + load_balance_path_t * n2) +{ + return ((int) n1->path_weight - (int) n2->path_weight); +} + +/* Given next hop vector is over-written with normalized one with sorted weights and + with weights corresponding to the number of adjacencies for each next hop. + Returns number of adjacencies in block. */ +u32 +ip_multipath_normalize_next_hops (load_balance_path_t * raw_next_hops, + load_balance_path_t ** normalized_next_hops, + u32 *sum_weight_in, + f64 multipath_next_hop_error_tolerance) +{ + load_balance_path_t * nhs; + uword n_nhs, n_adj, n_adj_left, i, sum_weight; + f64 norm, error; + + n_nhs = vec_len (raw_next_hops); + ASSERT (n_nhs > 0); + if (n_nhs == 0) + return 0; + + /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */ + nhs = *normalized_next_hops; + vec_validate (nhs, 2*n_nhs - 1); + + /* Fast path: 1 next hop in block. */ + n_adj = n_nhs; + if (n_nhs == 1) + { + nhs[0] = raw_next_hops[0]; + nhs[0].path_weight = 1; + _vec_len (nhs) = 1; + sum_weight = 1; + goto done; + } + + else if (n_nhs == 2) + { + int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0; + + /* Fast sort. */ + nhs[0] = raw_next_hops[cmp]; + nhs[1] = raw_next_hops[cmp ^ 1]; + + /* Fast path: equal cost multipath with 2 next hops. */ + if (nhs[0].path_weight == nhs[1].path_weight) + { + nhs[0].path_weight = nhs[1].path_weight = 1; + _vec_len (nhs) = 2; + sum_weight = 2; + goto done; + } + } + else + { + clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0])); + qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight); + } + + /* Find total weight to normalize weights. */ + sum_weight = 0; + for (i = 0; i < n_nhs; i++) + sum_weight += nhs[i].path_weight; + + /* In the unlikely case that all weights are given as 0, set them all to 1. */ + if (sum_weight == 0) + { + for (i = 0; i < n_nhs; i++) + nhs[i].path_weight = 1; + sum_weight = n_nhs; + } + + /* Save copies of all next hop weights to avoid being overwritten in loop below. */ + for (i = 0; i < n_nhs; i++) + nhs[n_nhs + i].path_weight = nhs[i].path_weight; + + /* Try larger and larger power of 2 sized adjacency blocks until we + find one where traffic flows to within 1% of specified weights. */ + for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2) + { + error = 0; + + norm = n_adj / ((f64) sum_weight); + n_adj_left = n_adj; + for (i = 0; i < n_nhs; i++) + { + f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */ + word n = flt_round_nearest (nf); + + n = n > n_adj_left ? n_adj_left : n; + n_adj_left -= n; + error += fabs (nf - n); + nhs[i].path_weight = n; + } + + nhs[0].path_weight += n_adj_left; + + /* Less than 5% average error per adjacency with this size adjacency block? */ + if (error <= multipath_next_hop_error_tolerance*n_adj) + { + /* Truncate any next hops with zero weight. */ + _vec_len (nhs) = i; + break; + } + } + +done: + /* Save vector for next call. */ + *normalized_next_hops = nhs; + *sum_weight_in = sum_weight; + return n_adj; +} + +static load_balance_path_t * +load_balance_multipath_next_hop_fixup (load_balance_path_t *nhs, + dpo_proto_t drop_proto) +{ + if (0 == vec_len(nhs)) + { + load_balance_path_t *nh; + + /* + * we need something for the load-balance. so use the drop + */ + vec_add2(nhs, nh, 1); + + nh->path_weight = 1; + dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto)); + } + + return (nhs); +} + +/* + * Fill in adjacencies in block based on corresponding + * next hop adjacencies. + */ +static void +load_balance_fill_buckets (load_balance_t *lb, + load_balance_path_t *nhs, + dpo_id_t *buckets, + u32 n_buckets) +{ + load_balance_path_t * nh; + u16 ii, bucket; + + bucket = 0; + + /* + * the next-hops have normalised weights. that means their sum is the number + * of buckets we need to fill. + */ + vec_foreach (nh, nhs) + { + for (ii = 0; ii < nh->path_weight; ii++) + { + ASSERT(bucket < n_buckets); + load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo); + } + } +} + +static inline void +load_balance_set_n_buckets (load_balance_t *lb, + u32 n_buckets) +{ + lb->lb_n_buckets = n_buckets; + lb->lb_n_buckets_minus_1 = n_buckets-1; +} + +void +load_balance_multipath_update (const dpo_id_t *dpo, + load_balance_path_t * raw_next_hops, + load_balance_flags_t flags) +{ + u32 sum_of_weights,n_buckets, ii; + load_balance_path_t * nh, * nhs; + index_t lbmi, old_lbmi; + load_balance_t *lb; + dpo_id_t *tmp_dpo; + + nhs = NULL; + + ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type); + lb = load_balance_get(dpo->dpoi_index); + raw_next_hops = + load_balance_multipath_next_hop_fixup(raw_next_hops, + lb->lb_proto); + n_buckets = + ip_multipath_normalize_next_hops(raw_next_hops, + &nhs, + &sum_of_weights, + multipath_next_hop_error_tolerance); + + ASSERT (n_buckets >= vec_len (raw_next_hops)); + + /* + * Save the old load-balance map used, and get a new one if required. + */ + old_lbmi = lb->lb_map; + if (flags & LOAD_BALANCE_FLAG_USES_MAP) + { + lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs); + } + else + { + lbmi = INDEX_INVALID; + } + + if (0 == lb->lb_n_buckets) + { + /* + * first time initialisation. no packets inflight, so we can write + * at leisure. + */ + load_balance_set_n_buckets(lb, n_buckets); + + if (!LB_HAS_INLINE_BUCKETS(lb)) + vec_validate_aligned(lb->lb_buckets, + lb->lb_n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + load_balance_fill_buckets(lb, nhs, + load_balance_get_buckets(lb), + n_buckets); + lb->lb_map = lbmi; + } + else + { + /* + * This is a modification of an existing load-balance. + * We need to ensure that packets inflight see a consistent state, that + * is the number of reported buckets the LB has (read from + * lb_n_buckets_minus_1) is not more than it actually has. So if the + * number of buckets is increasing, we must update the bucket array first, + * then the reported number. vice-versa if the number of buckets goes down. + */ + if (n_buckets == lb->lb_n_buckets) + { + /* + * no change in the number of buckets. we can simply fill what + * is new over what is old. + */ + load_balance_fill_buckets(lb, nhs, + load_balance_get_buckets(lb), + n_buckets); + lb->lb_map = lbmi; + } + else if (n_buckets > lb->lb_n_buckets) + { + /* + * we have more buckets. the old load-balance map (if there is one) + * will remain valid, i.e. mapping to indices within range, so we + * update it last. + */ + if (n_buckets > LB_NUM_INLINE_BUCKETS && + lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS) + { + /* + * the new increased number of buckets is crossing the threshold + * from the inline storage to out-line. Alloc the outline buckets + * first, then fixup the number. then reset the inlines. + */ + ASSERT(NULL == lb->lb_buckets); + vec_validate_aligned(lb->lb_buckets, + n_buckets - 1, + CLIB_CACHE_LINE_BYTES); + + load_balance_fill_buckets(lb, nhs, + lb->lb_buckets, + n_buckets); + CLIB_MEMORY_BARRIER(); + load_balance_set_n_buckets(lb, n_buckets); + + CLIB_MEMORY_BARRIER(); + + for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++) + { + dpo_reset(&lb->lb_buckets_inline[ii]); + } + } + else + { + /* + * we are not crossing the threshold. we can write the new on the + * old, whether they be inline or not. + */ + load_balance_fill_buckets(lb, nhs, + load_balance_get_buckets(lb), + n_buckets); + CLIB_MEMORY_BARRIER(); + load_balance_set_n_buckets(lb, n_buckets); + } + + /* + * buckets fixed. ready for the MAP update. + */ + lb->lb_map = lbmi; + } + else + { + /* + * bucket size shrinkage. + * Any map we have will be based on the old + * larger number of buckets, so will be translating to indices + * out of range. So the new MAP must be installed first. + */ + lb->lb_map = lbmi; + CLIB_MEMORY_BARRIER(); + + + if (n_buckets <= LB_NUM_INLINE_BUCKETS && + lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS) + { + /* + * the new decreased number of buckets is crossing the threshold + * from out-line storage to inline: + * 1 - Fill the inline buckets, + * 2 - fixup the number (and this point the inline buckets are + * used). + * 3 - free the outline buckets + */ + load_balance_fill_buckets(lb, nhs, + lb->lb_buckets_inline, + n_buckets); + CLIB_MEMORY_BARRIER(); + load_balance_set_n_buckets(lb, n_buckets); + CLIB_MEMORY_BARRIER(); + + vec_foreach(tmp_dpo, lb->lb_buckets) + { + dpo_reset(tmp_dpo); + } + vec_free(lb->lb_buckets); + } + else + { + /* + * not crossing the threshold. + * 1 - update the number to the smaller size + * 2 - write the new buckets + * 3 - reset those no longer used. + */ + dpo_id_t *buckets; + u32 old_n_buckets; + + old_n_buckets = lb->lb_n_buckets; + buckets = load_balance_get_buckets(lb); + + load_balance_set_n_buckets(lb, n_buckets); + CLIB_MEMORY_BARRIER(); + + load_balance_fill_buckets(lb, nhs, + buckets, + n_buckets); + + for (ii = old_n_buckets-n_buckets; ii < old_n_buckets; ii++) + { + dpo_reset(&buckets[ii]); + } + } + } + } + + vec_foreach (nh, nhs) + { + dpo_reset(&nh->path_dpo); + } + + load_balance_map_unlock(old_lbmi); +} + +static void +load_balance_lock (dpo_id_t *dpo) +{ + load_balance_t *lb; + + lb = load_balance_get(dpo->dpoi_index); + + lb->lb_locks++; +} + +static void +load_balance_destroy (load_balance_t *lb) +{ + dpo_id_t *buckets; + int i; + + buckets = load_balance_get_buckets(lb); + + for (i = 0; i < lb->lb_n_buckets; i++) + { + dpo_reset(&buckets[i]); + } + + LB_DBG(lb, "destroy"); + if (!LB_HAS_INLINE_BUCKETS(lb)) + { + vec_free(lb->lb_buckets); + } + + pool_put(load_balance_pool, lb); +} + +static void +load_balance_unlock (dpo_id_t *dpo) +{ + load_balance_t *lb; + + lb = load_balance_get(dpo->dpoi_index); + + lb->lb_locks--; + + if (0 == lb->lb_locks) + { + load_balance_destroy(lb); + } +} + +const static dpo_vft_t lb_vft = { + .dv_lock = load_balance_lock, + .dv_unlock = load_balance_unlock, + .dv_format = format_load_balance_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance + * object. + * + * this means that these graph nodes are ones from which a load-balance is the + * parent object in the DPO-graph. + * + * We do not list all the load-balance nodes, such as the *-lookup. instead + * we are relying on the correct use of the .sibling_of field when setting + * up these sibling nodes. + */ +const static char* const load_balance_ip4_nodes[] = +{ + "ip4-load-balance", + NULL, +}; +const static char* const load_balance_ip6_nodes[] = +{ + "ip6-load-balance", + NULL, +}; +const static char* const load_balance_mpls_nodes[] = +{ + "mpls-load-balance", + NULL, +}; +const static char* const * const load_balance_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = load_balance_ip4_nodes, + [DPO_PROTO_IP6] = load_balance_ip6_nodes, + [DPO_PROTO_MPLS] = load_balance_mpls_nodes, +}; + +void +load_balance_module_init (void) +{ + dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes); + + load_balance_map_module_init(); +} + +static clib_error_t * +load_balance_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t lbi = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &lbi)) + ; + else + break; + } + + if (INDEX_INVALID != lbi) + { + vlib_cli_output (vm, "%U", format_load_balance, lbi, + LOAD_BALANCE_FORMAT_DETAIL); + } + else + { + load_balance_t *lb; + + pool_foreach(lb, load_balance_pool, + ({ + vlib_cli_output (vm, "%U", format_load_balance, + load_balance_get_index(lb), + LOAD_BALANCE_FORMAT_NONE); + })); + } + + return 0; +} + +VLIB_CLI_COMMAND (load_balance_show_command, static) = { + .path = "show load-balance", + .short_help = "show load-balance [<index>]", + .function = load_balance_show, +}; diff --git a/vnet/vnet/dpo/load_balance.h b/vnet/vnet/dpo/load_balance.h new file mode 100644 index 00000000000..d630a2c2d75 --- /dev/null +++ b/vnet/vnet/dpo/load_balance.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * \brief + * The load-balance object represents an ECMP choice. The buckets of a load + * balance object point to the sub-graph after the choice is made. + * THe load-balance object is also object type returned from a FIB table lookup. + * As such it needs to represent the case where there is only one coice. It may + * seem like overkill to use a load-balance object in this case, but the reason + * is for performance. If the load-balance object were not the result of the FIB + * lookup, then some other object would be. The case where there was ECMP + * this other object would need a load-balance as a parent and hence just add + * an unnecessary indirection. + * + * It is also the object in the DP that represents a via-fib-entry in a recursive + * route. + * + */ + +#ifndef __LOAD_BALANCE_H__ +#define __LOAD_BALANCE_H__ + +#include <vlib/vlib.h> +#include <vnet/ip/lookup.h> +#include <vnet/dpo/dpo.h> +#include <vnet/fib/fib_types.h> + +/** + * Load-balance main + */ +typedef struct load_balance_main_t_ +{ + vlib_combined_counter_main_t lbm_to_counters; + vlib_combined_counter_main_t lbm_via_counters; +} load_balance_main_t; + +extern load_balance_main_t load_balance_main; + +/** + * The number of buckets that a load-balance object can have and still + * fit in one cache-line + */ +#define LB_NUM_INLINE_BUCKETS 4 + +/** + * @brief One path from an [EU]CMP set that the client wants to add to a + * load-balance object + */ +typedef struct load_balance_path_t_ { + /** + * ID of the Data-path object. + */ + dpo_id_t path_dpo; + + /** + * The index of the FIB path + */ + fib_node_index_t path_index; + + /** + * weight for the path. + */ + u32 path_weight; +} load_balance_path_t; + +/** + * The FIB DPO provieds; + * - load-balancing over the next DPOs in the chain/graph + * - per-route counters + */ +typedef struct load_balance_t_ { + /** + * number of buckets in the load-balance. always a power of 2. + */ + u16 lb_n_buckets; + /** + * number of buckets in the load-balance - 1. used in the switch path + * as part of the hash calculation. + */ + u16 lb_n_buckets_minus_1; + + /** + * The protocol of packets that traverse this LB. + * need in combination with the flow hash config to determine how to hash. + * u8. + */ + dpo_proto_t lb_proto; + + /** + * The number of locks, which is approximately the number of users, + * of this load-balance. + * Load-balance objects of via-entries are heavily shared by recursives, + * so the lock count is a u32. + */ + u32 lb_locks; + + /** + * index of the load-balance map, INVALID if this LB does not use one + */ + index_t lb_map; + + /** + * the hash config to use when selecting a bucket. this is a u16 + */ + flow_hash_config_t lb_hash_config; + + /** + * Vector of buckets containing the next DPOs, sized as lbo_num + */ + dpo_id_t *lb_buckets; + + /** + * The rest of the cache line is used for buckets. In the common case + * where there there are less than 4 buckets, then the buckets are + * on the same cachlie and we save ourselves a pointer dereferance in + * the data-path. + */ + dpo_id_t lb_buckets_inline[LB_NUM_INLINE_BUCKETS]; +} load_balance_t; + +_Static_assert(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES, + "A load_balance object size exceeds one cachline"); + +/** + * Flags controlling load-balance formatting/display + */ +typedef enum load_balance_format_flags_t_ { + LOAD_BALANCE_FORMAT_NONE, + LOAD_BALANCE_FORMAT_DETAIL = (1 << 0), +} load_balance_format_flags_t; + +/** + * Flags controlling load-balance creation and modification + */ +typedef enum load_balance_flags_t_ { + LOAD_BALANCE_FLAG_NONE = 0, + LOAD_BALANCE_FLAG_USES_MAP = (1 << 0), +} load_balance_flags_t; + +extern index_t load_balance_create(u32 num_buckets, + dpo_proto_t lb_proto, + flow_hash_config_t fhc); +extern void load_balance_multipath_update( + const dpo_id_t *dpo, + load_balance_path_t * raw_next_hops, + load_balance_flags_t flags); + +extern void load_balance_set_bucket(index_t lbi, + u32 bucket, + const dpo_id_t *next); + +extern u8* format_load_balance(u8 * s, va_list * args); + +extern const dpo_id_t *load_balance_get_bucket(index_t lbi, + u32 bucket); +extern int load_balance_is_drop(const dpo_id_t *dpo); + +extern f64 load_balance_get_multipath_tolerance(void); + +/** + * The encapsulation breakages are for fast DP access + */ +extern load_balance_t *load_balance_pool; +static inline load_balance_t* +load_balance_get (index_t lbi) +{ + return (pool_elt_at_index(load_balance_pool, lbi)); +} + +#define LB_HAS_INLINE_BUCKETS(_lb) \ + ((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS) + +static inline const dpo_id_t * +load_balance_get_bucket_i (const load_balance_t *lb, + u32 bucket) +{ + ASSERT(bucket < lb->lb_n_buckets); + + if (PREDICT_TRUE(LB_HAS_INLINE_BUCKETS(lb))) + { + return (&lb->lb_buckets_inline[bucket]); + } + else + { + return (&lb->lb_buckets[bucket]); + } +} + +extern void load_balance_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/load_balance_map.c b/vnet/vnet/dpo/load_balance_map.c new file mode 100644 index 00000000000..f08801f1ce7 --- /dev/null +++ b/vnet/vnet/dpo/load_balance_map.c @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + */ +#include <vnet/fib/fib_path.h> +#include <vnet/fib/fib_node_list.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/load_balance.h> + +/** + * A hash-table of load-balance maps by path index. + * this provides the fast lookup of the LB map when a path goes down + */ +static uword *lb_maps_by_path_index; + +/** + * A hash-table of load-balance maps by set of paths. + * This provides the LB map sharing. + * LB maps do not necessarily use all the paths in the list, since + * the entry that is requesting the map, may not have an out-going + * label for each of the paths. + */ +static uword *load_balance_map_db; + +typedef enum load_balance_map_path_flags_t_ +{ + LOAD_BALANCE_MAP_PATH_UP = (1 << 0), + LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1), +} __attribute__ ((packed)) load_balance_map_path_flags_t; + +typedef struct load_balance_map_path_t_ { + /** + * Index of the path + */ + fib_node_index_t lbmp_index; + + /** + * Sibling Index in the list of all maps with this path index + */ + fib_node_index_t lbmp_sibling; + + /** + * the normalised wegiht of the path + */ + u32 lbmp_weight; + + /** + * The sate of the path + */ + load_balance_map_path_flags_t lbmp_flags; +} load_balance_map_path_t; + +/** + * The global pool of LB maps + */ +load_balance_map_t *load_balance_map_pool; + +/* + * Debug macro + */ +#ifdef FIB_DEBUG +#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) \ + { \ + clib_warning("lbm: FIXME" _fmt, \ + ##_args); \ + } +#else +#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) +#endif + +static index_t +load_balance_map_get_index (load_balance_map_t *lbm) +{ + return (lbm - load_balance_map_pool); +} + +u8* +format_load_balance_map (u8 *s, va_list ap) +{ + index_t lbmi = va_arg(ap, index_t); + u32 indent = va_arg(ap, u32); + load_balance_map_t *lbm; + u32 n_buckets, ii; + + lbm = load_balance_map_get(lbmi); + n_buckets = vec_len(lbm->lbm_buckets); + + s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets); + s = format(s, "\n%U index:", format_white_space, indent+2); + for (ii = 0; ii < n_buckets; ii++) + { + s = format(s, "%5d", ii); + } + s = format(s, "\n%U map:", format_white_space, indent+2); + for (ii = 0; ii < n_buckets; ii++) + { + s = format(s, "%5d", lbm->lbm_buckets[ii]); + } + + return (s); +} + + +static uword +load_balance_map_hash (load_balance_map_t *lbm) +{ + u32 old_lbm_hash, new_lbm_hash, hash; + load_balance_map_path_t *lb_path; + + new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths); + + vec_foreach (lb_path, lbm->lbm_paths) + { + hash = lb_path->lbmp_index; + hash_mix32(hash, old_lbm_hash, new_lbm_hash); + } + + return (new_lbm_hash); +} + +always_inline uword +load_balance_map_db_hash_key_from_index (uword index) +{ + return 1 + 2*index; +} + +always_inline uword +load_balance_map_db_hash_key_is_index (uword key) +{ + return key & 1; +} + +always_inline uword +load_balance_map_db_hash_key_2_index (uword key) +{ + ASSERT (load_balance_map_db_hash_key_is_index (key)); + return key / 2; +} + +static load_balance_map_t* +load_balance_map_db_get_from_hash_key (uword key) +{ + load_balance_map_t *lbm; + + if (load_balance_map_db_hash_key_is_index (key)) + { + index_t lbm_index; + + lbm_index = load_balance_map_db_hash_key_2_index(key); + lbm = load_balance_map_get(lbm_index); + } + else + { + lbm = uword_to_pointer (key, load_balance_map_t *); + } + + return (lbm); +} + +static uword +load_balance_map_db_hash_key_sum (hash_t * h, + uword key) +{ + load_balance_map_t *lbm; + + lbm = load_balance_map_db_get_from_hash_key(key); + + return (load_balance_map_hash(lbm)); +} + +static uword +load_balance_map_db_hash_key_equal (hash_t * h, + uword key1, + uword key2) +{ + load_balance_map_t *lbm1, *lbm2; + + lbm1 = load_balance_map_db_get_from_hash_key(key1); + lbm2 = load_balance_map_db_get_from_hash_key(key2); + + return (load_balance_map_hash(lbm1) == + load_balance_map_hash(lbm2)); +} + +static index_t +load_balance_map_db_find (load_balance_map_t *lbm) +{ + uword *p; + + p = hash_get(load_balance_map_db, lbm); + + if (NULL != p) + { + return p[0]; + } + + return (FIB_NODE_INDEX_INVALID); +} + +static void +load_balance_map_db_insert (load_balance_map_t *lbm) +{ + load_balance_map_path_t *lbmp; + fib_node_list_t list; + uword *p; + + ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm)); + + /* + * insert into the DB based on the set of paths. + */ + hash_set (load_balance_map_db, + load_balance_map_db_hash_key_from_index( + load_balance_map_get_index(lbm)), + load_balance_map_get_index(lbm)); + + /* + * insert into each per-path list. + */ + vec_foreach(lbmp, lbm->lbm_paths) + { + p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index); + + if (NULL == p) + { + list = fib_node_list_create(); + hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list); + } + else + { + list = p[0]; + } + + lbmp->lbmp_sibling = + fib_node_list_push_front(list, + 0, FIB_NODE_TYPE_FIRST, + load_balance_map_get_index(lbm)); + } + + LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted"); +} + +static void +load_balance_map_db_remove (load_balance_map_t *lbm) +{ + load_balance_map_path_t *lbmp; + uword *p; + + ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm)); + + hash_unset(load_balance_map_db, + load_balance_map_db_hash_key_from_index( + load_balance_map_get_index(lbm))); + + /* + * remove from each per-path list. + */ + vec_foreach(lbmp, lbm->lbm_paths) + { + p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index); + + ASSERT(NULL != p); + + fib_node_list_remove(p[0], lbmp->lbmp_sibling); + } + + LOAD_BALANCE_MAP_DBG(lbm, "DB-removed"); +} + +/** + * @brief from the paths that are usable, fill the Map. + */ +static void +load_balance_map_fill (load_balance_map_t *lbm) +{ + load_balance_map_path_t *lbmp; + u32 n_buckets, bucket, ii, jj; + u16 *tmp_buckets; + + tmp_buckets = NULL; + n_buckets = vec_len(lbm->lbm_buckets); + + /* + * run throught the set of paths once, and build a vector of the + * indices that are usable. we do this is a scratch space, since we + * need to refer to it multiple times as we build the real buckets. + */ + vec_validate(tmp_buckets, n_buckets-1); + + bucket = jj = 0; + vec_foreach (lbmp, lbm->lbm_paths) + { + if (fib_path_is_resolved(lbmp->lbmp_index)) + { + for (ii = 0; ii < lbmp->lbmp_weight; ii++) + { + tmp_buckets[jj++] = bucket++; + } + } + else + { + bucket += lbmp->lbmp_weight; + } + } + _vec_len(tmp_buckets) = jj; + + /* + * If the number of temporaries written is as many as we need, implying + * all paths were up, then we can simply copy the scratch area over the + * actual buckets' memory + */ + if (jj == n_buckets) + { + memcpy(lbm->lbm_buckets, + tmp_buckets, + sizeof(lbm->lbm_buckets[0]) * n_buckets); + } + else + { + /* + * one or more paths are down. + */ + if (0 == vec_len(tmp_buckets)) + { + /* + * if the scratch area is empty, then no paths are usable. + * they will all drop. so use them all, lest we account drops + * against only one. + */ + for (bucket = 0; bucket < n_buckets; bucket++) + { + lbm->lbm_buckets[bucket] = bucket; + } + } + else + { + bucket = jj = 0; + vec_foreach (lbmp, lbm->lbm_paths) + { + if (fib_path_is_resolved(lbmp->lbmp_index)) + { + for (ii = 0; ii < lbmp->lbmp_weight; ii++) + { + lbm->lbm_buckets[bucket] = bucket; + bucket++; + } + } + else + { + /* + * path is unusable + * cycle through the scratch space selecting a index. + * this means we load balance, in the intended ratio, + * over the paths that are still usable. + */ + for (ii = 0; ii < lbmp->lbmp_weight; ii++) + { + lbm->lbm_buckets[bucket] = tmp_buckets[jj]; + jj = (jj + 1) % vec_len(tmp_buckets); + bucket++; + } + } + } + } + } + + vec_free(tmp_buckets); +} + +static load_balance_map_t* +load_balance_map_alloc (const load_balance_path_t *paths) +{ + load_balance_map_t *lbm; + u32 ii; + + pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES); + memset(lbm, 0, sizeof(*lbm)); + + vec_validate(lbm->lbm_paths, vec_len(paths)-1); + + vec_foreach_index(ii, paths) + { + lbm->lbm_paths[ii].lbmp_index = paths[ii].path_index; + lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight; + } + + return (lbm); +} + +static load_balance_map_t * +load_balance_map_init (load_balance_map_t *lbm, + u32 n_buckets, + u32 sum_of_weights) +{ + lbm->lbm_sum_of_norm_weights = sum_of_weights; + vec_validate(lbm->lbm_buckets, n_buckets-1); + + load_balance_map_db_insert(lbm); + + load_balance_map_fill(lbm); + + return (lbm); +} + +index_t +load_balance_map_add_or_lock (u32 n_buckets, + u32 sum_of_weights, + const load_balance_path_t *paths) +{ + load_balance_map_t *tmp, *lbm; + index_t lbmi; + + tmp = load_balance_map_alloc(paths); + + lbmi = load_balance_map_db_find(tmp); + + if (INDEX_INVALID == lbmi) + { + lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights); + } + else + { + lbm = load_balance_map_get(lbmi); + } + + lbm->lbm_locks++; + + return (load_balance_map_get_index(lbm)); +} + +void +load_balance_map_lock (index_t lbmi) +{ + load_balance_map_t *lbm; + + lbm = load_balance_map_get(lbmi); + + lbm->lbm_locks++; +} + +void +load_balance_map_unlock (index_t lbmi) +{ + load_balance_map_t *lbm; + + if (INDEX_INVALID == lbmi) + { + return; + } + + lbm = load_balance_map_get(lbmi); + + lbm->lbm_locks--; + + if (0 == lbm->lbm_locks) + { + load_balance_map_db_remove(lbm); + vec_free(lbm->lbm_paths); + vec_free(lbm->lbm_buckets); + pool_put(load_balance_map_pool, lbm); + } +} + +static int +load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr, + void *ctx) +{ + load_balance_map_t *lbm; + + lbm = load_balance_map_get(fptr->fnp_index); + + load_balance_map_fill(lbm); + + return (!0); +} + +/** + * @brief the state of a path has changed (it has no doubt gone down). + * This is the trigger to perform a PIC edge cutover and update the maps + * to exclude this path. + */ +void +load_balance_map_path_state_change (fib_node_index_t path_index) +{ + uword *p; + + /* + * re-stripe the buckets for each affect MAP + */ + p = hash_get(lb_maps_by_path_index, path_index); + + if (NULL == p) + return; + + fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL); +} + +/** + * @brief Make/add a new or lock an existing Load-balance map + */ +void +load_balance_map_module_init (void) +{ + load_balance_map_db = + hash_create2 (/* elts */ 0, + /* user */ 0, + /* value_bytes */ sizeof (index_t), + load_balance_map_db_hash_key_sum, + load_balance_map_db_hash_key_equal, + /* format pair/arg */ + 0, 0); + + lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t)); +} + +static clib_error_t * +load_balance_map_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t lbmi = INDEX_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &lbmi)) + ; + else + break; + } + + if (INDEX_INVALID != lbmi) + { + vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0); + } + else + { + load_balance_map_t *lbm; + + pool_foreach(lbm, load_balance_map_pool, + ({ + vlib_cli_output (vm, "%U", format_load_balance_map, + load_balance_map_get_index(lbm), 0); + })); + } + + return 0; +} + +VLIB_CLI_COMMAND (load_balance_map_show_command, static) = { + .path = "show load-balance-map", + .short_help = "show load-balance-map [<index>]", + .function = load_balance_map_show, +}; diff --git a/vnet/vnet/dpo/load_balance_map.h b/vnet/vnet/dpo/load_balance_map.h new file mode 100644 index 00000000000..f080e97ccad --- /dev/null +++ b/vnet/vnet/dpo/load_balance_map.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + */ + +#ifndef __LOAD_BALANCE_MAP_H__ +#define __LOAD_BALANCE_MAP_H__ + +#include <vlib/vlib.h> +#include <vnet/fib/fib_types.h> +#include <vnet/dpo/load_balance.h> + +struct load_balance_map_path_t_; + +/** + */ +typedef struct load_balance_map_t_ { + /** + * The buckets of the map that provide the index to index translation. + * In the first cacheline. + */ + u16 *lbm_buckets; + + /** + * the vector of paths this MAP represents + */ + struct load_balance_map_path_t_ *lbm_paths; + + /** + * the sum of the normalised weights. cache for convenience + */ + u32 lbm_sum_of_norm_weights; + + /** + * Number of locks. Maps are shared by a large number of recrusvie fib_entry_ts + */ + u32 lbm_locks; +} load_balance_map_t; + +extern index_t load_balance_map_add_or_lock(u32 n_buckets, + u32 sum_of_weights, + const load_balance_path_t *norm_paths); + +extern void load_balance_map_lock(index_t lmbi); +extern void load_balance_map_unlock(index_t lbmi); + +extern void load_balance_map_path_state_change(fib_node_index_t path_index); + +extern u8* format_load_balance_map(u8 *s, va_list ap); + +/** + * The encapsulation breakages are for fast DP access + */ +extern load_balance_map_t *load_balance_map_pool; + +static inline load_balance_map_t* +load_balance_map_get (index_t lbmi) +{ + return (pool_elt_at_index(load_balance_map_pool, lbmi)); +} + + +extern void load_balance_map_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/lookup_dpo.c b/vnet/vnet/dpo/lookup_dpo.c new file mode 100644 index 00000000000..0bfc0651a63 --- /dev/null +++ b/vnet/vnet/dpo/lookup_dpo.c @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/mpls_fib.h> + +static const char *const lookup_input_names[] = LOOKUP_INPUTS; + +/** + * @brief Enumeration of the lookup subtypes + */ +typedef enum lookup_sub_type_t_ +{ + LOOKUP_SUB_TYPE_SRC, + LOOKUP_SUB_TYPE_DST, + LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE, +} lookup_sub_type_t; +#define LOOKUP_SUB_TYPE_NUM (LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE+1) + +#define FOR_EACH_LOOKUP_SUB_TYPE(_st) \ + for (_st = LOOKUP_SUB_TYPE_IP4_SRC; _st < LOOKUP_SUB_TYPE_NUM; _st++) + +/** + * @brief pool of all MPLS Label DPOs + */ +lookup_dpo_t *lookup_dpo_pool; + +/** + * @brief An array of registered DPO type values for the sub-types + */ +static dpo_type_t lookup_dpo_sub_types[LOOKUP_SUB_TYPE_NUM]; + +static lookup_dpo_t * +lookup_dpo_alloc (void) +{ + lookup_dpo_t *lkd; + + pool_get_aligned(lookup_dpo_pool, lkd, CLIB_CACHE_LINE_BYTES); + + return (lkd); +} + +static index_t +lookup_dpo_get_index (lookup_dpo_t *lkd) +{ + return (lkd - lookup_dpo_pool); +} + +static void +lookup_dpo_add_or_lock_i (fib_node_index_t fib_index, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table_config, + dpo_id_t *dpo) +{ + lookup_dpo_t *lkd; + dpo_type_t type; + + lkd = lookup_dpo_alloc(); + lkd->lkd_fib_index = fib_index; + lkd->lkd_proto = proto; + lkd->lkd_input = input; + lkd->lkd_table = table_config; + + /* + * use the input type to select the lookup sub-type + */ + type = 0; + + switch (input) + { + case LOOKUP_INPUT_SRC_ADDR: + type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC]; + break; + case LOOKUP_INPUT_DST_ADDR: + switch (table_config) + { + case LOOKUP_TABLE_FROM_INPUT_INTERFACE: + type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE]; + break; + case LOOKUP_TABLE_FROM_CONFIG: + type = lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST]; + break; + } + } + + if (0 == type) + { + dpo_reset(dpo); + } + else + { + dpo_set(dpo, type, proto, lookup_dpo_get_index(lkd)); + } +} + +void +lookup_dpo_add_or_lock_w_fib_index (fib_node_index_t fib_index, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table_config, + dpo_id_t *dpo) +{ + if (LOOKUP_TABLE_FROM_CONFIG == table_config) + { + fib_table_lock(fib_index, dpo_proto_to_fib(proto)); + } + lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo); +} + +void +lookup_dpo_add_or_lock_w_table_id (u32 table_id, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table_config, + dpo_id_t *dpo) +{ + fib_node_index_t fib_index = FIB_NODE_INDEX_INVALID; + + if (LOOKUP_TABLE_FROM_CONFIG == table_config) + { + fib_index = + fib_table_find_or_create_and_lock(dpo_proto_to_fib(proto), + table_id); + } + + ASSERT(FIB_NODE_INDEX_INVALID != fib_index); + lookup_dpo_add_or_lock_i(fib_index, proto, input, table_config, dpo); +} + +u8* +format_lookup_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + lookup_dpo_t *lkd; + + lkd = lookup_dpo_get(index); + + if (LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table) + { + s = format(s, "%s lookup in interface's %U table", + lookup_input_names[lkd->lkd_input], + format_dpo_proto, lkd->lkd_proto); + } + else + { + s = format(s, "%s lookup in %U", + lookup_input_names[lkd->lkd_input], + format_fib_table_name, lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } + return (s); +} + +static void +lookup_dpo_lock (dpo_id_t *dpo) +{ + lookup_dpo_t *lkd; + + lkd = lookup_dpo_get(dpo->dpoi_index); + + lkd->lkd_locks++; +} + +static void +lookup_dpo_unlock (dpo_id_t *dpo) +{ + lookup_dpo_t *lkd; + + lkd = lookup_dpo_get(dpo->dpoi_index); + + lkd->lkd_locks--; + + if (0 == lkd->lkd_locks) + { + if (LOOKUP_TABLE_FROM_CONFIG == lkd->lkd_table) + { + fib_table_unlock(lkd->lkd_fib_index, + dpo_proto_to_fib(lkd->lkd_proto)); + } + pool_put(lookup_dpo_pool, lkd); + } +} + +always_inline void +ip4_src_fib_lookup_one (u32 src_fib_index0, + const ip4_address_t * addr0, + u32 * src_adj_index0) +{ + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_fib_mtrie_t * mtrie0; + + mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); + + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); +} + +always_inline void +ip4_src_fib_lookup_two (u32 src_fib_index0, + u32 src_fib_index1, + const ip4_address_t * addr0, + const ip4_address_t * addr1, + u32 * src_adj_index0, + u32 * src_adj_index1) +{ + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + + mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie; + mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2); + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3); + + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); + src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1); +} + +/** + * @brief Lookup trace data + */ +typedef struct lookup_trace_t_ +{ + union { + ip46_address_t addr; + mpls_unicast_header_t hdr; + }; + fib_node_index_t fib_index; + index_t lbi; +} lookup_trace_t; + + +always_inline uword +lookup_dpo_ip4_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int input_src_addr, + int table_from_interface) +{ + u32 n_left_from, next_index, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, lkdi0, lbi0, fib_index0, next0; + const ip4_address_t *input_addr; + const load_balance_t *lb0; + const lookup_dpo_t * lkd0; + const ip4_header_t * ip0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* dst lookup was done by ip4 lookup */ + lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + lkd0 = lookup_dpo_get(lkdi0); + + /* + * choose between a lookup using the fib index in the DPO + * or getting the FIB index from the interface. + */ + if (table_from_interface) + { + fib_index0 = + ip4_fib_table_get_index_for_sw_if_index( + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + } + else + { + fib_index0 = lkd0->lkd_fib_index; + } + + /* + * choose between a source or destination address lookup in the table + */ + if (input_src_addr) + { + input_addr = &ip0->src_address; + } + else + { + input_addr = &ip0->dst_address; + } + + /* do lookup */ + ip4_src_fib_lookup_one (fib_index0, input_addr, &lbi0); + lb0 = load_balance_get(lbi0); + dpo0 = load_balance_get_bucket_i(lb0, 0); + + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = lbi0; + tr->addr.ip4 = *input_addr; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lookup_trace_t * t = va_arg (*args, lookup_trace_t *); + uword indent = format_get_indent (s); + s = format (s, "%U fib-index:%d addr:%U load-balance:%d", + format_white_space, indent, + t->fib_index, + format_ip46_address, &t->addr, IP46_TYPE_ANY, + t->lbi); + return s; +} + +always_inline uword +lookup_ip4_dst (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 0)); +} + +VLIB_REGISTER_NODE (lookup_ip4_dst_node) = { + .function = lookup_ip4_dst, + .name = "lookup-ip4-dst", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = format_lookup_trace, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_node, lookup_ip4_dst) + +always_inline uword +lookup_ip4_dst_itf (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip4_inline(vm, node, from_frame, 0, 1)); +} + +VLIB_REGISTER_NODE (lookup_ip4_dst_itf_node) = { + .function = lookup_ip4_dst_itf, + .name = "lookup-ip4-dst-itf", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = format_lookup_trace, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_dst_itf_node, lookup_ip4_dst_itf) + +always_inline uword +lookup_ip4_src (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip4_inline(vm, node, from_frame, 1, 0)); +} + +VLIB_REGISTER_NODE (lookup_ip4_src_node) = { + .function = lookup_ip4_src, + .name = "lookup-ip4-src", + .vector_size = sizeof (u32), + .format_trace = format_lookup_trace, + .sibling_of = "ip4-lookup", +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip4_src_node, lookup_ip4_src) + +always_inline uword +lookup_dpo_ip6_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int input_src_addr) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + u32 n_left_from, next_index, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* { */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, lkdi0, lbi0, fib_index0, next0; + const ip6_address_t *input_addr0; + const load_balance_t *lb0; + const lookup_dpo_t * lkd0; + const ip6_header_t * ip0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* dst lookup was done by ip6 lookup */ + lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + lkd0 = lookup_dpo_get(lkdi0); + fib_index0 = lkd0->lkd_fib_index; + + /* + * choose between a source or destination address lookup in the table + */ + if (input_src_addr) + { + input_addr0 = &ip0->src_address; + } + else + { + input_addr0 = &ip0->dst_address; + } + + /* do src lookup */ + lbi0 = ip6_fib_table_fwding_lookup(&ip6_main, + fib_index0, + input_addr0); + lb0 = load_balance_get(lbi0); + dpo0 = load_balance_get_bucket_i(lb0, 0); + + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = lbi0; + tr->addr.ip6 = *input_addr0; + } + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +always_inline uword +lookup_ip6_dst (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip6_inline(vm, node, from_frame, 0 /*use src*/)); +} + +VLIB_REGISTER_NODE (lookup_ip6_dst_node) = { + .function = lookup_ip6_dst, + .name = "lookup-ip6-dst", + .vector_size = sizeof (u32), + .format_trace = format_lookup_trace, + .sibling_of = "ip6-lookup", +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_dst_node, lookup_ip6_dst) + +always_inline uword +lookup_ip6_src (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_ip6_inline(vm, node, from_frame, 1 /*use src*/)); +} + +VLIB_REGISTER_NODE (lookup_ip6_src_node) = { + .function = lookup_ip6_src, + .name = "lookup-ip6-src", + .vector_size = sizeof (u32), + .format_trace = format_lookup_trace, + .sibling_of = "ip6-lookup", +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_ip6_src_node, lookup_ip6_src) + +always_inline uword +lookup_dpo_mpls_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, + int table_from_interface) +{ + u32 n_left_from, next_index, * from, * to_next; + u32 cpu_index = os_get_cpu_number(); + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, lkdi0, lbi0, fib_index0, next0; + const mpls_unicast_header_t * hdr0; + const load_balance_t *lb0; + const lookup_dpo_t * lkd0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + hdr0 = vlib_buffer_get_current (b0); + + /* dst lookup was done by mpls lookup */ + lkdi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + lkd0 = lookup_dpo_get(lkdi0); + + /* + * choose between a lookup using the fib index in the DPO + * or getting the FIB index from the interface. + */ + if (table_from_interface) + { + fib_index0 = + mpls_fib_table_get_index_for_sw_if_index( + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + } + else + { + fib_index0 = lkd0->lkd_fib_index; + } + + /* do lookup */ + lbi0 = mpls_fib_table_forwarding_lookup (fib_index0, hdr0); + lb0 = load_balance_get(lbi0); + dpo0 = load_balance_get_bucket_i(lb0, 0); + + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->fib_index = fib_index0; + tr->lbi = lbi0; + tr->hdr = *hdr0; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_lookup_mpls_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + lookup_trace_t * t = va_arg (*args, lookup_trace_t *); + uword indent = format_get_indent (s); + mpls_unicast_header_t hdr; + + hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl); + + s = format (s, "%U fib-index:%d hdr:%U load-balance:%d", + format_white_space, indent, + t->fib_index, + format_mpls_header, hdr, + t->lbi); + return s; +} + +always_inline uword +lookup_mpls_dst (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_mpls_inline(vm, node, from_frame, 0)); +} + +VLIB_REGISTER_NODE (lookup_mpls_dst_node) = { + .function = lookup_mpls_dst, + .name = "lookup-mpls-dst", + .vector_size = sizeof (u32), + .sibling_of = "mpls-lookup", + .format_trace = format_lookup_mpls_trace, + .n_next_nodes = 0, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_node, lookup_mpls_dst) + +always_inline uword +lookup_mpls_dst_itf (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (lookup_dpo_mpls_inline(vm, node, from_frame, 1)); +} + +VLIB_REGISTER_NODE (lookup_mpls_dst_itf_node) = { + .function = lookup_mpls_dst_itf, + .name = "lookup-mpls-dst-itf", + .vector_size = sizeof (u32), + .sibling_of = "mpls-lookup", + .format_trace = format_lookup_mpls_trace, + .n_next_nodes = 0, +}; +VLIB_NODE_FUNCTION_MULTIARCH (lookup_mpls_dst_itf_node, lookup_mpls_dst_itf) + +const static dpo_vft_t lkd_vft = { + .dv_lock = lookup_dpo_lock, + .dv_unlock = lookup_dpo_unlock, + .dv_format = format_lookup_dpo, +}; + +const static char* const lookup_src_ip4_nodes[] = +{ + "lookup-ip4-src", + NULL, +}; +const static char* const lookup_src_ip6_nodes[] = +{ + "lookup-ip6-src", + NULL, +}; +const static char* const * const lookup_src_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lookup_src_ip4_nodes, + [DPO_PROTO_IP6] = lookup_src_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +const static char* const lookup_dst_ip4_nodes[] = +{ + "lookup-ip4-dst", + NULL, +}; +const static char* const lookup_dst_ip6_nodes[] = +{ + "lookup-ip6-dst", + NULL, +}; +const static char* const lookup_dst_mpls_nodes[] = +{ + "lookup-mpls-dst", + NULL, +}; +const static char* const * const lookup_dst_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lookup_dst_ip4_nodes, + [DPO_PROTO_IP6] = lookup_dst_ip6_nodes, + [DPO_PROTO_MPLS] = lookup_dst_mpls_nodes, +}; + +const static char* const lookup_dst_from_interface_ip4_nodes[] = +{ + "lookup-ip4-dst-itf", + NULL, +}; +const static char* const lookup_dst_from_interface_ip6_nodes[] = +{ + "lookup-ip6-dst-itf", + NULL, +}; +const static char* const lookup_dst_from_interface_mpls_nodes[] = +{ + "lookup-mpls-dst-itf", + NULL, +}; +const static char* const * const lookup_dst_from_interface_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lookup_dst_from_interface_ip4_nodes, + [DPO_PROTO_IP6] = lookup_dst_from_interface_ip6_nodes, + [DPO_PROTO_MPLS] = lookup_dst_from_interface_mpls_nodes, +}; + + +void +lookup_dpo_module_init (void) +{ + dpo_register(DPO_LOOKUP, &lkd_vft, NULL); + + /* + * There are various sorts of lookup; src or dst addr v4 /v6 etc. + * there isn't an object type for each (there is only the lookup_dpo_t), + * but, for performance reasons, there is a data plane function, and hence + * VLIB node for each. VLIB graph node construction is based on DPO types + * so we create sub-types. + */ + lookup_dpo_sub_types[LOOKUP_SUB_TYPE_SRC] = + dpo_register_new_type(&lkd_vft, lookup_src_nodes); + lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST] = + dpo_register_new_type(&lkd_vft, lookup_dst_nodes); + lookup_dpo_sub_types[LOOKUP_SUB_TYPE_DST_TABLE_FROM_INTERFACE] = + dpo_register_new_type(&lkd_vft, lookup_dst_nodes); +} diff --git a/vnet/vnet/dpo/lookup_dpo.h b/vnet/vnet/dpo/lookup_dpo.h new file mode 100644 index 00000000000..ff283388868 --- /dev/null +++ b/vnet/vnet/dpo/lookup_dpo.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LOOKUP_DPO_H__ +#define __LOOKUP_DPO_H__ + +#include <vnet/vnet.h> +#include <vnet/fib/fib_types.h> +#include <vnet/dpo/dpo.h> + +/** + * Switch to use the packet's source or destination address for lookup + */ +typedef enum lookup_input_t_ { + LOOKUP_INPUT_SRC_ADDR, + LOOKUP_INPUT_DST_ADDR, +} __attribute__ ((packed)) lookup_input_t; + +#define LOOKUP_INPUTS { \ + [LOOKUP_INPUT_SRC_ADDR] = "src-address", \ + [LOOKUP_INPUT_DST_ADDR] = "dst-address", \ +} + +/** + * Switch to use the packet's source or destination address for lookup + */ +typedef enum lookup_table_t_ { + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + LOOKUP_TABLE_FROM_CONFIG, +} __attribute__ ((packed)) lookup_table_t; + +#define LOOKUP_TABLES { \ + [LOOKUP_INPUT_SRC_ADDR] = "table-input-interface", \ + [LOOKUP_INPUT_DST_ADDR] = "table-configured", \ +} + +/** + * A representation of an MPLS label for imposition in the data-path + */ +typedef struct lookup_dpo_t +{ + /** + * The FIB, or interface from which to get a FIB, in which to perform + * the next lookup; + */ + fib_node_index_t lkd_fib_index; + + /** + * The protocol of the FIB for the lookup, and hence + * the protocol of the packet + */ + dpo_proto_t lkd_proto; + + /** + * Switch to use src or dst address + */ + lookup_input_t lkd_input; + + /** + * Switch to use the table index passed, or the table of the input interface + */ + lookup_table_t lkd_table; + + /** + * Number of locks + */ + u16 lkd_locks; +} lookup_dpo_t; + +extern void lookup_dpo_add_or_lock_w_fib_index(fib_node_index_t fib_index, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table, + dpo_id_t *dpo); +extern void lookup_dpo_add_or_lock_w_table_id(u32 table_id, + dpo_proto_t proto, + lookup_input_t input, + lookup_table_t table, + dpo_id_t *dpo); + +extern u8* format_lookup_dpo(u8 *s, va_list *args); + +/* + * Encapsulation violation for fast data-path access + */ +extern lookup_dpo_t *lookup_dpo_pool; + +static inline lookup_dpo_t * +lookup_dpo_get (index_t index) +{ + return (pool_elt_at_index(lookup_dpo_pool, index)); +} + +extern void lookup_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/mpls_label_dpo.c b/vnet/vnet/dpo/mpls_label_dpo.c new file mode 100644 index 00000000000..0ec840ecfbd --- /dev/null +++ b/vnet/vnet/dpo/mpls_label_dpo.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/mpls/mpls.h> + +/* + * pool of all MPLS Label DPOs + */ +mpls_label_dpo_t *mpls_label_dpo_pool; + +static mpls_label_dpo_t * +mpls_label_dpo_alloc (void) +{ + mpls_label_dpo_t *mld; + + pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES); + memset(mld, 0, sizeof(*mld)); + + dpo_reset(&mld->mld_dpo); + + return (mld); +} + +static index_t +mpls_label_dpo_get_index (mpls_label_dpo_t *mld) +{ + return (mld - mpls_label_dpo_pool); +} + +index_t +mpls_label_dpo_create (mpls_label_t label, + mpls_eos_bit_t eos, + u8 ttl, + u8 exp, + const dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_alloc(); + + vnet_mpls_uc_set_label(&mld->mld_hdr.label_exp_s_ttl, label); + vnet_mpls_uc_set_ttl(&mld->mld_hdr.label_exp_s_ttl, ttl); + vnet_mpls_uc_set_exp(&mld->mld_hdr.label_exp_s_ttl, exp); + vnet_mpls_uc_set_s(&mld->mld_hdr.label_exp_s_ttl, eos); + + /* + * get the header in network byte order since we will paint it + * on a packet in the data-plane + */ + mld->mld_hdr.label_exp_s_ttl = + clib_host_to_net_u32(mld->mld_hdr.label_exp_s_ttl); + + dpo_stack(DPO_MPLS_LABEL, DPO_PROTO_MPLS, &mld->mld_dpo, dpo); + + return (mpls_label_dpo_get_index(mld)); +} + +u8* +format_mpls_label_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + u32 indent = va_arg (*args, u32); + mpls_unicast_header_t hdr; + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_get(index); + + hdr.label_exp_s_ttl = + clib_net_to_host_u32(mld->mld_hdr.label_exp_s_ttl); + + return (format(s, "mpls-label:[%d]:%U\n%U%U", + index, + format_mpls_header, hdr, + format_white_space, indent, + format_dpo_id, &mld->mld_dpo, indent+2)); +} + +static void +mpls_label_dpo_lock (dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_get(dpo->dpoi_index); + + mld->mld_locks++; +} + +static void +mpls_label_dpo_unlock (dpo_id_t *dpo) +{ + mpls_label_dpo_t *mld; + + mld = mpls_label_dpo_get(dpo->dpoi_index); + + mld->mld_locks--; + + if (0 == mld->mld_locks) + { + dpo_reset(&mld->mld_dpo); + pool_put(mpls_label_dpo_pool, mld); + } +} + +/** + * @brief A struct to hold tracing information for the MPLS label imposition + * node. + */ +typedef struct mpls_label_imposition_trace_t_ +{ + /** + * The MPLS header imposed + */ + mpls_unicast_header_t hdr; +} mpls_label_imposition_trace_t; + +always_inline uword +mpls_label_imposition (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + mpls_unicast_header_t *hdr0; + mpls_label_dpo_t *mld0; + vlib_buffer_t * b0; + u32 bi0, mldi0; + u32 next0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* dst lookup was done by ip4 lookup */ + mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + mld0 = mpls_label_dpo_get(mldi0); + + /* Paint the MPLS header */ + vlib_buffer_advance(b0, -sizeof(*hdr0)); + hdr0 = vlib_buffer_get_current(b0); + + // FIXME. + // need to copy the TTL from the correct place. + // for IPvX imposition from the IP header + // so we need a deidcated ipx-to-mpls-label-imp-node + // for mpls switch and stack another solution is required. + *hdr0 = mld0->mld_hdr; + + next0 = mld0->mld_dpo.dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index; + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_label_imposition_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->hdr = *hdr0; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static u8 * +format_mpls_label_imposition_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_label_imposition_trace_t * t; + mpls_unicast_header_t hdr; + uword indent; + + t = va_arg (*args, mpls_label_imposition_trace_t *); + indent = format_get_indent (s); + hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl); + + s = format (s, "%Umpls-header:%U", + format_white_space, indent, + format_mpls_header, hdr); + return (s); +} + +VLIB_REGISTER_NODE (mpls_label_imposition_node) = { + .function = mpls_label_imposition, + .name = "mpls-label-imposition", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_label_imposition_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + } +}; +VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node, mpls_label_imposition) + +const static dpo_vft_t mld_vft = { + .dv_lock = mpls_label_dpo_lock, + .dv_unlock = mpls_label_dpo_unlock, + .dv_format = format_mpls_label_dpo, +}; + +const static char* const mpls_label_imp_ip4_nodes[] = +{ + "mpls-label-imposition", + NULL, +}; +const static char* const mpls_label_imp_ip6_nodes[] = +{ + "mpls-label-imposition", + NULL, +}; +const static char* const mpls_label_imp_mpls_nodes[] = +{ + "mpls-label-imposition", + NULL, +}; +const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes, + [DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes, + [DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes, +}; + + +void +mpls_label_dpo_module_init (void) +{ + dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes); +} diff --git a/vnet/vnet/dpo/mpls_label_dpo.h b/vnet/vnet/dpo/mpls_label_dpo.h new file mode 100644 index 00000000000..47ee344933f --- /dev/null +++ b/vnet/vnet/dpo/mpls_label_dpo.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_LABEL_DPO_H__ +#define __MPLS_LABEL_DPO_H__ + +#include <vnet/vnet.h> +#include <vnet/mpls/packet.h> +#include <vnet/dpo/dpo.h> + +/** + * A representation of an MPLS label for imposition in the data-path + */ +typedef struct mpls_label_dpo_t +{ + /** + * The MPLS label header to impose + */ + mpls_unicast_header_t mld_hdr; + + /** + * Next DPO in the graph + */ + dpo_id_t mld_dpo; + + /** + * Number of locks/users of the label + */ + u16 mld_locks; +} mpls_label_dpo_t; + +extern index_t mpls_label_dpo_create(mpls_label_t label, + mpls_eos_bit_t eos, + u8 ttl, + u8 exp, + const dpo_id_t *dpo); + +extern u8* format_mpls_label_dpo(u8 *s, va_list *args); + + +/* + * Encapsulation violation for fast data-path access + */ +extern mpls_label_dpo_t *mpls_label_dpo_pool; + +static inline mpls_label_dpo_t * +mpls_label_dpo_get (index_t index) +{ + return (pool_elt_at_index(mpls_label_dpo_pool, index)); +} + +extern void mpls_label_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/punt_dpo.c b/vnet/vnet/dpo/punt_dpo.c new file mode 100644 index 00000000000..e27a8ff3018 --- /dev/null +++ b/vnet/vnet/dpo/punt_dpo.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing puntping the packet + */ + +#include <vnet/dpo/dpo.h> + +static dpo_id_t punt_dpos[DPO_PROTO_NUM]; + +const dpo_id_t * +punt_dpo_get (dpo_proto_t proto) +{ + dpo_set(&punt_dpos[proto], DPO_PUNT, proto, 1); + + return (&punt_dpos[proto]); +} + +int +dpo_is_punt (const dpo_id_t *dpo) +{ + return (dpo->dpoi_type == DPO_PUNT); +} + +static void +punt_dpo_lock (dpo_id_t *dpo) +{ + /* + * not maintaining a lock count on the punt + * more trouble than it's worth. + * There always needs to be one around. no point it managaing its lifetime + */ +} +static void +punt_dpo_unlock (dpo_id_t *dpo) +{ +} + +static u8* +format_punt_dpo (u8 *s, va_list *ap) +{ + CLIB_UNUSED(index_t index) = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + + return (format(s, "dpo-punt")); +} + +const static dpo_vft_t punt_vft = { + .dv_lock = punt_dpo_lock, + .dv_unlock = punt_dpo_unlock, + .dv_format = format_punt_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a punt + * object. + * + * this means that these graph nodes are ones from which a punt is the + * parent object in the DPO-graph. + */ +const static char* const punt_ip4_nodes[] = +{ + "ip4-punt", + NULL, +}; +const static char* const punt_ip6_nodes[] = +{ + "ip6-punt", + NULL, +}; +const static char* const punt_mpls_nodes[] = +{ + "mpls-punt", + NULL, +}; +const static char* const * const punt_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = punt_ip4_nodes, + [DPO_PROTO_IP6] = punt_ip6_nodes, + [DPO_PROTO_MPLS] = punt_mpls_nodes, +}; + +void +punt_dpo_module_init (void) +{ + dpo_register(DPO_PUNT, &punt_vft, punt_nodes); +} diff --git a/vnet/vnet/dpo/punt_dpo.h b/vnet/vnet/dpo/punt_dpo.h new file mode 100644 index 00000000000..370547c1596 --- /dev/null +++ b/vnet/vnet/dpo/punt_dpo.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief A DPO to punt packets to the Control-plane + */ + +#ifndef __PUNT_DPO_H__ +#define __PUNT_DPO_H__ + +#include <vnet/dpo/dpo.h> + +extern int dpo_is_punt(const dpo_id_t *dpo); + +extern const dpo_id_t *punt_dpo_get(dpo_proto_t proto); + +extern void punt_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/dpo/receive_dpo.c b/vnet/vnet/dpo/receive_dpo.c new file mode 100644 index 00000000000..ee7d82b0980 --- /dev/null +++ b/vnet/vnet/dpo/receive_dpo.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing receiveing the packet, i.e. it's for-us + */ +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/dpo/receive_dpo.h> + +/** + * @brief pool of all receive DPOs + */ +receive_dpo_t *receive_dpo_pool; + +static receive_dpo_t * +receive_dpo_alloc (void) +{ + receive_dpo_t *rd; + + pool_get_aligned(receive_dpo_pool, rd, CLIB_CACHE_LINE_BYTES); + memset(rd, 0, sizeof(*rd)); + + return (rd); +} + +static receive_dpo_t * +receive_dpo_get_from_dpo (const dpo_id_t *dpo) +{ + ASSERT(DPO_RECEIVE == dpo->dpoi_type); + + return (receive_dpo_get(dpo->dpoi_index)); +} + + +/* + * receive_dpo_add_or_lock + * + * The next_hop address here is used for source address selection in the DP. + * The local adj is added to an interface's receive prefix, the next-hop + * passed here is the local prefix on the same interface. + */ +void +receive_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + const ip46_address_t *nh_addr, + dpo_id_t *dpo) +{ + receive_dpo_t *rd; + + rd = receive_dpo_alloc(); + + rd->rd_sw_if_index = sw_if_index; + if (NULL != nh_addr) + { + rd->rd_addr = *nh_addr; + } + + dpo_set(dpo, DPO_RECEIVE, proto, (rd - receive_dpo_pool)); +} + +static void +receive_dpo_lock (dpo_id_t *dpo) +{ + receive_dpo_t *rd; + + rd = receive_dpo_get_from_dpo(dpo); + rd->rd_locks++; +} + +static void +receive_dpo_unlock (dpo_id_t *dpo) +{ + receive_dpo_t *rd; + + rd = receive_dpo_get_from_dpo(dpo); + rd->rd_locks--; + + if (0 == rd->rd_locks) + { + pool_put(receive_dpo_pool, rd); + } +} + +static u8* +format_receive_dpo (u8 *s, va_list *ap) +{ + CLIB_UNUSED(index_t index) = va_arg(ap, index_t); + CLIB_UNUSED(u32 indent) = va_arg(ap, u32); + vnet_main_t * vnm = vnet_get_main(); + receive_dpo_t *rd; + + rd = receive_dpo_get(index); + + if (~0 != rd->rd_sw_if_index) + { + return (format(s, "dpo-receive: %U on %U", + format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY, + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface(vnm, rd->rd_sw_if_index))); + } + else + { + return (format(s, "dpo-receive")); + } +} + +const static dpo_vft_t receive_vft = { + .dv_lock = receive_dpo_lock, + .dv_unlock = receive_dpo_unlock, + .dv_format = format_receive_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a receive + * object. + * + * this means that these graph nodes are ones from which a receive is the + * parent object in the DPO-graph. + */ +const static char* const receive_ip4_nodes[] = +{ + "ip4-local", + NULL, +}; +const static char* const receive_ip6_nodes[] = +{ + "ip6-local", + NULL, +}; + +const static char* const * const receive_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = receive_ip4_nodes, + [DPO_PROTO_IP6] = receive_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +receive_dpo_module_init (void) +{ + dpo_register(DPO_RECEIVE, &receive_vft, receive_nodes); +} diff --git a/vnet/vnet/dpo/receive_dpo.h b/vnet/vnet/dpo/receive_dpo.h new file mode 100644 index 00000000000..2420fd7843c --- /dev/null +++ b/vnet/vnet/dpo/receive_dpo.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief + * The data-path object representing receiveing the packet, i.e. it's for-us + */ + +#ifndef __RECEIVE_DPO_H__ +#define __RECEIVE_DPO_H__ + +#include <vnet/dpo/dpo.h> +#include <vnet/ip/ip6.h> + +typedef struct receive_dpo_t_ +{ + /** + * The Software interface index on which traffic is received + */ + u32 rd_sw_if_index; + + /** + * The address on the receive interface. packet are destined to this address + */ + ip46_address_t rd_addr; + + /** + * number oflocks. + */ + u16 rd_locks; +} receive_dpo_t; + +extern void receive_dpo_add_or_lock (dpo_proto_t proto, + u32 sw_if_index, + const ip46_address_t *nh_addr, + dpo_id_t *dpo); + +extern void receive_dpo_module_init(void); + +/** + * @brief pool of all receive DPOs + */ +receive_dpo_t *receive_dpo_pool; + +static inline receive_dpo_t * +receive_dpo_get (index_t index) +{ + return (pool_elt_at_index(receive_dpo_pool, index)); +} + +#endif diff --git a/vnet/vnet/ethernet/arp.c b/vnet/vnet/ethernet/arp.c index 56df480ee6f..d08764a329f 100644 --- a/vnet/vnet/ethernet/arp.c +++ b/vnet/vnet/ethernet/arp.c @@ -21,6 +21,9 @@ #include <vnet/ethernet/arp_packet.h> #include <vnet/l2/l2_input.h> #include <vppinfra/mhash.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/adj/adj.h> +#include <vnet/mpls/mpls.h> /** * @file @@ -36,24 +39,44 @@ void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); typedef struct { u32 sw_if_index; - u32 fib_index; ip4_address_t ip4_address; -} ethernet_arp_ip4_key_t; -typedef struct -{ - ethernet_arp_ip4_key_t key; u8 ethernet_address[6]; u16 flags; -#define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0) -#define ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN (2 << 0) +#define ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC (1 << 0) +#define ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC (1 << 1) u64 cpu_time_last_updated; - - u32 *adjacencies; + adj_index_t adj_index[FIB_LINK_NUM]; } ethernet_arp_ip4_entry_t; +/** + * @brief administrative and operational state falgs on an interface + */ +typedef enum ethernet_arp_interface_flags_t_ +{ + ETHERNET_ARP_INTERFACE_UP = (0 << 1), + ETHERNET_ARP_INTERFACE_MPLS_ENABLE = (1 << 0), +} ethernet_arp_interface_flags_t; + +/** + * @brief Per-interface ARP configuration and state + */ +typedef struct ethernet_arp_interface_t_ +{ + /** + * Hash table of ARP entries. + * Since this hash table is per-interface, the key is only the IPv4 address. + */ + uword *arp_entries; + + /** + * Flags for administrative and operational state + */ + ethernet_arp_interface_flags_t flags; +} ethernet_arp_interface_t; + typedef struct { u32 lo_addr; @@ -87,18 +110,43 @@ typedef struct ethernet_arp_ip4_entry_t *ip4_entry_pool; - mhash_t ip4_entry_by_key; - /* ARP attack mitigation */ u32 arp_delete_rotor; u32 limit_arp_cache_size; + /** Per interface state */ + ethernet_arp_interface_t *ethernet_arp_by_sw_if_index; + /* Proxy arp vector */ ethernet_proxy_arp_t *proxy_arps; } ethernet_arp_main_t; static ethernet_arp_main_t ethernet_arp_main; + +typedef enum arp_ether_type_t_ +{ + ARP_ETHER_TYPE_IP4 = (1 << 0), + ARP_ETHER_TYPE_MPLS = (1 << 1), +} arp_ether_type_t; +#define ARP_ETHER_TYPE_BOTH (ARP_ETHER_TYPE_MPLS | ARP_ETHER_TYPE_IP4) + +typedef struct +{ + u32 sw_if_index; + ethernet_arp_ip4_over_ethernet_address_t a; + int is_static; + int flags; +#define ETHERNET_ARP_ARGS_REMOVE (1<<0) +#define ETHERNET_ARP_ARGS_FLUSH (1<<1) +#define ETHERNET_ARP_ARGS_POPULATE (1<<2) + arp_ether_type_t ether_type; +} vnet_arp_set_ip4_over_ethernet_rpc_args_t; + +static void +set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t + * a); + static u8 * format_ethernet_arp_hardware_type (u8 * s, va_list * va) { @@ -229,27 +277,23 @@ format_ethernet_arp_ip4_entry (u8 * s, va_list * va) vnet_main_t *vnm = va_arg (*va, vnet_main_t *); ethernet_arp_ip4_entry_t *e = va_arg (*va, ethernet_arp_ip4_entry_t *); vnet_sw_interface_t *si; - ip4_fib_t *fib; u8 *flags = 0; if (!e) - return format (s, "%=12s%=6s%=16s%=6s%=20s%=24s", "Time", "FIB", "IP4", + return format (s, "%=12s%=16s%=6s%=20s%=24s", "Time", "IP4", "Flags", "Ethernet", "Interface"); - fib = find_ip4_fib_by_table_index_or_id (&ip4_main, e->key.fib_index, - IP4_ROUTE_FLAG_FIB_INDEX); - si = vnet_get_sw_interface (vnm, e->key.sw_if_index); - - if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN) - flags = format (flags, "G"); + si = vnet_get_sw_interface (vnm, e->sw_if_index); if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) flags = format (flags, "S"); - s = format (s, "%=12U%=6u%=16U%=6s%=20U%=24U", + if (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) + flags = format (flags, "D"); + + s = format (s, "%=12U%=16U%=6s%=20U%=24U", format_vlib_cpu_time, vnm->vlib_main, e->cpu_time_last_updated, - fib->table_id, - format_ip4_address, &e->key.ip4_address, + format_ip4_address, &e->ip4_address, flags ? (char *) flags : "", format_ethernet_address, e->ethernet_address, format_vnet_sw_interface_name, vnm, si); @@ -294,207 +338,126 @@ format_arp_term_input_trace (u8 * s, va_list * va) return s; } -clib_error_t * -ethernet_arp_sw_interface_up_down (vnet_main_t * vnm, - u32 sw_if_index, u32 flags) +static void +arp_mk_complete (ethernet_arp_interface_t * eai, + ethernet_arp_ip4_entry_t * e, arp_ether_type_t et) { - ethernet_arp_main_t *am = ðernet_arp_main; - ethernet_arp_ip4_entry_t *e; - u32 i; - u32 *to_add_del = 0; + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = e->ip4_address, + }, + }; + u32 fib_index; - /* *INDENT-OFF* */ - pool_foreach (e, am->ip4_entry_pool, ({ - if (e->key.sw_if_index == sw_if_index) - vec_add1 (to_add_del, e - am->ip4_entry_pool); - })); - /* *INDENT-ON* */ + fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - for (i = 0; i < vec_len (to_add_del); i++) + if (et & ARP_ETHER_TYPE_IP4) { - ethernet_arp_ip4_over_ethernet_address_t arp_add; - e = pool_elt_at_index (am->ip4_entry_pool, to_add_del[i]); - - clib_memcpy (&arp_add.ethernet, e->ethernet_address, 6); - arp_add.ip4.as_u32 = e->key.ip4_address.as_u32; - - if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_IP4]) { - vnet_arp_set_ip4_over_ethernet (vnm, - e->key.sw_if_index, - e->key.fib_index, &arp_add, - e->flags & - ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC); + e->adj_index[FIB_LINK_IP4] = + adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &pfx.fp_addr, + e->sw_if_index, + e->ethernet_address); + ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4]); + + fib_table_entry_update_one_path (fib_index, + &pfx, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx.fp_addr, + e->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); } - else if ((e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) == 0) + else { - vnet_arp_unset_ip4_over_ethernet (vnm, - e->key.sw_if_index, - e->key.fib_index, &arp_add); + adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4], + e->ethernet_address); + } + } + if ((et & ARP_ETHER_TYPE_MPLS) && + eai->flags & ETHERNET_ARP_INTERFACE_MPLS_ENABLE) + { + if (ADJ_INDEX_INVALID == e->adj_index[FIB_LINK_MPLS]) + { + e->adj_index[FIB_LINK_MPLS] = + adj_nbr_add_or_lock_w_rewrite (FIB_PROTOCOL_IP4, + FIB_LINK_MPLS, + &pfx.fp_addr, + e->sw_if_index, + e->ethernet_address); + ASSERT (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS]); + } + else + { + adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS], + e->ethernet_address); } } - - vec_free (to_add_del); - return 0; -} - -VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down); - -static int -vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, - u32 sw_if_index, - u32 fib_index, - void *a_arg, int is_static); - -static int -vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, - u32 sw_if_index, - u32 fib_index, void *a_arg); - -typedef struct -{ - u32 sw_if_index; - u32 fib_index; - ethernet_arp_ip4_over_ethernet_address_t a; - int is_static; - int is_remove; /* set is_remove=1 to clear arp entry */ -} vnet_arp_set_ip4_over_ethernet_rpc_args_t; - -static void set_ip4_over_ethernet_rpc_callback - (vnet_arp_set_ip4_over_ethernet_rpc_args_t * a) -{ - vnet_main_t *vm = vnet_get_main (); - ASSERT (os_get_cpu_number () == 0); - - if (a->is_remove) - vnet_arp_unset_ip4_over_ethernet_internal (vm, - a->sw_if_index, - a->fib_index, &(a->a)); - else - vnet_arp_set_ip4_over_ethernet_internal (vm, - a->sw_if_index, - a->fib_index, - &(a->a), a->is_static); -} - -int -vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, - u32 sw_if_index, - u32 fib_index, void *a_arg, int is_static) -{ - ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; - vnet_arp_set_ip4_over_ethernet_rpc_args_t args; - - args.sw_if_index = sw_if_index; - args.fib_index = fib_index; - args.is_static = is_static; - args.is_remove = 0; - clib_memcpy (&args.a, a, sizeof (*a)); - - vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, - (u8 *) & args, sizeof (args)); - return 0; } int vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, - u32 sw_if_index, - u32 fib_index, - void *a_arg, int is_static) + vnet_arp_set_ip4_over_ethernet_rpc_args_t + * args) { - ethernet_arp_ip4_key_t k; ethernet_arp_ip4_entry_t *e = 0; ethernet_arp_main_t *am = ðernet_arp_main; - ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + ethernet_arp_ip4_over_ethernet_address_t *a = &args->a; vlib_main_t *vm = vlib_get_main (); - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; int make_new_arp_cache_entry = 1; uword *p; - ip4_add_del_route_args_t args; - ip_adjacency_t adj, *existing_adj; pending_resolution_t *pr, *mc; + ethernet_arp_interface_t *arp_int; + fib_link_t link; + int is_static = args->is_static; + u32 sw_if_index = args->sw_if_index; - u32 next_index; - u32 adj_index; - - fib_index = (fib_index != (u32) ~ 0) - ? fib_index : im->fib_index_by_sw_if_index[sw_if_index]; + vec_validate (am->ethernet_arp_by_sw_if_index, sw_if_index); - k.sw_if_index = sw_if_index; - k.ip4_address = a->ip4; - k.fib_index = fib_index; + arp_int = &am->ethernet_arp_by_sw_if_index[sw_if_index]; - p = mhash_get (&am->ip4_entry_by_key, &k); - if (p) + if (NULL != arp_int->arp_entries) { - e = pool_elt_at_index (am->ip4_entry_pool, p[0]); + p = hash_get (arp_int->arp_entries, a->ip4.as_u32); + if (p) + { + e = pool_elt_at_index (am->ip4_entry_pool, p[0]); - /* Refuse to over-write static arp. */ - if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)) - return -2; - make_new_arp_cache_entry = 0; + /* Refuse to over-write static arp. */ + if (!is_static && (e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC)) + return -2; + make_new_arp_cache_entry = 0; + } } - /* Note: always install the route. It might have been deleted */ - memset (&adj, 0, sizeof (adj)); - adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj.n_adj = 1; /* otherwise signature compare fails */ + if (make_new_arp_cache_entry) + { + pool_get (am->ip4_entry_pool, e); - vnet_rewrite_for_sw_interface (vnm, VNET_L3_PACKET_TYPE_IP4, sw_if_index, ip4_rewrite_node.index, a->ethernet, /* destination address */ - &adj.rewrite_header, - sizeof (adj.rewrite_data)); + if (NULL == arp_int->arp_entries) + { + arp_int->arp_entries = hash_create (0, sizeof (u32)); + if (mpls_sw_interface_is_enabled (sw_if_index)) + arp_int->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE; + } - /* result of this lookup should be next-hop adjacency */ - adj_index = ip4_fib_lookup_with_table (im, fib_index, &a->ip4, 0); - existing_adj = ip_get_adjacency (lm, adj_index); + hash_set (arp_int->arp_entries, a->ip4.as_u32, e - am->ip4_entry_pool); - if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - existing_adj->arp.next_hop.ip4.as_u32 == a->ip4.as_u32) - { - u32 *ai; - u32 *adjs = vec_dup (e->adjacencies); - /* Update all adj assigned to this arp entry */ - vec_foreach (ai, adjs) + e->sw_if_index = sw_if_index; + e->ip4_address = a->ip4; + FOR_EACH_FIB_LINK (link) { - int i; - ip_adjacency_t *uadj = ip_get_adjacency (lm, *ai); - for (i = 0; i < uadj->n_adj; i++) - if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP && - uadj[i].arp.next_hop.ip4.as_u32 == a->ip4.as_u32) - ip_update_adjacency (lm, *ai + i, &adj); + e->adj_index[link] = ADJ_INDEX_INVALID; } - vec_free (adjs); - } - else - { - /* Check that new adjacency actually isn't exactly the same as - * what is already there. If we over-write the adjacency with - * exactly the same info, its technically a new adjacency with - * new counters, but to user it appears as counters reset. - */ - if (vnet_ip_adjacency_share_compare (&adj, existing_adj) == 0) - { - /* create new adj */ - args.table_index_or_table_id = fib_index; - args.flags = - IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | - IP4_ROUTE_FLAG_NEIGHBOR; - args.dst_address = a->ip4; - args.dst_address_length = 32; - args.adj_index = ~0; - args.add_adj = &adj; - args.n_add_adj = 1; - ip4_add_del_route (im, &args); - } - } - - if (make_new_arp_cache_entry) - { - pool_get (am->ip4_entry_pool, e); - mhash_set (&am->ip4_entry_by_key, &k, e - am->ip4_entry_pool, - /* old value */ 0); - e->key = k; } /* Update time stamp and ethernet address. */ @@ -503,11 +466,16 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, e->cpu_time_last_updated = clib_cpu_time_now (); if (is_static) e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC; + else + e->flags |= ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC; + + arp_mk_complete (arp_int, e, ARP_ETHER_TYPE_BOTH); /* Customer(s) waiting for this address to be resolved? */ p = hash_get (am->pending_resolutions_by_address, a->ip4.as_u32); if (p) { + u32 next_index; next_index = p[0]; while (next_index != (u32) ~ 0) @@ -526,6 +494,7 @@ vnet_arp_set_ip4_over_ethernet_internal (vnet_main_t * vnm, p = hash_get (am->mac_changes_by_address, a->ip4.as_u32); if (p) { + u32 next_index; next_index = p[0]; while (next_index != (u32) ~ 0) @@ -688,6 +657,7 @@ typedef enum _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \ _ (missing_interface_address, "ARP missing interface address") \ _ (gratuitous_arp, "ARP probe or announcement dropped") \ + _ (interface_no_table, "Interface is not mapped to an IP table") \ typedef enum { @@ -697,29 +667,6 @@ typedef enum ETHERNET_ARP_N_ERROR, } ethernet_arp_input_error_t; -/* get first interface address */ -ip4_address_t * -ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, - ip_interface_address_t ** result_ia) -{ - ip_lookup_main_t *lm = &im->lookup_main; - ip_interface_address_t *ia = 0; - ip4_address_t *result = 0; - - /* *INDENT-OFF* */ - foreach_ip_interface_address (lm, ia, sw_if_index, - 1 /* honor unnumbered */ , - ({ - ip4_address_t * a = - ip_interface_address_get_address (lm, ia); - result = a; break; - })); - /* *INDENT-ON* */ - - if (result_ia) - *result_ia = result ? ia : 0; - return result; -} static void unset_random_arp_entry (void) @@ -747,16 +694,14 @@ unset_random_arp_entry (void) e = pool_elt_at_index (am->ip4_entry_pool, index); clib_memcpy (&delme.ethernet, e->ethernet_address, 6); - delme.ip4.as_u32 = e->key.ip4_address.as_u32; + delme.ip4.as_u32 = e->ip4_address.as_u32; - vnet_arp_unset_ip4_over_ethernet (vnm, e->key.sw_if_index, - e->key.fib_index, &delme); + vnet_arp_unset_ip4_over_ethernet (vnm, e->sw_if_index, &delme); } static void arp_unnumbered (vlib_buffer_t * p0, - u32 pi0, - ethernet_header_t * eth0, ip_interface_address_t * ifa0) + u32 pi0, ethernet_header_t * eth0, u32 sw_if_index) { vlib_main_t *vm = vlib_get_main (); vnet_main_t *vnm = vnet_get_main (); @@ -777,7 +722,7 @@ arp_unnumbered (vlib_buffer_t * p0, clib_memcpy (dst_mac_address, eth0->dst_address, sizeof (dst_mac_address)); /* Figure out which sw_if_index supplied the address */ - unnum_src_sw_if_index = ifa0->sw_if_index; + unnum_src_sw_if_index = sw_if_index; /* Track down all users of the unnumbered source */ /* *INDENT-OFF* */ @@ -928,13 +873,14 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_hw_interface_t *hw_if0; ethernet_arp_header_t *arp0; ethernet_header_t *eth0; - ip_interface_address_t *ifa0; ip_adjacency_t *adj0; - ip4_address_t *if_addr0; - ip4_address_t proxy_src; - u32 pi0, error0, next0, sw_if_index0; - u8 is_request0, src_is_local0, dst_is_local0, is_unnum0; + ip4_address_t *if_addr0, proxy_src; + u32 pi0, error0, next0, sw_if_index0, conn_sw_if_index0, fib_index0; + u8 is_request0, dst_is_local0, is_unnum0; ethernet_proxy_arp_t *pa; + fib_node_index_t dst_fei, src_fei; + fib_prefix_t pfx0; + fib_entry_flag_t src_flags, dst_flags; pi0 = from[0]; to_next[0] = pi0; @@ -942,6 +888,7 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) to_next += 1; n_left_from -= 1; n_left_to_next -= 1; + pa = 0; p0 = vlib_get_buffer (vm, pi0); arp0 = vlib_buffer_get_current (p0); @@ -963,43 +910,56 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; if (error0) - goto drop1; + goto drop2; /* Check that IP address is local and matches incoming interface. */ - if_addr0 = - ip4_interface_address_matching_destination (im4, - &arp0-> - ip4_over_ethernet[1]. - ip4, sw_if_index0, - &ifa0); - if (!if_addr0) + fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0); + if (~0 == fib_index0) + { + error0 = ETHERNET_ARP_ERROR_interface_no_table; + goto drop2; + + } + dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0), + &arp0->ip4_over_ethernet[1].ip4, + 32); + dst_flags = fib_entry_get_flags (dst_fei); + + conn_sw_if_index0 = fib_entry_get_resolving_interface (dst_fei); + + if (!(FIB_ENTRY_FLAG_CONNECTED & dst_flags)) { error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local; goto drop1; } /* Honor unnumbered interface, if any */ - is_unnum0 = sw_if_index0 != ifa0->sw_if_index; + is_unnum0 = sw_if_index0 != conn_sw_if_index0; /* Source must also be local to subnet of matching interface address. */ - if (!ip4_destination_matches_interface - (im4, &arp0->ip4_over_ethernet[0].ip4, ifa0)) + src_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0), + &arp0->ip4_over_ethernet[0].ip4, + 32); + src_flags = fib_entry_get_flags (src_fei); + + if (!((FIB_ENTRY_FLAG_ATTACHED & src_flags) || + (FIB_ENTRY_FLAG_CONNECTED & src_flags)) || + sw_if_index0 != fib_entry_get_resolving_interface (src_fei)) { error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local; - goto drop1; + goto drop2; } /* Reject requests/replies with our local interface address. */ - src_is_local0 = - if_addr0->as_u32 == arp0->ip4_over_ethernet[0].ip4.as_u32; - if (src_is_local0) + if (FIB_ENTRY_FLAG_LOCAL & src_flags) { error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local; - goto drop1; + goto drop2; } - dst_is_local0 = - if_addr0->as_u32 == arp0->ip4_over_ethernet[1].ip4.as_u32; + dst_is_local0 = (FIB_ENTRY_FLAG_LOCAL & dst_flags); + fib_entry_get_prefix (dst_fei, &pfx0); + if_addr0 = &pfx0.fp_addr.ip4; /* Fill in ethernet header. */ eth0 = ethernet_buffer_get_header (p0); @@ -1023,7 +983,6 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) unset_random_arp_entry (); vnet_arp_set_ip4_over_ethernet (vnm, sw_if_index0, - (u32) ~ 0 /* default fib */ , &arp0->ip4_over_ethernet[0], 0 /* is_static */ ); error0 = ETHERNET_ARP_ERROR_l3_src_address_learned; @@ -1064,21 +1023,25 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) clib_memcpy (eth0->src_address, hw_if0->hw_address, 6); /* Figure out how much to rewind current data from adjacency. */ - if (ifa0) + /* get the adj from the destination's covering connected */ + if (NULL == pa) { - adj0 = ip_get_adjacency (&ip4_main.lookup_main, - ifa0->neighbor_probe_adj_index); - if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) + adj0 = + adj_get (fib_entry_get_adj_for_source + (ip4_fib_table_lookup + (ip4_fib_get (fib_index0), + &arp0->ip4_over_ethernet[1].ip4, 31), + FIB_SOURCE_INTERFACE)); + if (adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) { error0 = ETHERNET_ARP_ERROR_missing_interface_address; goto drop2; } if (is_unnum0) - arp_unnumbered (p0, pi0, eth0, ifa0); + arp_unnumbered (p0, pi0, eth0, conn_sw_if_index0); else vlib_buffer_advance (p0, -adj0->rewrite_header.data_bytes); } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, next0); @@ -1128,8 +1091,8 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) * $$$ is the answer ever anything other than * vlib_buffer_reset(..)? */ - ifa0 = 0; if_addr0 = &proxy_src; + is_unnum0 = 0; vlib_buffer_reset (p0); n_proxy_arp_replies_sent++; goto send_reply; @@ -1192,10 +1155,9 @@ ip4_arp_entry_sort (void *a1, void *a2) int cmp; vnet_main_t *vnm = vnet_get_main (); - cmp = vnet_sw_interface_compare - (vnm, e1->key.sw_if_index, e2->key.sw_if_index); + cmp = vnet_sw_interface_compare (vnm, e1->sw_if_index, e2->sw_if_index); if (!cmp) - cmp = ip4_address_compare (&e1->key.ip4_address, &e2->key.ip4_address); + cmp = ip4_address_compare (&e1->ip4_address, &e2->ip4_address); return cmp; } @@ -1228,7 +1190,7 @@ show_ip4_arp (vlib_main_t * vm, vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, 0); vec_foreach (e, es) { - if (sw_if_index != ~0 && e->key.sw_if_index != sw_if_index) + if (sw_if_index != ~0 && e->sw_if_index != sw_if_index) continue; vlib_cli_output (vm, "%U", format_ethernet_arp_ip4_entry, vnm, e); } @@ -1346,91 +1308,196 @@ ip4_set_arp_limit (u32 arp_limit) return 0; } +/** + * @brief Control Plane hook to remove an ARP entry + */ +int +vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, void *a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.flags = ETHERNET_ARP_ARGS_REMOVE; + args.ether_type = ARP_ETHER_TYPE_IP4; + clib_memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +/** + * @brief Internally generated event to flush the ARP cache on an + * interface state change event. + * A flush will remove dynamic ARP entries, and for statics remove the MAC + * address from the corresponding adjacencies. + */ +static int +vnet_arp_flush_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, + arp_ether_type_t et, void *a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.flags = ETHERNET_ARP_ARGS_FLUSH; + args.ether_type = et; + clib_memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +/** + * @brief Internally generated event to populate the ARP cache on an + * interface state change event. + * For static entries this will re-source the adjacencies. + * + * @param sw_if_index The interface on which the ARP entires are acted + * @param et The ether type of those ARP entries. + */ +static int +vnet_arp_populate_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, + arp_ether_type_t et, void *a_arg) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.flags = ETHERNET_ARP_ARGS_POPULATE; + args.ether_type = et; + clib_memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +/* + * arp_add_del_interface_address + * + * callback when an interface address is added or deleted + */ static void -arp_ip4_entry_del_adj (ethernet_arp_ip4_entry_t * e, u32 adj_index) +arp_add_del_interface_address (ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_del) { - int done = 0; - int i; + /* + * Flush the ARP cache of all entries covered by the address + * that is being removed. + */ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; - while (!done) + if (vec_len (am->ethernet_arp_by_sw_if_index) < sw_if_index) + return; + + if (is_del) { - vec_foreach_index (i, e->adjacencies) - if (vec_elt (e->adjacencies, i) == adj_index) + ethernet_arp_interface_t *eai; + u32 i, *to_delete = 0; + hash_pair_t *pair; + + eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + + hash_foreach_pair (pair, eai->arp_entries, ( + { + e = + pool_elt_at_index + (am->ip4_entry_pool, + pair->value[0]); + if + (ip4_destination_matches_route + (im, &e->ip4_address, + address, address_length)) + { + vec_add1 (to_delete, + e - + am->ip4_entry_pool);} + } + )); + + for (i = 0; i < vec_len (to_delete); i++) { - vec_del1 (e->adjacencies, i); - continue; + ethernet_arp_ip4_over_ethernet_address_t delme; + e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]); + + clib_memcpy (&delme.ethernet, e->ethernet_address, 6); + delme.ip4.as_u32 = e->ip4_address.as_u32; + + vnet_arp_flush_ip4_over_ethernet (vnet_get_main (), + e->sw_if_index, + ARP_ETHER_TYPE_BOTH, &delme); } - done = 1; + + vec_free (to_delete); } } static void -arp_ip4_entry_add_adj (ethernet_arp_ip4_entry_t * e, u32 adj_index) +ethernet_arp_sw_interface_mpls_state_change (u32 sw_if_index, u32 is_enable) { - int i; - vec_foreach_index (i, e->adjacencies) - if (vec_elt (e->adjacencies, i) == adj_index) + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + ethernet_arp_interface_t *eai; + u32 i, *to_update = 0; + hash_pair_t *pair; + + if (vec_len (am->ethernet_arp_by_sw_if_index) < sw_if_index) return; - vec_add1 (e->adjacencies, adj_index); -} -static void -arp_add_del_adj_cb (struct ip_lookup_main_t *lm, - u32 adj_index, ip_adjacency_t * adj, u32 is_del) -{ - ethernet_arp_main_t *am = ðernet_arp_main; - ip4_main_t *im = &ip4_main; - ethernet_arp_ip4_key_t k; - ethernet_arp_ip4_entry_t *e = 0; - uword *p; - u32 ai; + eai = &am->ethernet_arp_by_sw_if_index[sw_if_index]; + + if (is_enable) + eai->flags |= ETHERNET_ARP_INTERFACE_MPLS_ENABLE; + else + eai->flags &= ~ETHERNET_ARP_INTERFACE_MPLS_ENABLE; + + hash_foreach_pair (pair, eai->arp_entries, ( + { + vec_add1 (to_update, + pair->value[0]); + } + )); - for (ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj; ai++) + for (i = 0; i < vec_len (to_update); i++) { - adj = ip_get_adjacency (lm, ai); - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP - && adj->arp.next_hop.ip4.as_u32) + ethernet_arp_ip4_over_ethernet_address_t updateme; + e = pool_elt_at_index (am->ip4_entry_pool, to_update[i]); + + clib_memcpy (&updateme.ethernet, e->ethernet_address, 6); + updateme.ip4.as_u32 = e->ip4_address.as_u32; + + if (is_enable) { - k.sw_if_index = adj->rewrite_header.sw_if_index; - k.ip4_address.as_u32 = adj->arp.next_hop.ip4.as_u32; - k.fib_index = - im->fib_index_by_sw_if_index[adj->rewrite_header.sw_if_index]; - p = mhash_get (&am->ip4_entry_by_key, &k); - if (p) - e = pool_elt_at_index (am->ip4_entry_pool, p[0]); + vnet_arp_populate_ip4_over_ethernet (vnet_get_main (), + e->sw_if_index, + ARP_ETHER_TYPE_MPLS, + &updateme); } else continue; - if (is_del) - { - if (!e) - clib_warning ("Adjacency contains unknown ARP next hop %U (del)", - format_ip46_address, &adj->arp.next_hop, - IP46_TYPE_IP4); - else - arp_ip4_entry_del_adj (e, adj->heap_handle); - } - else /* add */ - { - if (!e) - clib_warning ("Adjacency contains unknown ARP next hop %U (add)", - format_ip46_address, &adj->arp.next_hop, - IP46_TYPE_IP4); - else - arp_ip4_entry_add_adj (e, adj->heap_handle); - } } + vec_free (to_update); } static clib_error_t * ethernet_arp_init (vlib_main_t * vm) { ethernet_arp_main_t *am = ðernet_arp_main; - pg_node_t *pn; - clib_error_t *error; ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; + clib_error_t *error; + pg_node_t *pn; if ((error = vlib_call_init_function (vm, ethernet_init))) return error; @@ -1445,10 +1512,6 @@ ethernet_arp_init (vlib_main_t * vm) foreach_ethernet_arp_opcode; #undef _ - mhash_init (&am->ip4_entry_by_key, - /* value size */ sizeof (uword), - /* key size */ sizeof (ethernet_arp_ip4_key_t)); - /* $$$ configurable */ am->limit_arp_cache_size = 50000; @@ -1468,100 +1531,239 @@ ethernet_arp_init (vlib_main_t * vm) #undef _ } - ip_register_add_del_adjacency_callback (lm, arp_add_del_adj_cb); + ip4_add_del_interface_address_callback_t cb; + cb.function = arp_add_del_interface_address; + cb.function_opaque = 0; + vec_add1 (im->add_del_interface_address_callbacks, cb); + + vec_add1 (mpls_main.mpls_interface_state_change_callbacks, + ethernet_arp_sw_interface_mpls_state_change); return 0; } VLIB_INIT_FUNCTION (ethernet_arp_init); -int -vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, - u32 sw_if_index, u32 fib_index, void *a_arg) +static void +arp_mk_incomplete (ethernet_arp_interface_t * eai, + ethernet_arp_ip4_entry_t * e, arp_ether_type_t et) { - ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; - vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = e->ip4_address, + }, + }; + u32 fib_index; - args.sw_if_index = sw_if_index; - args.fib_index = fib_index; - args.is_remove = 1; - clib_memcpy (&args.a, a, sizeof (*a)); + fib_index = ip4_fib_table_get_index_for_sw_if_index (e->sw_if_index); - vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, - (u8 *) & args, sizeof (args)); - return 0; + if ((ARP_ETHER_TYPE_IP4 & et) && + (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_IP4])) + { + /* + * revert the adj this ARP entry sourced to incomplete + */ + adj_nbr_update_rewrite (e->adj_index[FIB_LINK_IP4], NULL); + + /* + * remove the FIB erntry the ARP entry sourced + */ + fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_ADJ); + + /* + * Unlock the adj now that the ARP entry is no longer a source + */ + adj_unlock (e->adj_index[FIB_LINK_IP4]); + e->adj_index[FIB_LINK_IP4] = ADJ_INDEX_INVALID; + } + if ((ARP_ETHER_TYPE_MPLS & et) && + (ADJ_INDEX_INVALID != e->adj_index[FIB_LINK_MPLS])) + { + /* + * revert the adj this ARP entry sourced to incomplete + */ + adj_nbr_update_rewrite (e->adj_index[FIB_LINK_MPLS], NULL); + + /* + * Unlock the adj now that the ARP entry is no longer a source + */ + adj_unlock (e->adj_index[FIB_LINK_MPLS]); + e->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID; + } +} + +static void +arp_entry_free (ethernet_arp_interface_t * eai, ethernet_arp_ip4_entry_t * e) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + + hash_unset (eai->arp_entries, e->ip4_address.as_u32); + pool_put (am->ip4_entry_pool, e); +} + +static ethernet_arp_ip4_entry_t * +arp_entry_find (ethernet_arp_interface_t * eai, const ip4_address_t * addr) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e = NULL; + uword *p; + + if (NULL != eai->arp_entries) + { + p = hash_get (eai->arp_entries, addr->as_u32); + if (!p) + return (NULL); + + e = pool_elt_at_index (am->ip4_entry_pool, p[0]); + } + + return (e); } static inline int vnet_arp_unset_ip4_over_ethernet_internal (vnet_main_t * vnm, - u32 sw_if_index, - u32 fib_index, void *a_arg) + vnet_arp_set_ip4_over_ethernet_rpc_args_t + * args) { - ethernet_arp_ip4_entry_t *e; ethernet_arp_main_t *am = ðernet_arp_main; - ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; - ethernet_arp_ip4_key_t k; - uword *p; - ip4_add_del_route_args_t args; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - u32 adj_index; - ip_adjacency_t *adj; - - k.sw_if_index = sw_if_index; - k.ip4_address = a->ip4; - k.fib_index = fib_index; - p = mhash_get (&am->ip4_entry_by_key, &k); - if (!p) - return -1; + ethernet_arp_ip4_entry_t *e; + ethernet_arp_interface_t *eai; - memset (&args, 0, sizeof (args)); + eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; - /* - * Make sure that the route actually exists before we try to delete it, - * and make sure that it's a rewrite adjacency. - * - * If we point 1-N unnumbered interfaces at a loopback interface and - * shut down the loopback before shutting down 1-N unnumbered - * interfaces, the ARP cache will still have an entry, - * but the route will have disappeared. - * - * See also ip4_del_interface_routes (...) - * -> ip4_delete_matching_routes (...). - */ + e = arp_entry_find (eai, &args->a.ip4); + + if (NULL != e) + { + arp_mk_incomplete (eai, e, ARP_ETHER_TYPE_BOTH); + arp_entry_free (eai, e); + } + + return 0; +} + +static int +vnet_arp_flush_ip4_over_ethernet_internal (vnet_main_t * vnm, + vnet_arp_set_ip4_over_ethernet_rpc_args_t + * args) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + ethernet_arp_interface_t *eai; + + eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; - adj_index = ip4_fib_lookup_with_table - (im, fib_index, &a->ip4, 1 /* disable default route */ ); + e = arp_entry_find (eai, &args->a.ip4); - /* Miss adj? Forget it... */ - if (adj_index != lm->miss_adj_index) + if (NULL != e) { - adj = ip_get_adjacency (lm, adj_index); + arp_mk_incomplete (eai, e, args->ether_type); + /* - * Stupid control-plane trick: - * admin down an interface (removes arp routes from fib), - * bring the interface back up (does not reinstall them) - * then remove the arp cache entry (yuck). When that happens, - * the adj we find here will be the interface subnet ARP adj. + * The difference between flush and unset, is that an unset + * means delete for static and dynamic entries. A flush + * means delete only for dynamic. Flushing is what the DP + * does in response to interface events. unset is only done + * by the control plane. */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) + if ((e->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_DYNAMIC) && + (args->ether_type & ARP_ETHER_TYPE_IP4)) { - args.table_index_or_table_id = fib_index; - args.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL - | IP4_ROUTE_FLAG_NEIGHBOR; - args.dst_address = a->ip4; - args.dst_address_length = 32; - ip4_add_del_route (im, &args); - ip4_maybe_remap_adjacencies (im, fib_index, args.flags); + arp_entry_free (eai, e); } } + return (0); +} + +static int +vnet_arp_populate_ip4_over_ethernet_internal (vnet_main_t * vnm, + vnet_arp_set_ip4_over_ethernet_rpc_args_t + * args) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + ethernet_arp_interface_t *eai; + + eai = &am->ethernet_arp_by_sw_if_index[args->sw_if_index]; + + e = arp_entry_find (eai, &args->a.ip4); + + if (NULL != e) + { + arp_mk_complete (eai, e, args->ether_type); + } + return (0); +} + +static void +set_ip4_over_ethernet_rpc_callback (vnet_arp_set_ip4_over_ethernet_rpc_args_t + * a) +{ + vnet_main_t *vm = vnet_get_main (); + ASSERT (os_get_cpu_number () == 0); + + if (a->flags & ETHERNET_ARP_ARGS_REMOVE) + vnet_arp_unset_ip4_over_ethernet_internal (vm, a); + else if (a->flags & ETHERNET_ARP_ARGS_FLUSH) + vnet_arp_flush_ip4_over_ethernet_internal (vm, a); + else if (a->flags & ETHERNET_ARP_ARGS_POPULATE) + vnet_arp_populate_ip4_over_ethernet_internal (vm, a); + else + vnet_arp_set_ip4_over_ethernet_internal (vm, a); +} + +/** + * @brief Invoked when the interface's admin state changes + */ +static clib_error_t * +ethernet_arp_sw_interface_up_down (vnet_main_t * vnm, + u32 sw_if_index, u32 flags) +{ + ethernet_arp_main_t *am = ðernet_arp_main; + ethernet_arp_ip4_entry_t *e; + u32 i, *to_delete = 0; + + /* *INDENT-OFF* */ + pool_foreach (e, am->ip4_entry_pool, + ({ + if (e->sw_if_index == sw_if_index) + { + vec_add1 (to_delete, e - am->ip4_entry_pool); + } + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (to_delete); i++) + { + ethernet_arp_ip4_over_ethernet_address_t delme; + e = pool_elt_at_index (am->ip4_entry_pool, to_delete[i]); + + clib_memcpy (&delme.ethernet, e->ethernet_address, 6); + delme.ip4.as_u32 = e->ip4_address.as_u32; + + if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + { + vnet_arp_populate_ip4_over_ethernet (vnm, e->sw_if_index, + ARP_ETHER_TYPE_BOTH, &delme); + } + else + { + vnet_arp_flush_ip4_over_ethernet (vnm, e->sw_if_index, + ARP_ETHER_TYPE_BOTH, &delme); + } + + } + vec_free (to_delete); + - e = pool_elt_at_index (am->ip4_entry_pool, p[0]); - mhash_unset (&am->ip4_entry_by_key, &e->key, 0); - pool_put (am->ip4_entry_pool, e); return 0; } +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ethernet_arp_sw_interface_up_down); + + static void increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a) { @@ -1586,6 +1788,24 @@ increment_ip4_and_mac_address (ethernet_arp_ip4_over_ethernet_address_t * a) } int +vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, + u32 sw_if_index, void *a_arg, int is_static) +{ + ethernet_arp_ip4_over_ethernet_address_t *a = a_arg; + vnet_arp_set_ip4_over_ethernet_rpc_args_t args; + + args.sw_if_index = sw_if_index; + args.is_static = is_static; + args.flags = 0; + args.ether_type = ARP_ETHER_TYPE_IP4; + clib_memcpy (&args.a, a, sizeof (*a)); + + vl_api_rpc_call_main_thread (set_ip4_over_ethernet_rpc_callback, + (u8 *) & args, sizeof (args)); + return 0; +} + +int vnet_proxy_arp_add_del (ip4_address_t * lo_addr, ip4_address_t * hi_addr, u32 fib_index, int is_del) { @@ -1660,57 +1880,6 @@ vnet_proxy_arp_fib_reset (u32 fib_id) return 0; } -u32 -vnet_arp_glean_add (u32 fib_index, void *next_hop_arg) -{ - ethernet_arp_main_t *am = ðernet_arp_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - ip4_address_t *next_hop = next_hop_arg; - ip_adjacency_t add_adj, *adj; - ip4_add_del_route_args_t args; - ethernet_arp_ip4_entry_t *e; - ethernet_arp_ip4_key_t k; - u32 adj_index; - - adj_index = ip4_fib_lookup_with_table (im, fib_index, next_hop, 0); - adj = ip_get_adjacency (lm, adj_index); - - if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP) - return ~0; - - if (adj->arp.next_hop.ip4.as_u32 != 0) - return adj_index; - - k.sw_if_index = adj->rewrite_header.sw_if_index; - k.fib_index = fib_index; - k.ip4_address.as_u32 = next_hop->as_u32; - - if (mhash_get (&am->ip4_entry_by_key, &k)) - return adj_index; - - pool_get (am->ip4_entry_pool, e); - mhash_set (&am->ip4_entry_by_key, &k, e - am->ip4_entry_pool, - /* old value */ 0); - e->key = k; - e->cpu_time_last_updated = clib_cpu_time_now (); - e->flags = ETHERNET_ARP_IP4_ENTRY_FLAG_GLEAN; - - memset (&args, 0, sizeof (args)); - clib_memcpy (&add_adj, adj, sizeof (add_adj)); - ip46_address_set_ip4 (&add_adj.arp.next_hop, next_hop); /* install neighbor /32 route */ - args.table_index_or_table_id = fib_index; - args.flags = - IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_ADD | IP4_ROUTE_FLAG_NEIGHBOR; - args.dst_address.as_u32 = next_hop->as_u32; - args.dst_address_length = 32; - args.adj_index = ~0; - args.add_adj = &add_adj; - args.n_add_adj = 1; - ip4_add_del_route (im, &args); - return ip4_fib_lookup_with_table (im, fib_index, next_hop, 0); -} - static clib_error_t * ip_arp_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -1784,7 +1953,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, 1 /* type */ , 0 /* data */ ); vnet_arp_set_ip4_over_ethernet - (vnm, sw_if_index, fib_index, &addr, is_static); + (vnm, sw_if_index, &addr, is_static); vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); @@ -1793,8 +1962,7 @@ ip_arp_add_del_command_fn (vlib_main_t * vm, clib_warning ("event type %d unexpected", event_type); } else - vnet_arp_unset_ip4_over_ethernet - (vnm, sw_if_index, fib_index, &addr); + vnet_arp_unset_ip4_over_ethernet (vnm, sw_if_index, &addr); increment_ip4_and_mac_address (&addr); } diff --git a/vnet/vnet/ethernet/ethernet.h b/vnet/vnet/ethernet/ethernet.h index 8a1369c1093..3b2ef875290 100644 --- a/vnet/vnet/ethernet/ethernet.h +++ b/vnet/vnet/ethernet/ethernet.h @@ -398,13 +398,11 @@ void ethernet_set_rx_redirect (vnet_main_t * vnm, vnet_hw_interface_t * hi, int vnet_arp_set_ip4_over_ethernet (vnet_main_t * vnm, - u32 sw_if_index, - u32 fib_index, void *a_arg, int is_static); + u32 sw_if_index, void *a_arg, int is_static); int vnet_arp_unset_ip4_over_ethernet (vnet_main_t * vnm, - u32 sw_if_index, u32 fib_index, - void *a_arg); + u32 sw_if_index, void *a_arg); int vnet_proxy_arp_fib_reset (u32 fib_id); @@ -538,8 +536,6 @@ int vnet_add_del_ip4_arp_change_event (vnet_main_t * vnm, uword type_opaque, uword data, int is_add); -u32 vnet_arp_glean_add (u32 fib_index, void *next_hop_arg); - extern vlib_node_registration_t ethernet_input_node; #endif /* included_ethernet_h */ diff --git a/vnet/vnet/ethernet/interface.c b/vnet/vnet/ethernet/interface.c index 0b19b51d3be..f2e2ca0d7d8 100644 --- a/vnet/vnet/ethernet/interface.c +++ b/vnet/vnet/ethernet/interface.c @@ -42,6 +42,20 @@ #include <vnet/pg/pg.h> #include <vnet/ethernet/ethernet.h> #include <vnet/l2/l2_input.h> +#include <vnet/srp/srp.h> +#include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/devices/af_packet/af_packet.h> + +int +vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index) +{ + // FIXME - use flags on the HW itf + vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index); + return (!(hw->hw_class_index == ethernet_hw_interface_class.index || + hw->hw_class_index == af_packet_device_class.index || + hw->hw_class_index == lisp_gpe_hw_class.index || + hw->hw_class_index == srp_hw_interface_class.index)); +} /** * @file diff --git a/vnet/vnet/fib/fib.c b/vnet/vnet/fib/fib.c new file mode 100644 index 00000000000..413f93e893c --- /dev/null +++ b/vnet/vnet/fib/fib.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_path.h> +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_path_list.h> + +static clib_error_t * +fib_module_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, dpo_module_init))) + return (error); + if ((error = vlib_call_init_function (vm, adj_module_init))) + return (error); + + fib_entry_module_init(); + fib_entry_src_module_init(); + fib_path_module_init(); + fib_path_list_module_init(); + fib_walk_module_init(); + + return (NULL); +} + +VLIB_INIT_FUNCTION (fib_module_init); diff --git a/vnet/vnet/fib/fib.h b/vnet/vnet/fib/fib.h new file mode 100644 index 00000000000..7cf1d136935 --- /dev/null +++ b/vnet/vnet/fib/fib.h @@ -0,0 +1,652 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * \brief + * A IP v4/6 independent FIB. + * + * The main functions provided by the FIB are as follows; + * + * - source priorities + * + * A route can be added to the FIB by more than entity or source. Sources + * include, but are not limited to, API, CLI, LISP, MAP, etc (for the full list + * see fib_entry.h). Each source provides the forwarding information (FI) that + * is has determined as required for that route. Since each source determines the + * FI using different best path and loop prevention algorithms, it is not + * correct for the FI of multiple sources to be combined. Instead the FIB must + * choose to use the FI from only one source. This choose is based on a static + * priority assignment. For example; + * IF a prefix is added as a result of interface configuration: + * set interface address 192.168.1.1/24 GigE0 + * and then it is also added from the CLI + * ip route 192.168.1.1/32 via 2.2.2.2/32 + * then the 'interface' source will prevail, and the route will remain as + * 'local'. + * The requirement of the FIB is to always install the FI from the winning + * source and thus to maintain the FI added by losing sources so it can be + * installed should the winning source be withdrawn. + * + * - adj-fib maintenance + * + * When ARP or ND discover a neighbour on a link an adjacency forms for the + * address of that neighbour. It is also required to insert a route in the + * appropriate FIB table, corresponding to the VRF for the link, an entry for + * that neighbour. This entry is often referred to as an adj-fib. Adj-fibs + * have a dedicated source; 'ADJ'. + * The priority of the ADJ source is lower than most. This is so the following + * config; + * set interface address 192.168.1.1/32 GigE0 + * ip arp 192.168.1.2 GigE0 dead.dead.dead + * ip route add 192.168.1.2 via 10.10.10.10 GigE1 + * will forward traffic for 192.168.1.2 via GigE1. That is the route added + * by the control plane is favoured over the adjacency discovered by ARP. + * The control plane, with its associated authentication, is considered the + * authoritative source. + * To counter the nefarious addition of adj-fib, through the nefarious injection + * of adjacencies, the FIB is also required to ensure that only adj-fibs whose + * less specific covering prefix is connected are installed in forwarding. This + * requires the use of 'cover tracking', where a route maintains a dependency + * relationship with the route that is its less specific cover. When this cover + * changes (i.e. there is a new covering route) or the forwarding information + * of the cover changes, then the covered route is notified. + * + * Overlapping sub-nets are not supported, so no adj-fib has multiple paths. + * The control plane is expected to remove a prefix configured for an interface + * before the interface changes VRF. + * So while the following config is accepted: + * set interface address 192.168.1.1/32 GigE0 + * ip arp 192.168.1.2 GigE0 dead.dead.dead + * set interface ip table GigE0 2 + * it does not result in the desired behaviour. + * + * - attached export. + * + * Further to adj-fib maintenance above consider the following config: + * set interface address 192.168.1.1/24 GigE0 + * ip route add table 2 192.168.1.0/24 GigE0 + * Traffic destined for 192.168.1.2 in table 2 will generate an ARP request + * on GigE0. However, since GigE0 is in table 0, all adj-fibs will be added in + * FIB 0. Hence all hosts in the sub-net are unreachable from table 2. To resolve + * this, all adj-fib and local prefixes are exported (i.e. copied) from the + * 'export' table 0, to the 'import' table 2. There can be many import tables + * for a single export table. + * + * - recursive route resolution + * + * A recursive route is of the form: + * 1.1.1.1/32 via 10.10.10.10 + * i.e. a route for which no egress interface is provided. In order to forward + * traffic to 1.1.1.1/32 the FIB must therefore first determine how to forward + * traffic to 10.10.10.10/32. This is recursive resolution. + * Recursive resolution, just like normal resolution, proceeds via a longest + * prefix match for the 'via-address' 10.10.10.10. Note it is only possible + * to add routes via an address (i.e. a /32 or /128) not via a shorter mask + * prefix. There is no use case for the latter. + * Since recursive resolution proceeds via a longest prefix match, the entry + * in the FIB that will resolve the recursive route, termed the via-entry, may + * change as other routes are added to the FIB. Consider the recursive + * route shown above, and this non-recursive route: + * 10.10.10.0/24 via 192.168.16.1 GigE0 + * The entry for 10.10.10.0/24 is thus the resolving via-entry. If this entry is + * modified, to say; + * 10.10.10.0/24 via 192.16.1.3 GigE0 + * Then packet for 1.1.1.1/32 must also be sent to the new next-hop. + * Now consider the addition of; + * 10.10.10.0/28 via 192.168.16.2 GigE0 + * The more specific /28 is a better longest prefix match and thus becomes the + * via-entry. Removal of the /28 means the resolution will revert to the /24. + * The tracking to the changes in recursive resolution is the requirement of + * the FIB. When the forwarding information of the via-entry changes a back-walk + * is used to update dependent recursive routes. When new routes are added to + * the table the cover tracking feature provides the necessary notifications to + * the via-entry routes. + * The adjacency constructed for 1.1.1.1/32 will be a recursive adjacency + * whose next adjacency will be contributed from the via-entry. Maintaining + * the validity of this recursive adjacency is a requirement of the FIB. + * + * - recursive loop avoidance + * + * Consider this set of routes: + * 1.1.1.1/32 via 2.2.2.2 + * 2.2.2.2/32 via 3.3.3.3 + * 3.3.3.3/32 via 1.1.1.1 + * this is termed a recursion loop - all of the routes in the loop are + * unresolved in so far as they do not have a resolving adjacency, but each + * is resolved because the via-entry is known. It is important here to note + * the distinction between the control-plane objects and the data-plane objects + * (more details in the implementation section). The control plane objects must + * allow the loop to form (i.e. the graph becomes cyclic), however, the + * data-plane absolutely must not allow the loop to form, otherwise the packet + * would loop indefinitely and never egress the device - meltdown would follow. + * The control plane must allow the loop to form, because when the loop breaks, + * all members of the loop need to be updated. Forming the loop allows the + * dependencies to be correctly setup to allow this to happen. + * There is no limit to the depth of recursion supported by VPP so: + * 9.9.9.100/32 via 9.9.9.99 + * 9.9.9.99/32 via 9.9.9.98 + * 9.9.9.98/32 via 9.9.9.97 + * ... turtles, turtles, turtles ... + * 9.9.9.1/32 via 10.10.10.10 Gig0 + * is supported to as many layers of turtles is desired, however, when + * back-walking a graph (in this case from 9.9.9.1/32 up toward 9.9.9.100/32) + * a FIB needs to differentiate the case where the recursion is deep versus + * the case where the recursion is looped. A simple method, employed by VPP FIB, + * is to limit the number of steps. VPP FIB limit is 16. Typical BGP scenarios + * in the wild do not exceed 3 (BGP Inter-AS option C). + * + * - Fast Convergence + * + * After a network topology change, the 'convergence' time, is the time taken + * for the router to complete a transition to forward traffic using the new + * topology. The convergence time is therefore a summation of the time to; + * - detect the failure. + * - calculate the new 'best path' information + * - download the new best paths to the data-plane. + * - install those best best in data-plane forwarding. + * The last two points are of relevance to VPP architecture. The download API is + * binary and batch, details are not discussed here. There is no HW component to + * programme, installation time is bounded by the memory allocation and table + * lookup and insert access times. + * + * 'Fast' convergence refers to a set of technologies that a FIB can employ to + * completely or partially restore forwarding whilst the convergence actions + * listed above are ongoing. Fast convergence technologies are further + * sub-divided into Prefix Independent Convergence (PIC) and Loop Free + * Alternate path Fast re-route (LFA-FRR or sometimes called IP-FRR) which + * affect recursive and non-recursive routes respectively. + * + * LFA-FRR + * + * Consider the network topology below: + * + * C + * / \ + * X -- A --- B - Y + * | | + * D F + * \ / + * E + * + * all links are equal cost, traffic is passing from X to Y. the best path is + * X-A-B-Y. There are two alternative paths, one via C and one via E. An + * alternate path is considered to be loop free if no other router on that path + * would forward the traffic back to the sender. Consider router C, its best + * path to Y is via B, so if A were to send traffic destined to Y to C, then C + * would forward that traffic to B - this is a loop-free alternate path. In + * contrast consider router D. D's shortest path to Y is via A, so if A were to + * send traffic destined to Y via D, then D would send it back to A; this is + * not a loop-free alternate path. There are several points of note; + * - we are considering the pre-failure routing topology + * - any equal-cost multi-path between A and B is also a LFA path. + * - in order for A to calculate LFA paths it must be aware of the best-path + * to Y from the perspective of D. These calculations are thus limited to + * routing protocols that have a full view of the network topology, i.e. + * link-state DB protocols like OSPF or an SDN controller. LFA protected + * prefixes are thus non-recursive. + * + * LFA is specified as a 1 to 1 redundancy; a primary path has only one LFA + * (a.k.a. backup) path. To my knowledge this limitation is one of complexity + * in the calculation of and capacity planning using a 1-n redundancy. + * + * In the event that the link A-B fails, the alternate path via C can be used. + * In order to provide 'fast' failover in the event of a failure, the control + * plane will download both the primary and the backup path to the FIB. It is + * then a requirement of the FIB to perform the failover (a.k.a cutover) from + * the primary to the backup path as quickly as possible, and particularly + * without any other control-plane intervention. The expectation is cutover is + * less than 50 milli-seconds - a value allegedly from the VOIP QoS. Note that + * cutover time still includes the fault detection time, which in a vitalised + * environment could be the dominant factor. Failure detection can be either a + * link down, which will affect multiple paths on a multi-access interface, or + * via a specific path heartbeat (i.e. BFD). + * At this time VPP does not support LFA, that is it does not support the + * installation of a primary and backup path[s] for a route. However, it does + * support ECMP, and VPP FIB is designed to quickly remove failed paths from + * the ECMP set, however, it does not insert shared objects specific to the + * protected resource into the forwarding object graph, since this would incur + * a forwarding/performance cost. Failover time is thus route number dependent. + * Details are provided in the implementation section below. + * + * PIC + * + * PIC refers to the concept that the converge time should be independent of + * the number of prefixes/routes that are affected by the failure. PIC is + * therefore most appropriate when considering networks with large number of + * prefixes, i.e. BGP networks and thus recursive prefixes. There are several + * flavours of PIC covering different locations of protection and failure + * scenarios. An outline is given below, see the literature for more details: + * + * Y/16 - CE1 -- PE1---\ + * | \ P1---\ + * | \ PE3 -- CE3 - X/16 + * | - P2---/ + * Y/16 - CE2 -- PE2---/ + * + * CE = customer edge, PE = provider edge. external-BGP runs between customer + * and provider, internal-BGP runs between provider and provider. + * + * 1) iBGP PIC-core: consider traffic from CE1 to X/16 via CE3. On PE1 there is + * are routes; + * X/16 (and hundreds of thousands of others like it) + * via PE3 + * and + * PE3/32 (its loopback address) + * via 10.0.0.1 Link0 (this is P1) + * via 10.1.1.1 Link1 (this is P2) + * the failure is the loss of link0 or link1 + * As in all PIC scenarios, in order to provide prefix independent convergence + * it must be that the route for X/16 (and all other routes via PE3) do not + * need to be updated in the FIB. The FIB therefore needs to update a single + * object that is shared by all routes - once this shared object is updated, + * then all routes using it will be instantly updated to use the new forwarding + * information. In this case the shared object is the resolving route via PE3. + * Once the route via PE3 is updated via IGP (OSPF) convergence, then all + * recursive routes that resolve through it are also updated. VPP FIB + * implements this scenario via a recursive-adjacency. the X/16 and it sibling + * routes share a recursive-adjacency that links to/points at/stacks on the + * normal adjacency contributed by the route for PE3. Once this shared + * recursive adj is re-linked then all routes are switched to using the new + * forwarding information. This is shown below; + * + * pre-failure; + * X/16 --> R-ADJ-1 --> ADJ-1-PE3 (multi-path via P1 and P2) + * + * post-failure: + * X/16 --> R-ADJ-1 --> ADJ-2-PE3 (single path via P1) + * + * note that R-ADJ-1 (the recursive adj) remains in the forwarding graph, + * therefore X/16 (and all its siblings) is not updated. + * X/16 and its siblings share the recursive adj since they share the same + * path-list. It is the path-list object that contributes the recursive-adj + * (see next section for more details) + * + * + * 2) iBGP PIC-edge; Traffic from CE3 to Y/16. On PE3 there is are routes; + * Y/16 (and hundreds of thousands of others like it) + * via PE1 + * via PE2 + * and + * PE1/32 (PE1's loopback address) + * via 10.0.2.2 Link0 (this is P1) + * PE2/32 (PE2's loopback address) + * via 10.0.3.3 Link1 (this is P2) + * + * the failure is the loss of reachability to PE2. this could be either the + * loss of the link P2-PE2 or the loss of the node PE2. This is detected either + * by the withdrawal of the PE2's loopback route or by some form of failure + * detection (i.e. BFD). + * VPP FIB again provides PIC via the use of the shared recursive-adj. Y/16 and + * its siblings will again share a path-list for the list {PE1,PE2}, this + * path-list will contribute a multi-path-recursive-adj, i.e. a multi-path-adj + * with each choice therein being another adj; + * + * Y/16 -> RM-ADJ --> ADJ1 (for PE1) + * --> ADJ2 (for PE2) + * + * when the route for PE1 is withdrawn then the multi-path-recursive-adjacency + * is updated to be; + * + * Y/16 --> RM-ADJ --> ADJ1 (for PE1) + * --> ADJ1 (for PE1) + * + * that is both choices in the ECMP set are the same and thus all traffic is + * forwarded to PE1. Eventually the control plane will download a route update + * for Y/16 to be via PE1 only. At that time the situation will be: + * + * Y/16 -> R-ADJ --> ADJ1 (for PE1) + * + * In the scenario above we assumed that PE1 and PE2 are ECMP for Y/16. eBGP + * PIC core is also specified for the case were one PE is primary and the other + * backup - VPP FIB does not support that case at this time. + * + * 3) eBGP PIC Edge; Traffic from CE3 to Y/16. On PE1 there is are routes; + * Y/16 (and hundreds of thousands of others like it) + * via CE1 (primary) + * via PE2 (backup) + * and + * CE1 (this is an adj-fib) + * via 11.0.0.1 Link0 (this is CE1) << this is an adj-fib + * PE2 (PE2's loopback address) + * via 10.0.5.5 Link1 (this is link PE1-PE2) + * the failure is the loss of link0 to CE1. The failure can be detected by FIB + * either as a link down event or by the control plane withdrawing the connected + * prefix on the link0 (say 10.0.5.4/30). The latter works because the resolving + * entry is an adj-fib, so removing the connected will withdraw the adj-fib, and + * hence the recursive path becomes unresolved. The former is faster, + * particularly in the case of Inter-AS option A where there are many VLAN + * sub-interfaces on the PE-CE link, one for each VRF, and so the control plane + * must remove the connected prefix for each sub-interface to trigger PIC in + * each VRF. Note though that total PIC cutover time will depend on VRF scale + * with either trigger. + * Primary and backup paths in this eBGP PIC-edge scenario are calculated by + * BGP. Each peer is configured to always advertise its best external path to + * its iBGP peers. Backup paths therefore send traffic from the PE back into the + * core to an alternate PE. A PE may have multiple external paths, i.e. multiple + * directly connected CEs, it may also have multiple backup PEs, however there + * is no correlation between the two, so unlike LFA-FRR, the redundancy model is + * N-M; N primary paths are backed-up by M backup paths - only when all primary + * paths fail, then the cutover is performed onto the M backup paths. Note that + * PE2 must be suitably configured to forward traffic on its external path that + * was received from PE1. VPP FIB does not support external-internal-BGP (eiBGP) + * load-balancing. + * + * As with LFA-FRR the use of primary and backup paths is not currently + * supported, however, the use of a recursive-multi-path-adj, and a suitably + * constrained hashing algorithm to choose from the primary or backup path sets, + * would again provide the necessary shared object and hence the prefix scale + * independent cutover. + * + * Astute readers will recognise that both of the eBGP PIC scenarios refer only + * to a BGP free core. + * + * Fast convergence implementation options come in two flavours: + * 1) Insert switches into the data-path. The switch represents the protected + * resource. If the switch is 'on' the primary path is taken, otherwise + * the backup path is taken. Testing the switch in the data-path comes with + * an associated performance cost. A given packet may encounter more than + * one protected resource as it is forwarded. This approach minimises + * cutover times as packets will be forwarded on the backup path as soon + * as the protected resource is detected to be down and the single switch + * is tripped. However, it comes at a performance cost, which increases + * with each shared resource a packet encounters in the data-path. + * This approach is thus best suited to LFA-FRR where the protected routes + * are non-recursive (i.e. encounter few shared resources) and the + * expectation on cutover times is more stringent (<50msecs). + * 2) Update shared objects. Identify objects in the data-path, that are + * required to be present whether or not fast convergence is required (i.e. + * adjacencies) that can be shared by multiple routes. Create a dependency + * between these objects at the protected resource. When the protected + * resource fails, each of the shared objects is updated in a way that all + * users of it see a consistent change. This approach incurs no performance + * penalty as the data-path structure is unchanged, however, the cutover + * times are longer as more work is required when the resource fails. This + * scheme is thus more appropriate to recursive prefixes (where the packet + * will encounter multiple protected resources) and to fast-convergence + * technologies where the cutover times are less stringent (i.e. PIC). + * + * Implementation: + * --------------- + * + * Due to the requirements outlined above, not all routes known to FIB + * (e.g. adj-fibs) are installed in forwarding. However, should circumstances + * change, those routes will need to be added. This adds the requirement that + * a FIB maintains two tables per-VRF, per-AF (where a 'table' is indexed by + * prefix); the forwarding and non-forwarding tables. + * + * For DP speed in VPP we want the lookup in the forwarding table to directly + * result in the ADJ. So the two tables; one contains all the routes (a + * lookup therein yields a fib_entry_t), the other contains only the forwarding + * routes (a lookup therein yields an ip_adjacency_t). The latter is used by the + * DP. + * This trades memory for forwarding performance. A good trade-off in VPP's + * expected operating environments. + * + * Note these tables are keyed only by the prefix (and since there 2 two + * per-VRF, implicitly by the VRF too). The key for an adjacency is the + * tuple:{next-hop, address (and it's AF), interface, link/ether-type}. + * consider this curious, but allowed, config; + * + * set int ip addr 10.0.0.1/24 Gig0 + * set ip arp Gig0 10.0.0.2 dead.dead.dead + * # a host in that sub-net is routed via a better next hop (say it avoids a + * # big L2 domain) + * ip route add 10.0.0.2 Gig1 192.168.1.1 + * # this recursive should go via Gig1 + * ip route add 1.1.1.1/32 via 10.0.0.2 + * # this non-recursive should go via Gig0 + * ip route add 2.2.2.2/32 via Gig0 10.0.0.2 + * + * for the last route, the lookup for the path (via {Gig0, 10.0.0.2}) in the + * prefix table would not yield the correct result. To fix this we need a + * separate table for the adjacencies. + * + * - FIB data structures; + * + * fib_entry_t: + * - a representation of a route. + * - has a prefix. + * - it maintains an array of path-lists that have been contributed by the + * different sources + * - install an adjacency in the forwarding table contributed by the best + * source's path-list. + * + * fib_path_list_t: + * - a list of paths + * - path-lists may be shared between FIB entries. The path-lists are thus + * kept in a DB. The key is the combined description of the paths. We share + * path-lists when it will aid convergence to do so. Adding path-lists to + * this DB that are never shared, or are not shared by prefixes that are + * not subject to PIC, will increase the size of the DB unnecessarily and + * may lead to increased search times due to hash collisions. + * - the path-list contributes the appropriate adj for the entry in the + * forwarding table. The adj can be 'normal', multi-path or recursive, + * depending on the number of paths and their types. + * - since path-lists are shared there is only one instance of the multi-path + * adj that they [may] create. As such multi-path adjacencies do not need a + * separate DB. + * The path-list with recursive paths and the recursive adjacency that it + * contributes forms the backbone of the fast convergence architecture (as + * described previously). + * + * fib_path_t: + * - a description of how to forward the traffic (i.e. via {Gig1, K}). + * - the path describes the intent on how to forward. This differs from how + * the path resolves. I.e. it might not be resolved at all (since the + * interface is deleted or down). + * - paths have different types, most notably recursive or non-recursive. + * - a fib_path_t will contribute the appropriate adjacency object. It is from + * these contributions that the DP graph/chain for the route is built. + * - if the path is recursive and a recursion loop is detected, then the path + * will contribute the special DROP adjacency. This way, whilst the control + * plane graph is looped, the data-plane graph does not. + * + * we build a graph of these objects; + * + * fib_entry_t -> fib_path_list_t -> fib_path_t -> ... + * + * for recursive paths: + * + * fib_path_t -> fib_entry_t -> .... + * + * for non-recursive paths + * + * fib_path_t -> ip_adjacency_t -> interface + * + * These objects, which constitute the 'control plane' part of the FIB are used + * to represent the resolution of a route. As a whole this is referred to as the + * control plane graph. There is a separate DP graph to represent the forwarding + * of a packet. In the DP graph each object represents an action that is applied + * to a packet as it traverses the graph. For example, a lookup of a IP address + * in the forwarding table could result in the following graph: + * + * recursive-adj --> multi-path-adj --> interface_A + * --> interface_B + * + * A packet traversing this FIB DP graph would thus also traverse a VPP node + * graph of: + * + * ipX_recursive --> ipX_rewrite --> interface_A_tx --> etc + * + * The taxonomy of objects in a FIB graph is as follows, consider; + * + * A --> + * B --> D + * C --> + * + * Where A,B and C are (for example) routes that resolve through D. + * parent; D is the parent of A, B, and C. + * children: A, B, and C are children of D. + * sibling: A, B and C are siblings of one another. + * + * All shared objects in the FIB are reference counted. Users of these objects + * are thus expected to use the add_lock/unlock semantics (as one would + * normally use malloc/free). + * + * WALKS + * + * It is necessary to walk/traverse the graph forwards (entry to interface) to + * perform a collapse or build a recursive adj and backwards (interface + * to entry) to perform updates, i.e. when interface state changes or when + * recursive route resolution updates occur. + * A forward walk follows simply by navigating an object's parent pointer to + * access its parent object. For objects with multiple parents (e.g. a + * path-list), each parent is walked in turn. + * To support back-walks direct dependencies are maintained between objects, + * i.e. in the relationship, {A, B, C} --> D, then object D will maintain a list + * of 'pointers' to its children {A, B, C}. Bare C-language pointers are not + * allowed, so a pointer is described in terms of an object type (i.e. entry, + * path-list, etc) and index - this allows the object to be retrieved from the + * appropriate pool. A list is maintained to achieve fast convergence at scale. + * When there are millions or recursive prefixes, it is very inefficient to + * blindly walk the tables looking for entries that were affected by a given + * topology change. The lowest hanging fruit when optimising is to remove + * actions that are not required, so all back-walks only traverse objects that + * are directly affected by the change. + * + * PIC Core and fast-reroute rely on FIB reacting quickly to an interface + * state change to update the multi-path-adjacencies that use this interface. + * An example graph is shown below: + * + * E_a --> + * E_b --> PL_2 --> P_a --> Interface_A + * ... --> P_c -\ + * E_k --> \ + * Interface_K + * / + * E_l --> / + * E_m --> PL_1 --> P_d -/ + * ... --> P_f --> Interface_F + * E_z --> + * + * E = fib_entry_t + * PL = fib_path_list_t + * P = fib_path_t + * The subscripts are arbitrary and serve only to distinguish object instances. + * This CP graph result in the following DP graph: + * + * M-ADJ-2 --> Interface_A + * \ + * -> Interface_K + * / + * M-ADJ-1 --> Interface_F + * + * M-ADJ = multi-path-adjacency. + * + * When interface K goes down a back-walk is started over its dependants in the + * control plane graph. This back-walk will reach PL_1 and PL_2 and result in + * the calculation of new adjacencies that have interface K removed. The walk + * will continue to the entry objects and thus the forwarding table is updated + * for each prefix with the new adjacency. The DP graph then becomes: + * + * ADJ-3 --> Interface_A + * + * ADJ-4 --> Interface_F + * + * The eBGP PIC scenarios described above relied on the update of a path-list's + * recursive-adjacency to provide the shared point of cutover. This is shown + * below + * + * E_a --> + * E_b --> PL_2 --> P_a --> E_44 --> PL_a --> P_b --> Interface_A + * ... --> P_c -\ + * E_k --> \ + * \ + * E_1 --> PL_k -> P_k --> Interface_K + * / + * E_l --> / + * E_m --> PL_1 --> P_d -/ + * ... --> P_f --> E_55 --> PL_e --> P_e --> Interface_E + * E_z --> + * + * The failure scenario is the removal of entry E_1 and thus the paths P_c and + * P_d become unresolved. To achieve PIC the two shared recursive path-lists, + * PL_1 and PL_2 must be updated to remove E_1 from the recursive-multi-path- + * adjacencies that they contribute, before any entry E_a to E_z is updated. + * This means that as the update propagates backwards (right to left) in the + * graph it must do so breadth first not depth first. Note this approach leads + * to convergence times that are dependent on the number of path-list and so + * the number of combinations of egress PEs - this is desirable as this + * scale is considerably lower than the number of prefixes. + * + * If we consider another section of the graph that is similar to the one + * shown above where there is another prefix E_2 in a similar position to E_1 + * and so also has many dependent children. It is reasonable to expect that a + * particular network failure may simultaneously render E_1 and E_2 unreachable. + * This means that the update to withdraw E_2 is download immediately after the + * update to withdraw E_1. It is a requirement on the FIB to not spend large + * amounts of time in a back-walk whilst processing the update for E_1, i.e. the + * back-walk must not reach as far as E_a and its siblings. Therefore, after the + * back-walk has traversed one generation (breadth first) to update all the + * path-lists it should be suspended/back-ground and further updates allowed + * to be handled. Once the update queue is empty, the suspended walks can be + * resumed. Note that in the case that multiple updates affect the same entry + * (say E_1) then this will trigger multiple similar walks, these are merged, + * so each child is updated only once. + * In the presence of more layers of recursion PIC is still a desirable + * feature. Consider an extension to the diagram above, where more recursive + * routes (E_100 -> E_200) are added as children of E_a: + * + * E_100 --> + * E_101 --> PL_3 --> P_j-\ + * ... \ + * E_199 --> E_a --> + * E_b --> PL_2 --> P_a --> E_44 --> ...etc.. + * ... --> P_c -\ + * E_k \ + * E_1 --> ...etc.. + * / + * E_l --> / + * E_m --> PL_1 --> P_d -/ + * ... --> P_e --> E_55 --> ...etc.. + * E_z --> + * + * To achieve PIC for the routes E_100->E_199, PL_3 needs to be updated before + * E_b -> E_z, a breadth first traversal at each level would not achieve this. + * Instead the walk must proceed intelligently. Children on PL_2 are sorted so + * those Entry objects that themselves have children appear first in the list, + * those without later. When an entry object is walked that has children, a + * walk of its children is pushed to the front background queue. The back + * ground queue is a priority queue. As the breadth first traversal proceeds + * across the dependent entry object E_a to E_k, when the first entry that does + * not have children is reached (E_b), the walk is suspended and placed at the + * back of the queue. Following this prioritisation method shared path-list + * updates are performed before all non-resolving entry objects. + * The CPU/core/thread that handles the updates is the same thread that handles + * the back-walks. Handling updates has a higher priority than making walk + * progress, so a walk is required to be interruptable/suspendable when new + * updates are available. + * !!! TODO - this section describes how walks should be not how they are !!! + * + * In the diagram above E_100 is an IP route, however, VPP has no restrictions + * on the type of object that can be a dependent of a FIB entry. Children of + * a FIB entry can be (and are) GRE & VXLAN tunnels endpoints, L2VPN LSPs etc. + * By including all object types into the graph and extending the back-walk, we + * can thus deliver fast convergence to technologies that overlay on an IP + * network. + * + * If having read all the above carefully you are still thinking; 'i don't need + * all this %&$* i have a route only I know about and I just need to jam it in', + * then fib_table_entry_special_add() is your only friend. + */ + +#ifndef __FIB_H__ +#define __FIB_H__ + +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> + +#endif diff --git a/vnet/vnet/fib/fib_attached_export.c b/vnet/vnet/fib/fib_attached_export.c new file mode 100644 index 00000000000..afc953a4ac5 --- /dev/null +++ b/vnet/vnet/fib/fib_attached_export.c @@ -0,0 +1,524 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_table.h> + +#include "fib_attached_export.h" +#include "fib_entry_cover.h" +#include "fib_entry_src.h" + +/** + * A description of the need to import routes from the export table + */ +typedef struct fib_ae_import_t_ +{ + /** + * The entry in the epxort table that this importer + * is importing covereds from + */ + fib_node_index_t faei_export_entry; + + /** + * The attached entry in the import table + */ + fib_node_index_t faei_import_entry; + /** + * the sibling index on the cover + */ + u32 faei_export_sibling; + + /** + * The index of the exporter tracker. Not set if the + * export entry is not valid for export + */ + fib_node_index_t faei_exporter; + + /** + * A vector/list of imported entry indicies + */ + fib_node_index_t *faei_importeds; + + /** + * The FIB index and prefix we are tracking + */ + fib_node_index_t faei_export_fib; + fib_prefix_t faei_prefix; + + /** + * The FIB index we are importing into + */ + fib_node_index_t faei_import_fib; +} fib_ae_import_t; + +/** + * A description of the need to export routes to one or more export tables + */ +typedef struct fib_ae_export_t_ { + /** + * The vector/list of import tracker indicies + */ + fib_node_index_t *faee_importers; + + /** + * THe connected entry this export is acting on behalf of + */ + fib_node_index_t faee_ei; + + /** + * Reference counting locks + */ + u32 faee_locks; +} fib_ae_export_t; + +/* + * memory pools for the importers and exportes + */ +static fib_ae_import_t *fib_ae_import_pool; +static fib_ae_export_t *fib_ae_export_pool; + +static fib_ae_export_t * +fib_entry_ae_add_or_lock (fib_node_index_t connected) +{ + fib_ae_export_t *export; + fib_entry_t *entry; + + entry = fib_entry_get(connected); + + if (FIB_NODE_INDEX_INVALID == entry->fe_export) + { + pool_get(fib_ae_export_pool, export); + memset(export, 0, sizeof(*export)); + + entry->fe_export = (export - fib_ae_export_pool); + export->faee_ei = connected; + } + else + { + export = pool_elt_at_index(fib_ae_export_pool, entry->fe_export); + } + + export->faee_locks++; + + return (export); +} + +static void +fib_entry_import_remove (fib_ae_import_t *import, + fib_node_index_t entry_index) +{ + fib_prefix_t prefix; + u32 index; + + /* + * find the index in the vector of the entry we are removing + */ + index = vec_search(import->faei_importeds, entry_index); + + if (index < vec_len(import->faei_importeds)) + { + /* + * this is an entry that was previsouly imported + */ + fib_entry_get_prefix(entry_index, &prefix); + + fib_table_entry_special_remove(import->faei_import_fib, + &prefix, + FIB_SOURCE_AE); + + fib_entry_unlock(entry_index); + vec_del1(import->faei_importeds, index); + } +} + +static void +fib_entry_import_add (fib_ae_import_t *import, + fib_node_index_t entry_index) +{ + fib_node_index_t *existing; + fib_prefix_t prefix; + + /* + * ensure we only add the exported entry once, since + * sourcing prefixes in the table is reference counted + */ + vec_foreach(existing, import->faei_importeds) + { + if (*existing == entry_index) + { + return; + } + } + + /* + * this is the first time this export entry has been imported + * Add it to the import FIB and to the list of importeds + */ + fib_entry_get_prefix(entry_index, &prefix); + + /* + * don't import entries that have the same prefix the import entry + */ + if (0 != fib_prefix_cmp(&prefix, + &import->faei_prefix)) + { + const dpo_id_t *dpo; + + dpo = fib_entry_contribute_ip_forwarding(entry_index); + + if (dpo_id_is_valid(dpo)) + { + fib_table_entry_special_dpo_add(import->faei_import_fib, + &prefix, + FIB_SOURCE_AE, + FIB_ENTRY_FLAG_EXCLUSIVE, + load_balance_get_bucket(dpo->dpoi_index, 0)); + + fib_entry_lock(entry_index); + vec_add1(import->faei_importeds, entry_index); + } + /* + * else + * the entry currently has no valid forwarding. when it + * does it will export itself + */ + } +} + +/** + * Call back when walking a connected prefix's covered prefixes for import + */ +static int +fib_entry_covered_walk_import (fib_entry_t *cover, + fib_node_index_t covered, + void *ctx) +{ + fib_ae_import_t *import = ctx; + + fib_entry_import_add(import, covered); + + return (0); +} + +/* + * fib_entry_ae_import_add + * + * Add an importer to a connected entry + */ +static void +fib_ae_export_import_add (fib_ae_export_t *export, + fib_ae_import_t *import) +{ + fib_entry_t *entry; + + import->faei_exporter = (export - fib_ae_export_pool); + entry = fib_entry_get(export->faee_ei); + + fib_entry_cover_walk(entry, + fib_entry_covered_walk_import, + import); +} + +void +fib_attached_export_import (fib_entry_t *fib_entry, + fib_node_index_t export_fib) +{ + fib_ae_import_t *import; + + pool_get(fib_ae_import_pool, import); + + import->faei_import_fib = fib_entry->fe_fib_index; + import->faei_export_fib = export_fib; + import->faei_prefix = fib_entry->fe_prefix; + import->faei_import_entry = fib_entry_get_index(fib_entry); + import->faei_export_sibling = ~0; + + /* + * do an exact match in the export table + */ + import->faei_export_entry = + fib_table_lookup_exact_match(import->faei_export_fib, + &import->faei_prefix); + + if (FIB_NODE_INDEX_INVALID == import->faei_export_entry) + { + /* + * no exact matching entry in the export table. can't be good. + * track the next best thing + */ + import->faei_export_entry = + fib_table_lookup(import->faei_export_fib, + &import->faei_prefix); + import->faei_exporter = FIB_NODE_INDEX_INVALID; + } + else + { + /* + * found the entry in the export table. import the + * the prefixes that it covers. + * only if the prefix found in the export FIB really is + * attached do we want to import its covered + */ + if (FIB_ENTRY_FLAG_ATTACHED & + fib_entry_get_flags_i(fib_entry_get(import->faei_export_entry))) + { + fib_ae_export_t *export; + + export = fib_entry_ae_add_or_lock(import->faei_export_entry); + vec_add1(export->faee_importers, (import - fib_ae_import_pool)); + fib_ae_export_import_add(export, import); + } + } + + /* + * track the entry in the export table so we can update appropriately + * when it changes + */ + import->faei_export_sibling = + fib_entry_cover_track(fib_entry_get(import->faei_export_entry), + fib_entry_get_index(fib_entry)); + + fib_entry->fe_import = (import - fib_ae_import_pool); +} + +/** + * \brief All the imported entries need to be pruged + */ +void +fib_attached_export_purge (fib_entry_t *fib_entry) +{ + if (FIB_NODE_INDEX_INVALID != fib_entry->fe_import) + { + fib_node_index_t *import_index; + fib_entry_t *export_entry; + fib_ae_import_t *import; + fib_ae_export_t *export; + + import = pool_elt_at_index(fib_ae_import_pool, + fib_entry->fe_import); + + /* + * remove each imported entry + */ + vec_foreach(import_index, import->faei_importeds) + { + fib_prefix_t prefix; + + fib_entry_get_prefix(*import_index, &prefix); + + fib_table_entry_delete(import->faei_import_fib, + &prefix, + FIB_SOURCE_AE); + fib_entry_unlock(*import_index); + } + vec_free(import->faei_importeds); + + /* + * stop tracking the export entry + */ + if (~0 != import->faei_export_sibling) + { + fib_entry_cover_untrack(fib_entry_get(import->faei_export_entry), + import->faei_export_sibling); + } + import->faei_export_sibling = ~0; + + /* + * remove this import tracker from the export's list, + * if it is attached to one. It won't be in the case the tracked + * export entry is not an attached exact match. + */ + if (FIB_NODE_INDEX_INVALID != import->faei_exporter) + { + export_entry = fib_entry_get(import->faei_export_entry); + ASSERT(FIB_NODE_INDEX_INVALID != export_entry->fe_export); + export = pool_elt_at_index(fib_ae_export_pool, export_entry->fe_export); + + u32 index = vec_search(export->faee_importers, + (import - fib_ae_import_pool)); + + ASSERT(index < vec_len(export->faee_importers)); + vec_del1(export->faee_importers, index); + + /* + * free the exporter if there are no longer importers + */ + if (0 == --export->faee_locks) + { + pool_put(fib_ae_export_pool, export); + export_entry->fe_export = FIB_NODE_INDEX_INVALID; + } + } + + /* + * free the import tracker + */ + pool_put(fib_ae_import_pool, import); + fib_entry->fe_import = FIB_NODE_INDEX_INVALID; + } +} + +void +fib_attached_export_covered_added (fib_entry_t *cover, + fib_node_index_t covered) +{ + if (FIB_NODE_INDEX_INVALID != cover->fe_export) + { + /* + * the covering prefix is exporting to other tables + */ + fib_node_index_t *import_index; + fib_ae_import_t *import; + fib_ae_export_t *export; + + export = pool_elt_at_index(fib_ae_export_pool, cover->fe_export); + + /* + * export the covered entry to each of the importers + */ + vec_foreach(import_index, export->faee_importers) + { + import = pool_elt_at_index(fib_ae_import_pool, *import_index); + + fib_entry_import_add(import, covered); + } + } +} + +void +fib_attached_export_covered_removed (fib_entry_t *cover, + fib_node_index_t covered) +{ + if (FIB_NODE_INDEX_INVALID != cover->fe_export) + { + /* + * the covering prefix is exporting to other tables + */ + fib_node_index_t *import_index; + fib_ae_import_t *import; + fib_ae_export_t *export; + + export = pool_elt_at_index(fib_ae_export_pool, cover->fe_export); + + /* + * remove the covered entry from each of the importers + */ + vec_foreach(import_index, export->faee_importers) + { + import = pool_elt_at_index(fib_ae_import_pool, *import_index); + + fib_entry_import_remove(import, covered); + } + } +} + +static void +fib_attached_export_cover_modified_i (fib_entry_t *fib_entry) +{ + if (FIB_NODE_INDEX_INVALID != fib_entry->fe_import) + { + fib_ae_import_t *import; + u32 export_fib; + + /* + * safe the temporaries we need from the existing import + * since it will be toast after the purge. + */ + import = pool_elt_at_index(fib_ae_import_pool, fib_entry->fe_import); + export_fib = import->faei_export_fib; + + /* + * keep it simple. purge anything that was previously imported. + * then re-evaluate the need to import. + */ + fib_attached_export_purge(fib_entry); + fib_attached_export_import(fib_entry, export_fib); + } +} + +/** + * \brief If this entry is tracking a cover (in another table) + * then that cover has changed. re-evaluate import. + */ +void +fib_attached_export_cover_change (fib_entry_t *fib_entry) +{ + fib_attached_export_cover_modified_i(fib_entry); +} + +/** + * \brief If this entry is tracking a cover (in another table) + * then that cover has been updated. re-evaluate import. + */ +void +fib_attached_export_cover_update (fib_entry_t *fib_entry) +{ + fib_attached_export_cover_modified_i(fib_entry); +} + +u8* +fib_ae_import_format (fib_node_index_t import_index, + u8* s) +{ + if (FIB_NODE_INDEX_INVALID != import_index) + { + fib_node_index_t *index; + fib_ae_import_t *import; + + import = pool_elt_at_index(fib_ae_import_pool, import_index); + + s = format(s, "\n Attached-Import:%d:[", (import - fib_ae_import_pool)); + s = format(s, "export-prefix:%U ", format_fib_prefix, &import->faei_prefix); + s = format(s, "export-entry:%d ", import->faei_export_entry); + s = format(s, "export-sibling:%d ", import->faei_export_sibling); + s = format(s, "exporter:%d ", import->faei_exporter); + s = format(s, "export-fib:%d ", import->faei_export_fib); + + s = format(s, "import-entry:%d ", import->faei_import_entry); + s = format(s, "import-fib:%d ", import->faei_import_fib); + + s = format(s, "importeds:["); + vec_foreach(index, import->faei_importeds) + { + s = format(s, "%d, ", *index); + } + s = format(s, "]]"); + } + + return (s); +} + +u8* +fib_ae_export_format (fib_node_index_t export_index, u8*s) +{ + if (FIB_NODE_INDEX_INVALID != export_index) + { + fib_node_index_t *index; + fib_ae_export_t *export; + + export = pool_elt_at_index(fib_ae_export_pool, export_index); + + s = format(s, "\n Attached-Export:%d:[", (export - fib_ae_export_pool)); + s = format(s, "export-entry:%d ", export->faee_ei); + + s = format(s, "importers:["); + vec_foreach(index, export->faee_importers) + { + s = format(s, "%d, ", *index); + } + s = format(s, "]]"); + } + return (s); +} diff --git a/vnet/vnet/fib/fib_attached_export.h b/vnet/vnet/fib/fib_attached_export.h new file mode 100644 index 00000000000..ee68481187c --- /dev/null +++ b/vnet/vnet/fib/fib_attached_export.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * FIB attached export + * + * what's it all about? + * say one does this: + * set int ip table Gig0 2 + * set int ip addr Gig0 10.0.0.1/24 + * Ggi0 is in table 2 with a connected address. + * Now we add a routing matching said connected in a different table + * ip route add table 3 10.0.0.0/24 via Gig0 + * How do we expect traffic in table 3 to be forwarded? Clearly out of + * Ggi0. It's an attached route, hence we are saying that we can ARP for + * hosts in the attached subnet. and we can. but any ARP entries we send + * we be received on Gig0, but since Gig0 is in table 2, it will install + * the adj-fins in table 2. So traffic in table 3 will never hit an adj-fib + * and hence always the glean, and so thus be effectively dropped. + * How do we fix this? Attached Export !! All more specfiic entries in table 2 + * that track and are covered by the connected are automatically exported into + * table 3. Now table 3 also has adj-fibs (and the local) so traffic to hosts + * is restored. + */ + +#ifndef __FIB_ATTACHED_EXPORT_H__ +#define __FIB_ATTACHED_EXPORT_H__ + +#include <vnet/fib/fib_types.h> + +extern void fib_attached_export_import(fib_entry_t *fib_entry, + fib_node_index_t export_fib); + +extern void fib_attached_export_purge(fib_entry_t *fib_entry); + +extern void fib_attached_export_covered_added(fib_entry_t *cover, + fib_node_index_t covered); +extern void fib_attached_export_covered_removed(fib_entry_t *cover, + fib_node_index_t covered); +extern void fib_attached_export_cover_change(fib_entry_t *fib_entry); +extern void fib_attached_export_cover_update(fib_entry_t *fib_entry); + +extern u8* fib_ae_import_format(fib_node_index_t import_index, u8*s); +extern u8* fib_ae_export_format(fib_node_index_t export_index, u8*s); + +#endif diff --git a/vnet/vnet/fib/fib_entry.c b/vnet/vnet/fib/fib_entry.c new file mode 100644 index 00000000000..8b63f0dc974 --- /dev/null +++ b/vnet/vnet/fib/fib_entry.c @@ -0,0 +1,1493 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/format.h> +#include <vnet/ip/lookup.h> +#include <vnet/adj/adj.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/drop_dpo.h> + +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_entry_cover.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/fib/fib_attached_export.h> +#include <vnet/fib/fib_path_ext.h> + +/* + * Array of strings/names for the FIB sources + */ +static const char *fib_source_names[] = FIB_SOURCES; +static const char *fib_attribute_names[] = FIB_ENTRY_ATTRIBUTES; + +/* + * Pool for all fib_entries + */ +static fib_entry_t *fib_entry_pool; + +fib_entry_t * +fib_entry_get (fib_node_index_t index) +{ + return (pool_elt_at_index(fib_entry_pool, index)); +} + +static fib_node_t * +fib_entry_get_node (fib_node_index_t index) +{ + return ((fib_node_t*)fib_entry_get(index)); +} + +fib_node_index_t +fib_entry_get_index (const fib_entry_t * fib_entry) +{ + return (fib_entry - fib_entry_pool); +} + +static fib_protocol_t +fib_entry_get_proto (const fib_entry_t * fib_entry) +{ + return (fib_entry->fe_prefix.fp_proto); +} + +/** + * @brief Turn the chain type requested by the client into the one they + * really wanted + */ +static fib_forward_chain_type_t +fib_entry_chain_type_fixup (const fib_entry_t *entry, + fib_forward_chain_type_t fct) +{ + if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == fct) + { + /* + * The EOS chain is a tricky since one cannot know the adjacency + * to link to without knowing what the packets payload protocol + * will be once the label is popped. + */ + fib_forward_chain_type_t dfct; + + dfct = fib_entry_get_default_chain_type(entry); + + if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct) + { + /* + * If the entry being asked is a eos-MPLS label entry, + * then use the payload-protocol field, that we stashed there + * for just this purpose + */ + return (fib_proto_to_forw_chain_type(entry->fe_prefix.fp_payload_proto)); + } + /* + * else give them what this entry would be by default. i.e. if it's a v6 + * entry, then the label its local labelled should be carrying v6 traffic. + * If it's a non-EOS label entry, then there are more labels and we want + * a non-eos chain. + */ + return (dfct); + } + + return (fct); +} + +fib_forward_chain_type_t +fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) +{ + switch (fib_entry->fe_prefix.fp_proto) + { + case FIB_PROTOCOL_IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case FIB_PROTOCOL_IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + case FIB_PROTOCOL_MPLS: + if (MPLS_EOS == fib_entry->fe_prefix.fp_eos) + /* + * If the entry being asked is a eos-MPLS label entry, + * then use the payload-protocol field, that we stashed there + * for just this purpose + */ + return (fib_proto_to_forw_chain_type(fib_entry->fe_prefix.fp_payload_proto)); + else + return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + } + + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); +} + +u8 * +format_fib_entry (u8 * s, va_list * args) +{ + fib_forward_chain_type_t fct; + fib_entry_attribute_t attr; + fib_path_ext_t *path_ext; + fib_entry_t *fib_entry; + fib_entry_src_t *src; + fib_node_index_t fei; + fib_source_t source; + u32 n_covered; + int level; + + fei = va_arg (*args, fib_node_index_t); + level = va_arg (*args, int); + fib_entry = fib_entry_get(fei); + + s = format (s, "%U", format_fib_prefix, &fib_entry->fe_prefix); + + if (level >= FIB_ENTRY_FORMAT_DETAIL) + { + s = format (s, " fib:%d", fib_entry->fe_fib_index); + s = format (s, " index:%d", fib_entry_get_index(fib_entry)); + s = format (s, " locks:%d", fib_entry->fe_node.fn_locks); + + FOR_EACH_SRC_ADDED(fib_entry, src, source, + ({ + s = format (s, "\n src:%s ", + fib_source_names[source]); + s = fib_entry_src_format(fib_entry, source, s); + s = format (s, " refs:%d ", src->fes_ref_count); + if (FIB_ENTRY_FLAG_NONE != src->fes_entry_flags) { + s = format(s, "flags:"); + FOR_EACH_FIB_ATTRIBUTE(attr) { + if ((1<<attr) & src->fes_entry_flags) { + s = format (s, "%s,", fib_attribute_names[attr]); + } + } + } + s = format (s, "\n"); + if (FIB_NODE_INDEX_INVALID != src->fes_pl) + { + s = fib_path_list_format(src->fes_pl, s); + } + if (NULL != src->fes_path_exts) + { + s = format(s, " Extensions:"); + vec_foreach(path_ext, src->fes_path_exts) + { + s = format(s, "\n %U", format_fib_path_ext, path_ext); + } + } + })); + + n_covered = fib_entry_cover_get_size(fib_entry); + if (n_covered > 0) { + s = format(s, "\n tracking %d covered: ", n_covered); + s = fib_entry_cover_list_format(fib_entry, s); + } + s = fib_ae_import_format(fib_entry->fe_import, s); + s = fib_ae_export_format(fib_entry->fe_export, s); + + s = format (s, "\n forwarding: "); + } + else + { + s = format (s, "\n"); + } + + fct = fib_entry_get_default_chain_type(fib_entry); + + if (!dpo_id_is_valid(&fib_entry->fe_lb[fct])) + { + s = format (s, " UNRESOLVED\n"); + return (s); + } + else + { + if (level >= FIB_ENTRY_FORMAT_DETAIL2) + { + + FOR_EACH_FIB_FORW_CHAIN(fct) + { + s = format(s, " %U-chain\n %U", + format_fib_forw_chain_type, fct, + format_dpo_id, + &fib_entry->fe_lb[fct], + 2); + s = format(s, "\n"); + } + } + else + { + s = format(s, " %U-chain\n %U", + format_fib_forw_chain_type, fct, + format_dpo_id, + &fib_entry->fe_lb[fct], + 2); + s = format(s, "\n"); + } + } + + if (level >= FIB_ENTRY_FORMAT_DETAIL2) + { + s = format(s, "\nchildren:"); + s = fib_node_children_format(fib_entry->fe_node.fn_children, s); + } + + /* adj = adj_get(fib_entry->fe_prefix.fp_proto, fib_entry->fe_adj_index); */ + + /* ip_multipath_next_hop_t * nhs, tmp_nhs[1]; */ + /* u32 i, j, n_left, n_nhs; */ + /* vlib_counter_t c, sum; */ + /* ip_lookup_main_t *lm = fib_get_lookup_main(fib_entry->fe_prefix.fp_proto); */ + + /* if (adj->n_adj == 1) */ + /* { */ + /* nhs = &tmp_nhs[0]; */ + /* nhs[0].next_hop_adj_index = ~0; /\* not used *\/ */ + /* nhs[0].weight = 1; */ + /* n_nhs = 1; */ + /* } */ + /* else */ + /* { */ + /* ip_multipath_adjacency_t * madj; */ + /* madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle); */ + /* nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset); */ + /* n_nhs = madj->normalized_next_hops.count; */ + /* } */ + + /* n_left = nhs[0].weight; */ + /* vlib_counter_zero (&sum); */ + /* for (i = j = 0; i < adj->n_adj; i++) */ + /* { */ + /* n_left -= 1; */ + /* vlib_get_combined_counter(&lm->adjacency_counters, */ + /* fib_entry->fe_adj_index + i, */ + /* &c); */ + /* /\* if (clear) *\/ */ + /* /\* vlib_zero_combined_counter (&lm->adjacency_counters, *\/ */ + /* /\* fib_entry->fe_adj_index + i); *\/ */ + + /* vlib_counter_add (&sum, &c); */ + /* if (n_left == 0) */ + /* { */ + /* s = format (s, "%16Ld%16Ld ", sum.packets, sum.bytes); */ + /* s = format (s, "weight %d, index %d", */ + /* nhs[j].weight, fib_entry->fe_adj_index + i); */ + + /* if (adj->n_adj > 1) */ + /* s = format (s, ", multipath"); */ + + /* s = format (s, "\n%U", */ + /* format_ip_adjacency, */ + /* vnet_get_main(), lm, fib_entry->fe_adj_index + i); */ + + /* // vlib_cli_output (vm, "%v", msg); */ + /* //vec_free (msg); */ + /* } */ + /* else */ + /* { */ + /* j++; */ + /* if (j < n_nhs) */ + /* { */ + /* n_left = nhs[j].weight; */ + /* vlib_counter_zero (&sum); */ + /* } */ + /* } */ + /* } */ + + return (s); +} + +static fib_entry_t* +fib_entry_from_fib_node (fib_node_t *node) +{ +#if CLIB_DEBUG > 0 + ASSERT(FIB_NODE_TYPE_ENTRY == node->fn_type); +#endif + return ((fib_entry_t*)node); +} + +static void +fib_entry_last_lock_gone (fib_node_t *node) +{ + fib_forward_chain_type_t fct; + fib_entry_t *fib_entry; + + fib_entry = fib_entry_from_fib_node(node); + + FOR_EACH_FIB_FORW_CHAIN(fct) + { + dpo_reset(&fib_entry->fe_lb[fct]); + } + + FIB_ENTRY_DBG(fib_entry, "last-lock"); + + fib_node_deinit(&fib_entry->fe_node); + // FIXME -RR Backwalk + pool_put(fib_entry_pool, fib_entry); +} + +static fib_entry_src_t* +fib_entry_get_best_src_i (const fib_entry_t *fib_entry) +{ + fib_entry_src_t *bsrc; + + /* + * the enum of sources is deliberately arranged in priority order + */ + if (0 == vec_len(fib_entry->fe_srcs)) + { + bsrc = NULL; + } + else + { + bsrc = vec_elt_at_index(fib_entry->fe_srcs, 0); + } + + return (bsrc); +} + +static fib_source_t +fib_entry_src_get_source (const fib_entry_src_t *esrc) +{ + if (NULL != esrc) + { + return (esrc->fes_src); + } + return (FIB_SOURCE_MAX); +} + +static fib_entry_flag_t +fib_entry_src_get_flags (const fib_entry_src_t *esrc) +{ + if (NULL != esrc) + { + return (esrc->fes_entry_flags); + } + return (FIB_ENTRY_FLAG_NONE); +} + +fib_entry_flag_t +fib_entry_get_flags (fib_node_index_t fib_entry_index) +{ + return (fib_entry_get_flags_i(fib_entry_get(fib_entry_index))); +} + +/* + * fib_entry_back_walk_notify + * + * A back walk has reach this entry. + */ +static fib_node_back_walk_rc_t +fib_entry_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_from_fib_node(node); + + if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason) + { + fib_entry_src_action_reactivate(fib_entry, + fib_entry_get_best_source( + fib_entry_get_index(fib_entry))); + } + + if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) + { + /* + * ADJ updates (complete<->incomplete) do not need to propagate to + * recursive entries. + * The only reason its needed as far back as here, is that the adj + * and the incomplete adj are a different DPO type, so the LBs need + * to re-stack. + */ + return (FIB_NODE_BACK_WALK_CONTINUE); + } + else + { + /* + * all other walk types can be reclassifed to a re-evaluate to + * all recursive dependents. + * By reclassifying we ensure that should any of these walk types meet + * they can be merged. + */ + ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + + /* + * propagate the backwalk further if we haven't already reached the + * maximum depth. + */ + fib_walk_sync(FIB_NODE_TYPE_ENTRY, + fib_entry_get_index(fib_entry), + ctx); + } + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/* + * The FIB path-list's graph node virtual function table + */ +static const fib_node_vft_t fib_entry_vft = { + .fnv_get = fib_entry_get_node, + .fnv_last_lock = fib_entry_last_lock_gone, + .fnv_back_walk = fib_entry_back_walk_notify, +}; + +/* + * fib_entry_contribute_forwarding + * + * Get an lock the forwarding information (DPO) contributed by the FIB entry. + */ +void +fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + /* + * these are not the droids you are looking for... + */ + type = fib_entry_chain_type_fixup(fib_entry, type); + + if (!dpo_id_is_valid(&fib_entry->fe_lb[type])) + { + /* + * on-demand create eos/non-eos. + * There is no on-demand delete because: + * - memory versus complexity & reliability: + * leaving unrequired [n]eos LB arounds wastes memory, cleaning + * then up on the right trigger is more code. i favour the latter. + */ + fib_entry_src_mk_lb(fib_entry, + fib_entry_get_best_src_i(fib_entry), + type, + &fib_entry->fe_lb[type]); + } + + dpo_copy(dpo, &fib_entry->fe_lb[type]); +} + +const dpo_id_t * +fib_entry_contribute_ip_forwarding (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + return (&fib_entry->fe_lb[fib_entry_get_default_chain_type(fib_entry)]); +} + +adj_index_t +fib_entry_get_adj (fib_node_index_t fib_entry_index) +{ + const dpo_id_t *dpo; + + dpo = fib_entry_contribute_ip_forwarding(fib_entry_index); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + + if (dpo_is_adj(dpo)) + { + return (dpo->dpoi_index); + } + return (ADJ_INDEX_INVALID); +} + +fib_node_index_t +fib_entry_get_path_list (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + return (fib_entry->fe_parent); +} + +u32 +fib_entry_get_fib_table_id(fib_node_index_t fib_entry_index) +{ + + + return (0); +} + +u32 +fib_entry_child_add (fib_node_index_t fib_entry_index, + fib_node_type_t child_type, + fib_node_index_t child_index) +{ + return (fib_node_child_add(FIB_NODE_TYPE_ENTRY, + fib_entry_index, + child_type, + child_index)); +}; + +void +fib_entry_child_remove (fib_node_index_t fib_entry_index, + u32 sibling_index) +{ + fib_node_child_remove(FIB_NODE_TYPE_ENTRY, + fib_entry_index, + sibling_index); +} + +static fib_entry_t * +fib_entry_alloc (u32 fib_index, + const fib_prefix_t *prefix, + fib_node_index_t *fib_entry_index) +{ + fib_forward_chain_type_t fct; + fib_entry_t *fib_entry; + + pool_get(fib_entry_pool, fib_entry); + memset(fib_entry, 0, sizeof(*fib_entry)); + + fib_node_init(&fib_entry->fe_node, + FIB_NODE_TYPE_ENTRY); + + fib_entry->fe_fib_index = fib_index; + fib_entry->fe_prefix = *prefix; + if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto) + { + fib_entry->fe_prefix.fp_len = 21; + ASSERT(DPO_PROTO_NONE != fib_entry->fe_prefix.fp_payload_proto); + } + + fib_entry->fe_export = FIB_NODE_INDEX_INVALID; + fib_entry->fe_import = FIB_NODE_INDEX_INVALID; + fib_entry->fe_covered = FIB_NODE_INDEX_INVALID; + FOR_EACH_FIB_FORW_CHAIN(fct) + { + dpo_reset(&fib_entry->fe_lb[fct]); + } + + *fib_entry_index = fib_entry_get_index(fib_entry); + + FIB_ENTRY_DBG(fib_entry, "alloc"); + + return (fib_entry); +} + +static void +fib_entry_post_flag_update_actions (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t old_flags) +{ + /* + * handle changes to attached export for import entries + */ + int is_import = (FIB_ENTRY_FLAG_IMPORT & fib_entry_get_flags_i(fib_entry)); + int was_import = (FIB_ENTRY_FLAG_IMPORT & old_flags); + + if (!was_import && is_import) + { + /* + * transition from not exported to exported + */ + + /* + * there is an assumption here that the entry resolves via only + * one interface and that it is the cross VRF interface. + */ + u32 sw_if_index = fib_path_list_get_resolving_interface(fib_entry->fe_parent); + + fib_attached_export_import(fib_entry, + fib_table_get_index_for_sw_if_index( + fib_entry_get_proto(fib_entry), + sw_if_index)); + } + else if (was_import && !is_import) + { + /* + * transition from exported to not exported + */ + fib_attached_export_purge(fib_entry); + } + /* + * else + * no change. nothing to do. + */ + + /* + * handle changes to attached export for export entries + */ + int is_attached = (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(fib_entry)); + int was_attached = (FIB_ENTRY_FLAG_ATTACHED & old_flags); + + if (!was_attached && is_attached) + { + /* + * transition to attached. time to export + */ + // FIXME + } + // else FIXME +} + +static void +fib_entry_post_install_actions (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t old_flags) +{ + fib_entry_post_flag_update_actions(fib_entry, source, old_flags); + fib_entry_src_action_installed(fib_entry, source); +} + +fib_node_index_t +fib_entry_create (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths) +{ + fib_node_index_t fib_entry_index; + fib_entry_t *fib_entry; + + ASSERT(0 < vec_len(paths)); + + fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index); + + /* + * since this is a new entry create, we don't need to check for winning + * sources - there is only one. + */ + fib_entry = fib_entry_src_action_add(fib_entry, source, flags, + drop_dpo_get( + fib_proto_to_dpo( + fib_entry_get_proto(fib_entry)))); + fib_entry_src_action_path_swap(fib_entry, + source, + flags, + paths); + /* + * handle possible realloc's by refetching the pointer + */ + fib_entry = fib_entry_get(fib_entry_index); + fib_entry_src_action_activate(fib_entry, source); + + fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE); + + return (fib_entry_index); +} + +fib_node_index_t +fib_entry_create_special (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t fib_entry_index; + fib_entry_t *fib_entry; + + /* + * create and initiliase the new enty + */ + fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index); + + /* + * create the path-list + */ + fib_entry = fib_entry_src_action_add(fib_entry, source, flags, dpo); + fib_entry_src_action_activate(fib_entry, source); + + fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE); + + return (fib_entry_index); +} + +static void +fib_entry_post_update_actions (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t old_flags) +{ + /* + * backwalk to children to inform then of the change to forwarding. + */ + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE, + }; + + fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_get_index(fib_entry), &bw_ctx); + + /* + * then inform any covered prefixes + */ + fib_entry_cover_update_notify(fib_entry); + + fib_entry_post_install_actions(fib_entry, source, old_flags); +} + +void +fib_entry_special_add (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(fib_entry_index); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + fib_entry = fib_entry_src_action_add(fib_entry, source, flags, dpo); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source) + { + /* + * we have a new winning source. + */ + fib_entry_src_action_deactivate(fib_entry, best_source); + fib_entry_src_action_activate(fib_entry, source); + } + else if (source > best_source) + { + /* + * the new source loses. nothing to do here. + * the data from the source is saved in the path-list created + */ + return; + } + else + { + /* + * the new source is one this entry already has. + * But the path-list was updated, which will contribute new forwarding, + * so install it. + */ + fib_entry_src_action_deactivate(fib_entry, source); + fib_entry_src_action_activate(fib_entry, source); + } + + fib_entry_post_update_actions(fib_entry, source, bflags); +} + +void +fib_entry_path_add (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpath) +{ + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + ASSERT(1 == vec_len(rpath)); + + fib_entry = fib_entry_get(fib_entry_index); + ASSERT(NULL != fib_entry); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + fib_entry = fib_entry_src_action_path_add(fib_entry, source, flags, rpath); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source) + { + /* + * we have a new winning source. + */ + fib_entry_src_action_deactivate(fib_entry, best_source); + fib_entry_src_action_activate(fib_entry, source); + } + else if (source > best_source) + { + /* + * the new source loses. nothing to do here. + * the data from the source is saved in the path-list created + */ + return; + } + else + { + /* + * the new source is one this entry already has. + * But the path-list was updated, which will contribute new forwarding, + * so install it. + */ + fib_entry_src_action_deactivate(fib_entry, source); + fib_entry_src_action_activate(fib_entry, source); + } + + fib_entry_post_update_actions(fib_entry, source, bflags); +} + +/* + * fib_entry_path_remove + * + * remove a path from the entry. + * return the fib_entry's index if it is still present, INVALID otherwise. + */ +fib_entry_src_flag_t +fib_entry_path_remove (fib_node_index_t fib_entry_index, + fib_source_t source, + const fib_route_path_t *rpath) +{ + fib_entry_src_flag_t sflag; + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + ASSERT(1 == vec_len(rpath)); + + fib_entry = fib_entry_get(fib_entry_index); + ASSERT(NULL != fib_entry); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + sflag = fib_entry_src_action_path_remove(fib_entry, source, rpath); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source ) + { + /* + * Que! removing a path from a source that is better than the + * one this entry is using. + */ + ASSERT(0); + } + else if (source > best_source ) + { + /* + * the source is not the best. nothing to do. + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); + } + else + { + /* + * removing a path from the path-list we were using. + */ + if (!(FIB_ENTRY_SRC_FLAG_ADDED & sflag)) + { + /* + * the last path from the source was removed. + * fallback to lower source + */ + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + + if (FIB_SOURCE_MAX == best_source) { + /* + * no more sources left. this entry is toast. + */ + fib_entry_src_action_uninstall(fib_entry); + fib_entry_post_flag_update_actions(fib_entry, source, bflags); + + return (FIB_ENTRY_SRC_FLAG_NONE); + } + else + { + fib_entry_src_action_activate(fib_entry, best_source); + source = best_source; + } + } + else + { + /* + * re-install the new forwarding information + */ + fib_entry_src_action_deactivate(fib_entry, source); + fib_entry_src_action_activate(fib_entry, source); + } + } + + fib_entry_post_update_actions(fib_entry, source, bflags); + + /* + * still have sources + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); +} + +/* + * fib_entry_special_remove + * + * remove a special source from the entry. + * return the fib_entry's index if it is still present, INVALID otherwise. + */ +fib_entry_src_flag_t +fib_entry_special_remove (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_src_flag_t sflag; + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(fib_entry_index); + ASSERT(NULL != fib_entry); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + sflag = fib_entry_src_action_remove(fib_entry, source); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source ) + { + /* + * Que! removing a path from a source that is better than the + * one this entry is using. This can only mean it is a source + * this prefix does not have. + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); + } + else if (source > best_source ) { + /* + * the source is not the best. nothing to do. + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); + } + else + { + if (!(FIB_ENTRY_SRC_FLAG_ADDED & sflag)) + { + /* + * the source was removed. use the next best. + */ + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + + if (FIB_SOURCE_MAX == best_source) { + /* + * no more sources left. this entry is toast. + */ + fib_entry_src_action_uninstall(fib_entry); + fib_entry_post_flag_update_actions(fib_entry, source, bflags); + + return (FIB_ENTRY_SRC_FLAG_NONE); + } + else + { + fib_entry_src_action_activate(fib_entry, best_source); + source = best_source; + } + } + else + { + /* + * re-install the new forwarding information + */ + fib_entry_src_action_reactivate(fib_entry, source); + } + } + + fib_entry_post_update_actions(fib_entry, source, bflags); + + /* + * still have sources + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); +} + +/** + * fib_entry_delete + * + * The source is withdrawing all the paths it provided + */ +fib_entry_src_flag_t +fib_entry_delete (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + return (fib_entry_special_remove(fib_entry_index, source)); +} + +/** + * fib_entry_update + * + * The source has provided a new set of paths that will replace the old. + */ +void +fib_entry_update (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths) +{ + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(fib_entry_index); + ASSERT(NULL != fib_entry); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + fib_entry_src_action_path_swap(fib_entry, + source, + flags, + paths); + /* + * handle possible realloc's by refetching the pointer + */ + fib_entry = fib_entry_get(fib_entry_index); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source) + { + /* + * we have a new winning source. + */ + fib_entry_src_action_deactivate(fib_entry, best_source); + fib_entry_src_action_activate(fib_entry, source); + } + else if (source > best_source) { + /* + * the new source loses. nothing to do here. + * the data from the source is saved in the path-list created + */ + return; + } + else + { + /* + * the new source is one this entry already has. + * But the path-list was updated, which will contribute new forwarding, + * so install it. + */ + fib_entry_src_action_deactivate(fib_entry, source); + fib_entry_src_action_activate(fib_entry, source); + } + + fib_entry_post_update_actions(fib_entry, source, bflags); +} + + +/* + * fib_entry_cover_changed + * + * this entry is tracking its cover and that cover has changed. + */ +void +fib_entry_cover_changed (fib_node_index_t fib_entry_index) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + fib_source_t source, best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + u32 index; + + bflags = FIB_ENTRY_FLAG_NONE; + best_source = FIB_SOURCE_FIRST; + fib_entry = fib_entry_get(fib_entry_index); + + fib_attached_export_cover_change(fib_entry); + + /* + * propagate the notificuation to each of the added sources + */ + index = 0; + FOR_EACH_SRC_ADDED(fib_entry, esrc, source, + ({ + if (0 == index) + { + /* + * only the best source gets to set the back walk flags + */ + res = fib_entry_src_action_cover_change(fib_entry, source); + bflags = fib_entry_src_get_flags(esrc); + best_source = fib_entry_src_get_source(esrc); + } + else + { + fib_entry_src_action_cover_change(fib_entry, source); + } + index++; + })); + + if (res.install) + { + fib_entry_src_action_reactivate(fib_entry, + fib_entry_src_get_source( + fib_entry_get_best_src_i(fib_entry))); + fib_entry_post_install_actions(fib_entry, best_source, bflags); + } + else + { + fib_entry_src_action_uninstall(fib_entry); + } + + if (FIB_NODE_BW_REASON_FLAG_NONE != res.bw_reason) + { + /* + * time for walkies fido. + */ + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = res.bw_reason, + }; + + fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_index, &bw_ctx); + } +} + +/* + * fib_entry_cover_updated + * + * this entry is tracking its cover and that cover has been updated + * (i.e. its forwarding information has changed). + */ +void +fib_entry_cover_updated (fib_node_index_t fib_entry_index) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + fib_source_t source, best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + u32 index; + + bflags = FIB_ENTRY_FLAG_NONE; + best_source = FIB_SOURCE_FIRST; + fib_entry = fib_entry_get(fib_entry_index); + + fib_attached_export_cover_update(fib_entry); + + /* + * propagate the notificuation to each of the added sources + */ + index = 0; + FOR_EACH_SRC_ADDED(fib_entry, esrc, source, + ({ + if (0 == index) + { + /* + * only the best source gets to set the back walk flags + */ + res = fib_entry_src_action_cover_update(fib_entry, source); + bflags = fib_entry_src_get_flags(esrc); + best_source = fib_entry_src_get_source(esrc); + } + else + { + fib_entry_src_action_cover_update(fib_entry, source); + } + index++; + })); + + if (res.install) + { + fib_entry_src_action_reactivate(fib_entry, + fib_entry_src_get_source( + fib_entry_get_best_src_i(fib_entry))); + fib_entry_post_install_actions(fib_entry, best_source, bflags); + } + else + { + fib_entry_src_action_uninstall(fib_entry); + } + + if (FIB_NODE_BW_REASON_FLAG_NONE != res.bw_reason) + { + /* + * time for walkies fido. + */ + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = res.bw_reason, + }; + + fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_index, &bw_ctx); + } +} + +int +fib_entry_recursive_loop_detect (fib_node_index_t entry_index, + fib_node_index_t **entry_indicies) +{ + fib_entry_t *fib_entry; + int was_looped, is_looped; + + fib_entry = fib_entry_get(entry_index); + + if (FIB_NODE_INDEX_INVALID != fib_entry->fe_parent) + { + fib_node_index_t *entries = *entry_indicies; + fib_forward_chain_type_t fct; + + vec_add1(entries, entry_index); + was_looped = fib_path_list_is_looped(fib_entry->fe_parent); + is_looped = fib_path_list_recursive_loop_detect(fib_entry->fe_parent, + &entries); + + *entry_indicies = entries; + + if (!!was_looped != !!is_looped) + { + /* + * re-evaluate all the entry's forwarding + * NOTE: this is an inplace modify + */ + FOR_EACH_FIB_FORW_CHAIN(fct) + { + if (dpo_id_is_valid(&fib_entry->fe_lb[fct])) + { + fib_entry_src_mk_lb(fib_entry, + fib_entry_get_best_src_i(fib_entry), + fct, + &fib_entry->fe_lb[fct]); + } + } + } + } + else + { + /* + * the entry is currently not linked to a path-list. this happens + * when it is this entry that is re-linking path-lists and has thus + * broken the loop + */ + is_looped = 0; + } + + return (is_looped); +} + +u32 +fib_entry_get_resolving_interface (fib_node_index_t entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(entry_index); + + return (fib_path_list_get_resolving_interface(fib_entry->fe_parent)); +} + +fib_source_t +fib_entry_get_best_source (fib_node_index_t entry_index) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(entry_index); + + bsrc = fib_entry_get_best_src_i(fib_entry); + return (fib_entry_src_get_source(bsrc)); +} + +static int +fib_ip4_address_compare (ip4_address_t * a1, + ip4_address_t * a2) +{ + /* + * IP addresses are unsiged ints. the return value here needs to be signed + * a simple subtraction won't cut it. + * If the addresses are the same, the sort order is undefiend, so phoey. + */ + return ((clib_net_to_host_u32(a1->data_u32) > + clib_net_to_host_u32(a2->data_u32) ) ? + 1 : -1); +} + +static int +fib_ip6_address_compare (ip6_address_t * a1, + ip6_address_t * a2) +{ + int i; + for (i = 0; i < ARRAY_LEN (a1->as_u16); i++) + { + int cmp = (clib_net_to_host_u16 (a1->as_u16[i]) - + clib_net_to_host_u16 (a2->as_u16[i])); + if (cmp != 0) + return cmp; + } + return 0; +} + +static int +fib_entry_cmp (fib_node_index_t fib_entry_index1, + fib_node_index_t fib_entry_index2) +{ + fib_entry_t *fib_entry1, *fib_entry2; + int cmp = 0; + + fib_entry1 = fib_entry_get(fib_entry_index1); + fib_entry2 = fib_entry_get(fib_entry_index2); + + switch (fib_entry1->fe_prefix.fp_proto) + { + case FIB_PROTOCOL_IP4: + cmp = fib_ip4_address_compare(&fib_entry1->fe_prefix.fp_addr.ip4, + &fib_entry2->fe_prefix.fp_addr.ip4); + break; + case FIB_PROTOCOL_IP6: + cmp = fib_ip6_address_compare(&fib_entry1->fe_prefix.fp_addr.ip6, + &fib_entry2->fe_prefix.fp_addr.ip6); + break; + case FIB_PROTOCOL_MPLS: + cmp = (fib_entry1->fe_prefix.fp_label - fib_entry2->fe_prefix.fp_label); + + if (0 == cmp) + { + cmp = (fib_entry1->fe_prefix.fp_eos - fib_entry2->fe_prefix.fp_eos); + } + break; + } + + if (0 == cmp) { + cmp = (fib_entry1->fe_prefix.fp_len - fib_entry2->fe_prefix.fp_len); + } + return (cmp); +} + +int +fib_entry_cmp_for_sort (void *i1, void *i2) +{ + fib_node_index_t *fib_entry_index1 = i1, *fib_entry_index2 = i2; + + return (fib_entry_cmp(*fib_entry_index1, + *fib_entry_index2)); +} + +void +fib_entry_lock (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + fib_node_lock(&fib_entry->fe_node); +} + +void +fib_entry_unlock (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + fib_node_unlock(&fib_entry->fe_node); +} + +void +fib_entry_module_init (void) +{ + fib_node_register_type (FIB_NODE_TYPE_ENTRY, &fib_entry_vft); +} + +void +fib_entry_get_prefix (fib_node_index_t fib_entry_index, + fib_prefix_t *pfx) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + *pfx = fib_entry->fe_prefix; +} + +u32 +fib_entry_get_fib_index (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + return (fib_entry->fe_fib_index); +} + +u32 +fib_entry_pool_size (void) +{ + return (pool_elts(fib_entry_pool)); +} + +static clib_error_t * +show_fib_entry_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_node_index_t fei; + + if (unformat (input, "%d", &fei)) + { + /* + * show one in detail + */ + if (!pool_is_free_index(fib_entry_pool, fei)) + { + vlib_cli_output (vm, "%d@%U", + fei, + format_fib_entry, fei, + FIB_ENTRY_FORMAT_DETAIL2); + } + else + { + vlib_cli_output (vm, "entry %d invalid", fei); + } + } + else + { + /* + * show all + */ + vlib_cli_output (vm, "FIB Entries:"); + pool_foreach_index(fei, fib_entry_pool, + ({ + vlib_cli_output (vm, "%d@%U", + fei, + format_fib_entry, fei, + FIB_ENTRY_FORMAT_BRIEF); + })); + } + + return (NULL); +} + +VLIB_CLI_COMMAND (show_fib_entry, static) = { + .path = "show fib entry", + .function = show_fib_entry_command, + .short_help = "show fib entry", +}; diff --git a/vnet/vnet/fib/fib_entry.h b/vnet/vnet/fib/fib_entry.h new file mode 100644 index 00000000000..ac22c170d55 --- /dev/null +++ b/vnet/vnet/fib/fib_entry.h @@ -0,0 +1,514 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_ENTRY_H__ +#define __FIB_ENTRY_H__ + +#include <vnet/fib/fib_node.h> +#include <vnet/adj/adj.h> +#include <vnet/ip/ip.h> +#include <vnet/dpo/dpo.h> + +/** + * The different sources that can create a route. + * The sources are defined here the thier relative priority order. + * The lower the value the higher the priority + */ +typedef enum fib_source_t_ { + /** + * Marker. Add new values after this one. + */ + FIB_SOURCE_FIRST, + /** + * Special sources. These are for entries that are added to all + * FIBs by default, and should never be over-ridden (hence they + * are the highest priority) + */ + FIB_SOURCE_SPECIAL = FIB_SOURCE_FIRST, + /** + * Classify. A route that links directly to a classify adj + */ + FIB_SOURCE_CLASSIFY, + /** + * Route added as a result of interface configuration. + * this will also come from the API/CLI, but the distinction is + * that is from confiiguration on an interface, not a 'ip route' command + */ + FIB_SOURCE_INTERFACE, + /** + * A high priority source a plugin can use + */ + FIB_SOURCE_PLUGIN_HI, + /** + * From the control plane API + */ + FIB_SOURCE_API, + /** + * From the CLI. + */ + FIB_SOURCE_CLI, + /** + * LISP + */ + FIB_SOURCE_LISP, + /** + * SRv6 + */ + FIB_SOURCE_SR, + /** + * IPv[46] Mapping + */ + FIB_SOURCE_MAP, + /** + * SIXRD + */ + FIB_SOURCE_SIXRD, + /** + * DHCP + */ + FIB_SOURCE_DHCP, + /** + * Adjacency source. + * routes created as a result of ARP/ND entries. This is lower priority + * then the API/CLI. This is on purpose. trust me. + */ + FIB_SOURCE_ADJ, + /** + * MPLS label. The prefix has been assigned a local label. This source + * never provides forwarding information, instead it acts as a place-holder + * so the association of label to prefix can be maintained + */ + FIB_SOURCE_MPLS, + /** + * Attached Export source. + * routes created as a result of attahced export. routes thus sourced + * will be present in the export tables + */ + FIB_SOURCE_AE, + /** + * Recursive resolution source. + * Used to install an entry that is thre resolution traget of another. + */ + FIB_SOURCE_RR, + /** + * The default route source. + * The default route is always added to the FIB table (like the + * special sources) but we need to be able to over-ride it with + * 'ip route' sources when provided + */ + FIB_SOURCE_DEFAULT_ROUTE, + /** + * Marker. add new entries before this one. + */ + FIB_SOURCE_LAST = FIB_SOURCE_DEFAULT_ROUTE, +} __attribute__ ((packed)) fib_source_t; + +_Static_assert (sizeof(fib_source_t) == 1, + "FIB too many sources"); + +/** + * The maximum number of sources + */ +#define FIB_SOURCE_MAX (FIB_SOURCE_LAST+1) + +#define FIB_SOURCES { \ + [FIB_SOURCE_SPECIAL] = "special", \ + [FIB_SOURCE_INTERFACE] = "interface", \ + [FIB_SOURCE_API] = "API", \ + [FIB_SOURCE_CLI] = "CLI", \ + [FIB_SOURCE_ADJ] = "adjacency", \ + [FIB_SOURCE_MAP] = "MAP", \ + [FIB_SOURCE_SR] = "SR", \ + [FIB_SOURCE_SIXRD] = "SixRD", \ + [FIB_SOURCE_LISP] = "LISP", \ + [FIB_SOURCE_CLASSIFY] = "classify", \ + [FIB_SOURCE_DHCP] = "DHCP", \ + [FIB_SOURCE_RR] = "recursive-resolution", \ + [FIB_SOURCE_AE] = "attached_export", \ + [FIB_SOURCE_MPLS] = "mpls", \ + [FIB_SOURCE_DEFAULT_ROUTE] = "default-route", \ +} + +#define FOR_EACH_FIB_SOURCE(_item) \ + for (_item = FIB_SOURCE_FIRST; _item < FIB_SOURCE_MAX; _item++) + +/** + * The different sources that can create a route. + * The sources are defined here the thier relative priority order. + * The lower the value the higher the priority + */ +typedef enum fib_entry_attribute_t_ { + /** + * Marker. Add new values after this one. + */ + FIB_ENTRY_ATTRIBUTE_FIRST, + /** + * Connected. The prefix is configured on an interface. + */ + FIB_ENTRY_ATTRIBUTE_CONNECTED = FIB_ENTRY_ATTRIBUTE_FIRST, + /** + * Attached. The prefix is attached to an interface. + */ + FIB_ENTRY_ATTRIBUTE_ATTACHED, + /** + * The route is an explicit drop. + */ + FIB_ENTRY_ATTRIBUTE_DROP, + /** + * The route is exclusive. The client creating the route is + * providing an exclusive adjacency. + */ + FIB_ENTRY_ATTRIBUTE_EXCLUSIVE, + /** + * The route is attached cross tables and thus imports covered + * prefixes from the other table. + */ + FIB_ENTRY_ATTRIBUTE_IMPORT, + /** + * The prefix/address is local to this device + */ + FIB_ENTRY_ATTRIBUTE_LOCAL, + /** + * Marker. add new entries before this one. + */ + FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_LOCAL, +} fib_entry_attribute_t; + +/** + * The maximum number of sources + */ +#define FIB_ENTRY_ATTRIBUTE_MAX (FIB_ENTRY_ATTRIBUTE_LAST+1) + +#define FIB_ENTRY_ATTRIBUTES { \ + [FIB_ENTRY_ATTRIBUTE_CONNECTED] = "connected", \ + [FIB_ENTRY_ATTRIBUTE_ATTACHED] = "attached", \ + [FIB_ENTRY_ATTRIBUTE_IMPORT] = "import", \ + [FIB_ENTRY_ATTRIBUTE_DROP] = "drop", \ + [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ + [FIB_ENTRY_ATTRIBUTE_LOCAL] = "local", \ +} + +#define FOR_EACH_FIB_ATTRIBUTE(_item) \ + for (_item = FIB_ENTRY_ATTRIBUTE_FIRST; \ + _item < FIB_ENTRY_ATTRIBUTE_MAX; \ + _item++) + +typedef enum fib_entry_flag_t_ { + FIB_ENTRY_FLAG_NONE = 0, + FIB_ENTRY_FLAG_CONNECTED = (1 << FIB_ENTRY_ATTRIBUTE_CONNECTED), + FIB_ENTRY_FLAG_ATTACHED = (1 << FIB_ENTRY_ATTRIBUTE_ATTACHED), + FIB_ENTRY_FLAG_DROP = (1 << FIB_ENTRY_ATTRIBUTE_DROP), + FIB_ENTRY_FLAG_EXCLUSIVE = (1 << FIB_ENTRY_ATTRIBUTE_EXCLUSIVE), + FIB_ENTRY_FLAG_LOCAL = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL), + FIB_ENTRY_FLAG_IMPORT = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT), +} fib_entry_flag_t; + +/** + * Flags for the source data + */ +typedef enum fib_entry_src_attribute_t_ { + /** + * Marker. Add new values after this one. + */ + FIB_ENTRY_SRC_ATTRIBUTE_FIRST, + /** + * the source has been added to the entry + */ + FIB_ENTRY_SRC_ATTRIBUTE_ADDED = FIB_ENTRY_SRC_ATTRIBUTE_FIRST, + /** + * the source is active/best + */ + FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE, + /** + * Marker. add new entries before this one. + */ + FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE, +} fib_entry_src_attribute_t; + +#define FIB_ENTRY_SRC_ATTRIBUTE_MAX (FIB_ENTRY_SRC_ATTRIBUTE_LAST+1) + +#define FIB_ENTRY_SRC_ATTRIBUTES { \ + [FIB_ENTRY_SRC_ATTRIBUTE_ADDED] = "added", \ + [FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE] = "active", \ +} + +typedef enum fib_entry_src_flag_t_ { + FIB_ENTRY_SRC_FLAG_NONE = 0, + FIB_ENTRY_SRC_FLAG_ADDED = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ADDED), + FIB_ENTRY_SRC_FLAG_ACTIVE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE), +} __attribute__ ((packed)) fib_entry_src_flag_t; + +/* + * Keep the size of the flags field to 2 bytes, so it + * can be placed next to the 2 bytes reference count + */ +_Static_assert (sizeof(fib_entry_src_flag_t) <= 2, + "FIB entry flags field size too big"); + +/** + * Information related to the source of a FIB entry + */ +typedef struct fib_entry_src_t_ { + /** + * The path-list created by the source + */ + fib_node_index_t fes_pl; + /** + * Which source this info block is for + */ + fib_source_t fes_src; + /** + * Flags on the source + */ + fib_entry_src_flag_t fes_flags; + /** + * Flags the source contributes to the entry + */ + fib_entry_flag_t fes_entry_flags; + + /** + * 1 bytes ref count. This is not the number of users of the Entry + * (which is itself not large, due to path-list sharing), but the number + * of times a given source has been added. Which is even fewer + */ + u8 fes_ref_count; + + /** + * A vector of path extensions + */ + struct fib_path_ext_t_ *fes_path_exts; + + /** + * Source specific info + */ + union { + struct { + /** + * the index of the FIB entry that is the covering entry + */ + fib_node_index_t fesr_cover; + /** + * This source's index in the cover's list + */ + u32 fesr_sibling; + } rr; + struct { + /** + * the index of the FIB entry that is the covering entry + */ + fib_node_index_t fesa_cover; + /** + * This source's index in the cover's list + */ + u32 fesa_sibling; + } adj; + struct { + /** + * the index of the FIB entry that is the covering entry + */ + fib_node_index_t fesi_cover; + /** + * This source's index in the cover's list + */ + u32 fesi_sibling; + } interface; + struct { + /** + * This MPLS local label associated with the prefix. + */ + mpls_label_t fesm_label; + + /** + * the indicies of the LFIB entries created + */ + fib_node_index_t fesm_lfes[2]; + } mpls; + struct { + /** + * The source FIB index. + */ + fib_node_index_t fesl_fib_index; + } lisp; + }; +} fib_entry_src_t; + +/** + * An entry in a FIB table. + * + * This entry represents a route added to the FIB that is stored + * in one of the FIB tables. + */ +typedef struct fib_entry_t_ { + /** + * Base class. The entry's node representation in the graph. + */ + fib_node_t fe_node; + /** + * The prefix of the route + */ + fib_prefix_t fe_prefix; + /** + * The index of the FIB table this entry is in + */ + u32 fe_fib_index; + /** + * The load-balance used for forwarding. + * + * We don't share the EOS and non-EOS even in case when they could be + * because: + * - complexity & reliability v. memory + * determining the conditions where sharing is possible is non-trivial. + * - separate LBs means we can get the EOS bit right in the MPLS label DPO + * and so save a few clock cycles in the DP imposition node since we can + * paint the header straight on without the need to check the packet + * type to derive the EOS bit value. + */ + dpo_id_t fe_lb[FIB_FORW_CHAIN_NUM]; + /** + * Vector of source infos. + * Most entries will only have 1 source. So we optimise for memory usage, + * which is preferable since we have many entries. + */ + fib_entry_src_t *fe_srcs; + /** + * the path-list for which this entry is a child. This is also the path-list + * that is contributing forwarding for this entry. + */ + fib_node_index_t fe_parent; + /** + * index of this entry in the parent's child list. + * This is set when this entry is added as a child, but can also + * be changed by the parent as it manages its list. + */ + u32 fe_sibling; + /** + * Dependency list of covered entries. + * these are more specific entries that are interested in changes + * to their respective cover + */ + fib_node_list_t fe_covered; + /** + * exporter + */ + fib_node_index_t fe_export; + fib_node_index_t fe_import; +} fib_entry_t; + +#define FOR_EACH_FIB_ENTRY_FLAG(_item) \ + for (_item = FIB_ENTRY_FLAG_FIRST; _item < FIB_ENTRY_FLAG_MAX; _item++) + +#define FIB_ENTRY_FORMAT_BRIEF (0x0) +#define FIB_ENTRY_FORMAT_DETAIL (0x1) +#define FIB_ENTRY_FORMAT_DETAIL2 (0x2) + +extern u8 *format_fib_entry (u8 * s, va_list * args); + +extern fib_node_index_t fib_entry_create_special(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo); + +extern fib_node_index_t fib_entry_create (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths); +extern void fib_entry_update (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths); + +extern void fib_entry_path_add(fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpath); +extern void fib_entry_special_add(fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo); +extern fib_entry_src_flag_t fib_entry_special_remove(fib_node_index_t fib_entry_index, + fib_source_t source); + +extern fib_entry_src_flag_t fib_entry_path_remove(fib_node_index_t fib_entry_index, + fib_source_t source, + const fib_route_path_t *rpath); +extern fib_entry_src_flag_t fib_entry_delete(fib_node_index_t fib_entry_index, + fib_source_t source); + +extern void fib_entry_contribute_forwarding( + fib_node_index_t fib_entry_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo); +extern const dpo_id_t * fib_entry_contribute_ip_forwarding( + fib_node_index_t fib_entry_index); +extern adj_index_t fib_entry_get_adj_for_source( + fib_node_index_t fib_entry_index, + fib_source_t source); +extern const int fib_entry_get_dpo_for_source ( + fib_node_index_t fib_entry_index, + fib_source_t source, + dpo_id_t *dpo); + +extern adj_index_t fib_entry_get_adj(fib_node_index_t fib_entry_index); + +extern int fib_entry_cmp_for_sort(void *i1, void *i2); + +extern void fib_entry_cover_changed(fib_node_index_t fib_entry); +extern void fib_entry_cover_updated(fib_node_index_t fib_entry); +extern int fib_entry_recursive_loop_detect(fib_node_index_t entry_index, + fib_node_index_t **entry_indicies); + +extern void fib_entry_lock(fib_node_index_t fib_entry_index); +extern void fib_entry_unlock(fib_node_index_t fib_entry_index); + +extern u32 fib_entry_child_add(fib_node_index_t fib_entry_index, + fib_node_type_t type, + fib_node_index_t child_index); +extern void fib_entry_child_remove(fib_node_index_t fib_entry_index, + u32 sibling_index); +extern u32 fib_entry_get_resolving_interface(fib_node_index_t fib_entry_index); + +extern void fib_entry_get_prefix(fib_node_index_t fib_entry_index, + fib_prefix_t *pfx); +extern u32 fib_entry_get_fib_index(fib_node_index_t fib_entry_index); +extern void fib_entry_set_source_data(fib_node_index_t fib_entry_index, + fib_source_t source, + const void *data); +extern const void* fib_entry_get_source_data(fib_node_index_t fib_entry_index, + fib_source_t source); + +extern fib_entry_flag_t fib_entry_get_flags(fib_node_index_t fib_entry_index); +extern fib_source_t fib_entry_get_best_source(fib_node_index_t fib_entry_index); +extern int fib_entry_is_sourced(fib_node_index_t fib_entry_index, + fib_source_t source); + +extern fib_node_index_t fib_entry_get_path_list(fib_node_index_t fib_entry_index); +extern u32 fib_entry_get_fib_table_id(fib_node_index_t fib_entry_index); + +extern void fib_entry_module_init(void); + +/* + * unsafe... beware the raw pointer. + */ +extern fib_node_index_t fib_entry_get_index(const fib_entry_t * fib_entry); +extern fib_entry_t * fib_entry_get(fib_node_index_t fib_entry_index); + +/* + * for testing purposes. + */ +extern u32 fib_entry_pool_size(void); + +#endif diff --git a/vnet/vnet/fib/fib_entry_cover.c b/vnet/vnet/fib/fib_entry_cover.c new file mode 100644 index 00000000000..06b5b918abc --- /dev/null +++ b/vnet/vnet/fib/fib_entry_cover.c @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_entry_cover.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_node_list.h> + +u32 +fib_entry_cover_track (fib_entry_t* cover, + fib_node_index_t covered) +{ + FIB_ENTRY_DBG(cover, "cover-track %d", covered); + + ASSERT(fib_entry_get_index(cover) != covered); + + if (FIB_NODE_INDEX_INVALID == cover->fe_covered) + { + cover->fe_covered = fib_node_list_create(); + } + + return (fib_node_list_push_front(cover->fe_covered, + 0, FIB_NODE_TYPE_ENTRY, + covered)); +} + +void +fib_entry_cover_untrack (fib_entry_t* cover, + u32 tracked_index) +{ + FIB_ENTRY_DBG(cover, "cover-untrack @ %d", tracked_index); + + if (FIB_NODE_INDEX_INVALID == cover->fe_covered) + return; + + fib_node_list_remove(cover->fe_covered, tracked_index); + + if (0 == fib_node_list_get_size(cover->fe_covered)) + { + fib_node_list_destroy(&cover->fe_covered); + } +} + +/** + * Internal struct to hold user supplied paraneters for the cover walk + */ +typedef struct fib_enty_cover_walk_ctx_t_ { + fib_entry_t *cover; + fib_entry_covered_walk_t walk; + void *ctx; +} fib_enty_cover_walk_ctx_t; + +static int +fib_entry_cover_walk_node_ptr (fib_node_ptr_t *depend, + void *args) +{ + fib_enty_cover_walk_ctx_t *ctx = args; + + ctx->walk(ctx->cover, depend->fnp_index, ctx->ctx); + + /* continue */ + return (1); +} + +void +fib_entry_cover_walk (fib_entry_t *cover, + fib_entry_covered_walk_t walk, + void *args) +{ + if (FIB_NODE_INDEX_INVALID != cover->fe_covered) + { + fib_enty_cover_walk_ctx_t ctx = { + .cover = cover, + .walk = walk, + .ctx = args, + }; + + fib_node_list_walk(cover->fe_covered, + fib_entry_cover_walk_node_ptr, + &ctx); + } +} + +u32 +fib_entry_cover_get_size (fib_entry_t *cover) +{ + if (FIB_NODE_INDEX_INVALID != cover->fe_covered) + return (fib_node_list_get_size(cover->fe_covered)); + return (0); +} + +typedef struct fib_entry_cover_list_format_ctx_t_ { + u8 *s; +} fib_entry_cover_list_format_ctx_t; + +static int +fib_entry_covered_list_format_one (fib_entry_t *cover, + fib_node_index_t covered, + void *args) +{ + fib_entry_cover_list_format_ctx_t * ctx = args; + + ctx->s = format(ctx->s, "%d, ", covered); + + /* continue */ + return (1); +} + +u8* +fib_entry_cover_list_format (fib_entry_t *fib_entry, + u8 *s) +{ + fib_entry_cover_list_format_ctx_t ctx = { + .s = s, + }; + + fib_entry_cover_walk(fib_entry, + fib_entry_covered_list_format_one, + &ctx); + + return (ctx.s); +} + +static int +fib_entry_cover_change_one (fib_entry_t *cover, + fib_node_index_t covered, + void *args) +{ + fib_node_index_t new_cover; + + /* + * The 3 entries involved here are: + * cover - the least specific. It will cover both the others + * new_cover - the enty just inserted below the cover + * covered - the entry that was tracking the cover. + * + * The checks below are to determine if new_cover is a cover for covered. + */ + new_cover = pointer_to_uword(args); + + if (FIB_NODE_INDEX_INVALID == new_cover) + { + /* + * nothing has been inserted, which implies the cover was removed. + * 'cover' is thus the new cover. + */ + fib_entry_cover_changed(covered); + } + else if (new_cover != covered) + { + fib_prefix_t pfx_covered, pfx_new_cover; + + fib_entry_get_prefix(covered, &pfx_covered); + fib_entry_get_prefix(new_cover, &pfx_new_cover); + + if (fib_prefix_is_cover(&pfx_new_cover, &pfx_covered)) + { + fib_entry_cover_changed(covered); + } + } + /* continue */ + return (1); +} + +void +fib_entry_cover_change_notify (fib_node_index_t cover_index, + fib_node_index_t covered) +{ + fib_entry_t *cover; + + cover = fib_entry_get(cover_index); + + fib_entry_cover_walk(cover, + fib_entry_cover_change_one, + uword_to_pointer(covered, void*)); +} + +static int +fib_entry_cover_update_one (fib_entry_t *cover, + fib_node_index_t covered, + void *args) +{ + fib_entry_cover_updated(covered); + + /* continue */ + return (1); +} + +void +fib_entry_cover_update_notify (fib_entry_t *fib_entry) +{ + fib_entry_cover_walk(fib_entry, + fib_entry_cover_update_one, + NULL); +} diff --git a/vnet/vnet/fib/fib_entry_cover.h b/vnet/vnet/fib/fib_entry_cover.h new file mode 100644 index 00000000000..fbbbc211dc9 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_cover.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_ENTRY_COVER_H__ +#define __FIB_ENTRY_COVER_H__ + +#include "fib_entry.h" + +/** + * callback function used when walking the covered entries + */ +typedef int (*fib_entry_covered_walk_t)(fib_entry_t *cover, + fib_node_index_t covered, + void *ctx); + +extern u32 fib_entry_cover_track(fib_entry_t *cover, + fib_node_index_t covered); + +extern void fib_entry_cover_untrack(fib_entry_t *cover, + u32 tracked_index); + +extern void fib_entry_cover_walk(fib_entry_t *cover, + fib_entry_covered_walk_t walk, + void *ctx); + +extern void fib_entry_cover_change_notify(fib_node_index_t cover_index, + fib_node_index_t covered_index); +extern void fib_entry_cover_update_notify(fib_entry_t *cover); + +extern u32 fib_entry_cover_get_size(fib_entry_t *cover); + +extern u8* fib_entry_cover_list_format(fib_entry_t *fib_entry, + u8 *s); + +#endif diff --git a/vnet/vnet/fib/fib_entry_src.c b/vnet/vnet/fib/fib_entry_src.c new file mode 100644 index 00000000000..f7d84e5ea34 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src.c @@ -0,0 +1,1278 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/dpo/drop_dpo.h> + +#include "fib_entry_src.h" +#include "fib_table.h" +#include "fib_path_ext.h" + +/* + * per-source type vft + */ +static fib_entry_src_vft_t fib_entry_src_vft[FIB_SOURCE_MAX]; + +static fib_protocol_t +fib_entry_get_proto (const fib_entry_t * fib_entry) +{ + return (fib_entry->fe_prefix.fp_proto); +} + +void +fib_entry_src_register (fib_source_t source, + const fib_entry_src_vft_t *vft) +{ + fib_entry_src_vft[source] = *vft; +} + +static int +fib_entry_src_cmp_for_sort (void * v1, + void * v2) +{ + fib_entry_src_t *esrc1 = v1, *esrc2 = v2; + + return (esrc1->fes_src - esrc2->fes_src); +} + +void +fib_entry_src_action_init (fib_entry_t *fib_entry, + fib_source_t source) + +{ + fib_entry_src_t esrc = { + .fes_pl = FIB_NODE_INDEX_INVALID, + .fes_flags = FIB_ENTRY_SRC_FLAG_NONE, + .fes_src = source, + }; + + if (NULL != fib_entry_src_vft[source].fesv_init) + { + fib_entry_src_vft[source].fesv_init(&esrc); + } + + vec_add1(fib_entry->fe_srcs, esrc); + vec_sort_with_function(fib_entry->fe_srcs, + fib_entry_src_cmp_for_sort); +} + +static fib_entry_src_t * +fib_entry_src_find (const fib_entry_t *fib_entry, + fib_source_t source, + u32 *index) + +{ + fib_entry_src_t *esrc; + int ii; + + ii = 0; + vec_foreach(esrc, fib_entry->fe_srcs) + { + if (esrc->fes_src == source) + { + if (NULL != index) + { + *index = ii; + } + return (esrc); + } + else + { + ii++; + } + } + + return (NULL); +} + +int +fib_entry_is_sourced (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + return (NULL != fib_entry_src_find(fib_entry, source, NULL)); +} + +static fib_entry_src_t * +fib_entry_src_find_or_create (fib_entry_t *fib_entry, + fib_source_t source, + u32 *index) +{ + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL == esrc) + { + fib_entry_src_action_init(fib_entry, source); + } + + return (fib_entry_src_find(fib_entry, source, NULL)); +} + +void +fib_entry_src_action_deinit (fib_entry_t *fib_entry, + fib_source_t source) + +{ + fib_entry_src_t *esrc; + u32 index = ~0; + + esrc = fib_entry_src_find(fib_entry, source, &index); + + ASSERT(NULL != esrc); + + if (NULL != fib_entry_src_vft[source].fesv_deinit) + { + fib_entry_src_vft[source].fesv_deinit(esrc); + } + + vec_free(esrc->fes_path_exts); + vec_del1(fib_entry->fe_srcs, index); +} + +fib_entry_src_cover_res_t +fib_entry_src_action_cover_change (fib_entry_t *fib_entry, + fib_source_t source) +{ + if (NULL != fib_entry_src_vft[source].fesv_cover_change) + { + return (fib_entry_src_vft[source].fesv_cover_change( + fib_entry_src_find(fib_entry, source, NULL), + fib_entry)); + } + + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + return (res); +} + +fib_entry_src_cover_res_t +fib_entry_src_action_cover_update (fib_entry_t *fib_entry, + fib_source_t source) +{ + if (NULL != fib_entry_src_vft[source].fesv_cover_update) + { + return (fib_entry_src_vft[source].fesv_cover_update( + fib_entry_src_find(fib_entry, source, NULL), + fib_entry)); + } + + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + return (res); +} + +typedef struct fib_entry_src_collect_forwarding_ctx_t_ +{ + load_balance_path_t * next_hops; + const fib_entry_t *fib_entry; + const fib_entry_src_t *esrc; + fib_forward_chain_type_t fct; + int is_recursive; +} fib_entry_src_collect_forwarding_ctx_t; + +/** + * @brief Determine whether this FIB entry should use a load-balance MAP + * to support PIC edge fast convergence + */ +load_balance_flags_t +fib_entry_calc_lb_flags (fib_entry_src_collect_forwarding_ctx_t *ctx) +{ + /** + * We'll use a LB map is the path-list has recursive paths. + * recursive paths implies BGP, and hence scale. + */ + if (ctx->is_recursive) + { + return (LOAD_BALANCE_FLAG_USES_MAP); + } + return (LOAD_BALANCE_FLAG_NONE); +} + +static int +fib_entry_src_valid_out_label (mpls_label_t label) +{ + return ((MPLS_LABEL_IS_REAL(label) || + MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL == label || + MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL == label || + MPLS_IETF_IMPLICIT_NULL_LABEL == label)); +} + +static int +fib_entry_src_collect_forwarding (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *arg) +{ + fib_entry_src_collect_forwarding_ctx_t *ctx; + fib_path_ext_t *path_ext; + + ctx = arg; + + /* + * if the path is not resolved, don't include it. + */ + if (!fib_path_is_resolved(path_index)) + { + return (!0); + } + + if (fib_path_is_recursive(path_index)) + { + ctx->is_recursive = 1; + } + + /* + * get the matching path-extension for the path being visited. + */ + vec_foreach(path_ext, ctx->esrc->fes_path_exts) + { + if (path_ext->fpe_path_index == path_index) + break; + } + + if (NULL != path_ext && + path_ext->fpe_path_index == path_index && + fib_entry_src_valid_out_label(path_ext->fpe_label)) + { + /* + * found a matching extension. stack it to obtain the forwarding + * info for this path. + */ + ctx->next_hops = fib_path_ext_stack(path_ext, ctx->fct, ctx->next_hops); + } + else + { + load_balance_path_t *nh; + + /* + * no extension => no out-going label for this path. that's OK + * in the case of an IP or EOS chain, but not for non-EOS + */ + switch (ctx->fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + /* + * EOS traffic with no label to stack, we need the IP Adj + */ + vec_add2(ctx->next_hops, nh, 1); + + nh->path_index = path_index; + nh->path_weight = fib_path_get_weight(path_index); + fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo); + + break; + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + if (fib_path_is_exclusive(path_index) || + fib_path_is_deag(path_index)) + { + vec_add2(ctx->next_hops, nh, 1); + + nh->path_index = path_index; + nh->path_weight = fib_path_get_weight(path_index); + fib_path_contribute_forwarding(path_index, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &nh->path_dpo); + } + break; + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + ASSERT(0); + break; + } + } + + return (!0); +} + +void +fib_entry_src_mk_lb (fib_entry_t *fib_entry, + const fib_entry_src_t *esrc, + fib_forward_chain_type_t fct, + dpo_id_t *dpo_lb) +{ + dpo_proto_t lb_proto; + + /* + * If the entry has path extensions then we construct a load-balance + * by stacking the extensions on the forwarding chains of the paths. + * Otherwise we use the load-balance of the path-list + */ + fib_entry_src_collect_forwarding_ctx_t ctx = { + .esrc = esrc, + .fib_entry = fib_entry, + .next_hops = NULL, + .is_recursive = 0, + .fct = fct, + }; + + lb_proto = fib_proto_to_dpo(fib_entry_get_proto(fib_entry)); + + fib_path_list_walk(esrc->fes_pl, + fib_entry_src_collect_forwarding, + &ctx); + + if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_EXCLUSIVE) + { + /* + * the client provided the DPO that the entry should link to. + * all entries must link to a LB, so if it is an LB already + * then we can use it. + */ + if ((1 == vec_len(ctx.next_hops)) && + (DPO_LOAD_BALANCE == ctx.next_hops[0].path_dpo.dpoi_type)) + { + dpo_copy(dpo_lb, &ctx.next_hops[0].path_dpo); + dpo_reset(&ctx.next_hops[0].path_dpo); + return; + } + } + + if (!dpo_id_is_valid(dpo_lb)) + { + /* + * first time create + */ + flow_hash_config_t fhc; + + fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, + dpo_proto_to_fib(lb_proto)); + dpo_set(dpo_lb, + DPO_LOAD_BALANCE, + lb_proto, + load_balance_create(0, lb_proto, fhc)); + } + + load_balance_multipath_update(dpo_lb, + ctx.next_hops, + fib_entry_calc_lb_flags(&ctx)); +} + +void +fib_entry_src_action_install (fib_entry_t *fib_entry, + fib_source_t source) +{ + /* + * Install the forwarding chain for the given source into the forwarding + * tables + */ + fib_forward_chain_type_t fct; + fib_entry_src_t *esrc; + + fct = fib_entry_get_default_chain_type(fib_entry); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + fib_entry_src_mk_lb(fib_entry, esrc, fct, &fib_entry->fe_lb[fct]); + + FIB_ENTRY_DBG(fib_entry, "install: %d", + fib_entry->fe_lb[fct]); + + /* + * insert the adj into the data-plane forwarding trie + */ + fib_table_fwding_dpo_update(fib_entry->fe_fib_index, + &fib_entry->fe_prefix, + &fib_entry->fe_lb[fct]); + + if (FIB_FORW_CHAIN_TYPE_UNICAST_IP4 == fct || + FIB_FORW_CHAIN_TYPE_UNICAST_IP6 == fct) + { + for (fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS; + fct <= FIB_FORW_CHAIN_TYPE_MPLS_EOS; + fct++) + { + /* + * if any of the other chain types are already created they will need + * updating too + */ + if (dpo_id_is_valid(&fib_entry->fe_lb[fct])) + { + fib_entry_src_mk_lb(fib_entry, + esrc, + fct, + &fib_entry->fe_lb[fct]); + } + } + } +} + +void +fib_entry_src_action_uninstall (fib_entry_t *fib_entry) +{ + fib_forward_chain_type_t fct; + + fct = fib_entry_get_default_chain_type(fib_entry); + /* + * uninstall the forwarding chain for the given source from the + * forwarding tables + */ + FIB_ENTRY_DBG(fib_entry, "uninstall: %d", + fib_entry->fe_adj_index); + + if (dpo_id_is_valid(&fib_entry->fe_lb[fct])) + { + /* fib_forward_chain_type_t fct; */ + /* fib_path_ext_t *path_ext; */ + + fib_table_fwding_dpo_remove( + fib_entry->fe_fib_index, + &fib_entry->fe_prefix, + &fib_entry->fe_lb[fct]); + + dpo_reset(&fib_entry->fe_lb[fct]); + } +} + +static void +fib_entry_recursive_loop_detect_i (fib_node_index_t path_list_index) +{ + fib_node_index_t *entries = NULL; + + fib_path_list_recursive_loop_detect(path_list_index, &entries); + + vec_free(entries); +} + +void +fib_entry_src_action_activate (fib_entry_t *fib_entry, + fib_source_t source) + +{ + int houston_we_are_go_for_install; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + ASSERT(!(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE)); + ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED); + + esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ACTIVE; + + if (NULL != fib_entry_src_vft[source].fesv_activate) + { + houston_we_are_go_for_install = + fib_entry_src_vft[source].fesv_activate(esrc, fib_entry); + } + else + { + /* + * the source is not providing an activate function, we'll assume + * therefore it has no objection to installing the entry + */ + houston_we_are_go_for_install = !0; + } + + /* + * link to the path-list provided by the source, and go check + * if that forms any loops in the graph. + */ + fib_entry->fe_parent = esrc->fes_pl; + fib_entry->fe_sibling = + fib_path_list_child_add(fib_entry->fe_parent, + FIB_NODE_TYPE_ENTRY, + fib_entry_get_index(fib_entry)); + + fib_entry_recursive_loop_detect_i(fib_entry->fe_parent); + + FIB_ENTRY_DBG(fib_entry, "activate: %d", + fib_entry->fe_parent); + + if (0 != houston_we_are_go_for_install) + { + fib_entry_src_action_install(fib_entry, source); + } + else + { + fib_entry_src_action_uninstall(fib_entry); + } +} + +void +fib_entry_src_action_deactivate (fib_entry_t *fib_entry, + fib_source_t source) + +{ + fib_node_index_t path_list_index; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE); + + if (NULL != fib_entry_src_vft[source].fesv_deactivate) + { + fib_entry_src_vft[source].fesv_deactivate(esrc, fib_entry); + } + + esrc->fes_flags &= ~FIB_ENTRY_SRC_FLAG_ACTIVE; + + FIB_ENTRY_DBG(fib_entry, "deactivate: %d", fib_entry->fe_parent); + + /* + * un-link from an old path-list. Check for any loops this will clear + */ + path_list_index = fib_entry->fe_parent; + fib_entry->fe_parent = FIB_NODE_INDEX_INVALID; + + fib_entry_recursive_loop_detect_i(path_list_index); + + /* + * this will unlock the path-list, so it may be invalid thereafter. + */ + fib_path_list_child_remove(path_list_index, fib_entry->fe_sibling); + fib_entry->fe_sibling = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_action_fwd_update (const fib_entry_t *fib_entry, + fib_source_t source) +{ + fib_entry_src_t *esrc; + + vec_foreach(esrc, fib_entry->fe_srcs) + { + if (NULL != fib_entry_src_vft[esrc->fes_src].fesv_fwd_update) + { + fib_entry_src_vft[esrc->fes_src].fesv_fwd_update(esrc, + fib_entry, + source); + } + } +} + +void +fib_entry_src_action_reactivate (fib_entry_t *fib_entry, + fib_source_t source) +{ + fib_node_index_t path_list_index; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE); + + FIB_ENTRY_DBG(fib_entry, "reactivate: %d to %d", + fib_entry->fe_parent, + esrc->fes_pl); + + if (fib_entry->fe_parent != esrc->fes_pl) + { + /* + * un-link from an old path-list. Check for any loops this will clear + */ + path_list_index = fib_entry->fe_parent; + fib_entry->fe_parent = FIB_NODE_INDEX_INVALID; + + /* + * temporary lock so it doesn't get deleted when this entry is no + * longer a child. + */ + fib_path_list_lock(path_list_index); + + /* + * this entry is no longer a child. after unlinking check if any loops + * were broken + */ + fib_path_list_child_remove(path_list_index, + fib_entry->fe_sibling); + + fib_entry_recursive_loop_detect_i(path_list_index); + + /* + * link to the path-list provided by the source, and go check + * if that forms any loops in the graph. + */ + fib_entry->fe_parent = esrc->fes_pl; + fib_entry->fe_sibling = + fib_path_list_child_add(fib_entry->fe_parent, + FIB_NODE_TYPE_ENTRY, + fib_entry_get_index(fib_entry)); + + fib_entry_recursive_loop_detect_i(fib_entry->fe_parent); + fib_path_list_unlock(path_list_index); + } + fib_entry_src_action_install(fib_entry, source); + fib_entry_src_action_fwd_update(fib_entry, source); +} + +void +fib_entry_src_action_installed (const fib_entry_t *fib_entry, + fib_source_t source) +{ + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != fib_entry_src_vft[source].fesv_installed) + { + fib_entry_src_vft[source].fesv_installed(esrc, + fib_entry); + } + + fib_entry_src_action_fwd_update(fib_entry, source); +} + +/* + * fib_entry_src_action_add + * + * Adding a source can result in a new fib_entry being created, which + * can inturn mean the pool is realloc'd and thus the entry passed as + * an argument it also realloc'd + * @return the original entry + */ +fib_entry_t * +fib_entry_src_action_add (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t fib_entry_index; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find_or_create(fib_entry, source, NULL); + + esrc->fes_ref_count++; + + if (1 != esrc->fes_ref_count) + { + /* + * we only want to add the source on the 0->1 transition + */ + return (fib_entry); + } + + esrc->fes_entry_flags = flags; + + /* + * save variable so we can recover from a fib_entry realloc. + */ + fib_entry_index = fib_entry_get_index(fib_entry); + + if (NULL != fib_entry_src_vft[source].fesv_add) + { + fib_entry_src_vft[source].fesv_add(esrc, + fib_entry, + flags, + fib_entry_get_proto(fib_entry), + dpo); + } + + fib_entry = fib_entry_get(fib_entry_index); + + esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ADDED; + + fib_path_list_lock(esrc->fes_pl); + + /* + * the source owns a lock on the entry + */ + fib_entry_lock(fib_entry_get_index(fib_entry)); + + return (fib_entry); +} + +fib_entry_src_flag_t +fib_entry_src_action_remove (fib_entry_t *fib_entry, + fib_source_t source) + +{ + fib_node_index_t old_path_list; + fib_entry_src_flag_t sflags; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL == esrc) + return (FIB_ENTRY_SRC_FLAG_ACTIVE); + + esrc->fes_ref_count--; + sflags = esrc->fes_flags; + + if (0 != esrc->fes_ref_count) + { + /* + * only remove the source on the 1->0 transisition + */ + return (sflags); + } + + if (esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE) + { + fib_entry_src_action_deactivate(fib_entry, source); + } + + old_path_list = esrc->fes_pl; + + if (NULL != fib_entry_src_vft[source].fesv_remove) + { + fib_entry_src_vft[source].fesv_remove(esrc); + } + + fib_path_list_unlock(old_path_list); + fib_entry_unlock(fib_entry_get_index(fib_entry)); + + sflags &= ~FIB_ENTRY_SRC_FLAG_ADDED; + fib_entry_src_action_deinit(fib_entry, source); + + return (sflags); +} + +static inline int +fib_route_recurses_via_self (const fib_prefix_t *prefix, + const fib_route_path_t *rpath) +{ + /* + * not all zeros next hop && + * is recursive path && + * nexthop is same as the route's address + */ + return ((!ip46_address_is_zero(&rpath->frp_addr)) && + (~0 == rpath->frp_sw_if_index) && + (0 == ip46_address_cmp(&rpath->frp_addr, &prefix->fp_addr))); + +} + +/* + * fib_route_attached_cross_table + * + * Return true the the route is attached via an interface that + * is not in the same table as the route + */ +static inline int +fib_route_attached_cross_table (const fib_entry_t *fib_entry, + const fib_route_path_t *rpath) +{ + /* + * - All zeros next-hop + * - a valid interface + * - entry's fib index not equeal to interface's index + */ + if (ip46_address_is_zero(&rpath->frp_addr) && + (~0 != rpath->frp_sw_if_index) && + (fib_entry->fe_fib_index != + fib_table_get_index_for_sw_if_index(fib_entry_get_proto(fib_entry), + rpath->frp_sw_if_index))) + { + return (!0); + } + return (0); +} + +/* + * fib_route_attached_cross_table + * + * Return true the the route is attached via an interface that + * is not in the same table as the route + */ +static inline int +fib_path_is_attached (const fib_route_path_t *rpath) +{ + /* + * - All zeros next-hop + * - a valid interface + */ + if (ip46_address_is_zero(&rpath->frp_addr) && + (~0 != rpath->frp_sw_if_index)) + { + return (!0); + } + return (0); +} + +fib_path_list_flags_t +fib_entry_src_flags_2_path_list_flags (fib_entry_flag_t eflags) +{ + fib_path_list_flags_t plf = FIB_PATH_LIST_FLAG_NONE; + + if (eflags & FIB_ENTRY_FLAG_DROP) + { + plf |= FIB_PATH_LIST_FLAG_DROP; + } + if (eflags & FIB_ENTRY_FLAG_LOCAL) + { + plf |= FIB_PATH_LIST_FLAG_LOCAL; + } + if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE) + { + plf |= FIB_PATH_LIST_FLAG_EXCLUSIVE; + } + + return (plf); +} + +static void +fib_entry_flags_update (const fib_entry_t *fib_entry, + const fib_route_path_t *rpath, + fib_path_list_flags_t *pl_flags, + fib_entry_src_t *esrc) +{ + /* + * don't allow the addition of a recursive looped path for prefix + * via itself. + */ + if (fib_route_recurses_via_self(&fib_entry->fe_prefix, rpath)) + { + /* + * force the install of a drop path-list. + * we want the entry to have some path-list, mainly so + * the dodgy path can be rmeoved when the source stops playing + * silly buggers. + */ + *pl_flags |= FIB_PATH_LIST_FLAG_DROP; + } + else + { + *pl_flags &= ~FIB_PATH_LIST_FLAG_DROP; + } + + if ((esrc->fes_src == FIB_SOURCE_API) || + (esrc->fes_src == FIB_SOURCE_CLI)) + { + if (fib_path_is_attached(rpath)) + { + esrc->fes_entry_flags |= FIB_ENTRY_FLAG_ATTACHED; + } + else + { + esrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_ATTACHED; + } + } + if (fib_route_attached_cross_table(fib_entry, rpath)) + { + esrc->fes_entry_flags |= FIB_ENTRY_FLAG_IMPORT; + } + else + { + esrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_IMPORT; + } +} + +/* + * fib_entry_src_path_ext_add + * + * append a path extension to the entry's list + */ +static void +fib_entry_src_path_ext_append (fib_entry_src_t *esrc, + const fib_route_path_t *rpath) +{ + if (MPLS_LABEL_INVALID != rpath->frp_label) + { + fib_path_ext_t *path_ext; + + vec_add2(esrc->fes_path_exts, path_ext, 1); + + fib_path_ext_init(path_ext, esrc->fes_pl, rpath); + } +} + +/* + * fib_entry_src_path_ext_insert + * + * insert, sorted, a path extension to the entry's list. + * It's not strictly necessary in sort the path extensions, since each + * extension has the path index to which it resolves. However, by being + * sorted the load-balance produced has a deterministic order, not an order + * based on the sequence of extension additions. this is a considerable benefit. + */ +static void +fib_entry_src_path_ext_insert (fib_entry_src_t *esrc, + const fib_route_path_t *rpath) +{ + if (0 == vec_len(esrc->fes_path_exts)) + return (fib_entry_src_path_ext_append(esrc, rpath)); + + if (MPLS_LABEL_INVALID != rpath->frp_label) + { + fib_path_ext_t path_ext; + int i = 0; + + fib_path_ext_init(&path_ext, esrc->fes_pl, rpath); + + while (i < vec_len(esrc->fes_path_exts) && + (fib_path_ext_cmp(&esrc->fes_path_exts[i], rpath) < 0)) + { + i++; + } + + vec_insert_elts(esrc->fes_path_exts, &path_ext, 1, i); + } +} + +/* + * fib_entry_src_action_add + * + * Adding a source can result in a new fib_entry being created, which + * can inturn mean the pool is realloc'd and thus the entry passed as + * an argument it also realloc'd + * @return the entry + */ +fib_entry_t* +fib_entry_src_action_path_add (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpath) +{ + fib_node_index_t old_path_list, fib_entry_index; + fib_path_list_flags_t pl_flags; + fib_path_ext_t *path_ext; + fib_entry_src_t *esrc; + + /* + * save variable so we can recover from a fib_entry realloc. + */ + fib_entry_index = fib_entry_get_index(fib_entry); + + esrc = fib_entry_src_find(fib_entry, source, NULL); + if (NULL == esrc) + { + fib_entry = + fib_entry_src_action_add(fib_entry, + source, + flags, + drop_dpo_get( + fib_proto_to_dpo( + fib_entry_get_proto(fib_entry)))); + esrc = fib_entry_src_find(fib_entry, source, NULL); + } + + /* + * we are no doubt modifying a path-list. If the path-list + * is shared, and hence not modifiable, then the index returned + * will be for a different path-list. This FIB entry to needs + * to maintain its lock appropriately. + */ + old_path_list = esrc->fes_pl; + + ASSERT(NULL != fib_entry_src_vft[source].fesv_path_add); + + pl_flags = fib_entry_src_flags_2_path_list_flags(fib_entry_get_flags_i(fib_entry)); + fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc); + + fib_entry_src_vft[source].fesv_path_add(esrc, fib_entry, pl_flags, rpath); + fib_entry = fib_entry_get(fib_entry_index); + + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, esrc->fes_path_exts) + { + fib_path_ext_resolve(path_ext, esrc->fes_pl); + } + /* + * if the path has a label we need to add a path extension + */ + fib_entry_src_path_ext_insert(esrc, rpath); + + fib_path_list_lock(esrc->fes_pl); + fib_path_list_unlock(old_path_list); + + return (fib_entry); +} + +/* + * fib_entry_src_action_swap + * + * The source is providing new paths to replace the old ones. + * Adding a source can result in a new fib_entry being created, which + * can inturn mean the pool is realloc'd and thus the entry passed as + * an argument it also realloc'd + * @return the entry + */ +fib_entry_t* +fib_entry_src_action_path_swap (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpaths) +{ + fib_node_index_t old_path_list, fib_entry_index; + fib_path_list_flags_t pl_flags; + const fib_route_path_t *rpath; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + /* + * save variable so we can recover from a fib_entry realloc. + */ + fib_entry_index = fib_entry_get_index(fib_entry); + + if (NULL == esrc) + { + fib_entry = fib_entry_src_action_add(fib_entry, + source, + flags, + drop_dpo_get( + fib_proto_to_dpo( + fib_entry_get_proto(fib_entry)))); + esrc = fib_entry_src_find(fib_entry, source, NULL); + } + + /* + * swapping paths may create a new path-list (or may use an existing shared) + * but we are certainly getting a different one. This FIB entry to needs + * to maintain its lock appropriately. + */ + old_path_list = esrc->fes_pl; + + ASSERT(NULL != fib_entry_src_vft[source].fesv_path_swap); + + pl_flags = fib_entry_src_flags_2_path_list_flags( + fib_entry_get_flags_i(fib_entry)); + vec_foreach(rpath, rpaths) + { + fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc); + } + + fib_entry_src_vft[source].fesv_path_swap(esrc, + fib_entry, + pl_flags, + rpaths); + + vec_free(esrc->fes_path_exts); + vec_foreach(rpath, rpaths) + { + fib_entry_src_path_ext_append(esrc, rpath); + } + + fib_entry = fib_entry_get(fib_entry_index); + + fib_path_list_lock(esrc->fes_pl); + fib_path_list_unlock(old_path_list); + + return (fib_entry); +} + +fib_entry_src_flag_t +fib_entry_src_action_path_remove (fib_entry_t *fib_entry, + fib_source_t source, + const fib_route_path_t *rpath) +{ + fib_path_list_flags_t pl_flags; + fib_node_index_t old_path_list; + fib_path_ext_t *path_ext; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + ASSERT(NULL != esrc); + ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED); + + /* + * we no doubt modifying a path-list. If the path-list + * is shared, and hence not modifiable, then the index returned + * will be for a different path-list. This FIB entry to needs + * to maintain its lock appropriately. + */ + old_path_list = esrc->fes_pl; + + ASSERT(NULL != fib_entry_src_vft[source].fesv_path_remove); + + pl_flags = fib_entry_src_flags_2_path_list_flags(fib_entry_get_flags_i(fib_entry)); + fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc); + + fib_entry_src_vft[source].fesv_path_remove(esrc, pl_flags, rpath); + /* + * find the matching path extension and remove it + */ + vec_foreach(path_ext, esrc->fes_path_exts) + { + if (!fib_path_ext_cmp(path_ext, rpath)) + { + /* + * delete the element moving the remaining elements down 1 position. + * this preserves the sorted order. + */ + vec_delete(esrc->fes_path_exts, 1, (path_ext - esrc->fes_path_exts)); + break; + } + } + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, esrc->fes_path_exts) + { + fib_path_ext_resolve(path_ext, esrc->fes_pl); + } + + /* + * lock the new path-list, unlock the old if it had one + */ + fib_path_list_unlock(old_path_list); + + if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) { + fib_path_list_lock(esrc->fes_pl); + return (FIB_ENTRY_SRC_FLAG_ADDED); + } + else + { + /* + * no more paths left from this source + */ + fib_entry_src_action_remove(fib_entry, source); + return (FIB_ENTRY_SRC_FLAG_NONE); + } +} + +u8* +fib_entry_src_format (fib_entry_t *fib_entry, + fib_source_t source, + u8* s) +{ + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != fib_entry_src_vft[source].fesv_format) + { + return (fib_entry_src_vft[source].fesv_format(esrc, s)); + } + return (s); +} + +adj_index_t +fib_entry_get_adj_for_source (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + return (ADJ_INDEX_INVALID); + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc) + { + if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) + { + return (fib_path_list_get_adj( + esrc->fes_pl, + fib_entry_get_default_chain_type(fib_entry))); + } + } + return (ADJ_INDEX_INVALID); +} + +const int +fib_entry_get_dpo_for_source (fib_node_index_t fib_entry_index, + fib_source_t source, + dpo_id_t *dpo) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + return (0); + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc) + { + if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) + { + fib_path_list_contribute_forwarding( + esrc->fes_pl, + fib_entry_get_default_chain_type(fib_entry), + dpo); + + return (dpo_id_is_valid(dpo)); + } + } + return (0); +} + +fib_entry_flag_t +fib_entry_get_flags_i (const fib_entry_t *fib_entry) +{ + fib_entry_flag_t flags; + + /* + * the vector of sources is deliberately arranged in priority order + */ + if (0 == vec_len(fib_entry->fe_srcs)) + { + flags = FIB_ENTRY_FLAG_NONE; + } + else + { + fib_entry_src_t *esrc; + + esrc = vec_elt_at_index(fib_entry->fe_srcs, 0); + flags = esrc->fes_entry_flags; + } + + return (flags); +} + +void +fib_entry_set_source_data (fib_node_index_t fib_entry_index, + fib_source_t source, + const void *data) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc && + NULL != fib_entry_src_vft[source].fesv_set_data) + { + fib_entry_src_vft[source].fesv_set_data(esrc, fib_entry, data); + } +} + +const void* +fib_entry_get_source_data (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc && + NULL != fib_entry_src_vft[source].fesv_get_data) + { + return (fib_entry_src_vft[source].fesv_get_data(esrc, fib_entry)); + } + return (NULL); +} + +void +fib_entry_src_module_init (void) +{ + fib_entry_src_rr_register(); + fib_entry_src_interface_register(); + fib_entry_src_default_route_register(); + fib_entry_src_special_register(); + fib_entry_src_api_register(); + fib_entry_src_adj_register(); + fib_entry_src_mpls_register(); + fib_entry_src_lisp_register(); +} diff --git a/vnet/vnet/fib/fib_entry_src.h b/vnet/vnet/fib/fib_entry_src.h new file mode 100644 index 00000000000..d70aabc9c00 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src.h @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_ENTRY_SRC_H__ +#define __FIB_ENTRY_SRC_H__ + +#include "fib_entry.h" +#include "fib_path_list.h" +#include "fib_internal.h" + +/** + * Debug macro + */ +#ifdef FIB_DEBUG +#define FIB_ENTRY_DBG(_e, _fmt, _args...) \ +{ \ + u8*__tmp = NULL; \ + __tmp = format(__tmp, "e:[%d:%U", \ + fib_entry_get_index(_e), \ + format_ip46_address, \ + &_e->fe_prefix.fp_addr, \ + IP46_TYPE_ANY); \ + __tmp = format(__tmp, "/%d]:", \ + _e->fe_prefix.fp_len); \ + __tmp = format(__tmp, _fmt, ##_args); \ + clib_warning("%s", __tmp); \ + vec_free(__tmp); \ +} +#else +#define FIB_ENTRY_DBG(_e, _fmt, _args...) +#endif + +/** + * Source initialisation Function + */ +typedef void (*fib_entry_src_init_t)(fib_entry_src_t *src); + +/** + * Source deinitialisation Function + */ +typedef void (*fib_entry_src_deinit_t)(fib_entry_src_t *src); + +/** + * Source activation. Called when the source is the new best source on the entry. + * Return non-zero if the entry can now install, 0 otherwise + */ +typedef int (*fib_entry_src_activate_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +typedef void (*fib_entry_src_deactivate_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Source Add. + * Called when the source is added to the entry + */ +typedef void (*fib_entry_src_add_t)(fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo); + +/** + * Source Remove. + */ +typedef void (*fib_entry_src_remove_t)(fib_entry_src_t *src); + +/** + * Result from a cover update/change + */ +typedef struct fib_entry_src_cover_res_t_ { + u16 install; + fib_node_bw_reason_flag_t bw_reason; +} fib_entry_src_cover_res_t; + +/** + * Cover changed. the source should re-evaluate its cover. + */ +typedef fib_entry_src_cover_res_t (*fib_entry_src_cover_change_t)( + fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Cover updated. The cover the source has, has updated (i.e. its forwarding) + * the source may need to re-evaluate. + */ +typedef fib_entry_src_cover_res_t (*fib_entry_src_cover_update_t)( + fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Forwarding updated. Notification that the forwarding information for the + * entry has been updated. This notification is sent to all sources, not just + * the active best. + */ +typedef void (*fib_entry_src_fwd_update_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry, + fib_source_t best_source); + +/** + * Installed. Notification that the source is now installed as + * the entry's forwarding source. + */ +typedef void (*fib_entry_src_installed_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * format. + */ +typedef u8* (*fib_entry_src_format_t)(fib_entry_src_t *src, + u8* s); + +/** + * Source path add + * the source is adding a new path + */ +typedef void (*fib_entry_src_path_add_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *path); + +/** + * Source path remove + * the source is remoinvg a path + */ +typedef void (*fib_entry_src_path_remove_t)(fib_entry_src_t *src, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *path); + +/** + * Source path replace/swap + * the source is providing a new set of paths + */ +typedef void (*fib_entry_src_path_swap_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *path); + +/** + * Set source specific opaque data + */ +typedef void (*fib_entry_src_set_data_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry, + const void *data); + +/** + * Get source specific opaque data + */ +typedef const void* (*fib_entry_src_get_data_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Virtual function table each FIB entry source will register + */ +typedef struct fib_entry_src_vft_t_ { + fib_entry_src_init_t fesv_init; + fib_entry_src_deinit_t fesv_deinit; + fib_entry_src_activate_t fesv_activate; + fib_entry_src_deactivate_t fesv_deactivate; + fib_entry_src_add_t fesv_add; + fib_entry_src_remove_t fesv_remove; + fib_entry_src_path_swap_t fesv_path_swap; + fib_entry_src_path_add_t fesv_path_add; + fib_entry_src_path_remove_t fesv_path_remove; + fib_entry_src_cover_change_t fesv_cover_change; + fib_entry_src_cover_update_t fesv_cover_update; + fib_entry_src_format_t fesv_format; + fib_entry_src_installed_t fesv_installed; + fib_entry_src_fwd_update_t fesv_fwd_update; + fib_entry_src_get_data_t fesv_get_data; + fib_entry_src_set_data_t fesv_set_data; +} fib_entry_src_vft_t; + +#define FOR_EACH_SRC_ADDED(_entry, _src, _source, action) \ +{ \ + vec_foreach(_src, _entry->fe_srcs) \ + { \ + if (_src->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED) { \ + _source = _src->fes_src; \ + do { \ + action; \ + } while(0); \ + } \ + } \ +} + +extern u8* fib_entry_src_format(fib_entry_t *entry, + fib_source_t source, + u8* s); + +extern void fib_entry_src_register(fib_source_t source, + const fib_entry_src_vft_t *vft); + +extern void fib_entry_src_action_init(fib_entry_t *entry, + fib_source_t source); + +extern void fib_entry_src_action_deinit(fib_entry_t *fib_entry, + fib_source_t source); + +extern fib_entry_src_cover_res_t fib_entry_src_action_cover_change( + fib_entry_t *entry, + fib_source_t source); + +extern fib_entry_src_cover_res_t fib_entry_src_action_cover_update( + fib_entry_t *fib_entry, + fib_source_t source); + +extern void fib_entry_src_action_activate(fib_entry_t *fib_entry, + fib_source_t source); + +extern void fib_entry_src_action_deactivate(fib_entry_t *fib_entry, + fib_source_t source); +extern void fib_entry_src_action_reactivate(fib_entry_t *fib_entry, + fib_source_t source); + +extern fib_entry_t* fib_entry_src_action_add(fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo); + +extern fib_entry_src_flag_t fib_entry_src_action_remove(fib_entry_t *fib_entry, + fib_source_t source); + +extern void fib_entry_src_action_install(fib_entry_t *fib_entry, + fib_source_t source); + +extern void fib_entry_src_action_uninstall(fib_entry_t *fib_entry); + +extern fib_entry_t* fib_entry_src_action_path_add(fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *path); + +extern fib_entry_t* fib_entry_src_action_path_swap(fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *path); + +extern fib_entry_src_flag_t fib_entry_src_action_path_remove(fib_entry_t *fib_entry, + fib_source_t source, + const fib_route_path_t *path); + +extern void fib_entry_src_action_installed(const fib_entry_t *fib_entry, + fib_source_t source); + +extern fib_forward_chain_type_t fib_entry_get_default_chain_type( + const fib_entry_t *fib_entry); +extern fib_entry_flag_t fib_entry_get_flags_i(const fib_entry_t *fib_entry); +extern fib_path_list_flags_t fib_entry_src_flags_2_path_list_flags( + fib_entry_flag_t eflags); + +extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry, + const fib_entry_src_t *esrc, + fib_forward_chain_type_t fct, + dpo_id_t *dpo_lb); + + +/* + * Per-source registration. declared here so we save a separate .h file for each + */ +extern void fib_entry_src_default_register(void); +extern void fib_entry_src_rr_register(void); +extern void fib_entry_src_interface_register(void); +extern void fib_entry_src_default_route_register(void); +extern void fib_entry_src_special_register(void); +extern void fib_entry_src_api_register(void); +extern void fib_entry_src_adj_register(void); +extern void fib_entry_src_mpls_register(void); +extern void fib_entry_src_lisp_register(void); + +extern void fib_entry_src_module_init(void); + +#endif diff --git a/vnet/vnet/fib/fib_entry_src_adj.c b/vnet/vnet/fib/fib_entry_src_adj.c new file mode 100644 index 00000000000..64f82a73e07 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_adj.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" +#include "fib_table.h" +#include "fib_entry_cover.h" +#include "fib_attached_export.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_adj_init (fib_entry_src_t *src) +{ + src->adj.fesa_cover = FIB_NODE_INDEX_INVALID; + src->adj.fesa_sibling = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_adj_path_swap (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + src->fes_pl = fib_path_list_create(pl_flags, paths); +} + +static void +fib_entry_src_adj_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + + +/* + * Source activate. + * Called when the source is teh new longer best source on the entry + */ +static int +fib_entry_src_adj_activate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * find the covering prefix. become a dependent thereof. + * there should always be a cover, though it may be the default route. + */ + src->adj.fesa_cover = fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix); + + ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover); + ASSERT(fib_entry_get_index(fib_entry) != src->adj.fesa_cover); + + cover = fib_entry_get(src->adj.fesa_cover); + + ASSERT(cover != fib_entry); + + src->adj.fesa_sibling = + fib_entry_cover_track(cover, + fib_entry_get_index(fib_entry)); + + /* + * if the ocver is attached then this adj source entry can install, + * via the adj. otherwise install a drop. + * This prevents ARP/ND entries that on interface X that do not belong + * on X's subnet from being added to the FIB. To do so would allow + * nefarious gratuitous ARP requests from attracting traffic to the sender. + * + * and yes, I really do mean attached and not connected. + * this abomination; + * ip route add 10.0.0.0/24 Eth0 + * is attached. and we want adj-fibs to install on Eth0. + */ + return (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)); +} + +/* + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +static void +fib_entry_src_adj_deactivate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * remove the depednecy on the covering entry + */ + ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover); + cover = fib_entry_get(src->adj.fesa_cover); + + fib_entry_cover_untrack(cover, src->adj.fesa_sibling); + + /* + * tell the cover this entry no longer needs exporting + */ + fib_attached_export_covered_removed(cover, fib_entry_get_index(fib_entry)); + + src->adj.fesa_cover = FIB_NODE_INDEX_INVALID; +} + +static u8* +fib_entry_src_adj_format (fib_entry_src_t *src, + u8* s) +{ + return (format(s, "cover:%d", src->adj.fesa_cover)); +} + +static void +fib_entry_src_adj_installed (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + /* + * The adj source now rules! poke our cover to get exported + */ + fib_entry_t *cover; + + ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover); + cover = fib_entry_get(src->adj.fesa_cover); + + fib_attached_export_covered_added(cover, + fib_entry_get_index(fib_entry)); +} + +static fib_entry_src_cover_res_t +fib_entry_src_adj_cover_change (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + + fib_entry_src_adj_deactivate(src, fib_entry); + + res.install = fib_entry_src_adj_activate(src, fib_entry); + + if (res.install) { + /* + * ADJ fib can install + */ + res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + } + + return (res); +} + +/* + * fib_entry_src_adj_cover_update + */ +static fib_entry_src_cover_res_t +fib_entry_src_adj_cover_update (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + /* + * the cover has updated, i.e. its forwarding or flags + * have changed. do'nt decativate/activate here, since this + * prefix is updated during the covers walk. + */ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + fib_entry_t *cover; + + ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover); + + cover = fib_entry_get(src->adj.fesa_cover); + + res.install = (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)); + + return (res); +} + +const static fib_entry_src_vft_t adj_src_vft = { + .fesv_init = fib_entry_src_adj_init, + .fesv_path_swap = fib_entry_src_adj_path_swap, + .fesv_remove = fib_entry_src_adj_remove, + .fesv_activate = fib_entry_src_adj_activate, + .fesv_deactivate = fib_entry_src_adj_deactivate, + .fesv_format = fib_entry_src_adj_format, + .fesv_installed = fib_entry_src_adj_installed, + .fesv_cover_change = fib_entry_src_adj_cover_change, + .fesv_cover_update = fib_entry_src_adj_cover_update, +}; + +void +fib_entry_src_adj_register (void) +{ + fib_entry_src_register(FIB_SOURCE_ADJ, &adj_src_vft); +} diff --git a/vnet/vnet/fib/fib_entry_src_api.c b/vnet/vnet/fib/fib_entry_src_api.c new file mode 100644 index 00000000000..edc8a47bc17 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_api.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_api_init (fib_entry_src_t *src) +{ +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_api_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_api_path_swap (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); +} + +static void +fib_entry_src_api_path_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + if (FIB_NODE_INDEX_INVALID == src->fes_pl) + { + src->fes_pl = + fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), paths); + } + else + { + src->fes_pl = + fib_path_list_copy_and_path_add(src->fes_pl, + (FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); + } +} + +static void +fib_entry_src_api_path_remove (fib_entry_src_t *src, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + if (FIB_NODE_INDEX_INVALID != src->fes_pl) + { + src->fes_pl = + fib_path_list_copy_and_path_remove(src->fes_pl, + (FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); + } +} + +static void +fib_entry_src_api_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + if (FIB_ENTRY_FLAG_NONE != flags) + { + src->fes_pl = fib_path_list_create_special( + proto, + fib_entry_src_flags_2_path_list_flags(flags), + dpo); + } +} + +static void +fib_entry_src_api_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + +const static fib_entry_src_vft_t api_src_vft = { + .fesv_init = fib_entry_src_api_init, + .fesv_deinit = fib_entry_src_api_deinit, + .fesv_add = fib_entry_src_api_add, + .fesv_remove = fib_entry_src_api_remove, + .fesv_path_add = fib_entry_src_api_path_add, + .fesv_path_swap = fib_entry_src_api_path_swap, + .fesv_path_remove = fib_entry_src_api_path_remove, +}; + +void +fib_entry_src_api_register (void) +{ + fib_entry_src_register(FIB_SOURCE_PLUGIN_HI, &api_src_vft); + fib_entry_src_register(FIB_SOURCE_API, &api_src_vft); + fib_entry_src_register(FIB_SOURCE_CLI, &api_src_vft); + fib_entry_src_register(FIB_SOURCE_DHCP, &api_src_vft); +} diff --git a/vnet/vnet/fib/fib_entry_src_default.c b/vnet/vnet/fib/fib_entry_src_default.c new file mode 100644 index 00000000000..9846cf56e64 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_default.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_default_init (fib_entry_src_t *src) +{ +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_default_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_cover_change (fib_entry_src_t *src) +{ +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_default_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_default_path_add (fib_entry_src_t *src, + fib_protocol_t proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight) +{ +} + +static void +fib_entry_src_default_path_remove (fib_entry_src_t *src, + fib_protocol_t proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight) +{ +} + + +/* + * Source activate. + * Called when the source is teh new longer best source on the entry + */ +static void +fib_entry_src_default_activate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ +} + +/* + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +static void +fib_entry_src_default_deactivate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ +} + +static void +fib_entry_src_default_add (fib_entry_src_t *src, + fib_entry_flag_t flags, + fib_protocol_t proto) +{ +} + +static void +fib_entry_src_default_remove (fib_entry_src_t *src) +{ +} + +const static fib_entry_src_vft_t default_src_vft = { + .fesv_init = fib_entry_src_default_init, + .fesv_deinit = fib_entry_src_default_deinit, + .fesv_add = fib_entry_src_default_add, + .fesv_remove = fib_entry_src_default_remove, + .fesv_path_add = fib_entry_src_default_path_add, + .fesv_path_remove = fib_entry_src_default_path_remove, + .fesv_activate = fib_entry_src_default_activate, + .fesv_deactivate = fib_entry_src_default_deactivate, +}; + +void +fib_entry_src_default_register (void) +{ + fib_source_t source; + + FOR_EACH_FIB_SOURCE(source) { + fib_entry_src_register(source, &default_src_vft); + } +} diff --git a/vnet/vnet/fib/fib_entry_src_default_route.c b/vnet/vnet/fib/fib_entry_src_default_route.c new file mode 100644 index 00000000000..8615f72dc46 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_default_route.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_default_route_init (fib_entry_src_t *src) +{ + src->fes_flags = FIB_ENTRY_FLAG_NONE; +} + +static void +fib_entry_src_default_route_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_default_route_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + src->fes_pl = fib_path_list_create_special(proto, + FIB_PATH_LIST_FLAG_DROP, + dpo); +} + +const static fib_entry_src_vft_t interface_src_vft = { + .fesv_init = fib_entry_src_default_route_init, + .fesv_add = fib_entry_src_default_route_add, + .fesv_remove = fib_entry_src_default_route_remove, +}; + +void +fib_entry_src_default_route_register (void) +{ + fib_entry_src_register(FIB_SOURCE_DEFAULT_ROUTE, &interface_src_vft); +} + + diff --git a/vnet/vnet/fib/fib_entry_src_interface.c b/vnet/vnet/fib/fib_entry_src_interface.c new file mode 100644 index 00000000000..2fb61677568 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_interface.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" +#include "fib_internal.h" +#include "fib_table.h" +#include "fib_entry_cover.h" +#include "fib_attached_export.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_interface_init (fib_entry_src_t *src) +{ + src->interface.fesi_cover = FIB_NODE_INDEX_INVALID; + src->interface.fesi_sibling = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_interface_path_swap (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + ip_adjacency_t *adj; + + src->fes_pl = fib_path_list_create(pl_flags, paths); + + /* + * this is a hack to get the entry's prefix into the glean adjacnecy + * so that it is available for fast retreival in the switch path. + */ + if (!(FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags)) + { + adj = adj_get(fib_path_list_get_adj( + src->fes_pl, + fib_entry_get_default_chain_type(entry))); + + if (IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index); + { + /* + * the connected prefix will link to a glean on a non-p2p + * interface. + */ + adj->sub_type.glean.receive_addr = entry->fe_prefix.fp_addr; + } + } +} + +/* + * Source activate. + * Called when the source is teh new longer best source on the entry + */ +static int +fib_entry_src_interface_activate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + if (FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags) + { + /* + * Track the covering attached/connected cover. This is so that + * during an attached export of the cover, this local prefix is + * also exported + */ + src->interface.fesi_cover = + fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix); + + ASSERT(FIB_NODE_INDEX_INVALID != src->interface.fesi_cover); + + cover = fib_entry_get(src->interface.fesi_cover); + + src->interface.fesi_sibling = + fib_entry_cover_track(cover, fib_entry_get_index(fib_entry)); + } + + return (!0); +} + + +/* + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +static void +fib_entry_src_interface_deactivate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * remove the depednecy on the covering entry + */ + if (FIB_NODE_INDEX_INVALID != src->interface.fesi_cover) + { + cover = fib_entry_get(src->interface.fesi_cover); + + fib_entry_cover_untrack(cover, src->interface.fesi_sibling); + + src->interface.fesi_cover = FIB_NODE_INDEX_INVALID; + } +} + +static fib_entry_src_cover_res_t +fib_entry_src_interface_cover_change (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + + if (FIB_NODE_INDEX_INVALID == src->interface.fesi_cover) + { + /* + * not tracking the cover. surprised we got poked? + */ + return (res); + } + + /* + * this function is called when this entry's cover has a more specific + * entry inserted benaeth it. That does not necessarily mean that this + * entry is covered by the new prefix. check that + */ + if (src->rr.fesr_cover != fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix)) + { + fib_entry_src_interface_deactivate(src, fib_entry); + fib_entry_src_interface_activate(src, fib_entry); + } + return (res); +} + +static void +fib_entry_src_interface_installed (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + /* + * The interface source now rules! poke our cover to get exported + */ + fib_entry_t *cover; + + if (FIB_NODE_INDEX_INVALID != src->interface.fesi_cover) + { + cover = fib_entry_get(src->interface.fesi_cover); + + fib_attached_export_covered_added(cover, + fib_entry_get_index(fib_entry)); + } +} + +static u8* +fib_entry_src_interface_format (fib_entry_src_t *src, + u8* s) +{ + return (format(s, "cover:%d", src->interface.fesi_cover)); +} + +const static fib_entry_src_vft_t interface_src_vft = { + .fesv_init = fib_entry_src_interface_init, + .fesv_path_swap = fib_entry_src_interface_path_swap, + .fesv_activate = fib_entry_src_interface_activate, + .fesv_deactivate = fib_entry_src_interface_deactivate, + .fesv_format = fib_entry_src_interface_format, + .fesv_installed = fib_entry_src_interface_installed, + .fesv_cover_change = fib_entry_src_interface_cover_change, + /* + * not concerned about updates to the cover. the cover will + * decide to export or not + */ +}; + +void +fib_entry_src_interface_register (void) +{ + fib_entry_src_register(FIB_SOURCE_INTERFACE, &interface_src_vft); +} diff --git a/vnet/vnet/fib/fib_entry_src_lisp.c b/vnet/vnet/fib/fib_entry_src_lisp.c new file mode 100644 index 00000000000..116c492994b --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_lisp.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_lisp_init (fib_entry_src_t *src) +{ +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_lisp_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_lisp_path_swap (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); +} + +static void +fib_entry_src_lisp_path_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + if (FIB_NODE_INDEX_INVALID == src->fes_pl) + { + src->fes_pl = + fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), paths); + } + else + { + src->fes_pl = + fib_path_list_copy_and_path_add(src->fes_pl, + (FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); + } +} + +static void +fib_entry_src_lisp_path_remove (fib_entry_src_t *src, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + if (FIB_NODE_INDEX_INVALID != src->fes_pl) + { + src->fes_pl = + fib_path_list_copy_and_path_remove(src->fes_pl, + (FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); + } +} + +static void +fib_entry_src_lisp_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + if (FIB_ENTRY_FLAG_NONE != flags) + { + src->fes_pl = fib_path_list_create_special(proto, flags, dpo); + } +} + +static void +fib_entry_src_lisp_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_lisp_set_data (fib_entry_src_t *src, + const fib_entry_t *entry, + const void *data) +{ + src->lisp.fesl_fib_index = *(u32*)data; +} + +static const void* +fib_entry_src_lisp_get_data (fib_entry_src_t *src, + const fib_entry_t *entry) +{ + return (&(src->lisp.fesl_fib_index)); +} + +const static fib_entry_src_vft_t api_src_vft = { + .fesv_init = fib_entry_src_lisp_init, + .fesv_deinit = fib_entry_src_lisp_deinit, + .fesv_add = fib_entry_src_lisp_add, + .fesv_remove = fib_entry_src_lisp_remove, + .fesv_path_add = fib_entry_src_lisp_path_add, + .fesv_path_swap = fib_entry_src_lisp_path_swap, + .fesv_path_remove = fib_entry_src_lisp_path_remove, + .fesv_set_data = fib_entry_src_lisp_set_data, + .fesv_get_data = fib_entry_src_lisp_get_data, +}; + +void +fib_entry_src_lisp_register (void) +{ + fib_entry_src_register(FIB_SOURCE_LISP, &api_src_vft); +} diff --git a/vnet/vnet/fib/fib_entry_src_mpls.c b/vnet/vnet/fib/fib_entry_src_mpls.c new file mode 100644 index 00000000000..5145c10977f --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_mpls.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/mpls/mpls_types.h> +#include <vnet/dpo/drop_dpo.h> + +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/mpls_fib.h> + +/** + * Source initialisation Function + */ +static void +fib_entry_src_mpls_init (fib_entry_src_t *src) +{ + mpls_eos_bit_t eos; + + src->fes_flags = FIB_ENTRY_FLAG_NONE; + src->mpls.fesm_label = MPLS_LABEL_INVALID; + + FOR_EACH_MPLS_EOS_BIT(eos) + { + src->mpls.fesm_lfes[eos] = FIB_NODE_INDEX_INVALID; + } +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_mpls_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_mpls_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; + src->mpls.fesm_label = MPLS_LABEL_INVALID; +} + +static void +fib_entry_src_mpls_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + src->fes_pl = + fib_path_list_create_special(proto, + FIB_PATH_LIST_FLAG_DROP, + drop_dpo_get(fib_proto_to_dpo(proto))); +} + +static void +fib_entry_src_mpls_fwd_update (fib_entry_src_t *src, + const fib_entry_t *fib_entry, + fib_source_t best_source) +{ + dpo_id_t dpo = DPO_NULL; + mpls_eos_bit_t eos; + + FOR_EACH_MPLS_EOS_BIT(eos) + { + fib_entry_contribute_forwarding(fib_entry_get_index(fib_entry), + (eos ? + FIB_FORW_CHAIN_TYPE_MPLS_EOS : + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS), + &dpo); + + fib_table_entry_special_dpo_update(src->mpls.fesm_lfes[eos], + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + } + dpo_reset(&dpo); +} + +static void +fib_entry_src_mpls_set_data (fib_entry_src_t *src, + const fib_entry_t *entry, + const void *data) +{ + dpo_proto_t payload_proto; + fib_node_index_t fei; + mpls_label_t label; + mpls_eos_bit_t eos; + + /* + * post MPLS table alloc and the possible rea-alloc of fib entrys + * the entry pointer will no longer be valid. so save its index + */ + payload_proto = entry->fe_prefix.fp_proto; + fei = fib_entry_get_index(entry); + label = *(mpls_label_t*)data; + + if (MPLS_LABEL_INVALID == label) + { + /* + * removing the local label + */ + FOR_EACH_MPLS_EOS_BIT(eos) + { + fib_table_entry_delete_index(src->mpls.fesm_lfes[eos], + FIB_SOURCE_SPECIAL); + } + fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, FIB_PROTOCOL_MPLS); + src->mpls.fesm_label = label; + } + else + { + fib_prefix_t prefix = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = label, + }; + fib_node_index_t fib_index; + dpo_id_t dpo = DPO_NULL; + + /* + * adding a new local label. make sure the MPLS fib exists. + */ + if (MPLS_LABEL_INVALID == src->mpls.fesm_label) + { + fib_index = + fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + } + else + { + fib_index = mpls_fib_index_from_table_id(MPLS_FIB_DEFAULT_TABLE_ID); + } + + src->mpls.fesm_label = label; + + FOR_EACH_MPLS_EOS_BIT(eos) + { + prefix.fp_eos = eos; + prefix.fp_payload_proto = fib_proto_to_dpo(payload_proto); + + fib_entry_contribute_forwarding(fei, + (eos ? + FIB_FORW_CHAIN_TYPE_MPLS_EOS : + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS), + &dpo); + src->mpls.fesm_lfes[eos] = + fib_table_entry_special_dpo_add(fib_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + dpo_reset(&dpo); + } + } +} + +static const void * +fib_entry_src_mpls_get_data (fib_entry_src_t *src, + const fib_entry_t *entry) +{ + return (&(src->mpls.fesm_label)); +} + +static u8* +fib_entry_src_mpls_format (fib_entry_src_t *src, + u8* s) +{ + return (format(s, "MPLS local-label:%d", src->mpls.fesm_label)); +} + +const static fib_entry_src_vft_t mpls_src_vft = { + .fesv_init = fib_entry_src_mpls_init, + .fesv_deinit = fib_entry_src_mpls_deinit, + .fesv_add = fib_entry_src_mpls_add, + .fesv_remove = fib_entry_src_mpls_remove, + .fesv_format = fib_entry_src_mpls_format, + .fesv_fwd_update = fib_entry_src_mpls_fwd_update, + .fesv_set_data = fib_entry_src_mpls_set_data, + .fesv_get_data = fib_entry_src_mpls_get_data, +}; + +void +fib_entry_src_mpls_register (void) +{ + fib_entry_src_register(FIB_SOURCE_MPLS, &mpls_src_vft); +} + + diff --git a/vnet/vnet/fib/fib_entry_src_rr.c b/vnet/vnet/fib/fib_entry_src_rr.c new file mode 100644 index 00000000000..f6b89603165 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_rr.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/format.h> +#include <vnet/ip/lookup.h> +#include <vnet/adj/adj.h> + +#include "fib_entry_src.h" +#include "fib_entry_cover.h" +#include "fib_entry.h" +#include "fib_table.h" + +/* + * fib_entry_src_rr_resolve_via_connected + * + * Resolve via a connected cover. + */ +static void +fib_entry_src_rr_resolve_via_connected (fib_entry_src_t *src, + const fib_entry_t *fib_entry, + const fib_entry_t *cover) +{ + const fib_route_path_t path = { + .frp_proto = fib_entry->fe_prefix.fp_proto, + .frp_addr = fib_entry->fe_prefix.fp_addr, + .frp_sw_if_index = fib_entry_get_resolving_interface( + fib_entry_get_index(cover)), + .frp_fib_index = ~0, + .frp_weight = 1, + }; + fib_route_path_t *paths = NULL; + vec_add1(paths, path); + + /* + * since the cover is connected, the address this entry corresponds + * to is a peer (ARP-able for) on the interface to which the cover is + * connected. The fact we resolve via the cover, just means this RR + * source is the first SRC to use said peer. The ARP source will be along + * shortly to over-rule this RR source. + */ + src->fes_pl = fib_path_list_create(FIB_PATH_LIST_FLAG_NONE, paths); + src->fes_entry_flags = fib_entry_get_flags(fib_entry_get_index(cover)); + + vec_free(paths); +} + +/** + * Source initialisation Function + */ +static void +fib_entry_src_rr_init (fib_entry_src_t *src) +{ + src->rr.fesr_cover = FIB_NODE_INDEX_INVALID; + src->rr.fesr_sibling = FIB_NODE_INDEX_INVALID; +} + +/* + * Source activation. Called when the source is the new best source on the entry + */ +static int +fib_entry_src_rr_activate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * find the covering prefix. become a dependent thereof. + * there should always be a cover, though it may be the default route. + */ + src->rr.fesr_cover = fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix); + + ASSERT(FIB_NODE_INDEX_INVALID != src->rr.fesr_cover); + + cover = fib_entry_get(src->rr.fesr_cover); + + src->rr.fesr_sibling = + fib_entry_cover_track(cover, fib_entry_get_index(fib_entry)); + + /* + * if the ocver is attached then install an attached-host path + * (like an adj-fib). Otherwise inherit the forwarding from the cover + */ + if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)) + { + fib_entry_src_rr_resolve_via_connected(src, fib_entry, cover); + } + else + { + src->fes_pl = cover->fe_parent; + } + fib_path_list_lock(src->fes_pl); + + /* + * return go for install + */ + return (!0); +} + +/** + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +static void +fib_entry_src_rr_deactivate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * remove the depednecy on the covering entry + */ + ASSERT(FIB_NODE_INDEX_INVALID != src->rr.fesr_cover); + cover = fib_entry_get(src->rr.fesr_cover); + + fib_entry_cover_untrack(cover, src->rr.fesr_sibling); + + src->rr.fesr_cover = FIB_NODE_INDEX_INVALID; + + fib_path_list_unlock(src->fes_pl); + src->fes_pl = FIB_NODE_INDEX_INVALID; + src->fes_entry_flags = FIB_ENTRY_FLAG_NONE; +} + +static fib_entry_src_cover_res_t +fib_entry_src_rr_cover_change (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + + if (FIB_NODE_INDEX_INVALID == src->rr.fesr_cover) + { + /* + * the source may be added, but it is not active + * if it is not tracking the cover. + */ + return (res); + } + + /* + * this function is called when this entry's cover has a more specific + * entry inserted benaeth it. That does not necessarily mean that this + * entry is covered by the new prefix. check that + */ + if (src->rr.fesr_cover != fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix)) + { + fib_entry_src_rr_deactivate(src, fib_entry); + fib_entry_src_rr_activate(src, fib_entry); + + /* + * dependent children need to re-resolve to the new forwarding info + */ + res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + } + return (res); +} + +/* + * fib_entry_src_rr_cover_update + * + * This entry's cover has updated its forwarding info. This entry + * will need to re-inheret. + */ +static fib_entry_src_cover_res_t +fib_entry_src_rr_cover_update (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + fib_node_index_t old_path_list; + fib_entry_t *cover; + + if (FIB_NODE_INDEX_INVALID == src->rr.fesr_cover) + { + /* + * the source may be added, but it is not active + * if it is not tracking the cover. + */ + return (res); + } + + cover = fib_entry_get(src->rr.fesr_cover); + old_path_list = src->fes_pl; + + /* + * if the ocver is attached then install an attached-host path + * (like an adj-fib). Otherwise inherit the forwarding from the cover + */ + if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)) + { + fib_entry_src_rr_resolve_via_connected(src, fib_entry, cover); + } + else + { + src->fes_pl = cover->fe_parent; + } + fib_path_list_lock(src->fes_pl); + fib_path_list_unlock(old_path_list); + + /* + * dependent children need to re-resolve to the new forwarding info + */ + res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + + return (res); +} + +static u8* +fib_entry_src_rr_format (fib_entry_src_t *src, + u8* s) +{ + return (format(s, "cover:%d", src->rr.fesr_cover)); +} + +const static fib_entry_src_vft_t rr_src_vft = { + .fesv_init = fib_entry_src_rr_init, + .fesv_activate = fib_entry_src_rr_activate, + .fesv_deactivate = fib_entry_src_rr_deactivate, + .fesv_cover_change = fib_entry_src_rr_cover_change, + .fesv_cover_update = fib_entry_src_rr_cover_update, + .fesv_format = fib_entry_src_rr_format, +}; + +void +fib_entry_src_rr_register (void) +{ + fib_entry_src_register(FIB_SOURCE_RR, &rr_src_vft); +} diff --git a/vnet/vnet/fib/fib_entry_src_special.c b/vnet/vnet/fib/fib_entry_src_special.c new file mode 100644 index 00000000000..f73e280f1c8 --- /dev/null +++ b/vnet/vnet/fib/fib_entry_src_special.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_special_init (fib_entry_src_t *src) +{ + src->fes_flags = FIB_ENTRY_FLAG_NONE; +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_special_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_special_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_special_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + src->fes_pl = + fib_path_list_create_special(proto, + fib_entry_src_flags_2_path_list_flags(flags), + dpo); +} + +const static fib_entry_src_vft_t special_src_vft = { + .fesv_init = fib_entry_src_special_init, + .fesv_deinit = fib_entry_src_special_deinit, + .fesv_add = fib_entry_src_special_add, + .fesv_remove = fib_entry_src_special_remove, +}; + +void +fib_entry_src_special_register (void) +{ + fib_entry_src_register(FIB_SOURCE_SPECIAL, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_MAP, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_SIXRD, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_CLASSIFY, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_SR, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_AE, &special_src_vft); +} diff --git a/vnet/vnet/fib/fib_internal.h b/vnet/vnet/fib/fib_internal.h new file mode 100644 index 00000000000..26b349eee5e --- /dev/null +++ b/vnet/vnet/fib/fib_internal.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_INTERNAL_H__ +#define __FIB_INTERNAL_H__ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/dpo.h> + +/** + * Big train switch; FIB debugs on or off + */ +#undef FIB_DEBUG + +extern void fib_prefix_from_ip46_addr (const ip46_address_t *addr, + fib_prefix_t *prf); + +extern int fib_route_path_cmp(const fib_route_path_t *rpath1, + const fib_route_path_t *rpath2); + +/** + * @brief + * Add or update an entry in the FIB's forwarding table. + * This is called from the fib_entry code. It is not meant to be used + * by the client/source. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add/update + * + * @param dpo + * The data-path object to use for forwarding + */ +extern void fib_table_fwding_dpo_update(u32 fib_index, + const fib_prefix_t *prefix, + const dpo_id_t *dpo); +/** + * @brief + * remove an entry in the FIB's forwarding table + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add/update + * + * @param dpo + * The data-path object to use for forwarding + */ +extern void fib_table_fwding_dpo_remove(u32 fib_index, + const fib_prefix_t *prefix, + const dpo_id_t *dpo); + + +#endif diff --git a/vnet/vnet/fib/fib_node.c b/vnet/vnet/fib/fib_node.c new file mode 100644 index 00000000000..8ac67d2ef92 --- /dev/null +++ b/vnet/vnet/fib/fib_node.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_node.h> +#include <vnet/fib/fib_node_list.h> + +/* + * The per-type vector of virtual function tables + */ +static fib_node_vft_t *fn_vfts; + +/** + * The last registered new type + */ +static fib_node_type_t last_new_type = FIB_NODE_TYPE_LAST; + +/* + * the node type names + */ +static const char *fn_type_names[] = FIB_NODE_TYPES; + +const char* +fib_node_type_get_name (fib_node_type_t type) +{ + if (type < FIB_NODE_TYPE_LAST) + return (fn_type_names[type]); + else + { + if (NULL != fn_vfts[type].fnv_format) + { + return ("fixme"); + } + else + { + return ("unknown"); + } + } +} + +/** + * fib_node_register_type + * + * Register the function table for a given type + */ +void +fib_node_register_type (fib_node_type_t type, + const fib_node_vft_t *vft) +{ + /* + * assert that one only registration is made per-node type + */ + if (vec_len(fn_vfts) > type) + ASSERT(NULL == fn_vfts[type].fnv_get); + + /* + * Assert that we are getting each of the required functions + */ + ASSERT(NULL != vft->fnv_get); + ASSERT(NULL != vft->fnv_last_lock); + + vec_validate(fn_vfts, type); + fn_vfts[type] = *vft; +} + +fib_node_type_t +fib_node_register_new_type (const fib_node_vft_t *vft) +{ + fib_node_type_t new_type; + + new_type = ++last_new_type; + + fib_node_register_type(new_type, vft); + + return (new_type); +} + +static u8* +fib_node_format (fib_node_ptr_t *fnp, u8*s) +{ + return (format(s, "{%s:%d}", fn_type_names[fnp->fnp_type], fnp->fnp_index)); +} + +u32 +fib_node_child_add (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_type_t type, + fib_node_index_t index) +{ + fib_node_t *parent; + + parent = fn_vfts[parent_type].fnv_get(parent_index); + + /* + * return the index of the sibling in the child list + */ + fib_node_lock(parent); + + if (FIB_NODE_INDEX_INVALID == parent->fn_children) + { + parent->fn_children = fib_node_list_create(); + } + + return (fib_node_list_push_front(parent->fn_children, + 0, type, + index)); +} + +void +fib_node_child_remove (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_index_t sibling_index) +{ + fib_node_t *parent; + + parent = fn_vfts[parent_type].fnv_get(parent_index); + + fib_node_list_remove(parent->fn_children, sibling_index); + + if (0 == fib_node_list_get_size(parent->fn_children)) + { + fib_node_list_destroy(&parent->fn_children); + } + + fib_node_unlock(parent); +} + + +fib_node_back_walk_rc_t +fib_node_back_walk_one (fib_node_ptr_t *ptr, + fib_node_back_walk_ctx_t *ctx) +{ + fib_node_t *node; + + node = fn_vfts[ptr->fnp_type].fnv_get(ptr->fnp_index); + + return (fn_vfts[ptr->fnp_type].fnv_back_walk(node, ctx)); +} + +static int +fib_node_ptr_format_one_child (fib_node_ptr_t *ptr, + void *arg) +{ + u8 **s = (u8**) arg; + + *s = fib_node_format(ptr, *s); + + return (1); +} + +u8* +fib_node_children_format (fib_node_list_t list, + u8 *s) +{ + fib_node_list_walk(list, fib_node_ptr_format_one_child, (void*)&s); + + return (s); +} + +void +fib_node_init (fib_node_t *node, + fib_node_type_t type) +{ +#if CLIB_DEBUG > 0 + /** + * The node's type. make sure we are dynamic/down casting correctly + */ + node->fn_type = type; +#endif + node->fn_locks = 0; + node->fn_vft = &fn_vfts[type]; + node->fn_children = FIB_NODE_INDEX_INVALID; +} + +void +fib_node_deinit (fib_node_t *node) +{ + fib_node_list_destroy(&node->fn_children); +} + +void +fib_node_lock (fib_node_t *node) +{ + node->fn_locks++; +} + +void +fib_node_unlock (fib_node_t *node) +{ + node->fn_locks--; + + if (0 == node->fn_locks) + { + node->fn_vft->fnv_last_lock(node); + } +} diff --git a/vnet/vnet/fib/fib_node.h b/vnet/vnet/fib/fib_node.h new file mode 100644 index 00000000000..a05b6f1b61a --- /dev/null +++ b/vnet/vnet/fib/fib_node.h @@ -0,0 +1,317 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_NODE_H__ +#define __FIB_NODE_H__ + +#include <vnet/fib/fib_types.h> + +/** + * The types of nodes in a FIB graph + */ +typedef enum fib_node_type_t_ { + /** + * Marker. New types after this one. + */ + FIB_NODE_TYPE_FIRST = 0, + /** + * See the respective fib_*.h files for descriptions of these objects. + */ + FIB_NODE_TYPE_WALK, + FIB_NODE_TYPE_ENTRY, + FIB_NODE_TYPE_PATH_LIST, + FIB_NODE_TYPE_PATH, + FIB_NODE_TYPE_ADJ, + FIB_NODE_TYPE_MPLS_ENTRY, + FIB_NODE_TYPE_LISP_GPE_TUNNEL, + FIB_NODE_TYPE_LISP_ADJ, + FIB_NODE_TYPE_MPLS_GRE_TUNNEL, + FIB_NODE_TYPE_GRE_TUNNEL, + /** + * Marker. New types before this one. leave the test last. + */ + FIB_NODE_TYPE_TEST, + FIB_NODE_TYPE_LAST = FIB_NODE_TYPE_TEST, +} fib_node_type_t; + +#define FIB_NODE_TYPE_MAX (FIB_NODE_TYPE_LAST + 1) + +#define FIB_NODE_TYPES { \ + [FIB_NODE_TYPE_ENTRY] = "entry", \ + [FIB_NODE_TYPE_WALK] = "walk", \ + [FIB_NODE_TYPE_PATH_LIST] = "path-list", \ + [FIB_NODE_TYPE_PATH] = "path", \ + [FIB_NODE_TYPE_MPLS_ENTRY] = "mpls-entry", \ + [FIB_NODE_TYPE_ADJ] = "adj", \ + [FIB_NODE_TYPE_LISP_GPE_TUNNEL] = "lisp-gpe-tunnel", \ + [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \ + [FIB_NODE_TYPE_MPLS_GRE_TUNNEL] = "mpls-gre-tunnel", \ + [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \ +} + +/** + * Reasons for backwalking the FIB object graph + */ +typedef enum fib_node_back_walk_reason_t_ { + /** + * Marker. Add new ones after. + */ + FIB_NODE_BW_REASON_FIRST = 0, + /** + * Walk to re-resolve the child. + * Used when the parent is no longer a valid resolution target + */ + FIB_NODE_BW_REASON_RESOLVE = FIB_NODE_BW_REASON_FIRST, + /** + * Walk to re-evaluate the forwarding contributed by the parent. + * Used when a parent's forwarding changes and the child needs to + * incorporate this change in its forwarding. + */ + FIB_NODE_BW_REASON_EVALUATE, + /** + * A resolving interface has come up + */ + FIB_NODE_BW_REASON_INTERFACE_UP, + /** + * A resolving interface has gone down + */ + FIB_NODE_BW_REASON_INTERFACE_DOWN, + /** + * A resolving interface has been deleted. + */ + FIB_NODE_BW_REASON_INTERFACE_DELETE, + /** + * Walk to re-collapse the multipath adjs when the rewrite of + * a unipath adjacency changes + */ + FIB_NODE_BW_REASON_ADJ_UPDATE, + /** + * Marker. Add new before and update + */ + FIB_NODE_BW_REASON_LAST = FIB_NODE_BW_REASON_EVALUATE, +} fib_node_back_walk_reason_t; + +#define FIB_NODE_BW_REASONS { \ + [FIB_NODE_BW_REASON_RESOLVE] = "resolve" \ + [FIB_NODE_BW_REASON_EVALUATE] = "evaluate" \ + [FIB_NODE_BW_REASON_INTERFACE_UP] = "if-up" \ + [FIB_NODE_BW_REASON_INTERFACE_DOWN] = "if-down" \ + [FIB_NODE_BW_REASON_INTERFACE_DELETE] = "if-delete" \ + [FIB_NODE_BW_REASON_ADJ_UPDATE] = "adj-update" \ +} + +/** + * Flags enum constructed from the reaons + */ +typedef enum fib_node_bw_reason_flag_t_ { + FIB_NODE_BW_REASON_FLAG_NONE = 0, + FIB_NODE_BW_REASON_FLAG_RESOLVE = (1 << FIB_NODE_BW_REASON_RESOLVE), + FIB_NODE_BW_REASON_FLAG_EVALUATE = (1 << FIB_NODE_BW_REASON_EVALUATE), + FIB_NODE_BW_REASON_FLAG_INTERFACE_UP = (1 << FIB_NODE_BW_REASON_INTERFACE_UP), + FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN = (1 << FIB_NODE_BW_REASON_INTERFACE_DOWN), + FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE = (1 << FIB_NODE_BW_REASON_INTERFACE_DELETE), + FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE = (1 << FIB_NODE_BW_REASON_ADJ_UPDATE), +} __attribute__ ((packed)) fib_node_bw_reason_flag_t; + +_Static_assert(sizeof(fib_node_bw_reason_flag_t) < 2, + "BW Reason enum < 2 byte. Consequences for cover_upd_res_t"); + +/** + * Forward eclarations + */ +struct fib_node_t_; + +/** + * A representation of one pointer to another node. + * To fully qualify a node, one must know its type and its index so it + * can be retrieved from the appropriate pool. Direct pointers to nodes + * are forbidden, since all nodes are allocated from pools, which are vectors, + * and thus subject to realloc at any time. + */ +typedef struct fib_node_ptr_t_ { + /** + * node type + */ + fib_node_type_t fnp_type; + /** + * node's index + */ + fib_node_index_t fnp_index; +} fib_node_ptr_t; + +/** + * @brief A list of FIB nodes. + */ +typedef u32 fib_node_list_t; + +/** + * Context passed between object during a back walk. + */ +typedef struct fib_node_back_walk_ctx_t_ { + /** + * The reason/trigger for the backwalk + */ + fib_node_bw_reason_flag_t fnbw_reason; + + /** + * the number of levels the walk has already traversed. + * this value is maintained by the walk infra, tp limit the depth of + * a walk so it does not run indefinately the presence of a loop/cycle + * in the graph. + */ + u32 fnbw_depth; +} fib_node_back_walk_ctx_t; + +/** + * We consider a depth of 32 to be sufficient to cover all sane + * network topologies. Anything more is then an indication that + * there is a loop/cycle in the FIB graph. + * Note that all object types contribute to 1 to the depth. + */ +#define FIB_NODE_GRAPH_MAX_DEPTH ((u32)32) + +/** + * A callback function for walking a node dependency list + */ +typedef int (*fib_node_ptr_walk_t)(fib_node_ptr_t *depend, + void *ctx); + +/** + * A list of dependent nodes. + * This is currently implemented as a hash_table of fib_node_ptr_t + */ +typedef fib_node_ptr_t fib_node_ptr_list_t; + +/** + * Return code from a back walk function + */ +typedef enum fib_node_back_walk_rc_t_ { + FIB_NODE_BACK_WALK_MERGE, + FIB_NODE_BACK_WALK_CONTINUE, +} fib_node_back_walk_rc_t; + +/** + * Function definition to backwalk a FIB node + */ +typedef fib_node_back_walk_rc_t (*fib_node_back_walk_t)( + struct fib_node_t_ *node, + fib_node_back_walk_ctx_t *ctx); + +/** + * Function definition to get a FIB node from its index + */ +typedef struct fib_node_t_* (*fib_node_get_t)(fib_node_index_t index); + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +typedef void (*fib_node_last_lock_gone_t)(struct fib_node_t_ *node); + +/** + * A FIB graph nodes virtual function table + */ +typedef struct fib_node_vft_t_ { + fib_node_get_t fnv_get; + fib_node_last_lock_gone_t fnv_last_lock; + fib_node_back_walk_t fnv_back_walk; + format_function_t *fnv_format; +} fib_node_vft_t; + +/** + * An node in the FIB graph + * + * Objects in the FIB form a graph. + */ +typedef struct fib_node_t_ { +#if CLIB_DEBUG > 0 + /** + * The node's type. make sure we are dynamic/down casting correctly + */ + fib_node_type_t fn_type; +#endif + /** + * The node's VFT. + * we could store the type here instead, and lookup the VFT using that. But + * I like this better, + */ + const fib_node_vft_t *fn_vft; + + /** + * Vector of nodes that depend upon/use/share this node + */ + fib_node_list_t fn_children; + + /** + * Number of dependents on this node. This number includes the number + * of children + */ + u32 fn_locks; +} fib_node_t; + +/** + * @brief + * Register the function table for a given type + * + * @param ft + * FIB node type + * + * @param vft + * virtual function table + */ +extern void fib_node_register_type (fib_node_type_t ft, + const fib_node_vft_t *vft); + +/** + * @brief + * Create a new FIB node type and Register the function table for it. + * + * @param vft + * virtual function table + * + * @return new FIB node type + */ +extern fib_node_type_t fib_node_register_new_type (const fib_node_vft_t *vft); + +extern void fib_node_init(fib_node_t *node, + fib_node_type_t ft); +extern void fib_node_deinit(fib_node_t *node); + +extern void fib_node_lock(fib_node_t *node); +extern void fib_node_unlock(fib_node_t *node); + +extern u32 fib_node_child_add(fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_type_t child_type, + fib_node_index_t child_index); +extern void fib_node_child_remove(fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_index_t sibling_index); + +extern fib_node_back_walk_rc_t fib_node_back_walk_one(fib_node_ptr_t *ptr, + fib_node_back_walk_ctx_t *ctx); + +extern u8* fib_node_children_format(fib_node_list_t list, + u8 *s); + +extern const char* fib_node_type_get_name(fib_node_type_t type); + +static inline int +fib_node_index_is_valid (fib_node_index_t ni) +{ + return (FIB_NODE_INDEX_INVALID != ni); +} + +#endif + diff --git a/vnet/vnet/fib/fib_node_list.c b/vnet/vnet/fib/fib_node_list.c new file mode 100644 index 00000000000..1d2e75ecec2 --- /dev/null +++ b/vnet/vnet/fib/fib_node_list.c @@ -0,0 +1,385 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief a hetrogeneous w.r.t. FIB node type, of FIB nodes. + * Since we cannot use C pointers, due to memeory reallocs, the next/prev + * are described as key:{type,index}. + */ + +#include <vnet/fib/fib_node_list.h> + +/** + * @brief An element in the list + */ +typedef struct fib_node_list_elt_t_ +{ + /** + * An opaque indentifier set by the FIB node owning this element + * that will allow the owner to identify which element it is. + */ + int fnle_owner_id; + + /** + * The index of the list this element is in + */ + fib_node_list_t fnle_list; + + /** + * The owner of this element + */ + fib_node_ptr_t fnle_owner; + + /** + * The next element in the list + */ + u32 fnle_next; + + /** + * The previous element in the list + */ + u32 fnle_prev; +} fib_node_list_elt_t; + +/** + * @brief A list of FIB nodes + */ +typedef struct fib_node_list_head_t_ +{ + /** + * The head element + */ + u32 fnlh_head; + + /** + * Number of elements in the list + */ + u32 fnlh_n_elts; +} fib_node_list_head_t; + +/** + * Pools of list elements and heads + */ +static fib_node_list_elt_t *fib_node_list_elt_pool; +static fib_node_list_head_t *fib_node_list_head_pool; + +static index_t +fib_node_list_elt_get_index (fib_node_list_elt_t *elt) +{ + return (elt - fib_node_list_elt_pool); +} + +static fib_node_list_elt_t * +fib_node_list_elt_get (index_t fi) +{ + return (pool_elt_at_index(fib_node_list_elt_pool, fi)); +} + +static index_t +fib_node_list_head_get_index (fib_node_list_head_t *head) +{ + return (head - fib_node_list_head_pool); +} +static fib_node_list_head_t * +fib_node_list_head_get (fib_node_list_t fi) +{ + return (pool_elt_at_index(fib_node_list_head_pool, fi)); +} + +static fib_node_list_elt_t * +fib_node_list_elt_create (fib_node_list_head_t *head, + int id, + fib_node_type_t type, + fib_node_index_t index) +{ + fib_node_list_elt_t *elt; + + pool_get(fib_node_list_elt_pool, elt); + + elt->fnle_list = fib_node_list_head_get_index(head); + elt->fnle_owner_id = id; + elt->fnle_owner.fnp_type = type; + elt->fnle_owner.fnp_index = index; + + elt->fnle_next = FIB_NODE_INDEX_INVALID; + elt->fnle_prev = FIB_NODE_INDEX_INVALID; + + return (elt); +} + +static void +fib_node_list_head_init (fib_node_list_head_t *head) +{ + head->fnlh_n_elts = 0; + head->fnlh_head = FIB_NODE_INDEX_INVALID; +} + +/** + * @brief Create a new node list. The expectation is that these are few in number + * so straight from the memory subsystem + */ +fib_node_list_t +fib_node_list_create (void) +{ + fib_node_list_head_t *head; + + pool_get(fib_node_list_head_pool, head); + + fib_node_list_head_init(head); + + return (fib_node_list_head_get_index(head)); +} + +void +fib_node_list_destroy (fib_node_list_t *list) +{ + fib_node_list_head_t *head; + + if (FIB_NODE_INDEX_INVALID == *list) + return; + + head = fib_node_list_head_get(*list); + ASSERT(0 == head->fnlh_n_elts); + + pool_put(fib_node_list_head_pool, head); + *list = FIB_NODE_INDEX_INVALID; +} + + +/** + * @brief Insert an element at the from of the list. + */ +u32 +fib_node_list_push_front (fib_node_list_t list, + int owner_id, + fib_node_type_t type, + fib_node_index_t index) +{ + fib_node_list_elt_t *elt, *next; + fib_node_list_head_t *head; + + head = fib_node_list_head_get(list); + elt = fib_node_list_elt_create(head, owner_id, type, index); + + elt->fnle_prev = FIB_NODE_INDEX_INVALID; + elt->fnle_next = head->fnlh_head; + + if (FIB_NODE_INDEX_INVALID != head->fnlh_head) + { + next = fib_node_list_elt_get(head->fnlh_head); + next->fnle_prev = fib_node_list_elt_get_index(elt); + } + head->fnlh_head = fib_node_list_elt_get_index(elt); + + head->fnlh_n_elts++; + + return (fib_node_list_elt_get_index(elt)); +} + +u32 +fib_node_list_push_back (fib_node_list_t list, + int owner_id, + fib_node_type_t type, + fib_node_index_t index) +{ + ASSERT(0); + return (FIB_NODE_INDEX_INVALID); +} + +static void +fib_node_list_extract (fib_node_list_head_t *head, + fib_node_list_elt_t *elt) +{ + fib_node_list_elt_t *next, *prev; + + if (FIB_NODE_INDEX_INVALID != elt->fnle_next) + { + next = fib_node_list_elt_get(elt->fnle_next); + next->fnle_prev = elt->fnle_prev; + } + + if (FIB_NODE_INDEX_INVALID != elt->fnle_prev) + { + prev = fib_node_list_elt_get(elt->fnle_prev); + prev->fnle_next = elt->fnle_next; + } + else + { + ASSERT (fib_node_list_elt_get_index(elt) == head->fnlh_head); + head->fnlh_head = elt->fnle_next; + } +} + +static void +fib_node_list_insert_after (fib_node_list_head_t *head, + fib_node_list_elt_t *prev, + fib_node_list_elt_t *elt) +{ + fib_node_list_elt_t *next; + + elt->fnle_next = prev->fnle_next; + if (FIB_NODE_INDEX_INVALID != prev->fnle_next) + { + next = fib_node_list_elt_get(prev->fnle_next); + next->fnle_prev = fib_node_list_elt_get_index(elt); + } + prev->fnle_next = fib_node_list_elt_get_index(elt); + elt->fnle_prev = fib_node_list_elt_get_index(prev); +} + +void +fib_node_list_remove (fib_node_list_t list, + u32 sibling) +{ + fib_node_list_head_t *head; + fib_node_list_elt_t *elt; + + head = fib_node_list_head_get(list); + elt = fib_node_list_elt_get(sibling); + + fib_node_list_extract(head, elt); + + head->fnlh_n_elts--; + pool_put(fib_node_list_elt_pool, elt); +} + +void +fib_node_list_elt_remove (u32 sibling) +{ + fib_node_list_elt_t *elt; + + elt = fib_node_list_elt_get(sibling); + + fib_node_list_remove(elt->fnle_list, sibling); +} + +/** + * @brief Advance the sibling one step (toward the tail) in the list. + * return 0 if at the end of the list, 1 otherwise. + */ +int +fib_node_list_advance (u32 sibling) +{ + fib_node_list_elt_t *elt, *next; + fib_node_list_head_t *head; + + elt = fib_node_list_elt_get(sibling); + head = fib_node_list_head_get(elt->fnle_list); + + if (FIB_NODE_INDEX_INVALID != elt->fnle_next) + { + /* + * not at the end of the list + */ + next = fib_node_list_elt_get(elt->fnle_next); + + fib_node_list_extract(head, elt); + fib_node_list_insert_after(head, next, elt); + + return (1); + } + else + { + return (0); + } +} + +int +fib_node_list_elt_get_next (u32 sibling, + fib_node_ptr_t *ptr) +{ + fib_node_list_elt_t *elt, *next; + + elt = fib_node_list_elt_get(sibling); + + if (FIB_NODE_INDEX_INVALID != elt->fnle_next) + { + next = fib_node_list_elt_get(elt->fnle_next); + + *ptr = next->fnle_owner; + return (1); + } + else + { + ptr->fnp_index = FIB_NODE_INDEX_INVALID; + return (0); + } +} + +u32 +fib_node_list_get_size (fib_node_list_t list) +{ + fib_node_list_head_t *head; + + if (FIB_NODE_INDEX_INVALID == list) + { + return (0); + } + + head = fib_node_list_head_get(list); + + return (head->fnlh_n_elts); +} + +int +fib_node_list_get_front (fib_node_list_t list, + fib_node_ptr_t *ptr) +{ + fib_node_list_head_t *head; + fib_node_list_elt_t *elt; + + + if (0 == fib_node_list_get_size(list)) + { + ptr->fnp_index = FIB_NODE_INDEX_INVALID; + return (0); + } + + head = fib_node_list_head_get(list); + elt = fib_node_list_elt_get(head->fnlh_head); + + *ptr = elt->fnle_owner; + + return (1); +} + +/** + * @brief Walk the list of node. This must be safe w.r.t. the removal + * of nodes during the walk. + */ +void +fib_node_list_walk (fib_node_list_t list, + fib_node_list_walk_cb_t fn, + void *args) +{ + fib_node_list_elt_t *elt; + fib_node_list_head_t *head; + u32 sibling; + + if (FIB_NODE_INDEX_INVALID == list) + { + return; + } + + head = fib_node_list_head_get(list); + sibling = head->fnlh_head; + + while (FIB_NODE_INDEX_INVALID != sibling) + { + elt = fib_node_list_elt_get(sibling); + sibling = elt->fnle_next; + + fn(&elt->fnle_owner, args); + } +} diff --git a/vnet/vnet/fib/fib_node_list.h b/vnet/vnet/fib/fib_node_list.h new file mode 100644 index 00000000000..afee3c6152c --- /dev/null +++ b/vnet/vnet/fib/fib_node_list.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief a hetrogeneous w.r.t. FIB node type, list of FIB nodes. + * Since we cannot use C pointers, due to memeory reallocs, the next/prev + * are described as an index to an element. Each element contains a pointer + * (key:{type, index}) to a FIB node. + */ + +#ifndef __FIB_NODE_LIST_H__ +#define __FIB_NODE_LIST_H__ + +#include <vnet/fib/fib_node.h> + +extern fib_node_list_t fib_node_list_create(void); +extern void fib_node_list_destroy(fib_node_list_t *list); + +extern u32 fib_node_list_push_front(fib_node_list_t head, + int owner_id, + fib_node_type_t type, + fib_node_index_t index); +extern u32 fib_node_list_push_back(fib_node_list_t head, + int owner_id, + fib_node_type_t type, + fib_node_index_t index); +extern void fib_node_list_remove(fib_node_list_t head, + u32 sibling); +extern void fib_node_list_elt_remove(u32 sibling); + +extern int fib_node_list_advance(u32 sibling); + +extern int fib_node_list_get_front(fib_node_list_t head, + fib_node_ptr_t *ptr); + +extern int fib_node_list_elt_get_next(u32 elt, + fib_node_ptr_t *ptr); + +extern u32 fib_node_list_get_size(fib_node_list_t head); + +/** + * @brief Callback function invoked during a list walk + */ +typedef int (*fib_node_list_walk_cb_t)(fib_node_ptr_t *owner, + void *args); + +extern void fib_node_list_walk(fib_node_list_t head, + fib_node_list_walk_cb_t fn, + void *args); +#endif diff --git a/vnet/vnet/fib/fib_path.c b/vnet/vnet/fib/fib_path.c new file mode 100644 index 00000000000..d2e5e319afd --- /dev/null +++ b/vnet/vnet/fib/fib_path.c @@ -0,0 +1,1744 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ip/format.h> +#include <vnet/ip/ip.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/receive_dpo.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/lookup_dpo.h> + +#include <vnet/adj/adj.h> + +#include "fib_path.h" +#include "fib_node.h" +#include "fib_table.h" +#include "fib_entry.h" +#include "fib_path_list.h" +#include "fib_internal.h" + +/** + * Enurmeration of path types + */ +typedef enum fib_path_type_t_ { + /** + * Marker. Add new types after this one. + */ + FIB_PATH_TYPE_FIRST = 0, + /** + * Attached-nexthop. An interface and a nexthop are known. + */ + FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST, + /** + * attached. Only the interface is known. + */ + FIB_PATH_TYPE_ATTACHED, + /** + * recursive. Only the next-hop is known. + */ + FIB_PATH_TYPE_RECURSIVE, + /** + * special. nothing is known. so we drop. + */ + FIB_PATH_TYPE_SPECIAL, + /** + * exclusive. user provided adj. + */ + FIB_PATH_TYPE_EXCLUSIVE, + /** + * deag. Link to a lookup adj in the next table + */ + FIB_PATH_TYPE_DEAG, + /** + * receive. it's for-us. + */ + FIB_PATH_TYPE_RECEIVE, + /** + * Marker. Add new types before this one, then update it. + */ + FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE, +} __attribute__ ((packed)) fib_path_type_t; + +/** + * The maximum number of path_types + */ +#define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1) + +#define FIB_PATH_TYPES { \ + [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop", \ + [FIB_PATH_TYPE_ATTACHED] = "attached", \ + [FIB_PATH_TYPE_RECURSIVE] = "recursive", \ + [FIB_PATH_TYPE_SPECIAL] = "special", \ + [FIB_PATH_TYPE_EXCLUSIVE] = "exclusive", \ + [FIB_PATH_TYPE_DEAG] = "deag", \ + [FIB_PATH_TYPE_RECEIVE] = "receive", \ +} + +#define FOR_EACH_FIB_PATH_TYPE(_item) \ + for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++) + +/** + * Enurmeration of path operational (i.e. derived) attributes + */ +typedef enum fib_path_oper_attribute_t_ { + /** + * Marker. Add new types after this one. + */ + FIB_PATH_OPER_ATTRIBUTE_FIRST = 0, + /** + * The path forms part of a recursive loop. + */ + FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST, + /** + * The path is resolved + */ + FIB_PATH_OPER_ATTRIBUTE_RESOLVED, + /** + * The path has become a permanent drop. + */ + FIB_PATH_OPER_ATTRIBUTE_DROP, + /** + * Marker. Add new types before this one, then update it. + */ + FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP, +} __attribute__ ((packed)) fib_path_oper_attribute_t; + +/** + * The maximum number of path operational attributes + */ +#define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1) + +#define FIB_PATH_OPER_ATTRIBUTES { \ + [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop", \ + [FIB_PATH_OPER_ATTRIBUTE_RESOLVED] = "resolved", \ + [FIB_PATH_OPER_ATTRIBUTE_DROP] = "drop", \ +} + +#define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \ + for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \ + _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \ + _item++) + +/** + * Path flags from the attributes + */ +typedef enum fib_path_oper_flags_t_ { + FIB_PATH_OPER_FLAG_NONE = 0, + FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP), + FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP), + FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED), +} __attribute__ ((packed)) fib_path_oper_flags_t; + +/** + * A FIB path + */ +typedef struct fib_path_t_ { + /** + * A path is a node in the FIB graph. + */ + fib_node_t fp_node; + + /** + * The index of the path-list to which this path belongs + */ + u32 fp_pl_index; + + /** + * This marks the start of the memory area used to hash + * the path + */ + STRUCT_MARK(path_hash_start); + + /** + * Configuration Flags + */ + fib_path_cfg_flags_t fp_cfg_flags; + + /** + * The type of the path. This is the selector for the union + */ + fib_path_type_t fp_type; + + /** + * The protocol of the next-hop, i.e. the address family of the + * next-hop's address. We can't derive this from the address itself + * since the address can be all zeros + */ + fib_protocol_t fp_nh_proto; + + /** + * UCMP [unnormalised] weigt + */ + u32 fp_weight; + + /** + * per-type union of the data required to resolve the path + */ + union { + struct { + /** + * The next-hop + */ + ip46_address_t fp_nh; + /** + * The interface + */ + u32 fp_interface; + } attached_next_hop; + struct { + /** + * The interface + */ + u32 fp_interface; + } attached; + struct { + /** + * The next-hop + */ + ip46_address_t fp_nh; + /** + * The FIB table index in which to find the next-hop. + * This needs to be fixed. We should lookup the adjacencies in + * a separate table of adjacencies, rather than from the FIB. + * Two reasons I can think of: + * - consider: + * int ip addr Gig0 10.0.0.1/24 + * ip route 10.0.0.2/32 via Gig1 192.168.1.2 + * ip route 1.1.1.1/32 via Gig0 10.0.0.2 + * this is perfectly valid. + * Packets addressed to 10.0.0.2 should be sent via Gig1. + * Packets address to 1.1.1.1 should be sent via Gig0. + * when we perform the adj resolution from the FIB for the path + * "via Gig0 10.0.0.2" the lookup will result in the route via Gig1 + * and so we will pick up the adj via Gig1 - which was not what the + * operator wanted. + * - we can only return link-type IPv4 and so not the link-type MPLS. + * more on this in a later commit. + * + * The table ID should only belong to a recursive path and indicate + * which FIB should be used to resolve the next-hop. + */ + fib_node_index_t fp_tbl_id; + } recursive; + struct { + /** + * The FIN index in which to perfom the next lookup + */ + fib_node_index_t fp_tbl_id; + } deag; + struct { + } special; + struct { + /** + * The user provided 'exclusive' DPO + */ + dpo_id_t fp_ex_dpo; + } exclusive; + struct { + /** + * The interface on which the local address is configured + */ + u32 fp_interface; + /** + * The next-hop + */ + ip46_address_t fp_addr; + } receive; + }; + STRUCT_MARK(path_hash_end); + + /** + * Memebers in this last section represent information that is + * dervied during resolution. It should not be copied to new paths + * nor compared. + */ + + /** + * Operational Flags + */ + fib_path_oper_flags_t fp_oper_flags; + + /** + * the resolving via fib. not part of the union, since it it not part + * of the path's hash. + */ + fib_node_index_t fp_via_fib; + + /** + * The Data-path objects through which this path resolves for IP. + */ + dpo_id_t fp_dpo; + + /** + * the index of this path in the parent's child list. + */ + u32 fp_sibling; +} fib_path_t; + +/* + * Array of strings/names for the path types and attributes + */ +static const char *fib_path_type_names[] = FIB_PATH_TYPES; +static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES; +static const char *fib_path_cfg_attribute_names[] = FIB_PATH_CFG_ATTRIBUTES; + +/* + * The memory pool from which we allocate all the paths + */ +static fib_path_t *fib_path_pool; + +/* + * Debug macro + */ +#ifdef FIB_DEBUG +#define FIB_PATH_DBG(_p, _fmt, _args...) \ +{ \ + u8 *_tmp = NULL; \ + _tmp = fib_path_format(fib_path_get_index(_p), _tmp); \ + clib_warning("path:[%d:%s]:" _fmt, \ + fib_path_get_index(_p), _tmp, \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define FIB_PATH_DBG(_p, _fmt, _args...) +#endif + +static fib_path_t * +fib_path_get (fib_node_index_t index) +{ + return (pool_elt_at_index(fib_path_pool, index)); +} + +static fib_node_index_t +fib_path_get_index (fib_path_t *path) +{ + return (path - fib_path_pool); +} + +static fib_node_t * +fib_path_get_node (fib_node_index_t index) +{ + return ((fib_node_t*)fib_path_get(index)); +} + +static fib_path_t* +fib_path_from_fib_node (fib_node_t *node) +{ +#if CLIB_DEBUG > 0 + ASSERT(FIB_NODE_TYPE_PATH == node->fn_type); +#endif + return ((fib_path_t*)node); +} + +u8 * +format_fib_path (u8 * s, va_list * args) +{ + fib_path_t *path = va_arg (*args, fib_path_t *); + vnet_main_t * vnm = vnet_get_main(); + fib_path_oper_attribute_t oattr; + fib_path_cfg_attribute_t cattr; + + s = format (s, " index:%d ", fib_path_get_index(path)); + s = format (s, "pl-index:%d ", path->fp_pl_index); + s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto); + s = format (s, "weight=%d ", path->fp_weight); + s = format (s, "%s: ", fib_path_type_names[path->fp_type]); + if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) { + s = format(s, " oper-flags:"); + FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) { + if ((1<<oattr) & path->fp_oper_flags) { + s = format (s, "%s,", fib_path_oper_attribute_names[oattr]); + } + } + } + if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) { + s = format(s, " cfg-flags:"); + FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) { + if ((1<<cattr) & path->fp_cfg_flags) { + s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]); + } + } + } + s = format(s, "\n "); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + s = format (s, "%U", format_ip46_address, + &path->attached_next_hop.fp_nh, + IP46_TYPE_ANY); + if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP) + { + s = format (s, " if_index:%d", path->attached_next_hop.fp_interface); + } + else + { + s = format (s, " %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface( + vnm, + path->attached_next_hop.fp_interface)); + if (vnet_sw_interface_is_p2p(vnet_get_main(), + path->attached_next_hop.fp_interface)) + { + s = format (s, " (p2p)"); + } + } + if (!dpo_id_is_valid(&path->fp_dpo)) + { + s = format(s, "\n unresolved"); + } + else + { + s = format(s, "\n %U", + format_dpo_id, + &path->fp_dpo, 13); + } + break; + case FIB_PATH_TYPE_ATTACHED: + if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP) + { + s = format (s, " if_index:%d", path->attached_next_hop.fp_interface); + } + else + { + s = format (s, " %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface( + vnm, + path->attached.fp_interface)); + } + break; + case FIB_PATH_TYPE_RECURSIVE: + s = format (s, "via %U", + format_ip46_address, + &path->recursive.fp_nh, + IP46_TYPE_ANY); + s = format (s, " in fib:%d", path->recursive.fp_tbl_id, path->fp_via_fib); + s = format (s, " via-fib:%d", path->fp_via_fib); + s = format (s, " via-dpo:[%U:%d]", + format_dpo_type, path->fp_dpo.dpoi_type, + path->fp_dpo.dpoi_index); + + break; + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_DEAG: + case FIB_PATH_TYPE_EXCLUSIVE: + if (dpo_id_is_valid(&path->fp_dpo)) + { + s = format(s, "%U", format_dpo_id, + &path->fp_dpo, 2); + } + break; + } + return (s); +} + +u8 * +fib_path_format (fib_node_index_t pi, u8 *s) +{ + fib_path_t *path; + + path = fib_path_get(pi); + ASSERT(NULL != path); + + return (format (s, "%U", format_fib_path, path)); +} + +u8 * +fib_path_adj_format (fib_node_index_t pi, + u32 indent, + u8 *s) +{ + fib_path_t *path; + + path = fib_path_get(pi); + ASSERT(NULL != path); + + if (!dpo_id_is_valid(&path->fp_dpo)) + { + s = format(s, " unresolved"); + } + else + { + s = format(s, "%U", format_dpo_id, + &path->fp_dpo, 2); + } + + return (s); +} + +/* + * fib_path_last_lock_gone + * + * We don't share paths, we share path lists, so the [un]lock functions + * are no-ops + */ +static void +fib_path_last_lock_gone (fib_node_t *node) +{ + ASSERT(0); +} + +static const adj_index_t +fib_path_attached_next_hop_get_adj (fib_path_t *path, + fib_link_t link) +{ + if (vnet_sw_interface_is_p2p(vnet_get_main(), + path->attached_next_hop.fp_interface)) + { + /* + * if the interface is p2p then the adj for the specific + * neighbour on that link will never exist. on p2p links + * the subnet address (the attached route) links to the + * auto-adj (see below), we want that adj here too. + */ + return (adj_nbr_add_or_lock(path->fp_nh_proto, + link, + &zero_addr, + path->attached_next_hop.fp_interface)); + } + else + { + return (adj_nbr_add_or_lock(path->fp_nh_proto, + link, + &path->attached_next_hop.fp_nh, + path->attached_next_hop.fp_interface)); + } +} + +static void +fib_path_attached_next_hop_set (fib_path_t *path) +{ + /* + * resolve directly via the adjacnecy discribed by the + * interface and next-hop + */ + if (!vnet_sw_interface_is_admin_up(vnet_get_main(), + path->attached_next_hop.fp_interface)) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + + dpo_set(&path->fp_dpo, + DPO_ADJACENCY, + fib_proto_to_dpo(path->fp_nh_proto), + fib_path_attached_next_hop_get_adj( + path, + fib_proto_to_link(path->fp_nh_proto))); + + /* + * become a child of the adjacency so we receive updates + * when its rewrite changes + */ + path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index, + FIB_NODE_TYPE_PATH, + fib_path_get_index(path)); +} + +/* + * create of update the paths recursive adj + */ +static void +fib_path_recursive_adj_update (fib_path_t *path, + fib_forward_chain_type_t fct, + dpo_id_t *dpo) +{ + dpo_id_t via_dpo = DPO_NULL; + + /* + * get the DPO to resolve through from the via-entry + */ + fib_entry_contribute_forwarding(path->fp_via_fib, + fct, + &via_dpo); + + + /* + * hope for the best - clear if restrictions apply. + */ + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + + /* + * Validate any recursion constraints and over-ride the via + * adj if not met + */ + if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + } + else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST) + { + /* + * the via FIB must be a host route. + * note the via FIB just added will always be a host route + * since it is an RR source added host route. So what we need to + * check is whether the route has other sources. If it does then + * some other source has added it as a host route. If it doesn't + * then it was added only here and inherits forwarding from a cover. + * the cover is not a host route. + * The RR source is the lowest priority source, so we check if it + * is the best. if it is there are no other sources. + */ + if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + + /* + * PIC edge trigger. let the load-balance maps know + */ + load_balance_map_path_state_change(fib_path_get_index(path)); + } + } + else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED) + { + /* + * RR source entries inherit the flags from the cover, so + * we can check the via directly + */ + if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib))) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + + /* + * PIC edge trigger. let the load-balance maps know + */ + load_balance_map_path_state_change(fib_path_get_index(path)); + } + } + + /* + * update the path's contributed DPO + */ + dpo_copy(dpo, &via_dpo); + + FIB_PATH_DBG(path, "recursive update: %U", + fib_get_lookup_main(path->fp_nh_proto), + &path->fp_dpo, 2); + + dpo_reset(&via_dpo); +} + +/* + * fib_path_is_permanent_drop + * + * Return !0 if the path is configured to permanently drop, + * despite other attributes. + */ +static int +fib_path_is_permanent_drop (fib_path_t *path) +{ + return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) || + (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)); +} + +/* + * fib_path_unresolve + * + * Remove our dependency on the resolution target + */ +static void +fib_path_unresolve (fib_path_t *path) +{ + /* + * the forced drop path does not need unresolving + */ + if (fib_path_is_permanent_drop(path)) + { + return; + } + + switch (path->fp_type) + { + case FIB_PATH_TYPE_RECURSIVE: + if (FIB_NODE_INDEX_INVALID != path->fp_via_fib) + { + fib_prefix_t pfx; + + fib_prefix_from_ip46_addr(&path->recursive.fp_nh, &pfx); + fib_entry_child_remove(path->fp_via_fib, + path->fp_sibling); + fib_table_entry_special_remove(path->recursive.fp_tbl_id, + &pfx, + FIB_SOURCE_RR); + path->fp_via_fib = FIB_NODE_INDEX_INVALID; + } + break; + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + case FIB_PATH_TYPE_ATTACHED: + adj_child_remove(path->fp_dpo.dpoi_index, + path->fp_sibling); + adj_unlock(path->fp_dpo.dpoi_index); + break; + case FIB_PATH_TYPE_EXCLUSIVE: + dpo_reset(&path->exclusive.fp_ex_dpo); + break; + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_DEAG: + /* + * these hold only the path's DPO, which is reset below. + */ + break; + } + + /* + * release the adj we were holding and pick up the + * drop just in case. + */ + dpo_reset(&path->fp_dpo); + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + + return; +} + +static fib_forward_chain_type_t +fib_path_proto_to_chain_type (fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case FIB_PROTOCOL_IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + case FIB_PROTOCOL_MPLS: + return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + } + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); +} + +/* + * fib_path_back_walk_notify + * + * A back walk has reach this path. + */ +static fib_node_back_walk_rc_t +fib_path_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + fib_path_t *path; + + path = fib_path_from_fib_node(node); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_RECURSIVE: + if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason) + { + /* + * modify the recursive adjacency to use the new forwarding + * of the via-fib. + * this update is visible to packets in flight in the DP. + */ + fib_path_recursive_adj_update( + path, + fib_path_proto_to_chain_type(path->fp_nh_proto), + &path->fp_dpo); + } + break; + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + /* +FIXME comment + * ADJ_UPDATE backwalk pass silently through here and up to + * the path-list when the multipath adj collapse occurs. + * The reason we do this is that the assumtption is that VPP + * runs in an environment where the Control-Plane is remote + * and hence reacts slowly to link up down. In order to remove + * this down link from the ECMP set quickly, we back-walk. + * VPP also has dedicated CPUs, so we are not stealing resources + * from the CP to do so. + */ + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason) + { + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + } + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason) + { + /* + * The interface this path resolves through has been deleted. + * This will leave the path in a permanent drop state. The route + * needs to be removed and readded (and hence the path-list deleted) + * before it can forward again. + */ + fib_path_unresolve(path); + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP; + } + if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) + { + /* + * restack the DPO to pick up the correct DPO sub-type + */ + adj_index_t ai; + + ai = fib_path_attached_next_hop_get_adj( + path, + fib_proto_to_link(path->fp_nh_proto)); + + dpo_set(&path->fp_dpo, DPO_ADJACENCY, + fib_proto_to_dpo(path->fp_nh_proto), + ai); + adj_unlock(ai); + } + break; + case FIB_PATH_TYPE_ATTACHED: + /* + * FIXME; this could schedule a lower priority walk, since attached + * routes are not usually in ECMP configurations so the backwalk to + * the FIB entry does not need to be high priority + */ + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason) + { + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + } + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason) + { + fib_path_unresolve(path); + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP; + } + break; + case FIB_PATH_TYPE_DEAG: + /* + * FIXME When VRF delete is allowed this will need a poke. + */ + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_EXCLUSIVE: + /* + * these path types have no parents. so to be + * walked from one is unexpected. + */ + ASSERT(0); + break; + } + + /* + * propagate the backwalk further to the path-list + */ + fib_path_list_back_walk(path->fp_pl_index, ctx); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/* + * The FIB path's graph node virtual function table + */ +static const fib_node_vft_t fib_path_vft = { + .fnv_get = fib_path_get_node, + .fnv_last_lock = fib_path_last_lock_gone, + .fnv_back_walk = fib_path_back_walk_notify, +}; + +static fib_path_cfg_flags_t +fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath) +{ + fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_ATTRIBUTE_FIRST; + + if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST) + cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST; + if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED) + cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED; + + return (cfg_flags); +} + +/* + * fib_path_create + * + * Create and initialise a new path object. + * return the index of the path. + */ +fib_node_index_t +fib_path_create (fib_node_index_t pl_index, + fib_protocol_t nh_proto, + fib_path_cfg_flags_t flags, + const fib_route_path_t *rpath) +{ + fib_path_t *path; + + pool_get(fib_path_pool, path); + memset(path, 0, sizeof(*path)); + + fib_node_init(&path->fp_node, + FIB_NODE_TYPE_PATH); + + dpo_reset(&path->fp_dpo); + path->fp_pl_index = pl_index; + path->fp_nh_proto = nh_proto; + path->fp_via_fib = FIB_NODE_INDEX_INVALID; + path->fp_weight = rpath->frp_weight; + path->fp_cfg_flags = flags; + path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath); + + /* + * deduce the path's tpye from the parementers and save what is needed. + */ + if (~0 != rpath->frp_sw_if_index) + { + if (flags & FIB_PATH_CFG_FLAG_LOCAL) + { + path->fp_type = FIB_PATH_TYPE_RECEIVE; + path->receive.fp_interface = rpath->frp_sw_if_index; + path->receive.fp_addr = rpath->frp_addr; + } + else + { + if (ip46_address_is_zero(&rpath->frp_addr)) + { + path->fp_type = FIB_PATH_TYPE_ATTACHED; + path->attached.fp_interface = rpath->frp_sw_if_index; + } + else + { + path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP; + path->attached_next_hop.fp_interface = rpath->frp_sw_if_index; + path->attached_next_hop.fp_nh = rpath->frp_addr; + } + } + } + else + { + if (ip46_address_is_zero(&rpath->frp_addr)) + { + if (~0 == rpath->frp_fib_index) + { + path->fp_type = FIB_PATH_TYPE_SPECIAL; + } + else + { + path->fp_type = FIB_PATH_TYPE_DEAG; + path->deag.fp_tbl_id = rpath->frp_fib_index; + } + } + else + { + path->fp_type = FIB_PATH_TYPE_RECURSIVE; + path->recursive.fp_nh = rpath->frp_addr; + path->recursive.fp_tbl_id = rpath->frp_fib_index; + } + } + + FIB_PATH_DBG(path, "create"); + + return (fib_path_get_index(path)); +} + +/* + * fib_path_create_special + * + * Create and initialise a new path object. + * return the index of the path. + */ +fib_node_index_t +fib_path_create_special (fib_node_index_t pl_index, + fib_protocol_t nh_proto, + fib_path_cfg_flags_t flags, + const dpo_id_t *dpo) +{ + fib_path_t *path; + + pool_get(fib_path_pool, path); + memset(path, 0, sizeof(*path)); + + fib_node_init(&path->fp_node, + FIB_NODE_TYPE_PATH); + dpo_reset(&path->fp_dpo); + + path->fp_pl_index = pl_index; + path->fp_weight = 1; + path->fp_nh_proto = nh_proto; + path->fp_via_fib = FIB_NODE_INDEX_INVALID; + path->fp_cfg_flags = flags; + + if (FIB_PATH_CFG_FLAG_DROP & flags) + { + path->fp_type = FIB_PATH_TYPE_SPECIAL; + } + else if (FIB_PATH_CFG_FLAG_LOCAL & flags) + { + path->fp_type = FIB_PATH_TYPE_RECEIVE; + path->attached.fp_interface = FIB_NODE_INDEX_INVALID; + } + else + { + path->fp_type = FIB_PATH_TYPE_EXCLUSIVE; + ASSERT(NULL != dpo); + dpo_copy(&path->exclusive.fp_ex_dpo, dpo); + } + + return (fib_path_get_index(path)); +} + +/* + * fib_path_copy + * + * Copy a path. return index of new path. + */ +fib_node_index_t +fib_path_copy (fib_node_index_t path_index, + fib_node_index_t path_list_index) +{ + fib_path_t *path, *orig_path; + + pool_get(fib_path_pool, path); + + orig_path = fib_path_get(path_index); + ASSERT(NULL != orig_path); + + memcpy(path, orig_path, sizeof(*path)); + + FIB_PATH_DBG(path, "create-copy:%d", path_index); + + /* + * reset the dynamic section + */ + fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH); + path->fp_oper_flags = FIB_PATH_OPER_FLAG_NONE; + path->fp_pl_index = path_list_index; + path->fp_via_fib = FIB_NODE_INDEX_INVALID; + memset(&path->fp_dpo, 0, sizeof(path->fp_dpo)); + dpo_reset(&path->fp_dpo); + + return (fib_path_get_index(path)); +} + +/* + * fib_path_destroy + * + * destroy a path that is no longer required + */ +void +fib_path_destroy (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(NULL != path); + FIB_PATH_DBG(path, "destroy"); + + fib_path_unresolve(path); + + fib_node_deinit(&path->fp_node); + pool_put(fib_path_pool, path); +} + +/* + * fib_path_destroy + * + * destroy a path that is no longer required + */ +uword +fib_path_hash (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start), + (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) - + STRUCT_OFFSET_OF(fib_path_t, path_hash_start)), + 0)); +} + +/* + * fib_path_cmp_i + * + * Compare two paths for equivalence. + */ +static int +fib_path_cmp_i (const fib_path_t *path1, + const fib_path_t *path2) +{ + int res; + + res = 1; + + /* + * paths of different types and protocol are not equal. + * different weights only are the same path. + */ + if (path1->fp_type != path2->fp_type) + { + res = (path1->fp_type - path2->fp_type); + } + if (path1->fp_nh_proto != path2->fp_nh_proto) + { + res = (path1->fp_nh_proto - path2->fp_nh_proto); + } + else + { + /* + * both paths are of the same type. + * consider each type and its attributes in turn. + */ + switch (path1->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + res = ip46_address_cmp(&path1->attached_next_hop.fp_nh, + &path2->attached_next_hop.fp_nh); + if (0 == res) { + res = vnet_sw_interface_compare( + vnet_get_main(), + path1->attached_next_hop.fp_interface, + path2->attached_next_hop.fp_interface); + } + break; + case FIB_PATH_TYPE_ATTACHED: + res = vnet_sw_interface_compare( + vnet_get_main(), + path1->attached.fp_interface, + path2->attached.fp_interface); + break; + case FIB_PATH_TYPE_RECURSIVE: + res = ip46_address_cmp(&path1->recursive.fp_nh, + &path2->recursive.fp_nh); + + if (0 == res) + { + res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id); + } + break; + case FIB_PATH_TYPE_DEAG: + res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id); + break; + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_EXCLUSIVE: + res = 0; + break; + } + } + return (res); +} + +/* + * fib_path_cmp_for_sort + * + * Compare two paths for equivalence. Used during path sorting. + * As usual 0 means equal. + */ +int +fib_path_cmp_for_sort (void * v1, + void * v2) +{ + fib_node_index_t *pi1 = v1, *pi2 = v2; + fib_path_t *path1, *path2; + + path1 = fib_path_get(*pi1); + path2 = fib_path_get(*pi2); + + return (fib_path_cmp_i(path1, path2)); +} + +/* + * fib_path_cmp + * + * Compare two paths for equivalence. + */ +int +fib_path_cmp (fib_node_index_t pi1, + fib_node_index_t pi2) +{ + fib_path_t *path1, *path2; + + path1 = fib_path_get(pi1); + path2 = fib_path_get(pi2); + + return (fib_path_cmp_i(path1, path2)); +} + +int +fib_path_cmp_w_route_path (fib_node_index_t path_index, + const fib_route_path_t *rpath) +{ + fib_path_t *path; + int res; + + path = fib_path_get(path_index); + + res = 1; + + if (path->fp_weight != rpath->frp_weight) + { + res = (path->fp_weight - rpath->frp_weight); + } + else + { + /* + * both paths are of the same type. + * consider each type and its attributes in turn. + */ + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + res = ip46_address_cmp(&path->attached_next_hop.fp_nh, + &rpath->frp_addr); + if (0 == res) + { + res = vnet_sw_interface_compare( + vnet_get_main(), + path->attached_next_hop.fp_interface, + rpath->frp_sw_if_index); + } + break; + case FIB_PATH_TYPE_ATTACHED: + res = vnet_sw_interface_compare( + vnet_get_main(), + path->attached.fp_interface, + rpath->frp_sw_if_index); + break; + case FIB_PATH_TYPE_RECURSIVE: + res = ip46_address_cmp(&path->recursive.fp_nh, + &rpath->frp_addr); + + if (0 == res) + { + res = (path->recursive.fp_tbl_id - rpath->frp_fib_index); + } + break; + case FIB_PATH_TYPE_DEAG: + res = (path->deag.fp_tbl_id - rpath->frp_fib_index); + break; + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_EXCLUSIVE: + res = 0; + break; + } + } + return (res); +} + +/* + * fib_path_recursive_loop_detect + * + * A forward walk of the FIB object graph to detect for a cycle/loop. This + * walk is initiated when an entry is linking to a new path list or from an old. + * The entry vector passed contains all the FIB entrys that are children of this + * path (it is all the entries encountered on the walk so far). If this vector + * contains the entry this path resolve via, then a loop is about to form. + * The loop must be allowed to form, since we need the dependencies in place + * so that we can track when the loop breaks. + * However, we MUST not produce a loop in the forwarding graph (else packets + * would loop around the switch path until the loop breaks), so we mark recursive + * paths as looped so that they do not contribute forwarding information. + * By marking the path as looped, an etry such as; + * X/Y + * via a.a.a.a (looped) + * via b.b.b.b (not looped) + * can still forward using the info provided by b.b.b.b only + */ +int +fib_path_recursive_loop_detect (fib_node_index_t path_index, + fib_node_index_t **entry_indicies) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + /* + * the forced drop path is never looped, cos it is never resolved. + */ + if (fib_path_is_permanent_drop(path)) + { + return (0); + } + + switch (path->fp_type) + { + case FIB_PATH_TYPE_RECURSIVE: + { + fib_node_index_t *entry_index, *entries; + int looped = 0; + entries = *entry_indicies; + + vec_foreach(entry_index, entries) { + if (*entry_index == path->fp_via_fib) + { + /* + * the entry that is about to link to this path-list (or + * one of this path-list's children) is the same entry that + * this recursive path resolves through. this is a cycle. + * abort the walk. + */ + looped = 1; + break; + } + } + + if (looped) + { + FIB_PATH_DBG(path, "recursive loop formed"); + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP; + + dpo_copy(&path->fp_dpo, + drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + } + else + { + /* + * no loop here yet. keep forward walking the graph. + */ + if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies)) + { + FIB_PATH_DBG(path, "recursive loop formed"); + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP; + } + else + { + FIB_PATH_DBG(path, "recursive loop cleared"); + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP; + } + } + break; + } + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + case FIB_PATH_TYPE_ATTACHED: + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_DEAG: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_EXCLUSIVE: + /* + * these path types cannot be part of a loop, since they are the leaves + * of the graph. + */ + break; + } + + return (fib_path_is_looped(path_index)); +} + +int +fib_path_resolve (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + /* + * hope for the best. + */ + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + + /* + * the forced drop path resolves via the drop adj + */ + if (fib_path_is_permanent_drop(path)) + { + dpo_copy(&path->fp_dpo, + drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + return (fib_path_is_resolved(path_index)); + } + + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + fib_path_attached_next_hop_set(path); + break; + case FIB_PATH_TYPE_ATTACHED: + /* + * path->attached.fp_interface + */ + if (!vnet_sw_interface_is_admin_up(vnet_get_main(), + path->attached.fp_interface)) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + if (vnet_sw_interface_is_p2p(vnet_get_main(), + path->attached.fp_interface)) + { + /* + * point-2-point interfaces do not require a glean, since + * there is nothing to ARP. Install a rewrite/nbr adj instead + */ + dpo_set(&path->fp_dpo, + DPO_ADJACENCY, + fib_proto_to_dpo(path->fp_nh_proto), + adj_nbr_add_or_lock( + path->fp_nh_proto, + fib_proto_to_link(path->fp_nh_proto), + &zero_addr, + path->attached.fp_interface)); + } + else + { + dpo_set(&path->fp_dpo, + DPO_ADJACENCY_GLEAN, + fib_proto_to_dpo(path->fp_nh_proto), + adj_glean_add_or_lock(path->fp_nh_proto, + path->attached.fp_interface, + NULL)); + } + /* + * become a child of the adjacency so we receive updates + * when the interface state changes + */ + path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index, + FIB_NODE_TYPE_PATH, + fib_path_get_index(path)); + + break; + case FIB_PATH_TYPE_RECURSIVE: + { + /* + * Create a RR source entry in the table for the address + * that this path recurses through. + * This resolve action is recursive, hence we may create + * more paths in the process. more creates mean maybe realloc + * of this path. + */ + fib_node_index_t fei; + fib_prefix_t pfx; + + ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib); + + fib_prefix_from_ip46_addr(&path->recursive.fp_nh, &pfx); + + fei = fib_table_entry_special_add(path->recursive.fp_tbl_id, + &pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + + path = fib_path_get(path_index); + path->fp_via_fib = fei; + + /* + * become a dependent child of the entry so the path is + * informed when the forwarding for the entry changes. + */ + path->fp_sibling = fib_entry_child_add(path->fp_via_fib, + FIB_NODE_TYPE_PATH, + fib_path_get_index(path)); + + /* + * create and configure the IP DPO + */ + fib_path_recursive_adj_update( + path, + fib_path_proto_to_chain_type(path->fp_nh_proto), + &path->fp_dpo); + + break; + } + case FIB_PATH_TYPE_SPECIAL: + /* + * Resolve via the drop + */ + dpo_copy(&path->fp_dpo, + drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + break; + case FIB_PATH_TYPE_DEAG: + /* + * Resolve via a lookup DPO. + * FIXME. control plane should add routes with a table ID + */ + lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id, + fib_proto_to_dpo(path->fp_nh_proto), + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + &path->fp_dpo); + break; + case FIB_PATH_TYPE_RECEIVE: + /* + * Resolve via a receive DPO. + */ + receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto), + path->receive.fp_interface, + &path->receive.fp_addr, + &path->fp_dpo); + break; + case FIB_PATH_TYPE_EXCLUSIVE: + /* + * Resolve via the user provided DPO + */ + dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo); + break; + } + + return (fib_path_is_resolved(path_index)); +} + +u32 +fib_path_get_resolving_interface (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + return (path->attached_next_hop.fp_interface); + case FIB_PATH_TYPE_ATTACHED: + return (path->attached.fp_interface); + case FIB_PATH_TYPE_RECEIVE: + return (path->receive.fp_interface); + case FIB_PATH_TYPE_RECURSIVE: + return (fib_entry_get_resolving_interface(path->fp_via_fib)); + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_DEAG: + case FIB_PATH_TYPE_EXCLUSIVE: + break; + } + return (~0); +} + +adj_index_t +fib_path_get_adj (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(dpo_is_adj(&path->fp_dpo)); + if (dpo_is_adj(&path->fp_dpo)) + { + return (path->fp_dpo.dpoi_index); + } + return (ADJ_INDEX_INVALID); +} + +int +fib_path_get_weight (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(path); + + return (path->fp_weight); +} + +void +fib_path_contribute_forwarding (fib_node_index_t path_index, + fib_forward_chain_type_t fct, + dpo_id_t *dpo) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(path); + ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct); + + FIB_PATH_DBG(path, "contribute"); + + /* + * The DPO stored in the path was created when the path was resolved. + * This then represents the path's 'native' protocol; IP. + * For all others will need to go find something else. + */ + if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct) + { + dpo_copy(dpo, &path->fp_dpo); + } + else { + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + { + adj_index_t ai; + + /* + * get a MPLS link type adj. + */ + ai = fib_path_attached_next_hop_get_adj( + path, + fib_forw_chain_type_to_link_type(fct)); + dpo_set(dpo, DPO_ADJACENCY, + fib_forw_chain_type_to_dpo_proto(fct), ai); + adj_unlock(ai); + + break; + } + } + break; + case FIB_PATH_TYPE_RECURSIVE: + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + /* + * Assume that EOS and IP forwarding is the same. + * revisit for ieBGP + */ + dpo_copy(dpo, &path->fp_dpo); + break; + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + fib_path_recursive_adj_update(path, fct, dpo); + break; + } + break; + case FIB_PATH_TYPE_DEAG: + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID, + DPO_PROTO_MPLS, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + dpo); + break; + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + dpo_copy(dpo, &path->fp_dpo); + break; + } + break; + case FIB_PATH_TYPE_EXCLUSIVE: + dpo_copy(dpo, &path->exclusive.fp_ex_dpo); + break; + case FIB_PATH_TYPE_ATTACHED: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_SPECIAL: + ASSERT(0); + break; + } + + } +} + +load_balance_path_t * +fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index, + fib_forward_chain_type_t fct, + load_balance_path_t *hash_key) +{ + load_balance_path_t *mnh; + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(path); + + if (fib_path_is_resolved(path_index)) + { + vec_add2(hash_key, mnh, 1); + + mnh->path_weight = path->fp_weight; + mnh->path_index = path_index; + dpo_copy(&mnh->path_dpo, &path->fp_dpo); + } + + return (hash_key); +} + +int +fib_path_is_recursive (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (FIB_PATH_TYPE_RECURSIVE == path->fp_type); +} + +int +fib_path_is_exclusive (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type); +} + +int +fib_path_is_deag (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (FIB_PATH_TYPE_DEAG == path->fp_type); +} + +int +fib_path_is_resolved (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (dpo_id_is_valid(&path->fp_dpo) && + (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) && + !fib_path_is_looped(path_index) && + !fib_path_is_permanent_drop(path)); +} + +int +fib_path_is_looped (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP); +} + +void +fib_path_module_init (void) +{ + fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft); +} + +static clib_error_t * +show_fib_path_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_path_t *path; + + vlib_cli_output (vm, "FIB Path Lists"); + pool_foreach(path, fib_path_pool, + ({ + vlib_cli_output (vm, "%U", format_fib_path, path); + })); + + return (NULL); +} + +VLIB_CLI_COMMAND (show_fib_path, static) = { + .path = "show fib paths", + .function = show_fib_path_command, + .short_help = "show fib paths", +}; diff --git a/vnet/vnet/fib/fib_path.h b/vnet/vnet/fib/fib_path.h new file mode 100644 index 00000000000..16ca358c04a --- /dev/null +++ b/vnet/vnet/fib/fib_path.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Given a route of the form; + * q.r.s.t/Y + * via <interface> <next-hop> + * + * The prefix is: q.r.s.t./Y + * the path is: 'via <interface> <next-hop> + * + * The path is the description of where to send the traffic, and the + * the prefix is a description of which traffic to send. + * It is the aim of the FIB to resolve the path, i.e. to find the corresponding + * adjacency to match the path's description. + */ + +#ifndef __FIB_PATH_H__ +#define __FIB_PATH_H__ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/load_balance.h> + +#include <vnet/fib/fib_types.h> +#include <vnet/adj/adj_types.h> + +/** + * Enurmeration of path configuration attributes + */ +typedef enum fib_path_cfg_attribute_t_ { + /** + * Marker. Add new types after this one. + */ + FIB_PATH_CFG_ATTRIBUTE_FIRST = 0, + /** + * The path is forced to a drop, whatever the next-hop info says. + * something somewhere knows better... + */ + FIB_PATH_CFG_ATTRIBUTE_DROP = FIB_PATH_CFG_ATTRIBUTE_FIRST, + /** + * The path uses an adj that is exclusive. I.e. it is known only by + * the source of the route. + */ + FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE, + /** + * Recursion constraint via host + */ + FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST, + /** + * Recursion constraint via attached + */ + FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED, + /** + * The path is a for-us path + */ + FIB_PATH_CFG_ATTRIBUTE_LOCAL, + /** + * Marker. Add new types before this one, then update it. + */ + FIB_PATH_CFG_ATTRIBUTE_LAST = FIB_PATH_CFG_ATTRIBUTE_LOCAL, +} __attribute__ ((packed)) fib_path_cfg_attribute_t; + +/** + * The maximum number of path attributes + */ +#define FIB_PATH_CFG_ATTRIBUTE_MAX (FIB_PATH_CFG_ATTRIBUTE_LAST + 1) + +#define FIB_PATH_CFG_ATTRIBUTES { \ + [FIB_PATH_CFG_ATTRIBUTE_DROP] = "drop", \ + [FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ + [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST] = "resolve-host", \ + [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED] = "resolve-attached", \ + [FIB_PATH_CFG_ATTRIBUTE_LOCAL] = "local", \ +} + +#define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \ + for (_item = FIB_PATH_CFG_ATTRIBUTE_FIRST; \ + _item <= FIB_PATH_CFG_ATTRIBUTE_LAST; \ + _item++) + +/** + * Path config flags from the attributes + */ +typedef enum fib_path_cfg_flags_t_ { + FIB_PATH_CFG_FLAG_NONE = 0, + FIB_PATH_CFG_FLAG_DROP = (1 << FIB_PATH_CFG_ATTRIBUTE_DROP), + FIB_PATH_CFG_FLAG_EXCLUSIVE = (1 << FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE), + FIB_PATH_CFG_FLAG_RESOLVE_HOST = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST), + FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED), + FIB_PATH_CFG_FLAG_LOCAL = (1 << FIB_PATH_CFG_ATTRIBUTE_LOCAL), +} __attribute__ ((packed)) fib_path_cfg_flags_t; + + +extern u8 *fib_path_format(fib_node_index_t pi, u8 *s); +extern u8 *fib_path_adj_format(fib_node_index_t pi, + u32 indent, + u8 *s); + +extern u8 * format_fib_path(u8 * s, va_list * args); + +extern fib_node_index_t fib_path_create(fib_node_index_t pl_index, + fib_protocol_t nh_proto, + fib_path_cfg_flags_t flags, + const fib_route_path_t *path); +extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index, + fib_protocol_t nh_proto, + fib_path_cfg_flags_t flags, + const dpo_id_t *dpo); + +extern int fib_path_cmp(fib_node_index_t path_index1, + fib_node_index_t path_index2); +extern int fib_path_cmp_for_sort(void * a1, void * a2); +extern int fib_path_cmp_w_route_path(fib_node_index_t path_index, + const fib_route_path_t *rpath); +extern fib_node_index_t fib_path_copy(fib_node_index_t path_index, + fib_node_index_t path_list_index); +extern int fib_path_resolve(fib_node_index_t path_index); +extern int fib_path_is_resolved(fib_node_index_t path_index); +extern int fib_path_is_recursive(fib_node_index_t path_index); +extern int fib_path_is_exclusive(fib_node_index_t path_index); +extern int fib_path_is_deag(fib_node_index_t path_index); +extern int fib_path_is_looped(fib_node_index_t path_index); +extern void fib_path_destroy(fib_node_index_t path_index); +extern uword fib_path_hash(fib_node_index_t path_index); +extern load_balance_path_t * fib_path_append_nh_for_multipath_hash( + fib_node_index_t path_index, + fib_forward_chain_type_t fct, + load_balance_path_t *hash_key); +extern void fib_path_contribute_forwarding(fib_node_index_t path_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo); +extern adj_index_t fib_path_get_adj(fib_node_index_t path_index); +extern int fib_path_recursive_loop_detect(fib_node_index_t path_index, + fib_node_index_t **entry_indicies); +extern u32 fib_path_get_resolving_interface(fib_node_index_t fib_entry_index); +extern int fib_path_get_weight(fib_node_index_t path_index); + +extern void fib_path_module_init(void); + +extern void fib_path_module_init(void); + +#endif diff --git a/vnet/vnet/fib/fib_path_ext.c b/vnet/vnet/fib/fib_path_ext.c new file mode 100644 index 00000000000..f40c749e981 --- /dev/null +++ b/vnet/vnet/fib/fib_path_ext.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/mpls/mpls.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/drop_dpo.h> + +#include "fib_path_ext.h" +#include "fib_path.h" +#include "fib_path_list.h" +#include "fib_internal.h" + +u8 * +format_fib_path_ext (u8 * s, va_list * args) +{ + fib_path_ext_t *path_ext; + + path_ext = va_arg (*args, fib_path_ext_t *); + + s = format(s, "path:%d label:%U", + path_ext->fpe_path_index, + format_mpls_unicast_label, + path_ext->fpe_path.frp_label); + + return (s); +} + +int +fib_path_ext_cmp (fib_path_ext_t *path_ext, + const fib_route_path_t *rpath) +{ + return (fib_route_path_cmp(&path_ext->fpe_path, rpath)); +} + +static int +fib_path_ext_match (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *ctx) +{ + fib_path_ext_t *path_ext = ctx; + + if (!fib_path_cmp_w_route_path(path_index, + &path_ext->fpe_path)) + { + path_ext->fpe_path_index = path_index; + return (0); + } + // keep going + return (1); +} + +void +fib_path_ext_resolve (fib_path_ext_t *path_ext, + fib_node_index_t path_list_index) +{ + /* + * Find the path on the path list that this is an extension for + */ + path_ext->fpe_path_index = FIB_NODE_INDEX_INVALID; + fib_path_list_walk(path_list_index, + fib_path_ext_match, + path_ext); +} + +void +fib_path_ext_init (fib_path_ext_t *path_ext, + fib_node_index_t path_list_index, + const fib_route_path_t *rpath) +{ + path_ext->fpe_path = *rpath; + path_ext->fpe_path_index = FIB_NODE_INDEX_INVALID; + + fib_path_ext_resolve(path_ext, path_list_index); +} + +load_balance_path_t * +fib_path_ext_stack (fib_path_ext_t *path_ext, + fib_forward_chain_type_t parent_fct, + load_balance_path_t *nhs) +{ + fib_forward_chain_type_t child_fct; + load_balance_path_t *nh; + + if (!fib_path_is_resolved(path_ext->fpe_path_index)) + return (nhs); + + /* + * Since we are stacking this path-extension, it must have a valid out + * label. From the chain type request by the child, determine what + * chain type we will request from the parent. + */ + switch (parent_fct) + { + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + ASSERT(0); + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + if (MPLS_IETF_IMPLICIT_NULL_LABEL == path_ext->fpe_label) + { + /* + * implicit-null label for the eos or IP chain, need to pick up + * the IP adj + */ + child_fct = parent_fct; + } + else + { + /* + * we have a label to stack. packets will thus be labelled when + * they encounter th child, ergo, non-eos. + */ + child_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS; + } + break; + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + child_fct = parent_fct; + break; + default: + return (nhs); + break; + } + + dpo_id_t via_dpo = DPO_NULL; + + /* + * The next object in the graph after the imposition of the label + * will be the DPO contributed by the path through which the packets + * are to be sent. We stack the MPLS Label DPO on this path DPO + */ + fib_path_contribute_forwarding(path_ext->fpe_path_index, + child_fct, + &via_dpo); + + if (dpo_is_drop(&via_dpo) || + load_balance_is_drop(&via_dpo)) + { + /* + * don't stack a path extension on a drop. doing so will create + * a LB bucket entry on drop, and we will lose a percentage of traffic. + */ + } + else + { + vec_add2(nhs, nh, 1); + nh->path_weight = fib_path_get_weight(path_ext->fpe_path_index); + nh->path_index = path_ext->fpe_path_index; + dpo_copy(&nh->path_dpo, &via_dpo); + + /* + * The label is stackable for this chain type + * construct the mpls header that will be imposed in the data-path + */ + if (MPLS_IETF_IMPLICIT_NULL_LABEL != path_ext->fpe_label) + { + dpo_set(&nh->path_dpo, + DPO_MPLS_LABEL, + DPO_PROTO_MPLS, + mpls_label_dpo_create(path_ext->fpe_label, + (parent_fct == FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS ? + MPLS_NON_EOS : + MPLS_EOS), + 255, 0, + &nh->path_dpo)); + } + } + dpo_reset(&via_dpo); + + return (nhs); +} diff --git a/vnet/vnet/fib/fib_path_ext.h b/vnet/vnet/fib/fib_path_ext.h new file mode 100644 index 00000000000..949b1e2b64f --- /dev/null +++ b/vnet/vnet/fib/fib_path_ext.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_PATH_EXT_H__ +#define __FIB_PATH_EXT_H__ + +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_types.h> + +/** + * A path extension is a per-entry addition to the forwarigind information + * when packets are sent for that entry over that path. + * + * For example: + * ip route add 1.1.1.1/32 via 10.10.10.10 mpls-label 100 + * + * The out-going MPLS label value 100 is a path-extension. It is a value sepcific + * to the entry 1.1.1.1/32 and valid only went packets are sent via 10.10.10.10. + */ +typedef struct fib_path_ext_t_ +{ + /** + * A description of the path that is being extended. + * This description is used to match this extension with the [changing] + * instance of a fib_path_t that is extended + */ + fib_route_path_t fpe_path; +#define fpe_label fpe_path.frp_label + + /** + * The index of the path. This is the global index, not the path's + * position in the path-list. + */ + fib_node_index_t fpe_path_index; +} fib_path_ext_t; + + +extern u8 * format_fib_path_ext(u8 * s, va_list * args); + +extern void fib_path_ext_init(fib_path_ext_t *path_ext, + fib_node_index_t path_list_index, + const fib_route_path_t *rpath); + +extern int fib_path_ext_cmp(fib_path_ext_t *path_ext, + const fib_route_path_t *rpath); + +extern void fib_path_ext_resolve(fib_path_ext_t *path_ext, + fib_node_index_t path_list_index); + +extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext, + fib_forward_chain_type_t fct, + load_balance_path_t *nhs); + +#endif + diff --git a/vnet/vnet/fib/fib_path_list.c b/vnet/vnet/fib/fib_path_list.c new file mode 100644 index 00000000000..1df73968614 --- /dev/null +++ b/vnet/vnet/fib/fib_path_list.c @@ -0,0 +1,1100 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/mhash.h> +#include <vnet/ip/ip.h> +#include <vnet/adj/adj.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> + +#include <vnet/fib/fib_path_list.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/fib/fib_node_list.h> +#include <vnet/fib/fib_walk.h> + +/** + * FIB path-list + * A representation of the list/set of path trough which a prefix is reachable + */ +typedef struct fib_path_list_t_ { + /** + * A path-list is a node in the FIB graph. + */ + fib_node_t fpl_node; + + /** + * Flags on the path-list + */ + fib_path_list_flags_t fpl_flags; + + /** + * The next-hop protocol for the paths in this path list. + * Note that fixing the proto here means we don't support a mix of + * v4 and v6 paths. ho hum. + */ + fib_protocol_t fpl_nh_proto; + + /** + * Vector of paths indecies for all configured paths. + * For shareable path-lists this list MUST not change. + */ + fib_node_index_t *fpl_paths; +} fib_path_list_t; + +/* + * Array of strings/names for the FIB sources + */ +static const char *fib_path_list_attr_names[] = FIB_PATH_LIST_ATTRIBUTES; + +/* + * The memory pool from which we allocate all the path-lists + */ +static fib_path_list_t * fib_path_list_pool; + +/* + * The data-base of shared path-lists + */ +static uword *fib_path_list_db; + +/* + * Debug macro + */ +#ifdef FIB_DEBUG +#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...) \ +{ \ + u8 *_tmp = 0; \ + _tmp = fib_path_list_format( \ + fib_path_list_get_index(_pl), _tmp); \ + clib_warning("pl:[%d:%p:%p:%s]:" _fmt, \ + fib_path_list_get_index(_pl), \ + _pl, _pl->fpl_paths, _tmp, \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...) +#endif + +static fib_path_list_t * +fib_path_list_get (fib_node_index_t index) +{ + return (pool_elt_at_index(fib_path_list_pool, index)); +} + +static fib_node_t * +fib_path_list_get_node (fib_node_index_t index) +{ + return ((fib_node_t*)fib_path_list_get(index)); +} + +static fib_path_list_t* +fib_path_list_from_fib_node (fib_node_t *node) +{ +#if CLIB_DEBUG > 0 + ASSERT(FIB_NODE_TYPE_PATH_LIST == node->fn_type); +#endif + return ((fib_path_list_t*)node); +} + +static fib_node_index_t +fib_path_list_get_index (fib_path_list_t *path_list) +{ + return (path_list - fib_path_list_pool); +} + +static u8 * +format_fib_path_list (u8 * s, va_list * args) +{ + fib_path_list_attribute_t attr; + fib_node_index_t *path_index; + fib_path_list_t *path_list; + + path_list = va_arg (*args, fib_path_list_t *); + + s = format (s, " index:%u", fib_path_list_get_index(path_list)); + s = format (s, " locks:%u", path_list->fpl_node.fn_locks); + s = format (s, " proto:%U", format_fib_protocol, path_list->fpl_nh_proto); + + if (FIB_PATH_LIST_FLAG_NONE != path_list->fpl_flags) + { + s = format (s, " flags:"); + FOR_EACH_PATH_LIST_ATTRIBUTE(attr) + { + if ((1<<attr) & path_list->fpl_flags) + { + s = format (s, "%s,", fib_path_list_attr_names[attr]); + } + } + } + vec_foreach (path_index, path_list->fpl_paths) + { + s = fib_path_format(*path_index, s); + s = format(s, "\n"); + } + + return (s); +} + +u8 * +fib_path_list_adjs_format (fib_node_index_t path_list_index, + u32 indent, + u8 * s) +{ + fib_path_list_t *path_list; + u32 i; + + path_list = fib_path_list_get(path_list_index); + + vec_foreach_index (i, path_list->fpl_paths) + { + s = fib_path_adj_format(path_list->fpl_paths[i], + indent, s); + } + + return (s); +} + + +u8 * +fib_path_list_format (fib_node_index_t path_list_index, + u8 * s) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + return (format(s, "%U", format_fib_path_list, path_list)); +} + +static uword +fib_path_list_hash (fib_path_list_t *path_list) +{ + uword old_path_list_hash, new_path_list_hash, path_hash; + fib_node_index_t *path_index; + + ASSERT(path_list); + + new_path_list_hash = old_path_list_hash = vec_len(path_list->fpl_paths); + + vec_foreach (path_index, path_list->fpl_paths) + { + path_hash = fib_path_hash(*path_index); +#if uword_bits == 64 + hash_mix64(path_hash, old_path_list_hash, new_path_list_hash); +#else + hash_mix32(path_hash, old_path_list_hash, new_path_list_hash); +#endif + } + + return (new_path_list_hash); +} + +always_inline uword +fib_path_list_db_hash_key_from_index (uword index) +{ + return 1 + 2*index; +} + +always_inline uword +fib_path_list_db_hash_key_is_index (uword key) +{ + return key & 1; +} + +always_inline uword +fib_path_list_db_hash_key_2_index (uword key) +{ + ASSERT (fib_path_list_db_hash_key_is_index (key)); + return key / 2; +} + +static fib_path_list_t* +fib_path_list_db_get_from_hash_key (uword key) +{ + fib_path_list_t *path_list; + + if (fib_path_list_db_hash_key_is_index (key)) + { + fib_node_index_t path_list_index; + + path_list_index = fib_path_list_db_hash_key_2_index(key); + path_list = fib_path_list_get(path_list_index); + } + else + { + path_list = uword_to_pointer (key, fib_path_list_t *); + } + + return (path_list); +} + +static uword +fib_path_list_db_hash_key_sum (hash_t * h, + uword key) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_db_get_from_hash_key(key); + + return (fib_path_list_hash(path_list)); +} + +static uword +fib_path_list_db_hash_key_equal (hash_t * h, + uword key1, + uword key2) +{ + fib_path_list_t *path_list1, *path_list2; + + path_list1 = fib_path_list_db_get_from_hash_key(key1); + path_list2 = fib_path_list_db_get_from_hash_key(key2); + + return (fib_path_list_hash(path_list1) == + fib_path_list_hash(path_list2)); +} + +static fib_node_index_t +fib_path_list_db_find (fib_path_list_t *path_list) +{ + uword *p; + + p = hash_get(fib_path_list_db, path_list); + + if (NULL != p) + { + return p[0]; + } + + return (FIB_NODE_INDEX_INVALID); +} + +static void +fib_path_list_db_insert (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + ASSERT(FIB_NODE_INDEX_INVALID == fib_path_list_db_find(path_list)); + + hash_set (fib_path_list_db, + fib_path_list_db_hash_key_from_index(path_list_index), + path_list_index); + + FIB_PATH_LIST_DBG(path_list, "DB-inserted"); +} + +static void +fib_path_list_db_remove (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + ASSERT(FIB_NODE_INDEX_INVALID != fib_path_list_db_find(path_list)); + + hash_unset(fib_path_list_db, + fib_path_list_db_hash_key_from_index(path_list_index)); + + FIB_PATH_LIST_DBG(path_list, "DB-removed"); +} + +static void +fib_path_list_destroy (fib_path_list_t *path_list) +{ + fib_node_index_t *path_index; + + FIB_PATH_LIST_DBG(path_list, "destroy"); + + vec_foreach (path_index, path_list->fpl_paths) + { + fib_path_destroy(*path_index); + } + + vec_free(path_list->fpl_paths); + + fib_node_deinit(&path_list->fpl_node); + pool_put(fib_path_list_pool, path_list); +} + +static void +fib_path_list_last_lock_gone (fib_node_t *node) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_from_fib_node(node); + + FIB_PATH_LIST_DBG(path_list, "last-lock"); + + if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED) + { + fib_path_list_db_remove(fib_path_list_get_index(path_list)); + } + fib_path_list_destroy(path_list); +} + +/* + * fib_path_mk_lb + * + * update the multipath adj this path-list will contribute to its + * children's forwarding. + */ +static void +fib_path_list_mk_lb (fib_path_list_t *path_list, + fib_forward_chain_type_t type, + dpo_id_t *dpo) +{ + load_balance_path_t *hash_key; + fib_node_index_t *path_index; + + hash_key = NULL; + + /* + * We gather the DPOs from resolved paths. + */ + vec_foreach (path_index, path_list->fpl_paths) + { + hash_key = fib_path_append_nh_for_multipath_hash( + *path_index, + type, + hash_key); + } + + /* + * Path-list load-balances, which if used, would be shared and hence + * never need a load-balance map. + */ + load_balance_multipath_update(dpo, hash_key, LOAD_BALANCE_FLAG_NONE); + + FIB_PATH_LIST_DBG(path_list, "mk lb: %d", dpo->dpoi_index); + + vec_free(hash_key); +} + +/* + * fib_path_list_back_walk + * + * Called from one of this path-list's paths to progate + * a back walk + */ +void +fib_path_list_back_walk (fib_node_index_t path_list_index, + fib_node_back_walk_ctx_t *ctx) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + /* + * propagate the backwalk further + */ + if (32 >= fib_node_list_get_size(path_list->fpl_node.fn_children)) + { + /* + * only a few children. continue the walk synchronously + */ + fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx); + } + else + { + /* + * many children. schedule a async walk + */ + fib_walk_async(FIB_NODE_TYPE_PATH_LIST, + path_list_index, + FIB_WALK_PRIORITY_LOW, + ctx); + } +} + +/* + * fib_path_list_back_walk_notify + * + * A back walk has reach this path-list. + */ +static fib_node_back_walk_rc_t +fib_path_list_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + /* + * the path-list is not a direct child of any other node type + * paths, which do not change thier to-list-mapping, save the + * list they are a member of, and invoke the BW function directly. + */ + ASSERT(0); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/* + * The FIB path-list's graph node virtual function table + */ +static const fib_node_vft_t fib_path_list_vft = { + .fnv_get = fib_path_list_get_node, + .fnv_last_lock = fib_path_list_last_lock_gone, + .fnv_back_walk = fib_path_list_back_walk_notify, +}; + +static fib_path_list_t * +fib_path_list_alloc (fib_node_index_t *path_list_index) +{ + fib_path_list_t *path_list; + + pool_get(fib_path_list_pool, path_list); + memset(path_list, 0, sizeof(*path_list)); + + fib_node_init(&path_list->fpl_node, + FIB_NODE_TYPE_PATH_LIST); + + if (NULL != path_list_index) + { + *path_list_index = fib_path_list_get_index(path_list); + } + + FIB_PATH_LIST_DBG(path_list, "alloc"); + + return (path_list); +} + +static fib_path_list_t * +fib_path_list_resolve (fib_path_list_t *path_list) +{ + fib_node_index_t *path_index, *paths, path_list_index; + + ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_RESOLVED)); + + /* + * resolving a path-list is a recursive action. this means more path + * lists can be created during this call, and hence this path-list + * can be realloc'd. so we work with copies. + * this function is called only once per-path list, so its no great overhead. + */ + path_list_index = fib_path_list_get_index(path_list); + paths = vec_dup(path_list->fpl_paths); + + vec_foreach (path_index, paths) + { + fib_path_resolve(*path_index); + } + + vec_free(paths); + path_list = fib_path_list_get(path_list_index); + + FIB_PATH_LIST_DBG(path_list, "resovled"); + + return (path_list); +} + +u32 +fib_path_list_get_resolving_interface (fib_node_index_t path_list_index) +{ + fib_node_index_t *path_index; + fib_path_list_t *path_list; + u32 sw_if_index; + + path_list = fib_path_list_get(path_list_index); + + sw_if_index = ~0; + vec_foreach (path_index, path_list->fpl_paths) + { + sw_if_index = fib_path_get_resolving_interface(*path_index); + if (~0 != sw_if_index) + { + return (sw_if_index); + } + } + + return (sw_if_index); +} + +int +fib_path_list_is_looped (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED); +} + +static fib_path_cfg_flags_t +fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf) +{ + fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE; + + if (plf & FIB_PATH_LIST_FLAG_LOCAL) + { + pf |= FIB_PATH_CFG_FLAG_LOCAL; + } + if (plf & FIB_PATH_LIST_FLAG_DROP) + { + pf |= FIB_PATH_CFG_FLAG_DROP; + } + if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE) + { + pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE; + } + + return (pf); +} + +static fib_path_list_flags_t +fib_path_list_flags_fixup (fib_path_list_flags_t flags) +{ + /* + * we do no share drop nor exclusive path-lists + */ + if (flags & FIB_PATH_LIST_FLAG_DROP || + flags & FIB_PATH_LIST_FLAG_EXCLUSIVE) + { + flags &= ~FIB_PATH_LIST_FLAG_SHARED; + } + + return (flags); +} + +fib_node_index_t +fib_path_list_create (fib_path_list_flags_t flags, + const fib_route_path_t *rpaths) +{ + fib_node_index_t path_list_index, old_path_list_index; + fib_path_list_t *path_list; + int i; + + flags = fib_path_list_flags_fixup(flags); + path_list = fib_path_list_alloc(&path_list_index); + path_list->fpl_flags = flags; + /* + * we'll assume for now all paths are the same next-hop protocol + */ + path_list->fpl_nh_proto = rpaths[0].frp_proto; + + vec_foreach_index(i, rpaths) + { + vec_add1(path_list->fpl_paths, + fib_path_create(path_list_index, + path_list->fpl_nh_proto, + fib_path_list_flags_2_path_flags(flags), + &rpaths[i])); + } + + /* + * If a shared path list is requested, consult the DB for a match + */ + if (flags & FIB_PATH_LIST_FLAG_SHARED) + { + /* + * check for a matching path-list in the DB. + * If we find one then we can return the existing one and destroy the + * new one just created. + */ + old_path_list_index = fib_path_list_db_find(path_list); + if (FIB_NODE_INDEX_INVALID != old_path_list_index) + { + fib_path_list_destroy(path_list); + + path_list_index = old_path_list_index; + } + else + { + /* + * if there was not a matching path-list, then this + * new one will need inserting into the DB and resolving. + */ + fib_path_list_db_insert(path_list_index); + path_list = fib_path_list_resolve(path_list); + } + } + else + { + /* + * no shared path list requested. resolve and use the one + * just created. + */ + path_list = fib_path_list_resolve(path_list); + } + + return (path_list_index); +} + +fib_node_index_t +fib_path_list_create_special (fib_protocol_t nh_proto, + fib_path_list_flags_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t path_index, path_list_index; + fib_path_list_t *path_list; + + path_list = fib_path_list_alloc(&path_list_index); + path_list->fpl_flags = flags; + path_list->fpl_nh_proto = nh_proto; + + path_index = + fib_path_create_special(path_list_index, + path_list->fpl_nh_proto, + fib_path_list_flags_2_path_flags(flags), + dpo); + vec_add1(path_list->fpl_paths, path_index); + + /* + * we don't share path-lists. we can do PIC on them so why bother. + */ + path_list = fib_path_list_resolve(path_list); + + return (path_list_index); +} + +/* + * fib_path_list_copy_and_path_add + * + * Create a copy of a path-list and append one more path to it. + * The path-list returned could either have been newly created, or + * can be a shared path-list from the data-base. + */ +fib_node_index_t +fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index, + fib_path_list_flags_t flags, + const fib_route_path_t *rpaths) +{ + fib_node_index_t path_index, path_list_index, *orig_path_index; + fib_path_list_t *path_list, *orig_path_list; + fib_node_index_t pi; + + ASSERT(1 == vec_len(rpaths)); + + /* + * alloc the new list before we retrieve the old one, lest + * the alloc result in a realloc + */ + path_list = fib_path_list_alloc(&path_list_index); + + orig_path_list = fib_path_list_get(orig_path_list_index); + + FIB_PATH_LIST_DBG(orig_path_list, "copy-add"); + + flags = fib_path_list_flags_fixup(flags); + path_list->fpl_flags = flags; + path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto; + vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths)); + pi = 0; + + vec_foreach (orig_path_index, orig_path_list->fpl_paths) + { + path_index = fib_path_copy(*orig_path_index, path_list_index); + path_list->fpl_paths[pi++] = path_index; + } + path_index = fib_path_create(path_list_index, + path_list->fpl_nh_proto, + fib_path_list_flags_2_path_flags(flags), + rpaths); + path_list->fpl_paths[pi] = path_index; + + /* + * we sort the paths since the key for the path-list is + * the description of the paths it contains. The paths need to + * be sorted else this description will differ. + */ + vec_sort_with_function(path_list->fpl_paths, fib_path_cmp_for_sort); + + FIB_PATH_LIST_DBG(path_list, "path-added"); + + /* + * If a shared path list is requested, consult the DB for a match + */ + if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED) + { + fib_node_index_t exist_path_list_index; + /* + * check for a matching path-list in the DB. + * If we find one then we can return the existing one and destroy the + * new one just created. + */ + exist_path_list_index = fib_path_list_db_find(path_list); + if (FIB_NODE_INDEX_INVALID != exist_path_list_index) + { + fib_path_list_destroy(path_list); + + path_list_index = exist_path_list_index; + } + else + { + /* + * if there was not a matching path-list, then this + * new one will need inserting into the DB and resolving. + */ + fib_path_list_db_insert(path_list_index); + + path_list = fib_path_list_resolve(path_list); + } + } + else + { + /* + * no shared path list requested. resolve and use the one + * just created. + */ + path_list = fib_path_list_resolve(path_list); + } + + return (path_list_index); +} + +/* + * fib_path_list_copy_and_path_remove + * + * Copy the path-list excluding the path passed. + * If the path is the last one, then the index reurned will be invalid. + * i.e. the path-list is toast. + */ +fib_node_index_t +fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index, + fib_path_list_flags_t flags, + const fib_route_path_t *rpaths) +{ + fib_node_index_t path_index, *orig_path_index, path_list_index, tmp_path_index; + fib_path_list_t *path_list, *orig_path_list; + fib_node_index_t pi; + + ASSERT(1 == vec_len(rpaths)); + + path_list = fib_path_list_alloc(&path_list_index); + + flags = fib_path_list_flags_fixup(flags); + orig_path_list = fib_path_list_get(orig_path_list_index); + + FIB_PATH_LIST_DBG(orig_path_list, "copy-remove"); + + path_list->fpl_flags = flags; + path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto; + /* + * allocate as many paths as we might need in one go, rather than + * using vec_add to do a few at a time. + */ + if (vec_len(orig_path_list->fpl_paths) > 1) + { + vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths) - 2); + } + pi = 0; + + /* + * create a representation of the path to be removed, so it + * can be used as a comparison object during the copy. + */ + tmp_path_index = fib_path_create(path_list_index, + path_list->fpl_nh_proto, + fib_path_list_flags_2_path_flags(flags), + rpaths); + + vec_foreach (orig_path_index, orig_path_list->fpl_paths) + { + if (0 != fib_path_cmp(tmp_path_index, *orig_path_index)) { + path_index = fib_path_copy(*orig_path_index, path_list_index); + if (pi < vec_len(path_list->fpl_paths)) + { + path_list->fpl_paths[pi++] = path_index; + } + else + { + /* + * this is the unlikely case that the path being + * removed does not match one in the path-list, so + * we end up with as many paths as we started with. + * the paths vector was sized above with the expectation + * that we would have 1 less. + */ + vec_add1(path_list->fpl_paths, path_index); + } + } + } + + /* + * done with the temporary now + */ + fib_path_destroy(tmp_path_index); + + /* + * if there are no paths, then the new path-list is aborted + */ + if (0 == vec_len(path_list->fpl_paths)) { + FIB_PATH_LIST_DBG(path_list, "last-path-removed"); + + fib_path_list_destroy(path_list); + + path_list_index = FIB_NODE_INDEX_INVALID; + } else { + /* + * we sort the paths since the key for the path-list is + * the description of the paths it contains. The paths need to + * be sorted else this description will differ. + */ + vec_sort_with_function(path_list->fpl_paths, fib_path_cmp_for_sort); + + /* + * If a shared path list is requested, consult the DB for a match + */ + if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED) + { + fib_node_index_t exist_path_list_index; + + /* + * check for a matching path-list in the DB. + * If we find one then we can return the existing one and destroy the + * new one just created. + */ + exist_path_list_index = fib_path_list_db_find(path_list); + if (FIB_NODE_INDEX_INVALID != exist_path_list_index) + { + fib_path_list_destroy(path_list); + + path_list_index = exist_path_list_index; + } + else + { + /* + * if there was not a matching path-list, then this + * new one will need inserting into the DB and resolving. + */ + fib_path_list_db_insert(path_list_index); + + path_list = fib_path_list_resolve(path_list); + } + } + else + { + /* + * no shared path list requested. resolve and use the one + * just created. + */ + path_list = fib_path_list_resolve(path_list); + } + } + + return (path_list_index); +} + +/* + * fib_path_list_contribute_forwarding + * + * Return the index of a load-balance that user of this path-list should + * use for forwarding + */ +void +fib_path_list_contribute_forwarding (fib_node_index_t path_list_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + fib_path_list_mk_lb(path_list, type, dpo); +} + +/* + * fib_path_list_get_adj + * + * Return the index of a adjacency for the first path that user of this + * path-list should use for forwarding + */ +adj_index_t +fib_path_list_get_adj (fib_node_index_t path_list_index, + fib_forward_chain_type_t type) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + return (fib_path_get_adj(path_list->fpl_paths[0])); +} + +int +fib_path_list_recursive_loop_detect (fib_node_index_t path_list_index, + fib_node_index_t **entry_indicies) +{ + fib_node_index_t *path_index; + int is_looped, list_looped; + fib_path_list_t *path_list; + + list_looped = 0; + path_list = fib_path_list_get(path_list_index); + + vec_foreach (path_index, path_list->fpl_paths) + { + fib_node_index_t *copy, **copy_ptr; + + /* + * we need a copy of the nodes visited so that when we add entries + * we explore on the nth path and a looped is detected, those entries + * are not again searched for n+1 path and so finding a loop that does + * not exist. + */ + copy = vec_dup(*entry_indicies); + copy_ptr = © + + is_looped = fib_path_recursive_loop_detect(*path_index, copy_ptr); + list_looped += is_looped; + } + + FIB_PATH_LIST_DBG(path_list, "loop-detect: eval:%d", eval); + + if (list_looped) + { + path_list->fpl_flags |= FIB_PATH_LIST_FLAG_LOOPED; + } + else + { + path_list->fpl_flags &= ~FIB_PATH_LIST_FLAG_LOOPED; + } + + return (list_looped); +} + +u32 +fib_path_list_child_add (fib_node_index_t path_list_index, + fib_node_type_t child_type, + fib_node_index_t child_index) +{ + return (fib_node_child_add(FIB_NODE_TYPE_PATH_LIST, + path_list_index, + child_type, + child_index)); +} + +void +fib_path_list_child_remove (fib_node_index_t path_list_index, + u32 si) +{ + fib_node_child_remove(FIB_NODE_TYPE_PATH_LIST, + path_list_index, + si); +} + +void +fib_path_list_lock(fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + if (FIB_NODE_INDEX_INVALID != path_list_index) + { + path_list = fib_path_list_get(path_list_index); + + fib_node_lock(&path_list->fpl_node); + FIB_PATH_LIST_DBG(path_list, "lock"); + } +} + +void +fib_path_list_unlock (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + if (FIB_NODE_INDEX_INVALID != path_list_index) + { + path_list = fib_path_list_get(path_list_index); + FIB_PATH_LIST_DBG(path_list, "unlock"); + + fib_node_unlock(&path_list->fpl_node); + } +} + +u32 +fib_path_list_pool_size (void) +{ + return (pool_elts(fib_path_list_pool)); +} + +u32 +fib_path_list_db_size (void) +{ + return (hash_elts(fib_path_list_db)); +} + +void +fib_path_list_walk (fib_node_index_t path_list_index, + fib_path_list_walk_fn_t func, + void *ctx) +{ + fib_node_index_t *path_index; + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + vec_foreach(path_index, path_list->fpl_paths) + { + if (!func(path_list_index, *path_index, ctx)) + break; + } +} + + +void +fib_path_list_module_init (void) +{ + fib_node_register_type (FIB_NODE_TYPE_PATH_LIST, &fib_path_list_vft); + + fib_path_list_db = hash_create2 (/* elts */ 0, + /* user */ 0, + /* value_bytes */ sizeof (fib_node_index_t), + fib_path_list_db_hash_key_sum, + fib_path_list_db_hash_key_equal, + /* format pair/arg */ + 0, 0); +} + +static clib_error_t * +show_fib_path_list_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_path_list_t *path_list; + fib_node_index_t pli; + + if (unformat (input, "%d", &pli)) + { + /* + * show one in detail + */ + if (!pool_is_free_index(fib_path_list_pool, pli)) + { + path_list = fib_path_list_get(pli); + u8 *s = fib_path_list_format(pli, NULL); + s = format(s, "children:"); + s = fib_node_children_format(path_list->fpl_node.fn_children, s); + vlib_cli_output (vm, "%s", s); + vec_free(s); + } + else + { + vlib_cli_output (vm, "path list %d invalid", pli); + } + } + else + { + /* + * show all + */ + vlib_cli_output (vm, "FIB Path Lists"); + pool_foreach(path_list, fib_path_list_pool, + ({ + vlib_cli_output (vm, "%U", format_fib_path_list, path_list); + })); + } + return (NULL); +} + +VLIB_CLI_COMMAND (show_fib_path_list, static) = { + .path = "show fib path list", + .function = show_fib_path_list_command, + .short_help = "show fib path list", +}; diff --git a/vnet/vnet/fib/fib_path_list.h b/vnet/vnet/fib/fib_path_list.h new file mode 100644 index 00000000000..42e07abdd4b --- /dev/null +++ b/vnet/vnet/fib/fib_path_list.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_PATH_LIST_H__ +#define __FIB_PATH_LIST_H__ + +#include <vlib/vlib.h> +#include <vnet/adj/adj.h> + +#include "fib_node.h" +#include "fib_path.h" + +/** + * Enumeration of path-list flags. + */ +typedef enum fib_path_list_attribute_t_ { + /** + * Marker. Add new flags after this one. + */ + FIB_PATH_LIST_ATTRIBUTE_FIRST = 0, + /** + * This path list is shareable. Shareable path-lists + * are inserted into the path-list data-base. + * All path-list are inherently shareable, the reason we share some and + * not others is to limit the size of the path-list database. This DB must + * be searched for each route update. + */ + FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST, + /** + * explicit drop path-list. Used when the entry source needs to + * force a drop, despite the fact the path info is present. + */ + FIB_PATH_LIST_ATTRIBUTE_DROP, + /** + * explicit local path-list. + */ + FIB_PATH_LIST_ATTRIBUTE_LOCAL, + /** + * exclusive path-list. Exclusive means the path will resolve via the + * exclusive (user provided) adj. + */ + FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE, + /** + * resolved path-list + */ + FIB_PATH_LIST_ATTRIBUTE_RESOLVED, + /** + * looped path-list. one path looped implies the whole list is + */ + FIB_PATH_LIST_ATTRIBUTE_LOOPED, + /** + * Marher. Add new flags before this one, and then update it. + */ + FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_LOOPED, +} fib_path_list_attribute_t; + +typedef enum fib_path_list_flags_t_ { + FIB_PATH_LIST_FLAG_NONE = 0, + FIB_PATH_LIST_FLAG_SHARED = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED), + FIB_PATH_LIST_FLAG_DROP = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP), + FIB_PATH_LIST_FLAG_LOCAL = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL), + FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE), + FIB_PATH_LIST_FLAG_RESOLVED = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED), + FIB_PATH_LIST_FLAG_LOOPED = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED), +} fib_path_list_flags_t; + +#define FIB_PATH_LIST_ATTRIBUTES { \ + [FIB_PATH_LIST_ATTRIBUTE_SHARED] = "shared", \ + [FIB_PATH_LIST_ATTRIBUTE_RESOLVED] = "resolved", \ + [FIB_PATH_LIST_ATTRIBUTE_DROP] = "drop", \ + [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ + [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ + [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \ +} + +#define FOR_EACH_PATH_LIST_ATTRIBUTE(_item) \ + for (_item = FIB_PATH_LIST_ATTRIBUTE_FIRST; \ + _item <= FIB_PATH_LIST_ATTRIBUTE_LAST; \ + _item++) + +extern fib_node_index_t fib_path_list_create(fib_path_list_flags_t flags, + const fib_route_path_t *paths); +extern fib_node_index_t fib_path_list_create_special(fib_protocol_t nh_proto, + fib_path_list_flags_t flags, + const dpo_id_t *dpo); + +extern fib_node_index_t fib_path_list_copy_and_path_add( + fib_node_index_t pl_index, + fib_path_list_flags_t flags, + const fib_route_path_t *path); +extern fib_node_index_t fib_path_list_copy_and_path_remove( + fib_node_index_t pl_index, + fib_path_list_flags_t flags, + const fib_route_path_t *path); +extern void fib_path_list_contribute_forwarding (fib_node_index_t path_list_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo); +extern index_t fib_path_list_get_adj(fib_node_index_t path_list_index, + fib_forward_chain_type_t type); + +extern u32 fib_path_list_child_add(fib_node_index_t pl_index, + fib_node_type_t type, + fib_node_index_t child_index); +extern void fib_path_list_child_remove(fib_node_index_t pl_index, + fib_node_index_t sibling_index); +extern void fib_path_list_back_walk(fib_node_index_t pl_index, + fib_node_back_walk_ctx_t *ctx); +extern void fib_path_list_lock(fib_node_index_t pl_index); +extern void fib_path_list_unlock(fib_node_index_t pl_index); +extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index, + fib_node_index_t **entry_indicies); +extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index); +extern int fib_path_list_is_looped(fib_node_index_t path_list_index); +extern u8 * fib_path_list_format(fib_node_index_t pl_index, + u8 * s); +extern u8 * fib_path_list_adjs_format(fib_node_index_t pl_index, + u32 indent, + u8 * s); +extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index, + const fib_node_index_t *pis); +/** + * A callback function type for walking a path-list's paths + */ +typedef int (*fib_path_list_walk_fn_t)(fib_node_index_t pl_index, + fib_node_index_t path_index, + void *ctx); + +extern void fib_path_list_walk(fib_node_index_t pl_index, + fib_path_list_walk_fn_t func, + void *ctx); + +extern void fib_path_list_module_init(void); + +extern void fib_path_list_module_init(void); + +/* + * functions for testing. + */ +u32 fib_path_list_pool_size(void); +u32 fib_path_list_db_size(void); + +#endif diff --git a/vnet/vnet/fib/fib_table.c b/vnet/vnet/fib/fib_table.c new file mode 100644 index 00000000000..84c8708851c --- /dev/null +++ b/vnet/vnet/fib/fib_table.c @@ -0,0 +1,1052 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/dpo/drop_dpo.h> + +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry_cover.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/mpls_fib.h> + +fib_table_t * +fib_table_get (fib_node_index_t index, + fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (pool_elt_at_index(ip4_main.fibs, index)); + case FIB_PROTOCOL_IP6: + return (pool_elt_at_index(ip6_main.fibs, index)); + case FIB_PROTOCOL_MPLS: + return (pool_elt_at_index(mpls_main.fibs, index)); + } + ASSERT(0); + return (NULL); +} + +static inline fib_node_index_t +fib_table_lookup_i (fib_table_t *fib_table, + const fib_prefix_t *prefix) +{ + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_lookup(&fib_table->v4, + &prefix->fp_addr.ip4, + prefix->fp_len)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_lookup(fib_table->ft_index, + &prefix->fp_addr.ip6, + prefix->fp_len)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_table_lookup(&fib_table->mpls, + prefix->fp_label, + prefix->fp_eos)); + } + return (FIB_NODE_INDEX_INVALID); +} + +fib_node_index_t +fib_table_lookup (u32 fib_index, + const fib_prefix_t *prefix) +{ + return (fib_table_lookup_i(fib_table_get(fib_index, prefix->fp_proto), prefix)); +} + +static inline fib_node_index_t +fib_table_lookup_exact_match_i (const fib_table_t *fib_table, + const fib_prefix_t *prefix) +{ + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_lookup_exact_match(&fib_table->v4, + &prefix->fp_addr.ip4, + prefix->fp_len)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_lookup_exact_match(fib_table->ft_index, + &prefix->fp_addr.ip6, + prefix->fp_len)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_table_lookup(&fib_table->mpls, + prefix->fp_label, + prefix->fp_eos)); + } + return (FIB_NODE_INDEX_INVALID); +} + +fib_node_index_t +fib_table_lookup_exact_match (u32 fib_index, + const fib_prefix_t *prefix) +{ + return (fib_table_lookup_exact_match_i(fib_table_get(fib_index, + prefix->fp_proto), + prefix)); +} + +static fib_node_index_t +fib_table_get_less_specific_i (fib_table_t *fib_table, + const fib_prefix_t *prefix) +{ + fib_prefix_t pfx; + + pfx = *prefix; + + if (FIB_PROTOCOL_MPLS == pfx.fp_proto) + { + return (FIB_NODE_INDEX_INVALID); + } + + /* + * in the absence of a tree structure for the table that allows for an O(1) + * parent get, a cheeky way to find the cover is to LPM for the prefix with + * mask-1. + * there should always be a cover, though it may be the default route. the + * default route's cover is the default route. + */ + if (pfx.fp_len != 0) { + pfx.fp_len -= 1; + } + + return (fib_table_lookup_i(fib_table, &pfx)); +} + +fib_node_index_t +fib_table_get_less_specific (u32 fib_index, + const fib_prefix_t *prefix) +{ + return (fib_table_get_less_specific_i(fib_table_get(fib_index, + prefix->fp_proto), + prefix)); +} + +static void +fib_table_entry_remove (fib_table_t *fib_table, + const fib_prefix_t *prefix, + fib_node_index_t fib_entry_index) +{ + vlib_smp_unsafe_warning(); + + fib_table->ft_total_route_counts--; + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + ip4_fib_table_entry_remove(&fib_table->v4, + &prefix->fp_addr.ip4, + prefix->fp_len); + break; + case FIB_PROTOCOL_IP6: + ip6_fib_table_entry_remove(fib_table->ft_index, + &prefix->fp_addr.ip6, + prefix->fp_len); + break; + case FIB_PROTOCOL_MPLS: + mpls_fib_table_entry_remove(&fib_table->mpls, + prefix->fp_label, + prefix->fp_eos); + break; + } + + fib_entry_unlock(fib_entry_index); +} + +static void +fib_table_post_insert_actions (fib_table_t *fib_table, + const fib_prefix_t *prefix, + fib_node_index_t fib_entry_index) +{ + fib_node_index_t fib_entry_cover_index; + + /* + * no cover relationships in the MPLS FIB + */ + if (FIB_PROTOCOL_MPLS == prefix->fp_proto) + return; + + /* + * find and inform the covering entry that a new more specific + * has been inserted beneath it + */ + fib_entry_cover_index = fib_table_get_less_specific_i(fib_table, prefix); + /* + * the indicies are the same when the default route is first added + */ + if (fib_entry_cover_index != fib_entry_index) + { + fib_entry_cover_change_notify(fib_entry_cover_index, + fib_entry_index); + } +} + +static void +fib_table_entry_insert (fib_table_t *fib_table, + const fib_prefix_t *prefix, + fib_node_index_t fib_entry_index) +{ + vlib_smp_unsafe_warning(); + + fib_entry_lock(fib_entry_index); + fib_table->ft_total_route_counts++; + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + ip4_fib_table_entry_insert(&fib_table->v4, + &prefix->fp_addr.ip4, + prefix->fp_len, + fib_entry_index); + break; + case FIB_PROTOCOL_IP6: + ip6_fib_table_entry_insert(fib_table->ft_index, + &prefix->fp_addr.ip6, + prefix->fp_len, + fib_entry_index); + break; + case FIB_PROTOCOL_MPLS: + mpls_fib_table_entry_insert(&fib_table->mpls, + prefix->fp_label, + prefix->fp_eos, + fib_entry_index); + break; + } + + fib_table_post_insert_actions(fib_table, prefix, fib_entry_index); +} + +void +fib_table_fwding_dpo_update (u32 fib_index, + const fib_prefix_t *prefix, + const dpo_id_t *dpo) +{ + vlib_smp_unsafe_warning(); + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_fwding_dpo_update(ip4_fib_get(fib_index), + &prefix->fp_addr.ip4, + prefix->fp_len, + dpo)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_fwding_dpo_update(fib_index, + &prefix->fp_addr.ip6, + prefix->fp_len, + dpo)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_forwarding_table_update(mpls_fib_get(fib_index), + prefix->fp_label, + prefix->fp_eos, + dpo)); + } +} + +void +fib_table_fwding_dpo_remove (u32 fib_index, + const fib_prefix_t *prefix, + const dpo_id_t *dpo) +{ + vlib_smp_unsafe_warning(); + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index), + &prefix->fp_addr.ip4, + prefix->fp_len, + dpo)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_fwding_dpo_remove(fib_index, + &prefix->fp_addr.ip6, + prefix->fp_len, + dpo)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_forwarding_table_reset(mpls_fib_get(fib_index), + prefix->fp_label, + prefix->fp_eos)); + } +} + + +fib_node_index_t +fib_table_entry_special_dpo_add (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + fib_entry_index = fib_entry_create_special(fib_index, prefix, + source, flags, + dpo); + + fib_table_entry_insert(fib_table, prefix, fib_entry_index); + fib_table->ft_src_route_counts[source]++; + } + else + { + int was_sourced; + + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + fib_entry_special_add(fib_entry_index, source, flags, dpo); + + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]++; + } + } + + + return (fib_entry_index); +} + +fib_node_index_t +fib_table_entry_special_add (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + adj_index_t adj_index) +{ + fib_node_index_t fib_entry_index; + dpo_id_t tmp_dpo = DPO_NULL; + + if (ADJ_INDEX_INVALID != adj_index) + { + dpo_set(&tmp_dpo, + DPO_ADJACENCY, + FIB_PROTOCOL_MAX, + adj_index); + } + else + { + dpo_copy(&tmp_dpo, drop_dpo_get(fib_proto_to_dpo(prefix->fp_proto))); + } + + fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix, source, + flags, &tmp_dpo); + + dpo_unlock(&tmp_dpo); + + return (fib_entry_index); +} + +void +fib_table_entry_special_dpo_update (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_prefix_t prefix; + u32 fib_index; + + fib_entry_get_prefix(fib_entry_index, &prefix); + fib_index = fib_entry_get_fib_index(fib_entry_index); + + fib_table_entry_special_dpo_add(fib_index, &prefix, source, flags, dpo); + fib_table_entry_special_remove(fib_index, &prefix, source); +} + +void +fib_table_entry_special_remove (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source) +{ + /* + * 1 is it present + * yes => remove source + * 2 - is it still sourced? + * no => cover walk + */ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + /* + * removing an etry that does not exist. i'll allow it. + */ + } + else + { + fib_entry_src_flag_t src_flag; + int was_sourced; + + /* + * don't nobody go nowhere + */ + fib_entry_lock(fib_entry_index); + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + + src_flag = fib_entry_special_remove(fib_entry_index, source); + + if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag)) + { + /* + * last source gone. remove from the table + */ + fib_table_entry_remove(fib_table, prefix, fib_entry_index); + + /* + * now the entry is no longer in the table, we can + * inform the entries that it covers to re-calculate their cover + */ + fib_entry_cover_change_notify(fib_entry_index, + FIB_NODE_INDEX_INVALID); + } + /* + * else + * still has sources, leave it be. + */ + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]--; + } + + fib_entry_unlock(fib_entry_index); + } +} + +/** + * fib_table_route_path_fixup + * + * Convert attached hosts to attached next-hops. + * + * This special case is required because an attached path will link to a + * glean, and the FIB entry will have the interface or API/CLI source. When + * the ARP/ND process is completes then that source (which will provide a + * complete adjacency) will be lower priority and so the FIB entry will + * remain linked to a glean and traffic will never reach the hosts. For + * an ATTAHCED_HOST path we can link the path directly to the [incomplete] + * adjacency. + */ +static void +fib_table_route_path_fixup (const fib_prefix_t *prefix, + fib_route_path_t *path) +{ + if (fib_prefix_is_host(prefix) && + ip46_address_is_zero(&path->frp_addr) && + path->frp_sw_if_index != ~0) + { + path->frp_addr = prefix->fp_addr; + } +} + +fib_node_index_t +fib_table_entry_path_add (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t next_hop_label, + fib_route_path_flags_t path_flags) +{ + fib_route_path_t path = { + .frp_proto = next_hop_proto, + .frp_addr = (NULL == next_hop? zero_addr : *next_hop), + .frp_sw_if_index = next_hop_sw_if_index, + .frp_fib_index = next_hop_fib_index, + .frp_weight = next_hop_weight, + .frp_flags = path_flags, + .frp_label = next_hop_label, + }; + fib_node_index_t fib_entry_index; + fib_route_path_t *paths = NULL; + + fib_table_route_path_fixup(prefix, &path); + vec_add1(paths, path); + + fib_entry_index = fib_table_entry_path_add2(fib_index, prefix, + source, flags, paths); + + vec_free(paths); + return (fib_entry_index); +} + +fib_node_index_t +fib_table_entry_path_add2 (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpath) +{ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + fib_entry_index = fib_entry_create(fib_index, prefix, + source, flags, + rpath); + + fib_table_entry_insert(fib_table, prefix, fib_entry_index); + fib_table->ft_src_route_counts[source]++; + } + else + { + int was_sourced; + + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + fib_entry_path_add(fib_entry_index, source, flags, rpath);; + + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]++; + } + } + + return (fib_entry_index); +} + +void +fib_table_entry_path_remove2 (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + const fib_route_path_t *rpath) +{ + /* + * 1 is it present + * yes => remove source + * 2 - is it still sourced? + * no => cover walk + */ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + /* + * removing an etry that does not exist. i'll allow it. + */ + } + else + { + fib_entry_src_flag_t src_flag; + int was_sourced; + + /* + * don't nobody go nowhere + */ + fib_entry_lock(fib_entry_index); + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + + src_flag = fib_entry_path_remove(fib_entry_index, source, rpath); + + if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag)) + { + /* + * last source gone. remove from the table + */ + fib_table_entry_remove(fib_table, prefix, fib_entry_index); + + /* + * now the entry is no longer in the table, we can + * inform the entries that it covers to re-calculate their cover + */ + fib_entry_cover_change_notify(fib_entry_index, + FIB_NODE_INDEX_INVALID); + } + /* + * else + * still has sources, leave it be. + */ + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]--; + } + + fib_entry_unlock(fib_entry_index); + } +} + +void +fib_table_entry_path_remove (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + fib_route_path_flags_t path_flags) +{ + /* + * 1 is it present + * yes => remove source + * 2 - is it still sourced? + * no => cover walk + */ + fib_route_path_t path = { + .frp_proto = next_hop_proto, + .frp_addr = (NULL == next_hop? zero_addr : *next_hop), + .frp_sw_if_index = next_hop_sw_if_index, + .frp_fib_index = next_hop_fib_index, + .frp_weight = next_hop_weight, + .frp_flags = path_flags, + }; + fib_route_path_t *paths = NULL; + + fib_table_route_path_fixup(prefix, &path); + vec_add1(paths, path); + + fib_table_entry_path_remove2(fib_index, prefix, source, paths); + + vec_free(paths); +} + +static int +fib_route_path_cmp_for_sort (void * v1, + void * v2) +{ + return (fib_route_path_cmp(v1, v2)); +} + +fib_node_index_t +fib_table_entry_update (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths) +{ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + /* + * sort the paths provided by the control plane. this means + * the paths and the extension on the entry will be sorted. + */ + vec_sort_with_function(((fib_route_path_t*)paths), // const cast + fib_route_path_cmp_for_sort); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + fib_entry_index = fib_entry_create(fib_index, prefix, + source, flags, + paths); + + fib_table_entry_insert(fib_table, prefix, fib_entry_index); + fib_table->ft_src_route_counts[source]++; + } + else + { + int was_sourced; + + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + fib_entry_update(fib_entry_index, source, flags, paths); + + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]++; + } + } + + return (fib_entry_index); +} + +fib_node_index_t +fib_table_entry_update_one_path (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t next_hop_label, + fib_route_path_flags_t path_flags) +{ + fib_node_index_t fib_entry_index; + fib_route_path_t path = { + .frp_proto = next_hop_proto, + .frp_addr = (NULL == next_hop? zero_addr : *next_hop), + .frp_sw_if_index = next_hop_sw_if_index, + .frp_fib_index = next_hop_fib_index, + .frp_weight = next_hop_weight, + .frp_flags = path_flags, + .frp_label = next_hop_label, + }; + fib_route_path_t *paths = NULL; + + fib_table_route_path_fixup(prefix, &path); + vec_add1(paths, path); + + fib_entry_index = + fib_table_entry_update(fib_index, prefix, source, flags, paths); + + vec_free(paths); + + return (fib_entry_index); +} + +static void +fib_table_entry_delete_i (u32 fib_index, + fib_node_index_t fib_entry_index, + const fib_prefix_t *prefix, + fib_source_t source) +{ + fib_entry_src_flag_t src_flag; + fib_table_t *fib_table; + int was_sourced; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + + /* + * don't nobody go nowhere + */ + fib_entry_lock(fib_entry_index); + + src_flag = fib_entry_delete(fib_entry_index, source); + + if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag)) + { + /* + * last source gone. remove from the table + */ + fib_table_entry_remove(fib_table, prefix, fib_entry_index); + + /* + * now the entry is no longer in the table, we can + * inform the entries that it covers to re-calculate their cover + */ + fib_entry_cover_change_notify(fib_entry_index, + FIB_NODE_INDEX_INVALID); + } + /* + * else + * still has sources, leave it be. + */ + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]--; + } + + fib_entry_unlock(fib_entry_index); +} + +void +fib_table_entry_delete (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source) +{ + fib_node_index_t fib_entry_index; + + fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + /* + * removing an etry that does not exist. + * i'll allow it, but i won't like it. + */ + clib_warning("%U not in FIB", format_fib_prefix, prefix); + } + else + { + fib_table_entry_delete_i(fib_index, fib_entry_index, prefix, source); + } +} + +void +fib_table_entry_delete_index (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_prefix_t prefix; + + fib_entry_get_prefix(fib_entry_index, &prefix); + + fib_table_entry_delete_i(fib_entry_get_fib_index(fib_entry_index), + fib_entry_index, &prefix, source); +} + +fib_node_index_t +fib_table_entry_local_label_add (u32 fib_index, + const fib_prefix_t *prefix, + mpls_label_t label) +{ + fib_node_index_t fib_entry_index; + + fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix, + FIB_SOURCE_MPLS, + FIB_ENTRY_FLAG_NONE, + NULL); + fib_entry_set_source_data(fib_entry_index, FIB_SOURCE_MPLS, &label); + + return (fib_entry_index); +} + +void +fib_table_entry_local_label_remove (u32 fib_index, + const fib_prefix_t *prefix, + mpls_label_t label) +{ + fib_node_index_t fib_entry_index; + const void *data; + mpls_label_t pl; + + fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + return; + + data = fib_entry_get_source_data(fib_entry_index, FIB_SOURCE_MPLS); + + if (NULL == data) + return; + + pl = *(mpls_label_t*)data; + + if (pl != label) + return; + + pl = MPLS_LABEL_INVALID; + + fib_entry_set_source_data(fib_entry_index, FIB_SOURCE_MPLS, &pl); + fib_table_entry_special_remove(fib_index, + prefix, + FIB_SOURCE_MPLS); +} + +u32 +fib_table_get_index_for_sw_if_index (fib_protocol_t proto, + u32 sw_if_index) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_get_index_for_sw_if_index(sw_if_index)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_get_index_for_sw_if_index(sw_if_index)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_table_get_index_for_sw_if_index(sw_if_index)); + } + return (~0); +} + +flow_hash_config_t +fib_table_get_flow_hash_config (u32 fib_index, + fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_get_flow_hash_config(fib_index)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_get_flow_hash_config(fib_index)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_table_get_flow_hash_config(fib_index)); + } + return (0); +} + + +u32 +fib_table_get_table_id_for_sw_if_index (fib_protocol_t proto, + u32 sw_if_index) +{ + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_table_get_index_for_sw_if_index( + proto, sw_if_index), + proto); + + return ((NULL != fib_table ? fib_table->ft_table_id : ~0)); +} + +u32 +fib_table_find (fib_protocol_t proto, + u32 table_id) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_index_from_table_id(table_id)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_index_from_table_id(table_id)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_index_from_table_id(table_id)); + } + return (~0); +} + +u32 +fib_table_find_or_create_and_lock (fib_protocol_t proto, + u32 table_id) +{ + fib_table_t *fib_table; + fib_node_index_t fi; + + switch (proto) + { + case FIB_PROTOCOL_IP4: + fi = ip4_fib_table_find_or_create_and_lock(table_id); + break; + case FIB_PROTOCOL_IP6: + fi = ip6_fib_table_find_or_create_and_lock(table_id); + break; + case FIB_PROTOCOL_MPLS: + fi = mpls_fib_table_find_or_create_and_lock(table_id); + break; + default: + return (~0); + } + + fib_table = fib_table_get(fi, proto); + + fib_table->ft_desc = format(NULL, "%U-VRF:%d", + format_fib_protocol, proto, + table_id); + + return (fi); +} + +u32 +fib_table_create_and_lock (fib_protocol_t proto, + const char *const fmt, + ...) +{ + fib_table_t *fib_table; + fib_node_index_t fi; + va_list ap; + + va_start(ap, fmt); + + switch (proto) + { + case FIB_PROTOCOL_IP4: + fi = ip4_fib_table_create_and_lock(); + break; + case FIB_PROTOCOL_IP6: + fi = ip6_fib_table_create_and_lock(); + break; + case FIB_PROTOCOL_MPLS: + fi = mpls_fib_table_create_and_lock(); + break; + default: + return (~0); + } + + fib_table = fib_table_get(fi, proto); + + fib_table->ft_desc = va_format(fib_table->ft_desc, fmt, &ap); + + va_end(ap); + return (fi); +} + +static void +fib_table_destroy (fib_table_t *fib_table) +{ + vec_free(fib_table->ft_desc); + + switch (fib_table->ft_proto) + { + case FIB_PROTOCOL_IP4: + ip4_fib_table_destroy(&fib_table->v4); + break; + case FIB_PROTOCOL_IP6: + ip6_fib_table_destroy(fib_table->ft_index); + break; + case FIB_PROTOCOL_MPLS: + mpls_fib_table_destroy(&fib_table->mpls); + break; + } +} + +void +fib_table_unlock (u32 fib_index, + fib_protocol_t proto) +{ + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, proto); + fib_table->ft_locks--; + + if (0 == fib_table->ft_locks) + { + fib_table_destroy(fib_table); + } +} +void +fib_table_lock (u32 fib_index, + fib_protocol_t proto) +{ + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, proto); + fib_table->ft_locks++; +} + +u32 +fib_table_get_num_entries (u32 fib_index, + fib_protocol_t proto, + fib_source_t source) +{ + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, proto); + + return (fib_table->ft_src_route_counts[source]); +} + +u8* +format_fib_table_name (u8* s, va_list ap) +{ + fib_node_index_t fib_index = va_arg(ap, fib_node_index_t); + fib_protocol_t proto = va_arg(ap, int); // int promotion + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, proto); + + s = format(s, "%v", fib_table->ft_desc); + + return (s); +} + +void +fib_table_flush (u32 fib_index, + fib_protocol_t proto, + fib_source_t source) +{ + // FIXME + ASSERT(0); +} diff --git a/vnet/vnet/fib/fib_table.h b/vnet/vnet/fib/fib_table.h new file mode 100644 index 00000000000..d7c604f9de9 --- /dev/null +++ b/vnet/vnet/fib/fib_table.h @@ -0,0 +1,732 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_TABLE_H__ +#define __FIB_TABLE_H__ + +#include <vnet/ip/ip.h> +#include <vnet/adj/adj.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/mpls/mpls.h> +#include <vnet/mpls/packet.h> + +/** + * @brief + * A protocol Independent FIB table + */ +typedef struct fib_table_t_ +{ + /** + * A union of the protocol specific FIBs that provide the + * underlying LPM mechanism. + * This element is first in the struct so that it is in the + * first cache line. + */ + union { + ip4_fib_t v4; + ip6_fib_t v6; + mpls_fib_t mpls; + }; + + /** + * Which protocol this table serves. Used to switch on the union above. + */ + fib_protocol_t ft_proto; + + /** + * number of locks on the table + */ + u16 ft_locks; + + /** + * Table ID (hash key) for this FIB. + */ + u32 ft_table_id; + + /** + * Index into FIB vector. + */ + fib_node_index_t ft_index; + + /** + * flow hash configuration + */ + u32 ft_flow_hash_config; + + /** + * Per-source route counters + */ + u32 ft_src_route_counts[FIB_SOURCE_MAX]; + + /** + * Total route counters + */ + u32 ft_total_route_counts; + + /** + * Table description + */ + u8* ft_desc; +} fib_table_t; + +/** + * @brief + * Format the description/name of the table + */ +extern u8* format_fib_table_name(u8* s, va_list ap); + +/** + * @brief + * Perfom a longest prefix match in the non-forwarding table + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to lookup + * + * @return + * The index of the fib_entry_t for the best match, which may be the default route + */ +extern fib_node_index_t fib_table_lookup(u32 fib_index, + const fib_prefix_t *prefix); + +/** + * @brief + * Perfom an exact match in the non-forwarding table + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to lookup + * + * @return + * The index of the fib_entry_t for the exact match, or INVALID + * is there is no match. + */ +extern fib_node_index_t fib_table_lookup_exact_match(u32 fib_index, + const fib_prefix_t *prefix); + +/** + * @brief + * Get the less specific (covering) prefix + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to lookup + * + * @return + * The index of the less specific fib_entry_t. + */ +extern fib_node_index_t fib_table_get_less_specific(u32 fib_index, + const fib_prefix_t *prefix); + +/** + * @brief + * Add a 'special' entry to the FIB that links to the adj passed + * A special entry is an entry that the FIB is not expect to resolve + * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup). + * Instead the client/source provides the adj to link to. + * This add is reference counting per-source. So n 'removes' are required + * for n 'adds', if the entry is no longer required. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param adj_index + * The adjacency to link to. + * + * @return + * the index of the fib_entry_t that is created (or exists already). + */ +extern fib_node_index_t fib_table_entry_special_add(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + adj_index_t adj_index); + +/** + * @brief + * Add a 'special' entry to the FIB that links to the DPO passed + * A special entry is an entry that the FIB is not expect to resolve + * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup). + * Instead the client/source provides the DPO to link to. + * This add is reference counting per-source. So n 'removes' are required + * for n 'adds', if the entry is no longer required. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param dpo + * The DPO to link to. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_special_dpo_add(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t stype, + const dpo_id_t *dpo); + +/** + * @brief + * Update a 'special' entry to the FIB that links to the DPO passed + * A special entry is an entry that the FIB is not expect to resolve + * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup). + * Instead the client/source provides the DPO to link to. + * Special entries are add/remove reference counted per-source. So n + * 'removes' are required for n 'adds', if the entry is no longer required. + * An 'update' can only be used after an 'add' and is therefore assumed to act + * on the reference instance of that add (an update is implemented as add/remove + * pair). + * + * @param fib_entry_index + * The index of the FIB entry to update + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param dpo + * The DPO to link to. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern void fib_table_entry_special_dpo_update (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t stype, + const dpo_id_t *dpo); + +/** + * @brief + * Remove a 'special' entry from the FIB. + * This add is reference counting per-source. So n 'removes' are required + * for n 'adds', if the entry is no longer required. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to remove + * + * @param source + * The ID of the client/source adding the entry. + * + */ +extern void fib_table_entry_special_remove(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source); + +/** + * @brief + * Add one path to an entry (aka route) in the FIB. If the entry does not + * exist, it will be created. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @paran next_hop_proto + * The protocol of the next hop. This cannot be derived in the event that + * the next hop is all zeros. + * + * @param next_hop + * The address of the next-hop. + * + * @param sw_if_index + * The index of the interface. + * + * @param next_hop_fib_index, + * The fib index of the next-hop for recursive resolution + * + * @param next_hop_weight + * [un]equal cost path weight + * + * @param next_hop_label + * The path's out-going label. INVALID is there is none. + * + * @param pf + * Flags for the path + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_path_add(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t next_hop_label, + fib_route_path_flags_t pf); +/** + * @brief + * Add n paths to an entry (aka route) in the FIB. If the entry does not + * exist, it will be created. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param rpaths + * A vector of paths. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_path_add2(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpath); + +/** + * @brief + * remove one path to an entry (aka route) in the FIB. If this is the entry's + * last path, then the entry will be removed, unless it has other sources. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @paran next_hop_proto + * The protocol of the next hop. This cannot be derived in the event that + * the next hop is all zeros. + * + * @param next_hop + * The address of the next-hop. + * + * @param sw_if_index + * The index of the interface. + * + * @param next_hop_fib_index, + * The fib index of the next-hop for recursive resolution + * + * @param next_hop_weight + * [un]equal cost path weight + * + * @param pf + * Flags for the path + */ +extern void fib_table_entry_path_remove(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + fib_route_path_flags_t pf); + +/** + * @brief + * Remove n paths to an entry (aka route) in the FIB. If this is the entry's + * last path, then the entry will be removed, unless it has other sources. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param rpaths + * A vector of paths. + */ +extern void fib_table_entry_path_remove2(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + const fib_route_path_t *paths); + +/** + * @brief + * Update an entry to have a new set of paths. If the entry does not + * exist, it will be created. + * The difference between an 'path-add' and an update, is that path-add is + * an incremental addition of paths, whereas an update is a wholesale swap. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param rpaths + * A vector of paths. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_update(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths); + +/** + * @brief + * Update the entry to have just one path. If the entry does not + * exist, it will be created. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @paran next_hop_proto + * The protocol of the next hop. This cannot be derived in the event that + * the next hop is all zeros. + * + * @param next_hop + * The address of the next-hop. + * + * @param sw_if_index + * The index of the interface. + * + * @param next_hop_fib_index, + * The fib index of the next-hop for recursive resolution + * + * @param next_hop_weight + * [un]equal cost path weight + * + * @param next_hop_label + * The path's out-going label. INVALID is there is none. + * + * @param pf + * Flags for the path + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_update_one_path(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t next_hop_label, + fib_route_path_flags_t pf); + +/** + * @brief + * Add a MPLS local label for the prefix/route. If the entry does not + * exist, it will be created. In theory more than one local label can be + * added, but this is not yet supported. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to which to add the label + * + * @param label + * The MPLS label to add + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_local_label_add(u32 fib_index, + const fib_prefix_t *prefix, + mpls_label_t label); +/** + * @brief + * remove a MPLS local label for the prefix/route. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to which to add the label + * + * @param label + * The MPLS label to add + */ +extern void fib_table_entry_local_label_remove(u32 fib_index, + const fib_prefix_t *prefix, + mpls_label_t label); + +/** + * @brief + * Delete a FIB entry. If the entry has no more sources, then it is + * removed from the table. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to remove + * + * @param source + * The ID of the client/source adding the entry. + */ +extern void fib_table_entry_delete(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source); + +/** + * @brief + * Delete a FIB entry. If the entry has no more sources, then it is + * removed from the table. + * + * @param entry_index + * The index of the FIB entry + * + * @param source + * The ID of the client/source adding the entry. + */ +extern void fib_table_entry_delete_index(fib_node_index_t entry_index, + fib_source_t source); + +/** + * @brief + * Flush all entries from a table for the source + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the entries in the table + * + * @param source + * the source to flush + */ +extern void fib_table_flush(u32 fib_index, + fib_protocol_t proto, + fib_source_t source); + +/** + * @brief + * Get the index of the FIB bound to the interface + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param sw_if_index + * The interface index + * + * @return fib_index + * The index of the FIB + */ +extern u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, + u32 sw_if_index); + +/** + * @brief + * Get the Table-ID of the FIB bound to the interface + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param sw_if_index + * The interface index + * + * @return fib_index + * The tableID of the FIB + */ +extern u32 fib_table_get_table_id_for_sw_if_index(fib_protocol_t proto, + u32 sw_if_index); + +/** + * @brief + * Get the index of the FIB for a Table-ID. This DOES NOT create the + * FIB if it does not exist. + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param table-id + * The Table-ID + * + * @return fib_index + * The index of the FIB, which may be INVALID. + */ +extern u32 fib_table_find(fib_protocol_t proto, u32 table_id); + + +/** + * @brief + * Get the index of the FIB for a Table-ID. This DOES create the + * FIB if it does not exist. + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param table-id + * The Table-ID + * + * @return fib_index + * The index of the FIB + */ +extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto, + u32 table_id); + +/** + * @brief + * Create a new table with no table ID. This means it does not get + * added to the hash-table and so can only be found by using the index returned. + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param fmt + * A string to describe the table + * + * @return fib_index + * The index of the FIB + */ +extern u32 fib_table_create_and_lock(fib_protocol_t proto, + const char *const fmt, + ...); + +/** + * @brief + * Get the flow hash configured used by the table + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @return The flow hash config + */ +extern flow_hash_config_t fib_table_get_flow_hash_config(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Take a reference counting lock on the table + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + */ +extern void fib_table_unlock(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Release a reference counting lock on the table. When the last lock + * has gone. the FIB is deleted. + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + */ +extern void fib_table_lock(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Return the number of entries in the FIB added by a given source. + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @return number of sourced entries. + */ +extern u32 fib_table_get_num_entries(u32 fib_index, + fib_protocol_t proto, + fib_source_t source); + +/** + * @brief + * Get a pointer to a FIB table + */ +extern fib_table_t *fib_table_get(fib_node_index_t index, + fib_protocol_t proto); + +#endif diff --git a/vnet/vnet/fib/fib_test.c b/vnet/vnet/fib/fib_test.c new file mode 100644 index 00000000000..898005e57fb --- /dev/null +++ b/vnet/vnet/fib/fib_test.c @@ -0,0 +1,6330 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/adj/adj.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/receive_dpo.h> + +#include <vnet/mpls/mpls.h> + +#include <vnet/fib/fib_path_list.h> +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_node_list.h> + +#define FIB_TEST_I(_cond, _comment, _args...) \ +({ \ + int _evald = (_cond); \ + if (!(_evald)) { \ + fformat(stderr, "FAIL:%d: " _comment "\n", \ + __LINE__, ##_args); \ + } else { \ + fformat(stderr, "PASS:%d: " _comment "\n", \ + __LINE__, ##_args); \ + } \ + _evald; \ +}) +#define FIB_TEST(_cond, _comment, _args...) \ +{ \ + if (!FIB_TEST_I(_cond, _comment, ##_args)) { \ + return;\ + ASSERT(!("FAIL: " _comment)); \ + } \ +} + +/** + * A 'i'm not fussed is this is not efficient' store of test data + */ +typedef struct test_main_t_ { + /** + * HW if indicies + */ + u32 hw_if_indicies[4]; + /** + * HW interfaces + */ + vnet_hw_interface_t * hw[4]; + +} test_main_t; +static test_main_t test_main; + +/* fake ethernet device class, distinct from "fake-ethX" */ +static u8 * format_test_interface_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "test-eth%d", dev_instance); +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +VNET_DEVICE_CLASS (test_interface_device_class,static) = { + .name = "Test interface", + .format_device_name = format_test_interface_name, + .tx_function = dummy_interface_tx, +}; + +static u8 *hw_address; + +static void +fib_test_mk_intf (u32 ninterfaces) +{ + clib_error_t * error = NULL; + test_main_t *tm = &test_main; + u8 byte; + u32 i; + + ASSERT(ninterfaces <= ARRAY_LEN(tm->hw_if_indicies)); + + for (i=0; i<6; i++) + { + byte = 0xd0+i; + vec_add1(hw_address, byte); + } + + for (i = 0; i < ninterfaces; i++) + { + hw_address[5] = i; + + error = ethernet_register_interface(vnet_get_main(), + ethernet_hw_interface_class.index, + i /* instance */, + hw_address, + &tm->hw_if_indicies[i], + /* flag change */ 0); + + FIB_TEST((NULL == error), "ADD interface %d", i); + + tm->hw[i] = vnet_get_hw_interface(vnet_get_main(), + tm->hw_if_indicies[i]); + vec_validate (ip4_main.fib_index_by_sw_if_index, tm->hw[i]->sw_if_index); + vec_validate (ip6_main.fib_index_by_sw_if_index, tm->hw[i]->sw_if_index); + ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0; + ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0; + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[i]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + FIB_TEST((NULL == error), "UP interface %d", i); + } + /* + * re-eval after the inevitable realloc + */ + for (i = 0; i < ninterfaces; i++) + { + tm->hw[i] = vnet_get_hw_interface(vnet_get_main(), + tm->hw_if_indicies[i]); + } +} + +#define FIB_TEST_REC_FORW(_rec_prefix, _via_prefix) \ +{ \ + const dpo_id_t *_rec_dpo = fib_entry_contribute_ip_forwarding( \ + fib_table_lookup_exact_match(fib_index, (_rec_prefix))); \ + const dpo_id_t *_via_dpo = fib_entry_contribute_ip_forwarding( \ + fib_table_lookup(fib_index, (_via_prefix))); \ + FIB_TEST(!dpo_cmp(_via_dpo, \ + load_balance_get_bucket(_rec_dpo->dpoi_index, 0)), \ + "%U is recursive via %U", \ + format_fib_prefix, (_rec_prefix), \ + format_fib_prefix, _via_prefix); \ +} + +#define FIB_TEST_LB_BUCKET_VIA_ADJ(_prefix, _bucket, _ai) \ +{ \ + const dpo_id_t *_dpo = fib_entry_contribute_ip_forwarding( \ + fib_table_lookup_exact_match(fib_index, (_prefix))); \ + const dpo_id_t *_dpo1 = \ + load_balance_get_bucket(_dpo->dpoi_index, _bucket); \ + FIB_TEST(DPO_ADJACENCY == _dpo1->dpoi_type, "type is %U", \ + format_dpo_type, _dpo1->dpoi_type); \ + FIB_TEST((_ai == _dpo1->dpoi_index), \ + "%U bucket %d resolves via %U", \ + format_fib_prefix, (_prefix), \ + _bucket, \ + format_dpo_id, _dpo1, 0); \ +} + +static void +fib_test_v4 (void) +{ + /* + * In the default table check for the presence and correct forwarding + * of the special entries + */ + fib_node_index_t dfrt, fei, ai, ai2, locked_ai, ai_01, ai_02, ai_03; + const dpo_id_t *dpo, *dpo1, *dpo2, *dpo_drop; + const ip_adjacency_t *adj; + const load_balance_t *lb; + test_main_t *tm; + u32 fib_index; + int ii; + + /* via 10.10.10.1 */ + ip46_address_t nh_10_10_10_1 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }; + /* via 10.10.10.2 */ + ip46_address_t nh_10_10_10_2 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), + }; + + tm = &test_main; + + /* Find or create FIB table 11 */ + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + + for (ii = 0; ii < 4; ii++) + { + ip4_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index; + } + + fib_prefix_t pfx_0_0_0_0_s_0 = { + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + {0} + }, + }, + }; + + fib_prefix_t pfx = { + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + {0} + }, + }, + }; + + dpo_drop = drop_dpo_get(DPO_PROTO_IP4); + + dfrt = fib_table_lookup(fib_index, &pfx_0_0_0_0_s_0); + FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)), + "Default route is DROP"); + + pfx.fp_len = 32; + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all zeros route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "all 0s route is DROP"); + + pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xffffffff); + pfx.fp_len = 32; + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all ones route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "all 1s route is DROP"); + + pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xe0000000); + pfx.fp_len = 8; + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all-mcast route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "all-mcast route is DROP"); + + pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xf0000000); + pfx.fp_len = 8; + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "class-e route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "class-e route is DROP"); + + /* + * at this stage there are 5 entries in the test FIB (plus 5 in the default), + * all of which are special sourced and so none of which share path-lists. + * There are also 6 entries, and 6 non-shared path-lists, in the v6 default + * table + */ +#define NBR (5+5+6) + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * add interface routes. + * validate presence of /24 attached and /32 recieve. + * test for the presence of the receive address in the glean and local adj + */ + fib_prefix_t local_pfx = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), + }, + }, + }; + + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, // weight + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present"); + FIB_TEST(((FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED) == + fib_entry_get_flags(fei)), + "Flags set on attached interface"); + + ai = fib_entry_get_adj(fei); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "attached interface route adj present"); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index), + "attached interface adj is glean"); + FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr, + &adj->sub_type.glean.receive_addr)), + "attached interface adj is receive ok"); + + local_pfx.fp_len = 32; + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, // weight + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &local_pfx); + FIB_TEST(((FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED) == + fib_entry_get_flags(fei)), + "Flags set on local interface"); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST((DPO_RECEIVE == dpo->dpoi_type), + "local interface adj is local"); + receive_dpo_t *rd = receive_dpo_get(dpo->dpoi_index); + + FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr, + &rd->rd_addr)), + "local interface adj is receive ok"); + + FIB_TEST((2 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_INTERFACE)), + "2 Interface Source'd prefixes"); + + /* + * +2 interface routes +2 non-shared path-lists + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Modify the default route to be via an adj not yet known. + * this sources the defalut route with the API source, which is + * a higher preference to the DEFAULT_ROUTE source + */ + pfx.fp_addr.ip4.as_u32 = 0; + pfx.fp_len = 0; + fib_table_entry_path_add(fib_index, &pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)), + "Flags set on API route"); + + FIB_TEST((fei == dfrt), "default route same index"); + ai = fib_entry_get_adj(fei); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "default route adj present"); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&nh_10_10_10_1, &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + FIB_TEST((1 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_API)), + "1 API Source'd prefixes"); + + /* + * find the adj in the shared db + */ + locked_ai = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + FIB_TEST((locked_ai == ai), "ADJ NBR DB find"); + adj_unlock(locked_ai); + + /* + * +1 shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+3 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove the API source from the default route. We expected + * the route to remain, sourced by DEFAULT_ROUTE, and hence a DROP + */ + pfx.fp_addr.ip4.as_u32 = 0; + pfx.fp_len = 0; + fib_table_entry_path_remove(fib_index, &pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // non-recursive path, so no FIB index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx); + + FIB_TEST((fei == dfrt), "default route same index"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "Default route is DROP"); + + /* + * -1 shared-path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add an 2 ARP entry => a complete ADJ plus adj-fib. + */ + fib_prefix_t pfx_10_10_10_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.1 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }, + }; + fib_prefix_t pfx_10_10_10_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.2 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), + }, + }; + fib_prefix_t pfx_11_11_11_11_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 11.11.11.11 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0b0b0b0b), + }, + }; + u8 eth_addr[] = { + 0xde, 0xde, 0xde, 0xba, 0xba, 0xba, + }; + + /* + * Add a route via an incomplete ADJ. then complete the ADJ + * Expect the route LB is updated to use complete adj type. + */ + fei = fib_table_entry_update_one_path(fib_index, + &pfx_11_11_11_11_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY_INCOMPLETE == dpo1->dpoi_type, + "11.11.11.11/32 via incomplete adj"); + + ai_01 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_01), "adj created"); + adj = adj_get(ai_01); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + adj_nbr_update_rewrite(ai_01, eth_addr); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, + "11.11.11.11/32 via complete adj"); + + /* + * add the adj fib + */ + fei = fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST((FIB_ENTRY_FLAG_ATTACHED == fib_entry_get_flags(fei)), + "Flags set on adj-fib"); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + + fib_table_entry_path_remove(fib_index, + &pfx_11_11_11_11_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + eth_addr[5] = 0xb2; + + ai_02 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &pfx_10_10_10_2_s_32.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_02), "adj created"); + adj = adj_get(ai_02); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + adj_nbr_update_rewrite(ai_02, eth_addr); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + FIB_TEST((ai_01 != ai_02), "ADJs are different"); + + fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_2_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_10_10_10_2_s_32); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj"); + + /* + * +2 adj-fibs, and their non-shared path-lists + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add a 2 routes via the first ADJ. ensure path-list sharing + */ + fib_prefix_t pfx_1_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 1.1.1.1/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x01010101), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.1.1 resolves via 10.10.10.1"); + + /* + * +1 entry and a shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* 1.1.2.0/24 */ + fib_prefix_t pfx_1_1_2_0_s_24 = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010200), + } + }; + + fib_table_entry_path_add(fib_index, + &pfx_1_1_2_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.2.0/24 resolves via 10.10.10.1"); + + /* + * +1 entry only + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * modify 1.1.2.0/24 to use multipath. + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_2_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24); + dpo = fib_entry_contribute_ip_forwarding(fei); + + dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, "type is %d", dpo1->dpoi_type); + FIB_TEST((ai_01 == dpo1->dpoi_index), + "1.1.2.0/24 bucket 0 resolves via 10.10.10.1 (%d=%d)", + ai_01, dpo1->dpoi_index); + + dpo1 = load_balance_get_bucket(dpo->dpoi_index, 1); + FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, "type is %d", dpo1->dpoi_type); + FIB_TEST((ai_02 == dpo1->dpoi_index), + "1.1.2.0/24 bucket 1 resolves via 10.10.10.2"); + + /* + * +1 shared-pathlist + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * revert the modify + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_2_0_s_24, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.2.0/24 resolves via 10.10.10.1"); + + /* + * +1 shared-pathlist + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB is %d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add 2 recursive routes: + * 100.100.100.100/32 via 1.1.1.1/32 => the via entry is installed. + * 100.100.100.101/32 via 1.1.1.1/32 => the via entry is installed. + */ + fib_prefix_t bgp_100_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 100.100.100.100/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x64646464), + }, + }; + /* via 1.1.1.1 */ + ip46_address_t nh_1_1_1_1 = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010101), + }; + + fib_table_entry_path_add(fib_index, + &bgp_100_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, // no index provided. + fib_index, // nexthop in same fib as route + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_100_pfx, &pfx_1_1_1_1_s_32); + + /* + * +1 entry and +1 shared-path-list + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + fib_prefix_t bgp_101_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 100.100.100.101/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x64646465), + }, + }; + + fib_table_entry_path_add(fib_index, + &bgp_101_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, // no index provided. + fib_index, // nexthop in same fib as route + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_101_pfx, &pfx_1_1_1_1_s_32); + + /* + * +1 entry, but the recursive path-list is shared. + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * An EXCLUSIVE route; one where the user (me) provides the exclusive + * adjacency through which the route will resovle + */ + fib_prefix_t ex_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 4.4.4.4/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x04040404), + }, + }; + + fib_table_entry_special_add(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + locked_ai); + fei = fib_table_lookup_exact_match(fib_index, &ex_pfx); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "Exclusive route links to user adj"); + + fib_table_entry_special_remove(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &ex_pfx), + "Exclusive reoute removed"); + + /* + * An EXCLUSIVE route; one where the user (me) provides the exclusive + * adjacency through which the route will resovle + */ + dpo_id_t ex_dpo = DPO_NULL; + + lookup_dpo_add_or_lock_w_fib_index(fib_index, + DPO_PROTO_IP4, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + &ex_dpo); + + fib_table_entry_special_dpo_add(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &ex_dpo); + fei = fib_table_lookup_exact_match(fib_index, &ex_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(&ex_dpo, load_balance_get_bucket(dpo->dpoi_index, 0)), + "exclusive remote uses lookup DPO"); + + fib_table_entry_special_remove(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &ex_pfx), + "Exclusive reoute removed"); + dpo_reset(&ex_dpo); + + /* + * Add a recursive route: + * 200.200.200.200/32 via 1.1.1.2/32 => the via entry is NOT installed. + */ + fib_prefix_t bgp_200_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 200.200.200.200/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0xc8c8c8c8), + }, + }; + /* via 1.1.1.2 */ + fib_prefix_t pfx_1_1_1_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010102), + }, + }; + + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, // no index provided. + fib_index, // nexthop in same fib as route + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32); + + /* + * the adj should be recursive via drop, since the route resolves via + * the default route, which is itself a DROP + */ + fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(load_balance_is_drop(dpo1), "1.1.1.2/32 is drop"); + + /* + * +2 entry and +1 shared-path-list + */ + FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Unequal Cost load-balance. 3:1 ratio. fits in a 4 bucket LB + */ + fib_prefix_t pfx_1_2_3_4_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01020304), + }, + }; + fib_table_entry_path_add(fib_index, + &pfx_1_2_3_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_entry_path_add(fib_index, + &pfx_1_2_3_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 3, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.2.3.4/32 presnet"); + dpo = fib_entry_contribute_ip_forwarding(fei); + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 4), + "1.2.3.4/32 LB has %d bucket", + lb->lb_n_buckets); + + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 0, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 1, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 2, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 3, ai_02); + + fib_table_entry_delete(fib_index, + &pfx_1_2_3_4_s_32, + FIB_SOURCE_API); + + /* + * Unequal Cost load-balance. 4:1 ratio. + * fits in a 16 bucket LB with ratio 13:3 + */ + fib_prefix_t pfx_1_2_3_5_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01020305), + }, + }; + fib_table_entry_path_add(fib_index, + &pfx_1_2_3_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_entry_path_add(fib_index, + &pfx_1_2_3_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 4, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.2.3.5/32 presnet"); + dpo = fib_entry_contribute_ip_forwarding(fei); + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 16), + "1.2.3.5/32 LB has %d bucket", + lb->lb_n_buckets); + + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 0, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 1, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 2, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 3, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 4, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 5, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 6, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 7, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 8, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 9, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 10, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 11, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 12, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 13, ai_02); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 14, ai_02); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 15, ai_02); + + fib_table_entry_delete(fib_index, + &pfx_1_2_3_5_s_32, + FIB_SOURCE_API); + + /* + * Add a recursive route: + * 200.200.200.201/32 via 1.1.1.200/32 => the via entry is NOT installed. + */ + fib_prefix_t bgp_201_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 200.200.200.201/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0xc8c8c8c9), + }, + }; + /* via 1.1.1.200 */ + fib_prefix_t pfx_1_1_1_200_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x010101c8), + }, + }; + + fib_table_entry_path_add(fib_index, + &bgp_201_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_200_s_32.fp_addr, + ~0, // no index provided. + fib_index, // nexthop in same fib as route + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32); + FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)), + "Flags set on RR via non-attached"); + + /* + * +2 entry (BGP & RR) and +1 shared-path-list + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * insert a route that covers the missing 1.1.1.2/32. we epxect + * 200.200.200.200/32 and 200.200.200.201/32 to resolve through it. + */ + fib_prefix_t pfx_1_1_1_0_s_24 = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 1.1.1.0/24 */ + .ip4.as_u32 = clib_host_to_net_u32(0x01010100), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_0_s_24); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.1.0/24 resolves via 10.10.10.1"); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.1.2/32 resolves via 10.10.10.1"); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_200_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.1.200/24 resolves via 10.10.10.1"); + + /* + * +1 entry. 1.1.1.1/32 already uses 10.10.10.1 so no new pah-list + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * the recursive adj for 200.200.200.200 should be updated. + */ + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32); + + /* + * insert a more specific route than 1.1.1.0/24 that also covers the + * missing 1.1.1.2/32, but not 1.1.1.200/32. we epxect + * 200.200.200.200 to resolve through it. + */ + fib_prefix_t pfx_1_1_1_0_s_28 = { + .fp_len = 28, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 1.1.1.0/24 */ + .ip4.as_u32 = clib_host_to_net_u32(0x01010100), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_0_s_28, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_0_s_28); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "1.1.1.0/24 resolves via 10.10.10.2"); + + /* + * +1 entry. +1 shared path-list + */ + FIB_TEST((5 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+9 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+14 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * the recursive adj for 200.200.200.200 should be updated. + * 200.200.200.201 remains unchanged. + */ + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32); + + /* + * remove this /28. 200.200.200.200/32 should revert back to via 1.1.1.0/24 + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_0_s_28, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28) == + FIB_NODE_INDEX_INVALID), + "1.1.1.0/28 removed"); + FIB_TEST((fib_table_lookup(fib_index, &pfx_1_1_1_0_s_28) == + fib_table_lookup(fib_index, &pfx_1_1_1_0_s_24)), + "1.1.1.0/28 lookup via /24"); + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32); + + /* + * -1 entry. -1 shared path-list + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove 1.1.1.0/24. 200.200.200.200/32 should revert back to via 0.0.0.0/0 + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_0_s_24, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_24) == + FIB_NODE_INDEX_INVALID), + "1.1.1.0/24 removed"); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "1.1.1.2/32 route is DROP"); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_200_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "1.1.1.200/32 route is DROP"); + + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32); + + /* + * -1 entry + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * insert the missing 1.1.1.2/32 + */ + fei = fib_table_entry_path_add(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai = ai_01), "1.1.1.2/32 resolves via 10.10.10.1"); + + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32); + + /* + * no change. 1.1.1.2/32 was already there RR sourced. + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove 200.200.200.201/32 which does not have a valid via FIB + */ + fib_table_entry_path_remove(fib_index, + &bgp_201_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_200_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * -2 entries (BGP and RR). -1 shared path-list; + */ + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_201_pfx) == + FIB_NODE_INDEX_INVALID), + "200.200.200.201/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32) == + FIB_NODE_INDEX_INVALID), + "1.1.1.200/32 removed"); + + FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove 200.200.200.200/32 which does have a valid via FIB + */ + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_200_pfx) == + FIB_NODE_INDEX_INVALID), + "200.200.200.200/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32) != + FIB_NODE_INDEX_INVALID), + "1.1.1.2/32 still present"); + + /* + * -1 entry (BGP, the RR source is also API sourced). -1 shared path-list; + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+9 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A recursive prefix that has a 2 path load-balance. + * It also shares a next-hop with other BGP prefixes and hence + * test the ref counting of RR sourced prefixes and 2 level LB. + */ + const fib_prefix_t bgp_102 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 100.100.100.101/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x64646466), + }, + }; + fib_table_entry_path_add(fib_index, + &bgp_102, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &bgp_102, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &bgp_102); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "100.100.100.102/32 presnet"); + dpo = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 2), "Recursive LB has %d bucket", lb->lb_n_buckets); + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)), + "First via 10.10.10.1"); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo->dpoi_index, 1)), + "Second via 10.10.10.1"); + + fib_table_entry_path_remove(fib_index, + &bgp_102, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &bgp_102, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &bgp_102); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "100.100.100.102/32 removed"); + + /* + * remove the remaining recursives + */ + fib_table_entry_path_remove(fib_index, + &bgp_100_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &bgp_101_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_100_pfx) == + FIB_NODE_INDEX_INVALID), + "100.100.100.100/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_101_pfx) == + FIB_NODE_INDEX_INVALID), + "100.100.100.101/32 removed"); + + /* + * -2 entry (2*BGP, the RR source is also API sourced). -1 shared path-list; + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add a recursive route via a connected cover, using an adj-fib that does exist + */ + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + ~0, // no index provided. + fib_index, // Same as route's FIB + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * +1 entry. +1 shared path-list (recursive via 10.10.10.1) + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)), + "200.200.200.200/32 is recursive via adj for 10.10.10.1"); + + FIB_TEST((FIB_ENTRY_FLAG_ATTACHED == fib_entry_get_flags(fei)), + "Flags set on RR via existing attached"); + + /* + * Add a recursive route via a connected cover, using and adj-fib that does + * not exist + */ + ip46_address_t nh_10_10_10_3 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03), + }; + fib_prefix_t pfx_10_10_10_3 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = nh_10_10_10_3, + }; + + fib_table_entry_path_add(fib_index, + &bgp_201_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_3, + ~0, // no index provided. + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * +2 entries (BGP and RR). +1 shared path-list (recursive via 10.10.10.3) and + * one unshared non-recursive via 10.10.10.3 + */ + FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + ai_03 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &nh_10_10_10_3, + tm->hw[0]->sw_if_index); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_201_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + ai = fib_entry_get_adj(fei); + FIB_TEST((ai == ai_03), "adj for 10.10.10.3/32 is via adj for 10.10.10.3"); + FIB_TEST(((FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED) == + fib_entry_get_flags(fei)), + "Flags set on RR via non-existing attached"); + + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for 10.10.10.3"); + + adj_unlock(ai_03); + + /* + * remove the recursives + */ + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &bgp_201_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_3, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_201_pfx) == + FIB_NODE_INDEX_INVALID), + "200.200.200.201/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_200_pfx) == + FIB_NODE_INDEX_INVALID), + "200.200.200.200/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3) == + FIB_NODE_INDEX_INVALID), + "10.10.10.3/32 removed"); + + /* + * -3 entries (2*BGP and RR). -2 shared path-list (recursive via 10.10.10.3 & + * 10.10.10.1) and one unshared non-recursive via 10.10.10.3 + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + + /* + * RECURSION LOOPS + * Add 5.5.5.5/32 -> 5.5.5.6/32 -> 5.5.5.7/32 -> 5.5.5.5/32 + */ + fib_prefix_t pfx_5_5_5_5_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x05050505), + }, + }; + fib_prefix_t pfx_5_5_5_6_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x05050506), + }, + }; + fib_prefix_t pfx_5_5_5_7_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x05050507), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_7_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_7_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_5_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + /* + * +3 entries, +3 shared path-list + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * All the entries have only looped paths, so they are all drop + */ + fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.7/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.5/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.6/32 is via adj for DROP"); + + /* + * provide 5.5.5.6/32 with alternate path. + * this will allow only 5.5.5.6/32 to forward with this path, the others + * are still drop since the loop is still present. + */ + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo1->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 1), "5.5.5.6 LB has %d bucket", lb->lb_n_buckets); + + dpo2 = load_balance_get_bucket(dpo1->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY == dpo2->dpoi_type, "type is %d", dpo2->dpoi_type); + FIB_TEST((ai_01 == dpo2->dpoi_index), + "5.5.5.6 bucket 0 resolves via 10.10.10.2"); + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.7/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.5/32 is via adj for DROP"); + + /* + * remove the alternate path for 5.5.5.6/32 + * back to all drop + */ + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.7/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.5/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.6/32 is via adj for DROP"); + + /* + * break the loop by giving 5.5.5.5/32 a new set of paths + * expect all to forward via this new path. + */ + fib_table_entry_update_one_path(fib_index, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + lb = load_balance_get(dpo1->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 1), "5.5.5.5 LB has %d bucket", lb->lb_n_buckets); + + dpo2 = load_balance_get_bucket(dpo1->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY == dpo2->dpoi_type, "type is %d", dpo2->dpoi_type); + FIB_TEST((ai_01 == dpo2->dpoi_index), + "5.5.5.5 bucket 0 resolves via 10.10.10.2"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_7_s_32); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo2->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 1), "Recursive LB has %d bucket", lb->lb_n_buckets); + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo2->dpoi_index, 0)), + "5.5.5.5.7 via 5.5.5.5"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_6_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo1->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 1), "Recursive LB has %d bucket", lb->lb_n_buckets); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)), + "5.5.5.5.6 via 5.5.5.7"); + + /* + * revert back to the loop. so we can remove the prefixes with + * the loop intact + */ + fib_table_entry_update_one_path(fib_index, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.7/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.5/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.6/32 is via adj for DROP"); + + /* + * remove all the 5.5.5.x/32 prefixes + */ + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_7_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_7_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_5_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * -3 entries, -3 shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Single level loop 5.5.5.5/32 via 5.5.5.5/32 + */ + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "1-level 5.5.5.6/32 loop is via adj for DROP"); + + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_6_s_32), + "1-level 5.5.5.6/32 loop is removed"); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A recursive route with recursion constraints. + * 200.200.200.200/32 via 1.1.1.1 is recurse via host constrained + */ + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1"); + + /* + * save the load-balance. we expect it to be inplace modified + */ + lb = load_balance_get(dpo1->dpoi_index); + + /* + * add a covering prefix for the via fib that would otherwise serve + * as the resolving route when the host is removed + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_0_s_28, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai == ai_01), + "adj for 1.1.1.0/28 is via adj for 1.1.1.1"); + + /* + * remove the host via FIB - expect the BGP prefix to be drop + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo1->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for DROP"); + + /* + * add the via-entry host reoute back. expect to resolve again + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1"); + + /* + * add another path for the recursive. it will then have 2. + */ + fib_prefix_t pfx_1_1_1_3_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010103), + }, + }; + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_3_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_3_s_32.fp_addr, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_3_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 1)), + "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.3"); + + /* + * expect the lb-map used by the recursive's load-balance is using both buckets + */ + load_balance_map_t *lbm; + index_t lbmi; + + lb = load_balance_get(dpo->dpoi_index); + lbmi = lb->lb_map; + load_balance_map_lock(lbmi); + lbm = load_balance_map_get(lbmi); + + FIB_TEST(lbm->lbm_buckets[0] == 0, + "LB maps's bucket 0 is %d", + lbm->lbm_buckets[0]); + FIB_TEST(lbm->lbm_buckets[1] == 1, + "LB maps's bucket 1 is %d", + lbm->lbm_buckets[1]); + + /* + * withdraw one of the /32 via-entrys. + * that ECMP path will be unresolved and forwarding should continue on the + * other available path. this is an iBGP PIC edge failover. + * Test the forwarding changes without re-fetching the adj from the + * recursive entry. this ensures its the same one that is updated; i.e. an + * inplace-modify. + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + FIB_TEST(!dpo_cmp(dpo, fib_entry_contribute_ip_forwarding(fei)), + "post PIC 200.200.200.200/32 was inplace modified"); + + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket_i(lb, 0)), + "post PIC adj for 200.200.200.200/32 is recursive" + " via adj for 1.1.1.3"); + + /* + * the LB maps that was locked above should have been modified to remove + * the path that was down, and thus its bucket points to a path that is + * still up. + */ + FIB_TEST(lbm->lbm_buckets[0] == 1, + "LB maps's bucket 0 is %d", + lbm->lbm_buckets[0]); + FIB_TEST(lbm->lbm_buckets[1] == 1, + "LB maps's bucket 1 is %d", + lbm->lbm_buckets[1]); + + load_balance_map_unlock(lb->lb_map); + + /* + * add it back. again + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket_i(lb, 0)), + "post PIC recovery adj for 200.200.200.200/32 is recursive " + "via adj for 1.1.1.1"); + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket_i(lb, 1)), + "post PIC recovery adj for 200.200.200.200/32 is recursive " + "via adj for 1.1.1.3"); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(lb == load_balance_get(dpo->dpoi_index), + "post PIC 200.200.200.200/32 was inplace modified"); + + /* + * add a 3rd path. this makes the LB 16 buckets. + */ + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(lb == load_balance_get(dpo->dpoi_index), + "200.200.200.200/32 was inplace modified for 3rd path"); + FIB_TEST(16 == lb->lb_n_buckets, + "200.200.200.200/32 was inplace modified for 3rd path to 16 buckets"); + + lbmi = lb->lb_map; + load_balance_map_lock(lbmi); + lbm = load_balance_map_get(lbmi); + + for (ii = 0; ii < 16; ii++) + { + FIB_TEST(lbm->lbm_buckets[ii] == ii, + "LB Map for 200.200.200.200/32 at %d is %d", + ii, lbm->lbm_buckets[ii]); + } + + /* + * trigger PIC by removing the first via-entry + * the first 6 buckets of the map should map to the next 6 + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(lb == load_balance_get(dpo->dpoi_index), + "200.200.200.200/32 was inplace modified for 3rd path"); + FIB_TEST(2 == lb->lb_n_buckets, + "200.200.200.200/32 was inplace modified for 3rd path remove to 2 buckets"); + + for (ii = 0; ii < 6; ii++) + { + FIB_TEST(lbm->lbm_buckets[ii] == ii+6, + "LB Map for 200.200.200.200/32 at %d is %d", + ii, lbm->lbm_buckets[ii]); + } + for (ii = 6; ii < 16; ii++) + { + FIB_TEST(lbm->lbm_buckets[ii] == ii, + "LB Map for 200.200.200.200/32 at %d is %d", + ii, lbm->lbm_buckets[ii]); + } + + + /* + * tidy up + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID); + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, + fib_index, + 1, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_3_s_32.fp_addr, + ~0, + fib_index, + 1, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + fib_table_entry_delete(fib_index, + &pfx_1_1_1_3_s_32, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_1_1_1_0_s_28, + FIB_SOURCE_API); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28)), + "1.1.1.1/28 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_3_s_32)), + "1.1.1.3/32 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &bgp_200_pfx)), + "200.200.200.200/32 removed"); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A route whose paths are built up iteratively and then removed + * all at once + */ + fib_prefix_t pfx_4_4_4_4_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 4.4.4.4/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x04040404), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_3, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(FIB_NODE_INDEX_INVALID != + fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), + "4.4.4.4/32 present"); + + fib_table_entry_delete(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), + "4.4.4.4/32 removed"); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A route with multiple paths at once + */ + fib_route_path_t *r_paths = NULL; + + for (ii = 0; ii < 4; ii++) + { + fib_route_path_t r_path = { + .frp_proto = FIB_PROTOCOL_IP4, + .frp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii), + }, + .frp_sw_if_index = tm->hw[0]->sw_if_index, + .frp_weight = 1, + .frp_fib_index = ~0, + }; + vec_add1(r_paths, r_path); + } + + fib_table_entry_update(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + r_paths); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present"); + dpo = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets); + + fib_table_entry_delete(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), + "4.4.4.4/32 removed"); + vec_free(r_paths); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A route deag route + */ + fib_table_entry_path_add(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &zero_addr, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + lookup_dpo_t *lkd = lookup_dpo_get(dpo->dpoi_index); + + FIB_TEST((fib_index == lkd->lkd_fib_index), + "4.4.4.4/32 is deag in %d %U", + lkd->lkd_fib_index, + format_dpo_id, dpo, 0); + + fib_table_entry_delete(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), + "4.4.4.4/32 removed"); + vec_free(r_paths); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * CLEANUP + * remove: 1.1.1.2/32, 1.1.2.0/24 and 1.1.1.1/32 + * all of which are via 10.10.10.1, Itf1 + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_1_1_2_0_s_24, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32), + "1.1.1.1/32 removed"); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32), + "1.1.1.2/32 removed"); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_2_0_s_24), + "1.1.2.0/24 removed"); + + /* + * -3 entries and -1 shared path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * An attached-host route. Expect to link to the incomplete adj + */ + fib_prefix_t pfx_4_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 4.1.1.1/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x04010101), + }, + }; + fib_table_entry_path_add(fib_index, + &pfx_4_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &zero_addr, + tm->hw[0]->sw_if_index, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_4_1_1_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.1.1.1/32 present"); + ai = fib_entry_get_adj(fei); + + ai2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &pfx_4_1_1_1_s_32.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((ai == ai2), "Attached-host link to incomplete ADJ"); + adj_unlock(ai2); + + /* + * +1 entry and +1 shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + fib_table_entry_delete(fib_index, + &pfx_4_1_1_1_s_32, + FIB_SOURCE_API); + + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * add a v6 prefix via v4 next-hops + */ + fib_prefix_t pfx_2001_s_64 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0x2001000000000000), + }, + }; + fei = fib_table_entry_path_add(0, //default v6 table + &pfx_2001_s_64, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(0, &pfx_2001_s_64); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "2001::/64 present"); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_ARP), + "2001::/64 via ARP-adj"); + FIB_TEST((adj->ia_link == FIB_LINK_IP6), + "2001::/64 is link type v6"); + FIB_TEST((adj->ia_nh_proto == FIB_PROTOCOL_IP4), + "2001::/64 ADJ-adj is NH proto v4"); + fib_table_entry_delete(0, &pfx_2001_s_64, FIB_SOURCE_API); + + + /* + * CLEANUP + * remove adj-fibs: + */ + fib_table_entry_delete(fib_index, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ); + fib_table_entry_delete(fib_index, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32), + "10.10.10.1/32 adj-fib removed"); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32), + "10.10.10.2/32 adj-fib removed"); + + /* + * -2 entries and -2 non-shared path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * unlock the 2 adjacencies for which this test provided a rewrite. + * These are the last locks on these adjs. they should thus go away. + */ + adj_unlock(ai_02); + adj_unlock(ai_01); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + /* + * CLEANUP + * remove the interface prefixes + */ + local_pfx.fp_len = 32; + fib_table_entry_special_remove(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE); + fei = fib_table_lookup(fib_index, &local_pfx); + + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &local_pfx), + "10.10.10.10/32 adj-fib removed"); + + local_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE); + + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &local_pfx), + "10.10.10.10/24 adj-fib removed"); + + /* + * -2 entries and -2 non-shared path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Last but not least, remove the VRF + */ + FIB_TEST((0 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_API)), + "NO API Source'd prefixes"); + FIB_TEST((0 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_RR)), + "NO RR Source'd prefixes"); + FIB_TEST((0 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_INTERFACE)), + "NO INterface Source'd prefixes"); + + fib_table_unlock(fib_index, FIB_PROTOCOL_IP4); + + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR-5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR-5 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + return; +} + +static void +fib_test_v6 (void) +{ + /* + * In the default table check for the presence and correct forwarding + * of the special entries + */ + fib_node_index_t dfrt, fei, ai, locked_ai, ai_01, ai_02; + const dpo_id_t *dpo, *dpo_drop; + const ip_adjacency_t *adj; + const receive_dpo_t *rd; + test_main_t *tm; + u32 fib_index; + int ii; + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + /* via 2001:0:0:1::2 */ + ip46_address_t nh_2001_2 = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000001), + [1] = clib_host_to_net_u64(0x0000000000000002), + }, + }, + }; + + tm = &test_main; + + dpo_drop = drop_dpo_get(DPO_PROTO_IP6); + + /* Find or create FIB table 11 */ + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11); + + for (ii = 0; ii < 4; ii++) + { + ip6_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index; + } + + fib_prefix_t pfx_0_0 = { + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + {0, 0}, + }, + }, + }; + + dfrt = fib_table_lookup(fib_index, &pfx_0_0); + FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)), + "Default route is DROP"); + + dpo = fib_entry_contribute_ip_forwarding(dfrt); + FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup( + &ip6_main, + 1, + &pfx_0_0.fp_addr.ip6)), + "default-route; fwd and non-fwd tables match"); + + // FIXME - check specials. + + /* + * At this stage there is one v4 FIB with 5 routes and two v6 FIBs + * each with 6 entries. All entries are special so no path-list sharing. + */ +#define NPS (5+6+6) + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * add interface routes. + * validate presence of /64 attached and /128 recieve. + * test for the presence of the receive address in the glean and local adj + * + * receive on 2001:0:0:1::1/128 + */ + fib_prefix_t local_pfx = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000001), + [1] = clib_host_to_net_u64(0x0000000000000001), + }, + }, + } + }; + + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present"); + + ai = fib_entry_get_adj(fei); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "attached interface route adj present"); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index), + "attached interface adj is glean"); + FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr, + &adj->sub_type.glean.receive_addr)), + "attached interface adj is receive ok"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup( + &ip6_main, + 1, + &local_pfx.fp_addr.ip6)), + "attached-route; fwd and non-fwd tables match"); + + local_pfx.fp_len = 128; + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &local_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST((DPO_RECEIVE == dpo->dpoi_type), + "local interface adj is local"); + rd = receive_dpo_get(dpo->dpoi_index); + + FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr, + &rd->rd_addr)), + "local interface adj is receive ok"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup( + &ip6_main, + 1, + &local_pfx.fp_addr.ip6)), + "local-route; fwd and non-fwd tables match"); + + /* + * +2 entries. +2 unshared path-lists + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Modify the default route to be via an adj not yet known. + * this sources the defalut route with the API source, which is + * a higher preference to the DEFAULT_ROUTE source + */ + fib_table_entry_path_add(fib_index, &pfx_0_0, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_0_0); + + FIB_TEST((fei == dfrt), "default route same index"); + ai = fib_entry_get_adj(fei); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "default route adj present"); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&nh_2001_2, &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + /* + * find the adj in the shared db + */ + locked_ai = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index); + FIB_TEST((locked_ai == ai), "ADJ NBR DB find"); + adj_unlock(locked_ai); + + /* + * no more entires. +1 shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+3 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove the API source from the default route. We expected + * the route to remain, sourced by DEFAULT_ROUTE, and hence a DROP + */ + fib_table_entry_path_remove(fib_index, &pfx_0_0, + FIB_SOURCE_API, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_0_0); + + FIB_TEST((fei == dfrt), "default route same index"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)), + "Default route is DROP"); + + /* + * no more entires. -1 shared path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add an 2 ARP entry => a complete ADJ plus adj-fib. + */ + fib_prefix_t pfx_2001_1_2_s_128 = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000001), + [1] = clib_host_to_net_u64(0x0000000000000002), + }, + }, + } + }; + fib_prefix_t pfx_2001_1_3_s_128 = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000001), + [1] = clib_host_to_net_u64(0x0000000000000003), + }, + }, + } + }; + u8 eth_addr[] = { + 0xde, 0xde, 0xde, 0xba, 0xba, 0xba, + }; + + ai_01 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + &pfx_2001_1_2_s_128.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_01), "adj created"); + adj = adj_get(ai_01); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + adj_nbr_update_rewrite(ai_01, eth_addr); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + fib_table_entry_update_one_path(fib_index, + &pfx_2001_1_2_s_128, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &pfx_2001_1_2_s_128.fp_addr, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_2001_1_2_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + + eth_addr[5] = 0xb2; + + ai_02 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + &pfx_2001_1_3_s_128.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_02), "adj created"); + adj = adj_get(ai_02); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + adj_nbr_update_rewrite(ai_02, eth_addr); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + FIB_TEST((ai_01 != ai_02), "ADJs are different"); + + fib_table_entry_update_one_path(fib_index, + &pfx_2001_1_3_s_128, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &pfx_2001_1_3_s_128.fp_addr, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_2001_1_3_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj"); + + /* + * +2 entries, +2 unshread path-lists. + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+4 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+4 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add a 2 routes via the first ADJ. ensure path-list sharing + */ + fib_prefix_t pfx_2001_a_s_64 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x200100000000000a), + [1] = clib_host_to_net_u64(0x0000000000000000), + }, + }, + } + }; + fib_prefix_t pfx_2001_b_s_64 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x200100000000000b), + [1] = clib_host_to_net_u64(0x0000000000000000), + }, + }, + } + }; + + fib_table_entry_path_add(fib_index, + &pfx_2001_a_s_64, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_2001_a_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1"); + fib_table_entry_path_add(fib_index, + &pfx_2001_b_s_64, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_2001_b_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1"); + + /* + * +2 entries, +1 shared path-list. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * add a v4 prefix via a v6 next-hop + */ + fib_prefix_t pfx_1_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = 0x01010101, + }, + }; + fei = fib_table_entry_path_add(0, // default table + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(fei == fib_table_lookup_exact_match(0, &pfx_1_1_1_1_s_32), + "1.1.1.1/32 o v6 route present"); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_ARP), + "1.1.1.1/32 via ARP-adj"); + FIB_TEST((adj->ia_link == FIB_LINK_IP4), + "1.1.1.1/32 ADJ-adj is link type v4"); + FIB_TEST((adj->ia_nh_proto == FIB_PROTOCOL_IP6), + "1.1.1.1/32 ADJ-adj is NH proto v6"); + fib_table_entry_delete(0, &pfx_1_1_1_1_s_32, FIB_SOURCE_API); + + /* + * An attached route + */ + fib_prefix_t pfx_2001_c_s_64 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x200100000000000c), + [1] = clib_host_to_net_u64(0x0000000000000000), + }, + }, + } + }; + fib_table_entry_path_add(fib_index, + &pfx_2001_c_s_64, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_c_s_64); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached route present"); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_GLEAN), + "2001:0:0:c/64 attached resolves via glean"); + + fib_table_entry_path_remove(fib_index, + &pfx_2001_c_s_64, + FIB_SOURCE_CLI, + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_c_s_64); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "attached route removed"); + + /* + * Shutdown the interface on which we have a connected and through + * which the routes are reachable. + * This will result in the connected, adj-fibs, and routes linking to drop + * The local/for-us prefix continues to receive. + */ + clib_error_t * error; + + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + ~VNET_SW_INTERFACE_FLAG_ADMIN_UP); + FIB_TEST((NULL == error), "Interface shutdown OK"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001::b/64 resolves via drop"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001::a/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::3/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::2/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::1/128 not drop"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1/64 resolves via drop"); + + /* + * no change + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * shutdown one of the other interfaces, then add a connected. + * and swap one of the routes to it. + */ + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[1]->sw_if_index, + ~VNET_SW_INTERFACE_FLAG_ADMIN_UP); + FIB_TEST((NULL == error), "Interface 1 shutdown OK"); + + fib_prefix_t connected_pfx = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + /* 2001:0:0:2::1/64 */ + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000002), + [1] = clib_host_to_net_u64(0x0000000000000001), + }, + }, + } + }; + fib_table_entry_update_one_path(fib_index, &connected_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP6, + NULL, + tm->hw[1]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &connected_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present"); + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST(!dpo_cmp(dpo, dpo_drop), + "2001:0:0:2/64 not resolves via drop"); + + connected_pfx.fp_len = 128; + fib_table_entry_update_one_path(fib_index, &connected_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &connected_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present"); + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST((DPO_RECEIVE == dpo->dpoi_type), + "local interface adj is local"); + rd = receive_dpo_get(dpo->dpoi_index); + FIB_TEST((0 == ip46_address_cmp(&connected_pfx.fp_addr, + &rd->rd_addr)), + "local interface adj is receive ok"); + + /* + * +2 entries, +2 unshared path-lists + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + + /* + * bring the interface back up. we expected the routes to return + * to normal forwarding. + */ + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + FIB_TEST((NULL == error), "Interface bring-up OK"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index), + "attached interface adj is glean"); + + /* + * Delete the interface that the routes reolve through. + * Again no routes are removed. They all point to drop. + * + * This is considered an error case. The control plane should + * not remove interfaces through which routes resolve, but + * such things can happen. ALL affected routes will drop. + */ + vnet_delete_hw_interface(vnet_get_main(), tm->hw_if_indicies[0]); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::3/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::2/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::1/128 is drop"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1/64 resolves via drop"); + + /* + * no change + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add the interface back. routes stay unresolved. + */ + error = ethernet_register_interface(vnet_get_main(), + test_interface_device_class.index, + 0 /* instance */, + hw_address, + &tm->hw_if_indicies[0], + /* flag change */ 0); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::3/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::2/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::1/128 is drop"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1/64 resolves via drop"); + + /* + * CLEANUP ALL the routes + */ + fib_table_entry_delete(fib_index, + &pfx_2001_c_s_64, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2001_a_s_64, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2001_b_s_64, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2001_1_3_s_128, + FIB_SOURCE_ADJ); + fib_table_entry_delete(fib_index, + &pfx_2001_1_2_s_128, + FIB_SOURCE_ADJ); + local_pfx.fp_len = 64; + fib_table_entry_delete(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE); + local_pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE); + connected_pfx.fp_len = 64; + fib_table_entry_delete(fib_index, &connected_pfx, + FIB_SOURCE_INTERFACE); + connected_pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, &connected_pfx, + FIB_SOURCE_INTERFACE); + + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64)), + "2001::a/64 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64)), + "2001::b/64 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128)), + "2001:0:0:1::3/128 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128)), + "2001:0:0:1::3/128 removed"); + local_pfx.fp_len = 64; + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &local_pfx)), + "2001:0:0:1/64 removed"); + local_pfx.fp_len = 128; + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &local_pfx)), + "2001:0:0:1::1/128 removed"); + connected_pfx.fp_len = 64; + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &connected_pfx)), + "2001:0:0:2/64 removed"); + connected_pfx.fp_len = 128; + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &connected_pfx)), + "2001:0:0:2::1/128 removed"); + + /* + * -8 entries. -7 path-lists (1 was shared). + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * now remove the VRF + */ + fib_table_unlock(fib_index, FIB_PROTOCOL_IP6); + + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS-6 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS-6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + adj_unlock(ai_02); + adj_unlock(ai_01); + + /* + * return the interfaces to up state + */ + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[1]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); +} + +/* + * Test the recursive route route handling for GRE tunnels + */ +static void +fib_test_gre (void) +{ + /* fib_node_index_t fei; */ + /* u32 fib_index = 0; */ + /* test_main_t *tm; */ + /* u32 ii; */ + + /* tm = &test_main; */ + + /* for (ii = 0; ii < 4; ii++) */ + /* { */ + /* ip4_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = 0; */ + /* } */ + + /* /\* */ + /* * add interface routes. We'll assume this works. It's more rigorously */ + /* * tested elsewhere. */ + /* *\/ */ + /* fib_prefix_t local_pfx = { */ + /* .fp_len = 24, */ + /* .fp_proto = FIB_PROTOCOL_IP4, */ + /* .fp_addr = { */ + /* .ip4 = { */ + /* /\* 10.10.10.10 *\/ */ + /* .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), */ + /* }, */ + /* }, */ + /* }; */ + + /* fib_table_entry_update_one_path(fib_index, &local_pfx, */ + /* FIB_SOURCE_INTERFACE, */ + /* (FIB_ENTRY_FLAG_CONNECTED | */ + /* FIB_ENTRY_FLAG_ATTACHED), */ + /* NULL, */ + /* tm->hw[0]->sw_if_index, */ + /* ~0, */ + /* 1, */ + /* FIB_ROUTE_PATH_FLAG_NONE); */ + /* fei = fib_table_lookup_exact_match(fib_index, &local_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), */ + /* "attached interface route present"); */ + + /* local_pfx.fp_len = 32; */ + /* fib_table_entry_update_one_path(fib_index, &local_pfx, */ + /* FIB_SOURCE_INTERFACE, */ + /* (FIB_ENTRY_FLAG_CONNECTED | */ + /* FIB_ENTRY_FLAG_LOCAL), */ + /* NULL, */ + /* tm->hw[0]->sw_if_index, */ + /* ~0, // invalid fib index */ + /* 1, */ + /* FIB_ROUTE_PATH_FLAG_NONE); */ + /* fei = fib_table_lookup_exact_match(fib_index, &local_pfx); */ + + /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), */ + /* "local interface route present"); */ + + /* fib_prefix_t local2_pfx = { */ + /* .fp_len = 24, */ + /* .fp_proto = FIB_PROTOCOL_IP4, */ + /* .fp_addr = { */ + /* .ip4 = { */ + /* /\* 10.10.11.11 *\/ */ + /* .as_u32 = clib_host_to_net_u32(0x0a0a0b0b), */ + /* }, */ + /* }, */ + /* }; */ + + /* fib_table_entry_update_one_path(fib_index, &local2_pfx, */ + /* FIB_SOURCE_INTERFACE, */ + /* (FIB_ENTRY_FLAG_CONNECTED | */ + /* FIB_ENTRY_FLAG_ATTACHED), */ + /* NULL, */ + /* tm->hw[1]->sw_if_index, */ + /* ~0, */ + /* 1, */ + /* FIB_ROUTE_PATH_FLAG_NONE); */ + /* fei = fib_table_lookup_exact_match(fib_index, &local2_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), */ + /* "attached interface route present"); */ + + /* local2_pfx.fp_len = 32; */ + /* fib_table_entry_update_one_path(fib_index, &local2_pfx, */ + /* FIB_SOURCE_INTERFACE, */ + /* (FIB_ENTRY_FLAG_CONNECTED | */ + /* FIB_ENTRY_FLAG_LOCAL), */ + /* NULL, */ + /* tm->hw[0]->sw_if_index, */ + /* ~0, // invalid fib index */ + /* 1, */ + /* FIB_ROUTE_PATH_FLAG_NONE); */ + /* fei = fib_table_lookup_exact_match(fib_index, &local2_pfx); */ + + /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), */ + /* "local interface route present"); */ + + /* /\* */ + /* * Add the route that will be used to resolve the tunnel's destination */ + /* *\/ */ + /* fib_prefix_t route_pfx = { */ + /* .fp_len = 24, */ + /* .fp_proto = FIB_PROTOCOL_IP4, */ + /* .fp_addr = { */ + /* .ip4 = { */ + /* /\* 1.1.1.0/24 *\/ */ + /* .as_u32 = clib_host_to_net_u32(0x01010100), */ + /* }, */ + /* }, */ + /* }; */ + /* /\* 10.10.10.2 *\/ */ + /* ip46_address_t nh_10_10_10_2 = { */ + /* .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), */ + /* }; */ + + /* fib_table_entry_path_add(fib_index, &route_pfx, */ + /* FIB_SOURCE_API, */ + /* FIB_ENTRY_FLAG_NONE, */ + /* &nh_10_10_10_2, */ + /* tm->hw[0]->sw_if_index, */ + /* ~0, */ + /* 1, */ + /* FIB_ROUTE_PATH_FLAG_NONE); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID != */ + /* fib_table_lookup_exact_match(fib_index, &local_pfx)), */ + /* "route present"); */ + + /* /\* */ + /* * Add a tunnel */ + /* *\/ */ + /* /\* 1.1.1.1 *\/ */ + /* fib_prefix_t tun_dst_pfx = { */ + /* .fp_len = 32, */ + /* .fp_proto = FIB_PROTOCOL_IP4, */ + /* .fp_addr = { */ + /* .ip4.as_u32 = clib_host_to_net_u32(0x01010101), */ + /* }, */ + /* }; */ + /* /\* 10.10.10.10 *\/ */ + /* ip4_address_t tun_src = { */ + /* .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), */ + /* }; */ + /* /\* 172.16.0.1 *\/ */ + /* ip4_address_t tun_itf = { */ + /* .as_u32 = clib_host_to_net_u32(0xac100001), */ + /* }; */ + /* fib_prefix_t tun_itf_pfx = { */ + /* .fp_len = 30, */ + /* .fp_proto = FIB_PROTOCOL_IP4, */ + /* .fp_addr = { */ + /* .ip4 = tun_itf, */ + /* }, */ + /* }; */ + /* u32 *encap_labels = NULL; */ + /* u32 label = 0xbaba; */ + /* u32 encap_index; */ + /* u32 tunnel_sw_if_index; */ + + /* int rv; */ + + /* /\* */ + /* * First we need the MPLS Encap present */ + /* * */ + /* * Pretty sure this is broken. the wiki say the 1st aparamter address */ + /* * should be the tunnel's interface address, which makes some sense. But */ + /* * the code for tunnel creation checks for the tunnel's destination */ + /* * address. curious... */ + /* *\/ */ + /* vec_add1(encap_labels, label); */ + /* rv = vnet_mpls_add_del_encap(&tun_dst_pfx.fp_addr.ip4, */ + /* 0, // inner VRF */ + /* encap_labels, */ + /* ~0, // policy_tunnel_index, */ + /* 0, // no_dst_hash, */ + /* &encap_index, */ + /* 1); // ADD */ + /* FIB_TEST((0 == rv), "MPLS encap created"); */ + + /* /\* */ + /* * now create the tunnel */ + /* *\/ */ + /* rv = vnet_mpls_gre_add_del_tunnel(&tun_src, */ + /* &tun_dst_pfx.fp_addr.ip4, */ + /* &tun_itf_pfx.fp_addr.ip4, */ + /* tun_itf_pfx.fp_len, */ + /* 0, // inner VRF */ + /* 0, // outer VRF */ + /* &tunnel_sw_if_index, */ + /* 0, // l2 only */ + /* 1); // ADD */ + /* FIB_TEST((0 == rv), "Tunnel created"); */ + + /* /\* */ + /* * add it again. just for giggles. */ + /* *\/ */ + /* rv = vnet_mpls_gre_add_del_tunnel(&tun_src, */ + /* &tun_dst_pfx.fp_addr.ip4, */ + /* &tun_itf_pfx.fp_addr.ip4, */ + /* tun_itf_pfx.fp_len, */ + /* 0, // inner VRF */ + /* 0, // outer VRF */ + /* &tunnel_sw_if_index, */ + /* 0, // l2 only */ + /* 1); // ADD */ + /* FIB_TEST((0 != rv), "Duplicate Tunnel not created"); */ + + /* /\* */ + /* * Find the route added for the tunnel subnet and check that */ + /* * it has a midchin adj that is stacked on the adj used to reach the */ + /* * tunnel destination */ + /* *\/ */ + /* ip_adjacency_t *midchain_adj, *route_adj, *adjfib_adj; */ + /* adj_index_t midchain_ai, route_ai, adjfib_ai1, adjfib_ai2; */ + /* ip_lookup_main_t *lm; */ + + /* lm = &ip4_main.lookup_main; */ + + /* fei = fib_table_lookup_exact_match(fib_index, &tun_itf_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "tun itf route present"); */ + /* midchain_ai = fib_entry_contribute_forwarding(fei); */ + /* midchain_adj = adj_get(midchain_ai); */ + + /* FIB_TEST((IP_LOOKUP_NEXT_MIDCHAIN == midchain_adj->lookup_next_index), */ + /* "Tunnel interface links to midchain"); */ + + /* fei = fib_table_lookup_exact_match(fib_index, &route_pfx); */ + /* route_ai = fib_entry_contribute_forwarding(fei); */ + /* FIB_TEST((midchain_adj->sub_type.midchain.adj_index == route_ai), */ + /* "tunnel midchain it stacked on route adj"); */ + + /* /\* */ + /* * update the route to the tunnel's destination to load-balance via */ + /* * interface 1. */ + /* *\/ */ + /* /\* 10.10.11.2 *\/ */ + /* ip46_address_t nh_10_10_11_2 = { */ + /* .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0b02), */ + /* }; */ + + /* fib_table_entry_path_add(fib_index, &route_pfx, */ + /* FIB_SOURCE_API, */ + /* FIB_ENTRY_FLAG_NONE, */ + /* &nh_10_10_11_2, */ + /* tm->hw[1]->sw_if_index, */ + /* ~0, */ + /* 1, */ + /* FIB_ROUTE_PATH_FLAG_NONE); */ + + /* /\* */ + /* * the tunnels midchain should have re-stacked. This tests that the */ + /* * route re-resolution backwalk works to a tunnel interface. */ + /* *\/ */ + /* fei = fib_table_lookup_exact_match(fib_index, &route_pfx); */ + /* FIB_TEST((route_ai != fib_entry_contribute_forwarding(fei)), "route changed"); */ + /* route_ai = fib_entry_contribute_forwarding(fei); */ + + /* midchain_adj = adj_get(midchain_ai); */ + + /* FIB_TEST((midchain_adj->sub_type.midchain.adj_index == route_ai), */ + /* "tunnel midchain has re-stacked on route adj"); */ + + /* route_adj = adj_get(route_ai); */ + + /* FIB_TEST((2 == route_adj->n_adj), "Route adj is multipath"); */ + + /* /\* */ + /* * At this stage both nieghbour adjs are incomplete, so the same should */ + /* * be true of the multipath adj */ + /* *\/ */ + /* FIB_TEST((IP_LOOKUP_NEXT_ARP == route_adj->lookup_next_index), */ + /* "Adj0 is ARP: %d", route_adj->lookup_next_index); */ + /* FIB_TEST((IP_LOOKUP_NEXT_ARP == (route_adj+1)->lookup_next_index), */ + /* "Adj1 is ARP"); */ + + /* /\* */ + /* * do the equivalent of creating an ARP entry for 10.10.10.2. */ + /* * This will complete the adj, and this */ + /* * change should be refelct in the multipath too. */ + /* *\/ */ + /* u8* rewrite = NULL, byte = 0xd; */ + /* vec_add(rewrite, &byte, 6); */ + + /* adjfib_ai1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, */ + /* FIB_LINK_IP4, */ + /* &nh_10_10_10_2, */ + /* tm->hw[0]->sw_if_index); */ + /* adj_nbr_update_rewrite(FIB_PROTOCOL_IP4, */ + /* adjfib_ai1, */ + /* rewrite); */ + /* adjfib_adj = adj_get(adjfib_ai1); */ + /* FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adjfib_adj->lookup_next_index), */ + /* "Adj-fib10 adj is rewrite"); */ + + /* adjfib_ai2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, */ + /* FIB_LINK_IP4, */ + /* &nh_10_10_11_2, */ + /* tm->hw[1]->sw_if_index); */ + /* adj_nbr_update_rewrite(FIB_PROTOCOL_IP4, */ + /* adjfib_ai2, */ + /* rewrite); */ + + /* adjfib_adj = adj_get(adjfib_ai2); */ + + /* FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adjfib_adj->lookup_next_index), */ + /* "Adj-fib11 adj is rewrite"); */ + + /* fei = fib_table_lookup_exact_match(fib_index, &route_pfx); */ + /* FIB_TEST((route_ai != fib_entry_contribute_forwarding(fei)), "route changed"); */ + /* route_ai = fib_entry_contribute_forwarding(fei); */ + /* route_adj = adj_get(route_ai); */ + /* FIB_TEST((IP_LOOKUP_NEXT_REWRITE == route_adj->lookup_next_index), */ + /* "Adj0 is rewrite"); */ + /* FIB_TEST((IP_LOOKUP_NEXT_REWRITE == (route_adj+1)->lookup_next_index), */ + /* "Adj1 is rewrite"); */ + + /* /\* */ + /* * CLEANUP */ + /* *\/ */ + /* adj_index_t drop_ai = adj_get_special(FIB_PROTOCOL_IP4, */ + /* ADJ_SPECIAL_TYPE_DROP); */ + + /* /\* */ + /* * remove the route that the tunnel resovles via. expect */ + /* * it to now resolve via the default route, which is drop */ + /* *\/ */ + /* fib_table_entry_path_remove(fib_index, &route_pfx, */ + /* FIB_SOURCE_API, */ + /* &nh_10_10_10_2, */ + /* tm->hw[0]->sw_if_index, */ + /* ~0, */ + /* 1, */ + /* FIB_ROUTE_PATH_FLAG_NONE); */ + /* fib_table_entry_path_remove(fib_index, &route_pfx, */ + /* FIB_SOURCE_API, */ + /* &nh_10_10_11_2, */ + /* tm->hw[1]->sw_if_index, */ + /* ~0, */ + /* 1, */ + /* FIB_ROUTE_PATH_FLAG_NONE); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID != */ + /* fib_table_lookup_exact_match(fib_index, &local_pfx)), */ + /* "route present"); */ + /* midchain_adj = adj_get(midchain_ai); */ + /* FIB_TEST((midchain_adj->sub_type.midchain.adj_index == drop_ai), */ + /* "tunnel midchain has re-stacked on drop"); */ + + /* /\* */ + /* * remove the tunnel and its MPLS encaps */ + /* *\/ */ + /* rv = vnet_mpls_gre_add_del_tunnel(&tun_src, */ + /* &tun_dst_pfx.fp_addr.ip4, */ + /* &tun_itf_pfx.fp_addr.ip4, */ + /* tun_itf_pfx.fp_len, */ + /* 0, // inner VRF */ + /* 0, // outer VRF */ + /* &tunnel_sw_if_index, */ + /* 0, // l2 only */ + /* 0); // DEL */ + /* FIB_TEST((0 == rv), "Tunnel removed"); */ + /* rv = vnet_mpls_gre_add_del_tunnel(&tun_src, */ + /* &tun_dst_pfx.fp_addr.ip4, */ + /* &tun_itf_pfx.fp_addr.ip4, */ + /* tun_itf_pfx.fp_len, */ + /* 0, // inner VRF */ + /* 0, // outer VRF */ + /* &tunnel_sw_if_index, */ + /* 0, // l2 only */ + /* 0); // DEL */ + /* FIB_TEST((0 != rv), "No existant Tunnel not removed"); */ + + /* rv = vnet_mpls_add_del_encap(&tun_dst_pfx.fp_addr.ip4, */ + /* 0, // inner VRF */ + /* encap_labels, */ + /* ~0, // policy_tunnel_index, */ + /* 0, // no_dst_hash, */ + /* NULL, */ + /* 0); // ADD */ + /* FIB_TEST((0 == rv), "MPLS encap deleted"); */ + + /* vec_free(encap_labels); */ + + /* /\* */ + /* * no more FIB entries expected */ + /* *\/ */ + /* fei = fib_table_lookup_exact_match(fib_index, &tun_itf_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "tun itf route removed"); */ + /* fei = fib_table_lookup_exact_match(fib_index, &tun_dst_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "tun dst route removed"); */ + + /* /\* */ + /* * CLEANUP the connecteds */ + /* *\/ */ + /* local2_pfx.fp_len = 24; */ + /* fib_table_entry_delete(fib_index, &local2_pfx, */ + /* FIB_SOURCE_INTERFACE); */ + /* fei = fib_table_lookup_exact_match(fib_index, &local2_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), */ + /* "attached interface route remove"); */ + + /* local2_pfx.fp_len = 32; */ + /* fib_table_entry_special_remove(fib_index, &local2_pfx, */ + /* FIB_SOURCE_INTERFACE); */ + /* fei = fib_table_lookup_exact_match(fib_index, &local2_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), */ + /* "local interface route removed"); */ + /* local_pfx.fp_len = 24; */ + /* fib_table_entry_delete(fib_index, &local_pfx, */ + /* FIB_SOURCE_INTERFACE); */ + /* fei = fib_table_lookup_exact_match(fib_index, &local_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), */ + /* "attached interface route remove"); */ + + /* local_pfx.fp_len = 32; */ + /* fib_table_entry_special_remove(fib_index, &local_pfx, */ + /* FIB_SOURCE_INTERFACE); */ + /* fei = fib_table_lookup_exact_match(fib_index, &local_pfx); */ + /* FIB_TEST((FIB_NODE_INDEX_INVALID == fei), */ + /* "local interface route removed"); */ +} + +/* + * Test Attached Exports + */ +static void +fib_test_ae (void) +{ + const dpo_id_t *dpo, *dpo_drop; + const u32 fib_index = 0; + fib_node_index_t fei; + test_main_t *tm; + ip4_main_t *im; + + tm = &test_main; + im = &ip4_main; + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + /* + * add interface routes. We'll assume this works. It's more rigorously + * tested elsewhere. + */ + fib_prefix_t local_pfx = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + /* 10.10.10.10 */ + .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), + }, + }, + }; + + vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index); + im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index; + + dpo_drop = drop_dpo_get(DPO_PROTO_IP4); + + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "attached interface route present"); + + local_pfx.fp_len = 32; + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "local interface route present"); + + /* + * Add an 2 ARP entry => a complete ADJ plus adj-fib. + */ + fib_prefix_t pfx_10_10_10_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.1 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }, + }; + fib_node_index_t ai; + + fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 created"); + ai = fib_entry_get_adj(fei); + + /* + * create another FIB table into which routes will be imported + */ + u32 import_fib_index1; + + import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + + /* + * Add an attached route in the import FIB + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(import_fib_index1, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached export created"); + + /* + * check for the presence of the adj-fibs in the import table + */ + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported"); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "adj-fib1 Import uses same adj as export"); + + /* + * check for the presence of the local in the import table + */ + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported"); + + /* + * Add another adj-fin in the export table. Expect this + * to get magically exported; + */ + fib_prefix_t pfx_10_10_10_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.2 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), + }, + }; + + fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_2_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 present"); + ai = fib_entry_get_adj(fei); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported"); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "Import uses same adj as export"); + + /* + * create a 2nd FIB table into which routes will be imported + */ + u32 import_fib_index2; + + import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12); + + /* + * Add an attached route in the import FIB + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(import_fib_index2, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached export created"); + + /* + * check for the presence of all the adj-fibs and local in the import table + */ + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported"); + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported"); + + /* + * add a 3rd adj-fib. expect it to be exported to both tables. + */ + fib_prefix_t pfx_10_10_10_3_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.3 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03), + }, + }; + + fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_3_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_3_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 present"); + ai = fib_entry_get_adj(fei); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 imported to FIB1"); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "Import uses same adj as export"); + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 imported to FIB2"); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "Import uses same adj as export"); + + /* + * remove the 3rd adj fib. we expect it to be removed from both FIBs + */ + fib_table_entry_delete(fib_index, + &pfx_10_10_10_3_s_32, + FIB_SOURCE_ADJ); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 remved"); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 removed from FIB1"); + + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 removed from FIB2"); + + /* + * remove the attached route from the 2nd FIB. expect the imported + * entires to be removed + */ + local_pfx.fp_len = 24; + fib_table_entry_delete(import_fib_index2, + &local_pfx, + FIB_SOURCE_API); + fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "attached export removed"); + + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB2"); + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB2"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB2"); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 still in FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 still in FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local still in FIB1"); + + /* + * modify the route in FIB1 so it is no longer attached. expect the imported + * entires to be removed + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(import_fib_index1, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_2_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1"); + + /* + * modify it back to attached. expect the adj-fibs back + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(import_fib_index1, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported in FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported in FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported in FIB1"); + + /* + * add a covering attached next-hop for the interface address, so we have + * a valid adj to find when we check the forwarding tables + */ + fib_prefix_t pfx_10_0_0_0_s_8 = { + .fp_len = 8, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.0.0.0 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a000000), + }, + }; + + fei = fib_table_entry_update_one_path(fib_index, + &pfx_10_0_0_0_s_8, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_3_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + dpo = fib_entry_contribute_ip_forwarding(fei); + + /* + * remove the route in the export fib. expect the adj-fibs to be removed + */ + local_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, + &local_pfx, + FIB_SOURCE_INTERFACE); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "Delete export: ADJ-fib1 removed from FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1"); + + /* + * the adj-fibs in the export VRF are present in the FIB table, + * but not installed in forwarding, since they have no attached cover. + * Consequently a lookup in the MTRIE gives the adj for the covering + * route 10.0.0.0/8. + */ + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 in export"); + + index_t lbi; + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_1_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.1 forwards on \n%U not \n%U", + format_load_balance, lbi, 0, + format_dpo_id, dpo, 0); + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_2_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.2 forwards on %U", format_dpo_id, dpo, 0); + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_3_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.3 forwards on %U", format_dpo_id, dpo, 0); + + /* + * add the export prefix back, but not as attached. + * No adj-fibs in export nor import tables + */ + local_pfx.fp_len = 24; + fei = fib_table_entry_update_one_path(fib_index, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + dpo = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "non-attached in export: ADJ-fib1 in export"); + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_1_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.1 forwards on %U", format_dpo_id, dpo, 0); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 in export"); + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_2_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.2 forwards on %U", format_dpo_id, dpo, 0); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1"); + + /* + * modify the export prefix so it is attached. expect all covereds to return + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(fib_index, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 reinstalled in export"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "Adj-fib1 is not drop in export"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 reinstalled in export"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local reinstalled in export"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached in export: ADJ-fib1 imported"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "Adj-fib1 is not drop in export"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported"); + + /* + * modify the export prefix so connected. no change. + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 reinstalled in export"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "Adj-fib1 is not drop in export"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 reinstalled in export"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local reinstalled in export"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached in export: ADJ-fib1 imported"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "Adj-fib1 is not drop in export"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported"); + + /* + * CLEANUP + */ + fib_table_entry_delete(fib_index, + &pfx_10_0_0_0_s_8, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ); + fib_table_entry_delete(fib_index, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ); + local_pfx.fp_len = 32; + fib_table_entry_delete(fib_index, + &local_pfx, + FIB_SOURCE_INTERFACE); + local_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, + &local_pfx, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &local_pfx, + FIB_SOURCE_INTERFACE); + local_pfx.fp_len = 24; + fib_table_entry_delete(import_fib_index1, + &local_pfx, + FIB_SOURCE_API); + + fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4); + fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); +} + +typedef enum fib_test_lb_bucket_type_t_ { + FT_LB_LABEL_O_ADJ, + FT_LB_LABEL_O_LB, + FT_LB_O_LB, + FT_LB_SPECIAL, + FT_LB_ADJ, +} fib_test_lb_bucket_type_t; + +typedef struct fib_test_lb_bucket_t_ { + fib_test_lb_bucket_type_t type; + + union + { + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + adj_index_t adj; + } label_o_adj; + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + index_t lb; + } label_o_lb; + struct + { + index_t adj; + } adj; + struct + { + index_t lb; + } lb; + struct + { + index_t adj; + } special; + }; +} fib_test_lb_bucket_t; + +#define FIB_TEST_LB(_cond, _comment, _args...) \ +{ \ + if (!FIB_TEST_I(_cond, _comment, ##_args)) { \ + return (0); \ + } \ +} + +static int +fib_test_validate_lb_v (const load_balance_t *lb, + u16 n_buckets, + va_list ap) +{ + const dpo_id_t *dpo; + int bucket; + + FIB_TEST_LB((n_buckets == lb->lb_n_buckets), "n_buckets = %d", lb->lb_n_buckets); + + for (bucket = 0; bucket < n_buckets; bucket++) + { + const fib_test_lb_bucket_t *exp; + + exp = va_arg(ap, fib_test_lb_bucket_t*); + dpo = load_balance_get_bucket_i(lb, bucket); + + switch (exp->type) + { + case FT_LB_LABEL_O_ADJ: + { + const mpls_label_dpo_t *mld; + mpls_label_t hdr; + FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + mld = mpls_label_dpo_get(dpo->dpoi_index); + hdr = clib_net_to_host_u32(mld->mld_hdr.label_exp_s_ttl); + + FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) == + exp->label_o_adj.label), + "bucket %d stacks on label %d", + bucket, + exp->label_o_adj.label); + + FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == + exp->label_o_adj.eos), + "bucket %d stacks on label %d %U", + bucket, + exp->label_o_adj.label, + format_mpls_eos_bit, exp->label_o_adj.eos); + + FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type), + "bucket %d label stacks on %U", + bucket, + format_dpo_type, mld->mld_dpo.dpoi_type); + + FIB_TEST_LB((exp->label_o_adj.adj == mld->mld_dpo.dpoi_index), + "bucket %d label stacks on adj %d", + bucket, + exp->label_o_adj.adj); + } + break; + case FT_LB_LABEL_O_LB: + { + const mpls_label_dpo_t *mld; + mpls_label_t hdr; + + FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + mld = mpls_label_dpo_get(dpo->dpoi_index); + hdr = clib_net_to_host_u32(mld->mld_hdr.label_exp_s_ttl); + + FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) == + exp->label_o_lb.label), + "bucket %d stacks on label %d", + bucket, + exp->label_o_lb.label); + + FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == + exp->label_o_lb.eos), + "bucket %d stacks on label %d %U", + bucket, + exp->label_o_lb.label, + format_mpls_eos_bit, exp->label_o_lb.eos); + + FIB_TEST_LB((DPO_LOAD_BALANCE == mld->mld_dpo.dpoi_type), + "bucket %d label stacks on %U", + bucket, + format_dpo_type, mld->mld_dpo.dpoi_type); + + FIB_TEST_LB((exp->label_o_lb.lb == mld->mld_dpo.dpoi_index), + "bucket %d label stacks on LB %d", + bucket, + exp->label_o_lb.lb); + } + break; + case FT_LB_ADJ: + FIB_TEST_I(((DPO_ADJACENCY == dpo->dpoi_type) || + (DPO_ADJACENCY_INCOMPLETE == dpo->dpoi_type)), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index), + "bucket %d stacks on adj %d", + bucket, + exp->adj.adj); + break; + case FT_LB_O_LB: + FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + FIB_TEST_LB((exp->lb.lb == dpo->dpoi_index), + "bucket %d stacks on lb %d", + bucket, + exp->lb.lb); + break; + case FT_LB_SPECIAL: + FIB_TEST_I((DPO_DROP == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + FIB_TEST_LB((exp->special.adj == dpo->dpoi_index), + "bucket %d stacks on drop %d", + bucket, + exp->adj.adj); + break; + } + } + return (!0); +} + +static int +fib_test_validate_entry (fib_node_index_t fei, + fib_forward_chain_type_t fct, + u16 n_buckets, + ...) +{ + const load_balance_t *lb; + dpo_id_t dpo = DPO_NULL; + va_list ap; + int res; + + va_start(ap, n_buckets); + + fib_entry_contribute_forwarding(fei, fct, &dpo); + + FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type), + "Entry links to %U", + format_dpo_type, dpo.dpoi_type); + lb = load_balance_get(dpo.dpoi_index); + + res = fib_test_validate_lb_v(lb, n_buckets, ap); + + dpo_reset(&dpo); + + va_end(ap); + + return (res); +} + +/* + * Test the recursive route route handling for GRE tunnels + */ +static void +fib_test_label (void) +{ + fib_node_index_t fei, ai_mpls_10_10_10_1, ai_v4_10_10_11_1, ai_v4_10_10_11_2, ai_mpls_10_10_11_2, ai_mpls_10_10_11_1; + const u32 fib_index = 0; + test_main_t *tm; + ip4_main_t *im; + int lb_count; + + lb_count = pool_elts(load_balance_pool); + tm = &test_main; + im = &ip4_main; + + /* + * add interface routes. We'll assume this works. It's more rigorously + * tested elsewhere. + */ + fib_prefix_t local0_pfx = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + /* 10.10.10.10 */ + .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), + }, + }, + }; + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index); + im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index; + + fib_table_entry_update_one_path(fib_index, &local0_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local0_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "attached interface route present"); + + local0_pfx.fp_len = 32; + fib_table_entry_update_one_path(fib_index, &local0_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local0_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "local interface route present"); + + fib_prefix_t local1_pfx = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + /* 10.10.11.10 */ + .as_u32 = clib_host_to_net_u32(0x0a0a0b0a), + }, + }, + }; + + vec_validate(im->fib_index_by_sw_if_index, tm->hw[1]->sw_if_index); + im->fib_index_by_sw_if_index[tm->hw[1]->sw_if_index] = fib_index; + + fib_table_entry_update_one_path(fib_index, &local1_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[1]->sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local1_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "attached interface route present"); + + local1_pfx.fp_len = 32; + fib_table_entry_update_one_path(fib_index, &local1_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local1_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "local interface route present"); + + ip46_address_t nh_10_10_10_1 = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }, + }; + ip46_address_t nh_10_10_11_1 = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0b01), + }, + }; + ip46_address_t nh_10_10_11_2 = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0b02), + }, + }; + + ai_v4_10_10_11_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index); + ai_v4_10_10_11_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &nh_10_10_11_2, + tm->hw[1]->sw_if_index); + ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_MPLS, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + ai_mpls_10_10_11_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_MPLS, + &nh_10_10_11_2, + tm->hw[1]->sw_if_index); + ai_mpls_10_10_11_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_MPLS, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index); + + /* + * Add an etry with one path with a real out-going label + */ + fib_prefix_t pfx_1_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010101), + }, + }; + fib_test_lb_bucket_t l99_eos_o_10_10_10_1 = { + .type = FT_LB_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 99, + .eos = MPLS_EOS, + }, + }; + fib_test_lb_bucket_t l99_neos_o_10_10_10_1 = { + .type = FT_LB_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 99, + .eos = MPLS_NON_EOS, + }, + }; + fib_table_entry_update_one_path(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + 99, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.1.1.1/32 created"); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l99_eos_o_10_10_10_1), + "1.1.1.1/32 LB 1 bucket via label 99 over 10.10.10.1"); + + /* + * add a path with an implicit NULL label + */ + fib_test_lb_bucket_t a_o_10_10_11_1 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_v4_10_10_11_1, + }, + }; + fib_test_lb_bucket_t a_mpls_o_10_10_11_1 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_mpls_10_10_11_1, + }, + }; + + fei = fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_IETF_IMPLICIT_NULL_LABEL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1), + "1.1.1.1/32 LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1"); + + /* + * assign the route a local label + */ + fib_table_entry_local_label_add(fib_index, + &pfx_1_1_1_1_s_32, + 24001); + + fib_prefix_t pfx_24001_eos = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 24001, + .fp_eos = MPLS_EOS, + }; + fib_prefix_t pfx_24001_neos = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 24001, + .fp_eos = MPLS_NON_EOS, + }; + + /* + * The EOS entry should link to both the paths, + * and use an ip adj for the imp-null + * The NON-EOS entry should link to both the paths, + * and use an mpls adj for the imp-null + */ + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1), + "24001/eos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1"); + + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 2, + &l99_neos_o_10_10_10_1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 1 bucket via: " + "label 99 over 10.10.10.1 ", + "mpls-adj via 10.10.11.1"); + + /* + * add an unlabelled path, this is excluded from the neos chains, + */ + fib_test_lb_bucket_t adj_o_10_10_11_2 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_v4_10_10_11_2, + }, + }; + + fei = fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_2, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 16, // 3 choices spread over 16 buckets + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 16 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1", + "adj over 10.10.11.2"); + + /* + * get and lock a reference to the non-eos of the via entry 1.1.1.1/32 + */ + dpo_id_t non_eos_1_1_1_1 = DPO_NULL; + fib_entry_contribute_forwarding(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &non_eos_1_1_1_1); + + /* + * n-eos has only the 2 labelled paths + */ + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 2, + &l99_neos_o_10_10_10_1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj-mpls over 10.10.11.2"); + + /* + * A labelled recursive + */ + fib_prefix_t pfx_2_2_2_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020202), + }, + }; + fib_test_lb_bucket_t l1600_eos_o_1_1_1_1 = { + .type = FT_LB_LABEL_O_LB, + .label_o_lb = { + .lb = non_eos_1_1_1_1.dpoi_index, + .label = 1600, + .eos = MPLS_EOS, + }, + }; + + fib_table_entry_update_one_path(fib_index, + &pfx_2_2_2_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, + fib_index, + 1, + 1600, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l1600_eos_o_1_1_1_1), + "2.2.2.2.2/32 LB 1 buckets via: " + "label 1600 over 1.1.1.1"); + + /* + * we are holding a lock on the non-eos LB of the via-entry. + * do a PIC-core failover by shutting the link of the via-entry. + * + * shut down the link with the valid label + */ + vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + 0); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &a_o_10_10_11_1, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 2 buckets via: " + "adj over 10.10.11.1, ", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &a_o_10_10_11_1, + &adj_o_10_10_11_2), + "24001/eos LB 2 buckets via: " + "adj over 10.10.11.1, ", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 1 buckets via: " + "adj-mpls over 10.10.11.2"); + + /* + * test that the pre-failover load-balance has been in-place + * modified + */ + dpo_id_t current = DPO_NULL; + fib_entry_contribute_forwarding(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + ¤t); + + FIB_TEST(!dpo_cmp(&non_eos_1_1_1_1, + ¤t), + "PIC-core LB inplace modified %U %U", + format_dpo_id, &non_eos_1_1_1_1, 0, + format_dpo_id, ¤t, 0); + + dpo_reset(&non_eos_1_1_1_1); + dpo_reset(¤t); + + /* + * no-shut the link with the valid label + */ + vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 16, // 3 choices spread over 16 buckets + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 16 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1", + "adj-v4 over 10.10.11.2"); + + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 16, // 3 choices spread over 16 buckets + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2), + "24001/eos LB 16 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 2, + &l99_neos_o_10_10_10_1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj-mpls over 10.10.11.2"); + + /* + * remove the first path with the valid label + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &a_o_10_10_11_1, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 2 buckets via: " + "adj over 10.10.11.1", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &a_o_10_10_11_1, + &adj_o_10_10_11_2), + "24001/eos LB 2 buckets via: " + "adj over 10.10.11.1", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 1 buckets via: " + "adj-mpls over 10.10.11.2"); + + /* + * remove the other path with a valid label + */ + fib_test_lb_bucket_t bucket_drop = { + .type = FT_LB_SPECIAL, + .special = { + .adj = 1, + }, + }; + + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 1 buckets via: " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 1, + &adj_o_10_10_11_2), + "24001/eos LB 1 buckets via: " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &bucket_drop), + "24001/eos LB 1 buckets via: DROP"); + + /* + * add back the path with the valid label + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + 99, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l99_eos_o_10_10_10_1, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &l99_eos_o_10_10_10_1, + &adj_o_10_10_11_2), + "24001/eos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &l99_neos_o_10_10_10_1), + "24001/neos LB 1 buckets via: " + "label 99 over 10.10.10.1"); + + /* + * remove the local label + */ + fib_table_entry_local_label_remove(fib_index, + &pfx_1_1_1_1_s_32, + 24001); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l99_eos_o_10_10_10_1, + &adj_o_10_10_11_2), + "24001/eos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.2"); + + FIB_TEST((FIB_NODE_INDEX_INVALID == + mpls_fib_index_from_table_id(MPLS_FIB_DEFAULT_TABLE_ID)), + "No more MPLS FIB entries => table removed"); + + /* + * add another via-entry for the recursive + */ + fib_prefix_t pfx_1_1_1_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010102), + }, + }; + fib_test_lb_bucket_t l101_eos_o_10_10_10_1 = { + .type = FT_LB_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 101, + .eos = MPLS_EOS, + }, + }; + + fei = fib_table_entry_update_one_path(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + 101, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l101_eos_o_10_10_10_1), + "1.1.1.2/32 LB 1 buckets via: " + "label 101 over 10.10.10.1"); + + dpo_id_t non_eos_1_1_1_2 = DPO_NULL; + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, + &pfx_1_1_1_1_s_32), + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &non_eos_1_1_1_1); + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, + &pfx_1_1_1_2_s_32), + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &non_eos_1_1_1_2); + + fib_test_lb_bucket_t l1601_eos_o_1_1_1_2 = { + .type = FT_LB_LABEL_O_LB, + .label_o_lb = { + .lb = non_eos_1_1_1_2.dpoi_index, + .label = 1601, + .eos = MPLS_EOS, + }, + }; + l1600_eos_o_1_1_1_1.label_o_lb.lb = non_eos_1_1_1_1.dpoi_index; + + fei = fib_table_entry_path_add(fib_index, + &pfx_2_2_2_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, + fib_index, + 1, + 1601, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l1600_eos_o_1_1_1_1, + &l1601_eos_o_1_1_1_2), + "2.2.2.2/32 LB 2 buckets via: " + "label 1600 via 1.1,1.1, " + "label 16001 via 1.1.1.2"); + + /* + * update the via-entry so it no longer has an imp-null path. + * the LB for the recursive can use an imp-null + */ + fei = fib_table_entry_update_one_path(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_IETF_IMPLICIT_NULL_LABEL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &a_o_10_10_11_1), + "1.1.1.2/32 LB 1 buckets via: " + "adj 10.10.11.1"); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l1600_eos_o_1_1_1_1, + &l1601_eos_o_1_1_1_2), + "2.2.2.2/32 LB 2 buckets via: " + "label 1600 via 1.1,1.1, " + "label 16001 via 1.1.1.2"); + + /* + * update the via-entry so it no longer has labelled paths. + * the LB for the recursive should exclue this via form its LB + */ + fei = fib_table_entry_update_one_path(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &a_o_10_10_11_1), + "1.1.1.2/32 LB 1 buckets via: " + "adj 10.10.11.1"); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l1600_eos_o_1_1_1_1), + "2.2.2.2/32 LB 1 buckets via: " + "label 1600 via 1.1,1.1"); + + dpo_reset(&non_eos_1_1_1_1); + dpo_reset(&non_eos_1_1_1_2); + + /* + * Add a recursive with no out-labels. We expect to use the IP of the via + */ + fib_prefix_t pfx_2_2_2_3_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020203), + }, + }; + dpo_id_t ip_1_1_1_1 = DPO_NULL; + + fib_table_entry_update_one_path(fib_index, + &pfx_2_2_2_3_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, + &pfx_1_1_1_1_s_32), + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + &ip_1_1_1_1); + + fib_test_lb_bucket_t ip_o_1_1_1_1 = { + .type = FT_LB_O_LB, + .lb = { + .lb = ip_1_1_1_1.dpoi_index, + }, + }; + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_3_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ip_o_1_1_1_1), + "2.2.2.2.3/32 LB 1 buckets via: " + "ip 1.1.1.1"); + + /* + * Add a recursive with an imp-null out-label. + * We expect to use the IP of the via + */ + fib_prefix_t pfx_2_2_2_4_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020204), + }, + }; + + fib_table_entry_update_one_path(fib_index, + &pfx_2_2_2_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_4_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ip_o_1_1_1_1), + "2.2.2.2.4/32 LB 1 buckets via: " + "ip 1.1.1.1"); + + dpo_reset(&ip_1_1_1_1); + + /* + * cleanup + */ + fib_table_entry_delete(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l1600_eos_o_1_1_1_1), + "2.2.2.2/32 LB 1 buckets via: " + "label 1600 via 1.1,1.1"); + + fib_table_entry_delete(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &bucket_drop), + "2.2.2.2/32 LB 1 buckets via: DROP"); + + fib_table_entry_delete(fib_index, + &pfx_2_2_2_2_s_32, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2_2_2_3_s_32, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2_2_2_4_s_32, + FIB_SOURCE_API); + + adj_unlock(ai_mpls_10_10_10_1); + adj_unlock(ai_mpls_10_10_11_2); + adj_unlock(ai_v4_10_10_11_1); + adj_unlock(ai_v4_10_10_11_2); + adj_unlock(ai_mpls_10_10_11_1); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + local0_pfx.fp_len = 32; + fib_table_entry_delete(fib_index, + &local0_pfx, + FIB_SOURCE_INTERFACE); + local0_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, + &local0_pfx, + FIB_SOURCE_INTERFACE); + local1_pfx.fp_len = 32; + fib_table_entry_delete(fib_index, + &local1_pfx, + FIB_SOURCE_INTERFACE); + local1_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, + &local1_pfx, + FIB_SOURCE_INTERFACE); + + /* + * +1 for the drop LB in the MPLS tables. + */ + FIB_TEST(lb_count+1 == pool_elts(load_balance_pool), + "Load-balance resources freed %d of %d", + lb_count+1, pool_elts(load_balance_pool)); +} + +#define N_TEST_CHILDREN 4 +#define PARENT_INDEX 0 + +typedef struct fib_node_test_t_ +{ + fib_node_t node; + u32 sibling; + u32 index; + fib_node_back_walk_ctx_t *ctxs; + u32 destroyed; +} fib_node_test_t; + +static fib_node_test_t fib_test_nodes[N_TEST_CHILDREN+1]; + +#define PARENT() (&fib_test_nodes[PARENT_INDEX].node) + +#define FOR_EACH_TEST_CHILD(_tc) \ + for (ii = 1, (_tc) = &fib_test_nodes[1]; \ + ii < N_TEST_CHILDREN+1; \ + ii++, (_tc) = &fib_test_nodes[ii]) + +static fib_node_t * +fib_test_child_get_node (fib_node_index_t index) +{ + return (&fib_test_nodes[index].node); +} + +static int fib_test_walk_spawns_walks; + +static fib_node_back_walk_rc_t +fib_test_child_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + fib_node_test_t *tc = (fib_node_test_t*) node; + + vec_add1(tc->ctxs, *ctx); + + if (1 == fib_test_walk_spawns_walks) + fib_walk_sync(FIB_NODE_TYPE_TEST, tc->index, ctx); + if (2 == fib_test_walk_spawns_walks) + fib_walk_async(FIB_NODE_TYPE_TEST, tc->index, + FIB_WALK_PRIORITY_HIGH, ctx); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +static void +fib_test_child_last_lock_gone (fib_node_t *node) +{ + fib_node_test_t *tc = (fib_node_test_t *)node; + + tc->destroyed = 1; +} + +/** + * The FIB walk's graph node virtual function table + */ +static const fib_node_vft_t fib_test_child_vft = { + .fnv_get = fib_test_child_get_node, + .fnv_last_lock = fib_test_child_last_lock_gone, + .fnv_back_walk = fib_test_child_back_walk_notify, +}; + +/* + * the function (that should have been static but isn't so I can do this) + * that processes the walk from the async queue, + */ +f64 fib_walk_process_queues(vlib_main_t * vm, + const f64 quota); +u32 fib_walk_queue_get_size(fib_walk_priority_t prio); + +static void +fib_test_walk (void) +{ + fib_node_back_walk_ctx_t high_ctx = {}, low_ctx = {}; + fib_node_test_t *tc; + vlib_main_t *vm; + u32 ii; + + vm = vlib_get_main(); + fib_node_register_type(FIB_NODE_TYPE_TEST, &fib_test_child_vft); + + /* + * init a fake node on which we will add children + */ + fib_node_init(&fib_test_nodes[PARENT_INDEX].node, + FIB_NODE_TYPE_TEST); + + FOR_EACH_TEST_CHILD(tc) + { + fib_node_init(&tc->node, FIB_NODE_TYPE_TEST); + fib_node_lock(&tc->node); + tc->ctxs = NULL; + tc->index = ii; + tc->sibling = fib_node_child_add(FIB_NODE_TYPE_TEST, + PARENT_INDEX, + FIB_NODE_TYPE_TEST, ii); + } + + /* + * enqueue a walk across the parents children. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_RESOLVE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children pre-walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * give the walk a large amount of time so it gets to the end + */ + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is empty post walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * walk again. should be no increase in the number of visits, since + * the walk will have terminated. + */ + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(0 == vec_len(tc->ctxs), + "%d child visitsed %d times", + ii, vec_len(tc->ctxs)); + } + + /* + * schedule a low and hig priority walk. expect the high to be performed + * before the low. + * schedule the high prio walk first so that it is further from the head + * of the dependency list. that way it won't merge with the low one. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_LOW, &low_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(high_ctx.fnbw_reason == tc->ctxs[0].fnbw_reason, + "%d child visitsed by high prio walk", ii); + FIB_TEST(low_ctx.fnbw_reason == tc->ctxs[1].fnbw_reason, + "%d child visitsed by low prio walk", ii); + vec_free(tc->ctxs); + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is empty post prio walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post prio walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * schedule 2 walks of the same priority that can be megred. + * expect that each child is thus visited only once. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &low_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times during merge walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is empty post merge walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post merge walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * schedule 2 walks of the same priority that cannot be megred. + * expect that each child is thus visited twice and in the order + * in which the walks were scheduled. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &low_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(high_ctx.fnbw_reason == tc->ctxs[0].fnbw_reason, + "%d child visitsed by high prio walk", ii); + FIB_TEST(low_ctx.fnbw_reason == tc->ctxs[1].fnbw_reason, + "%d child visitsed by low prio walk", ii); + vec_free(tc->ctxs); + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is empty post no-merge walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post no-merge walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * schedule a walk that makes one one child progress. + * we do this by giving the queue draining process zero + * time quanta. it's a do..while loop, so it does something. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_RESOLVE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_process_queues(vm, 0); + + FOR_EACH_TEST_CHILD(tc) + { + if (ii == N_TEST_CHILDREN) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times in zero quanta walk", + ii, vec_len(tc->ctxs)); + } + else + { + FIB_TEST(0 == vec_len(tc->ctxs), + "%d child visitsed %d times in 0 quanta walk", + ii, vec_len(tc->ctxs)); + } + } + FIB_TEST(1 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is not empty post zero quanta walk"); + FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post zero qunta walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * another one step + */ + fib_walk_process_queues(vm, 0); + + FOR_EACH_TEST_CHILD(tc) + { + if (ii >= N_TEST_CHILDREN-1) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times in 2nd zero quanta walk", + ii, vec_len(tc->ctxs)); + } + else + { + FIB_TEST(0 == vec_len(tc->ctxs), + "%d child visitsed %d times in 2nd 0 quanta walk", + ii, vec_len(tc->ctxs)); + } + } + FIB_TEST(1 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is not empty post zero quanta walk"); + FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post zero qunta walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * schedule another walk that will catch-up and merge. + */ + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + if (ii >= N_TEST_CHILDREN-1) + { + FIB_TEST(2 == vec_len(tc->ctxs), + "%d child visitsed %d times in 2nd zero quanta merge walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + else + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times in 2nd 0 quanta merge walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is not empty post 2nd zero quanta merge walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post 2nd zero qunta merge walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * park a async walk in the middle of the list, then have an sync walk catch + * it. same expectations as async catches async. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_RESOLVE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + + fib_walk_process_queues(vm, 0); + fib_walk_process_queues(vm, 0); + + fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx); + + FOR_EACH_TEST_CHILD(tc) + { + if (ii >= N_TEST_CHILDREN-1) + { + FIB_TEST(2 == vec_len(tc->ctxs), + "%d child visitsed %d times in sync catches async walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + else + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times in sync catches async walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is not empty post 2nd zero quanta merge walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post 2nd zero qunta merge walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * make the parent a child of one of its children, thus inducing a routing loop. + */ + fib_test_nodes[PARENT_INDEX].sibling = + fib_node_child_add(FIB_NODE_TYPE_TEST, + 1, // the first child + FIB_NODE_TYPE_TEST, + PARENT_INDEX); + + /* + * execute a sync walk from the parent. each child visited spawns more sync + * walks. we expect the walk to terminate. + */ + fib_test_walk_spawns_walks = 1; + + fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx); + + FOR_EACH_TEST_CHILD(tc) + { + /* + * child 1 - which is last in the list - has the loop. + * the other children a re thus visitsed first. the we meet + * child 1. we go round the loop again, visting the other children. + * then we meet the walk in the dep list and bail. child 1 is not visitsed + * again. + */ + if (1 == ii) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "child %d visitsed %d times during looped sync walk", + ii, vec_len(tc->ctxs)); + } + else + { + FIB_TEST(2 == vec_len(tc->ctxs), + "child %d visitsed %d times during looped sync walk", + ii, vec_len(tc->ctxs)); + } + vec_free(tc->ctxs); + } + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post sync loop walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * the walk doesn't reach the max depth because the infra knows that sync + * meets sync implies a loop and bails early. + */ + FIB_TEST(high_ctx.fnbw_depth == 9, + "Walk context depth %d post sync loop walk", + high_ctx.fnbw_depth); + + /* + * execute an async walk of the graph loop, with each child spawns sync walks + */ + high_ctx.fnbw_depth = 0; + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + /* + * we don't really care how many times the children are visisted, as long as + * it is more than once. + */ + FIB_TEST(1 <= vec_len(tc->ctxs), + "child %d visitsed %d times during looped aync spawns sync walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + + /* + * execute an async walk of the graph loop, with each child spawns async walks + */ + fib_test_walk_spawns_walks = 2; + high_ctx.fnbw_depth = 0; + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + /* + * we don't really care how many times the children are visisted, as long as + * it is more than once. + */ + FIB_TEST(1 <= vec_len(tc->ctxs), + "child %d visitsed %d times during looped async spawns async walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + + + fib_node_child_remove(FIB_NODE_TYPE_TEST, + 1, // the first child + fib_test_nodes[PARENT_INDEX].sibling); + + /* + * cleanup + */ + FOR_EACH_TEST_CHILD(tc) + { + fib_node_child_remove(FIB_NODE_TYPE_TEST, PARENT_INDEX, + tc->sibling); + fib_node_deinit(&tc->node); + fib_node_unlock(&tc->node); + } + fib_node_deinit(PARENT()); + + /* + * The parent will be destroyed when the last lock on it goes. + * this test ensures all the walk objects are unlocking it. + */ + FIB_TEST((1 == fib_test_nodes[PARENT_INDEX].destroyed), + "Parent was destroyed"); +} + +static void +lfib_test_deagg (void) +{ + const mpls_label_t deag_label = 50; + const u32 lfib_index = 0; + const u32 fib_index = 0; + dpo_id_t dpo = DPO_NULL; + const dpo_id_t *dpo1; + fib_node_index_t lfe; + lookup_dpo_t *lkd; + test_main_t *tm; + int lb_count; + + tm = &test_main; + lb_count = pool_elts(load_balance_pool); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + /* + * MPLS enable an interface so we get the MPLS table created + */ + mpls_sw_interface_enable_disable(&mpls_main, + tm->hw[0]->sw_if_index, + 1); + + /* + * Test the specials stack properly. + */ + fib_prefix_t exp_null_v6_pfx = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_eos = MPLS_EOS, + .fp_label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL, + .fp_payload_proto = DPO_PROTO_IP6, + }; + lfe = fib_table_lookup(lfib_index, &exp_null_v6_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != lfe), + "%U/%U present", + format_mpls_unicast_label, MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL, + format_mpls_eos_bit, MPLS_EOS); + fib_entry_contribute_forwarding(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &dpo); + dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0); + lkd = lookup_dpo_get(dpo1->dpoi_index); + + FIB_TEST((fib_index == lkd->lkd_fib_index), + "%U/%U is deag in %d %U", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS, + lkd->lkd_fib_index, + format_dpo_id, &dpo, 0); + FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input), + "%U/%U is dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + FIB_TEST((LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table), + "%U/%U is lookup in interface's table", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + FIB_TEST((DPO_PROTO_IP6 == lkd->lkd_proto), + "%U/%U is %U dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS, + format_dpo_proto, lkd->lkd_proto); + + + /* + * A route deag route for EOS + */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_eos = MPLS_EOS, + .fp_label = deag_label, + .fp_payload_proto = DPO_PROTO_IP4, + }; + lfe = fib_table_entry_path_add(lfib_index, + &pfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &zero_addr, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((lfe == fib_table_lookup(lfib_index, &pfx)), + "%U/%U present", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + + fib_entry_contribute_forwarding(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &dpo); + dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0); + lkd = lookup_dpo_get(dpo1->dpoi_index); + + FIB_TEST((fib_index == lkd->lkd_fib_index), + "%U/%U is deag in %d %U", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS, + lkd->lkd_fib_index, + format_dpo_id, &dpo, 0); + FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input), + "%U/%U is dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + FIB_TEST((DPO_PROTO_IP4 == lkd->lkd_proto), + "%U/%U is %U dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS, + format_dpo_proto, lkd->lkd_proto); + + fib_table_entry_delete_index(lfe, FIB_SOURCE_CLI); + + FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup(lfib_index, + &pfx)), + "%U/%U not present", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + + /* + * A route deag route for non-EOS + */ + pfx.fp_eos = MPLS_NON_EOS; + lfe = fib_table_entry_path_add(lfib_index, + &pfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &zero_addr, + ~0, + lfib_index, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((lfe == fib_table_lookup(lfib_index, &pfx)), + "%U/%U present", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_NON_EOS); + + fib_entry_contribute_forwarding(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &dpo); + dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0); + lkd = lookup_dpo_get(dpo1->dpoi_index); + + FIB_TEST((fib_index == lkd->lkd_fib_index), + "%U/%U is deag in %d %U", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_NON_EOS, + lkd->lkd_fib_index, + format_dpo_id, &dpo, 0); + FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input), + "%U/%U is dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_NON_EOS); + + FIB_TEST((DPO_PROTO_MPLS == lkd->lkd_proto), + "%U/%U is %U dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_NON_EOS, + format_dpo_proto, lkd->lkd_proto); + + fib_table_entry_delete_index(lfe, FIB_SOURCE_CLI); + + FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup(lfib_index, + &pfx)), + "%U/%U not present", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + + + mpls_sw_interface_enable_disable(&mpls_main, + tm->hw[0]->sw_if_index, + 0); + + dpo_reset(&dpo); + /* + * +1 for the drop LB in the MPLS tables. + */ + FIB_TEST(lb_count+1 == pool_elts(load_balance_pool), + "Load-balance resources freed %d of %d", + lb_count+1, pool_elts(load_balance_pool)); +} + +static clib_error_t * +lfib_test (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd_arg) +{ + fib_test_mk_intf(4); + + lfib_test_deagg(); + + return (NULL); +} + +static clib_error_t * +fib_test (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd_arg) +{ + fib_test_mk_intf(4); + + if (unformat (input, "ip")) + { + fib_test_v4(); + fib_test_v6(); + } + else if (unformat (input, "gre")) + { + fib_test_gre(); + } + else if (unformat (input, "label")) + { + fib_test_label(); + } + else if (unformat (input, "ae")) + { + fib_test_ae(); + } + else if (unformat (input, "walk")) + { + fib_test_walk(); + } + else + { + /* + * These walk UT aren't run as part of the full suite, since the + * fib-walk process must be disabled in order for the tests to work + * + * fib_test_walk(); + */ + fib_test_v4(); + fib_test_v6(); + fib_test_gre(); + fib_test_ae(); + fib_test_label(); + } + + return (NULL); +} + +VLIB_CLI_COMMAND (test_fib_command, static) = { + .path = "test fib", + .short_help = "fib unit tests - DO NOT RUN ON A LIVE SYSTEM", + .function = fib_test, +}; + +VLIB_CLI_COMMAND (test_lfib_command, static) = { + .path = "test lfib", + .short_help = "mpls label fib unit tests - DO NOT RUN ON A LIVE SYSTEM", + .function = lfib_test, +}; + +clib_error_t * +fib_test_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (fib_test_init); diff --git a/vnet/vnet/fib/fib_types.c b/vnet/vnet/fib/fib_types.c new file mode 100644 index 00000000000..bf76c5536e6 --- /dev/null +++ b/vnet/vnet/fib/fib_types.c @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> + +#include <vnet/fib/fib_types.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/mpls/mpls.h> + +/* + * arrays of protocol and link names + */ +static const char* fib_protocol_names[] = FIB_PROTOCOLS; +static const char* fib_link_names[] = FIB_LINKS; +static const char* fib_forw_chain_names[] = FIB_FORW_CHAINS; + +u8 * +format_fib_protocol (u8 * s, va_list ap) +{ + fib_protocol_t proto = va_arg(ap, int); // fib_protocol_t promotion + + return (format (s, "%s", fib_protocol_names[proto])); +} + +u8 * +format_fib_link (u8 * s, va_list ap) +{ + fib_link_t link = va_arg(ap, int); // fib_link_t promotion + + return (format (s, "%s", fib_link_names[link])); +} + +u8 * +format_fib_forw_chain_type (u8 * s, va_list * args) +{ + fib_forward_chain_type_t fct = va_arg(*args, int); + + return (format (s, "%s", fib_forw_chain_names[fct])); +} + +void +fib_prefix_from_ip46_addr (const ip46_address_t *addr, + fib_prefix_t *pfx) +{ + ASSERT(!ip46_address_is_zero(addr)); + + pfx->fp_proto = ((ip46_address_is_ip4(addr) ? + FIB_PROTOCOL_IP4 : + FIB_PROTOCOL_IP6)); + pfx->fp_len = ((ip46_address_is_ip4(addr) ? + 32 : 128)); + pfx->fp_addr = *addr; +} + +int +fib_prefix_cmp (const fib_prefix_t *p1, + const fib_prefix_t *p2) +{ + int res; + + res = (p1->fp_proto - p2->fp_proto); + + if (0 == res) + { + switch (p1->fp_proto) + { + case FIB_PROTOCOL_IP4: + case FIB_PROTOCOL_IP6: + res = (p1->fp_len - p2->fp_len); + + if (0 == res) + { + res = ip46_address_cmp(&p1->fp_addr, &p2->fp_addr); + } + break; + case FIB_PROTOCOL_MPLS: + res = (p1->fp_label - p2->fp_label); + + if (0 == res) + { + res = (p1->fp_eos - p2->fp_eos); + } + break; + } + } + + return (res); +} + +int +fib_prefix_is_cover (const fib_prefix_t *p1, + const fib_prefix_t *p2) +{ + switch (p1->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_destination_matches_route(&ip4_main, + &p1->fp_addr.ip4, + &p2->fp_addr.ip4, + p1->fp_len)); + case FIB_PROTOCOL_IP6: + return (ip6_destination_matches_route(&ip6_main, + &p1->fp_addr.ip6, + &p2->fp_addr.ip6, + p1->fp_len)); + case FIB_PROTOCOL_MPLS: + break; + } + return (0); +} + +int +fib_prefix_is_host (const fib_prefix_t *prefix) +{ + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (prefix->fp_len == 32); + case FIB_PROTOCOL_IP6: + return (prefix->fp_len == 128); + case FIB_PROTOCOL_MPLS: + return (!0); + } + return (0); +} + +u8 * +format_fib_prefix (u8 * s, va_list * args) +{ + fib_prefix_t *fp = va_arg (*args, fib_prefix_t *); + + /* + * protocol specific so it prints ::/0 correctly. + */ + switch (fp->fp_proto) + { + case FIB_PROTOCOL_IP6: + { + ip6_address_t p6 = fp->fp_addr.ip6; + + ip6_address_mask(&p6, &(ip6_main.fib_masks[fp->fp_len])); + s = format (s, "%U", format_ip6_address, &p6); + break; + } + case FIB_PROTOCOL_IP4: + { + ip4_address_t p4 = fp->fp_addr.ip4; + p4.as_u32 &= ip4_main.fib_masks[fp->fp_len]; + + s = format (s, "%U", format_ip4_address, &p4); + break; + } + case FIB_PROTOCOL_MPLS: + s = format (s, "%U:%U", + format_mpls_unicast_label, fp->fp_label, + format_mpls_eos_bit, fp->fp_eos); + break; + } + s = format (s, "/%d", fp->fp_len); + + return (s); +} + +int +fib_route_path_cmp (const fib_route_path_t *rpath1, + const fib_route_path_t *rpath2) +{ + int res; + + res = ip46_address_cmp(&rpath1->frp_addr, + &rpath2->frp_addr); + + if (0 != res) return (res); + + res = vnet_sw_interface_compare(vnet_get_main(), + rpath1->frp_sw_if_index, + rpath2->frp_sw_if_index); + + if (0 != res) return (res); + + if (ip46_address_is_zero(&rpath1->frp_addr)) + { + res = rpath1->frp_fib_index - rpath2->frp_fib_index; + } + + return (res); +} + +dpo_proto_t +fib_proto_to_dpo (fib_protocol_t fib_proto) +{ + switch (fib_proto) + { + case FIB_PROTOCOL_IP6: + return (DPO_PROTO_IP6); + case FIB_PROTOCOL_IP4: + return (DPO_PROTO_IP4); + case FIB_PROTOCOL_MPLS: + return (DPO_PROTO_MPLS); + } + ASSERT(0); + return (0); +} + +fib_protocol_t +dpo_proto_to_fib (dpo_proto_t dpo_proto) +{ + switch (dpo_proto) + { + case DPO_PROTO_IP6: + return (FIB_PROTOCOL_IP6); + case DPO_PROTO_IP4: + return (FIB_PROTOCOL_IP4); + case DPO_PROTO_MPLS: + return (FIB_PROTOCOL_MPLS); + } + ASSERT(0); + return (0); +} + +fib_link_t +fib_proto_to_link (fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (FIB_LINK_IP4); + case FIB_PROTOCOL_IP6: + return (FIB_LINK_IP6); + case FIB_PROTOCOL_MPLS: + return (FIB_LINK_MPLS); + } + ASSERT(0); + return (0); +} + +fib_forward_chain_type_t +fib_proto_to_forw_chain_type (fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case FIB_PROTOCOL_IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + case FIB_PROTOCOL_MPLS: + return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + } + ASSERT(0); + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); +} + +fib_link_t +fib_forw_chain_type_to_link_type (fib_forward_chain_type_t fct) +{ + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + return (FIB_LINK_IP4); + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + return (FIB_LINK_IP6); + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + /* + * insufficient information to to convert + */ + ASSERT(0); + break; + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + return (FIB_LINK_MPLS); + } + return (FIB_LINK_IP4); +} + +dpo_proto_t +fib_forw_chain_type_to_dpo_proto (fib_forward_chain_type_t fct) +{ + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + return (DPO_PROTO_IP4); + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + return (DPO_PROTO_IP6); + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + /* + * insufficient information to to convert + */ + ASSERT(0); + break; + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + return (DPO_PROTO_MPLS); + } + return (FIB_LINK_IP4); +} diff --git a/vnet/vnet/fib/fib_types.h b/vnet/vnet/fib/fib_types.h new file mode 100644 index 00000000000..4ebd68d1450 --- /dev/null +++ b/vnet/vnet/fib/fib_types.h @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_TYPES_H__ +#define __FIB_TYPES_H__ + +#include <vlib/vlib.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/mpls/packet.h> +#include <vnet/dpo/dpo.h> + +/** + * A typedef of a node index. + * we make this typedef so the code becomes easier for a human to parse. + */ +typedef u32 fib_node_index_t; +#define FIB_NODE_INDEX_INVALID ((fib_node_index_t)(~0)) + +/** + * Protocol Type. packed so it consumes a u8 only + */ +typedef enum fib_protocol_t_ { +#if CLIB_DEBUG > 0 + FIB_PROTOCOL_IP4 = 1, +#else + FIB_PROTOCOL_IP4 = 0, +#endif + FIB_PROTOCOL_IP6, + FIB_PROTOCOL_MPLS, +} __attribute__ ((packed)) fib_protocol_t; + +#define FIB_PROTOCOLS { \ + [FIB_PROTOCOL_IP4] = "ipv4", \ + [FIB_PROTOCOL_IP6] = "ipv6", \ + [FIB_PROTOCOL_MPLS] = "MPLS", \ +} + +/** + * Definition outside of enum so it does not need to be included in non-defaulted + * switch statements + */ +#define FIB_PROTOCOL_MAX (FIB_PROTOCOL_MPLS + 1) + +/** + * Not part of the enum so it does not have to be handled in switch statements + */ +#define FIB_PROTOCOL_NONE (FIB_PROTOCOL_MAX+1) + +/** + * Link Type. This maps directly into the ethertype. + */ +typedef enum fib_link_t_ { +#if CLIB_DEBUG > 0 + FIB_LINK_IP4 = 1, +#else + FIB_LINK_IP4 = 0, +#endif + FIB_LINK_IP6, + FIB_LINK_MPLS, +} __attribute__ ((packed)) fib_link_t; + +/** + * Definition outside of enum so it does not need to be included in non-defaulted + * switch statements + */ +#define FIB_LINK_NUM (FIB_LINK_MPLS+1) + +#define FIB_LINKS { \ + [FIB_LINK_IP4] = "ipv4", \ + [FIB_LINK_IP6] = "ipv6", \ + [FIB_LINK_MPLS] = "mpls", \ +} + +#define FOR_EACH_FIB_LINK(_item) \ + for (_item = FIB_LINK_IP4; \ + _item <= FIB_LINK_MPLS; \ + _item++) + +#define FOR_EACH_FIB_IP_LINK(_item) \ + for (_item = FIB_LINK_IP4; \ + _item <= FIB_LINK_IP6; \ + _item++) + +/** + * @brief Convert from a protocol to a link type + */ +fib_link_t fib_proto_to_link (fib_protocol_t proto); + +/** + * FIB output chain type. When a child object requests a forwarding contribution + * from a parent, it does so for a particular scenario. This enumererates those + * sceanrios + */ +typedef enum fib_forward_chain_type_t_ { + /** + * Contribute an object that is to be used to forward IP4 packets + */ + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + /** + * Contribute an object that is to be used to forward IP6 packets + */ + FIB_FORW_CHAIN_TYPE_UNICAST_IP6, + /** + * Contribute an object that is to be used to forward non-end-of-stack + * MPLS packets + */ + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + /** + * Contribute an object that is to be used to forward end-of-stack + * MPLS packets. This is a convenient ID for clients. A real EOS chain + * must be pay-load protocol specific. This + * option is converted into one of the other three internally. + */ + FIB_FORW_CHAIN_TYPE_MPLS_EOS, +} __attribute__ ((packed)) fib_forward_chain_type_t; + +#define FIB_FORW_CHAINS { \ + [FIB_FORW_CHAIN_TYPE_UNICAST_IP4] = "unicast-ip4", \ + [FIB_FORW_CHAIN_TYPE_UNICAST_IP6] = "unicast-ip6", \ + [FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS] = "mpls-neos", \ + [FIB_FORW_CHAIN_TYPE_MPLS_EOS] = "mpls-eos", \ +} + +#define FIB_FORW_CHAIN_NUM (FIB_FORW_CHAIN_TYPE_MPLS_EOS+1) + +#define FOR_EACH_FIB_FORW_CHAIN(_item) \ + for (_item = FIB_FORW_CHAIN_TYPE_UNICAST_IP4; \ + _item <= FIB_FORW_CHAIN_TYPE_MPLS_EOS; \ + _item++) + +/** + * @brief Convert from a chain type to the adjacencies link type + */ +extern fib_link_t fib_forw_chain_type_to_link_type(fib_forward_chain_type_t fct); + +/** + * @brief Convert from a payload-protocol to a chain type. + */ +extern fib_forward_chain_type_t fib_proto_to_forw_chain_type(fib_protocol_t proto); + +/** + * @brief Convert from a chain type to the DPO proto it will install + */ +extern dpo_proto_t fib_forw_chain_type_to_dpo_proto(fib_forward_chain_type_t fct); + +/** + * Aggregrate type for a prefix + */ +typedef struct fib_prefix_t_ { + /** + * The mask length + */ + u16 fp_len; + + /** + * protocol type + */ + fib_protocol_t fp_proto; + + /** + * Pad to keep the address 4 byte aligned + */ + u8 ___fp___pad; + + union { + /** + * The address type is not deriveable from the fp_addr member. + * If it's v4, then the first 3 u32s of the address will be 0. + * v6 addresses (even v4 mapped ones) have at least 2 u32s assigned + * to non-zero values. true. but when it's all zero, one cannot decide. + */ + ip46_address_t fp_addr; + + struct { + mpls_label_t fp_label; + mpls_eos_bit_t fp_eos; + /** + * This protocol determines the payload protocol of packets + * that will be forwarded by this entry once the label is popped. + * For a non-eos entry it will be MPLS. + */ + dpo_proto_t fp_payload_proto; + }; + }; +} fib_prefix_t; + +_Static_assert(STRUCT_OFFSET_OF(fib_prefix_t, fp_addr) == 4, + "FIB Prefix's address is 4 byte aligned."); + +/** + * \brief Compare two prefixes for equality + */ +extern int fib_prefix_cmp(const fib_prefix_t *p1, + const fib_prefix_t *p2); + +/** + * \brief Compare two prefixes for covering relationship + * + * \return non-zero if the first prefix is a cover for the second + */ +extern int fib_prefix_is_cover(const fib_prefix_t *p1, + const fib_prefix_t *p2); + +/** + * \brief Return true is the prefix is a host prefix + */ +extern int fib_prefix_is_host(const fib_prefix_t *p); + +extern u8 * format_fib_prefix(u8 * s, va_list * args); +extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args); + +extern dpo_proto_t fib_proto_to_dpo(fib_protocol_t fib_proto); +extern fib_protocol_t dpo_proto_to_fib(dpo_proto_t dpo_proto); + +/** + * Enurmeration of special path/entry types + */ +typedef enum fib_special_type_t_ { + /** + * Marker. Add new types after this one. + */ + FIB_SPECIAL_TYPE_FIRST = 0, + /** + * Local/for-us paths + */ + FIB_SPECIAL_TYPE_LOCAL = FIB_SPECIAL_TYPE_FIRST, + /** + * drop paths + */ + FIB_SPECIAL_TYPE_DROP, + /** + * Marker. Add new types before this one, then update it. + */ + FIB_SPECIAL_TYPE_LAST = FIB_SPECIAL_TYPE_DROP, +} __attribute__ ((packed)) fib_special_type_t; + +/** + * The maximum number of types + */ +#define FIB_SPEICAL_TYPE_MAX (FIB_SPEICAL_TYPE_LAST + 1) + +#define FOR_EACH_FIB_SPEICAL_TYPE(_item) \ + for (_item = FIB_TYPE_SPEICAL_FIRST; \ + _item <= FIB_SPEICAL_TYPE_LAST; _item++) + +extern u8 * format_fib_protocol(u8 * s, va_list ap); +extern u8 * format_fib_link(u8 *s, va_list ap); + +/** + * Path flags from the control plane + */ +typedef enum fib_route_path_flags_t_ +{ + FIB_ROUTE_PATH_FLAG_NONE = 0, + /** + * Recursion constraint of via a host prefix + */ + FIB_ROUTE_PATH_RESOLVE_VIA_HOST = (1 << 0), + /** + * Recursion constraint of via an attahced prefix + */ + FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED = (1 << 1), +} fib_route_path_flags_t; + +/** + * @brief + * A representation of a path as described by a route producer. + * These paramenters will determine the path 'type', of which there are: + * 1) Attached-next-hop: + * a single peer on a link. + * It is 'attached' because it is in the same sub-net as the router, on a link + * directly connected to the route. + * It is 'next=hop' since the next-hop address of the peer is known. + * 2) Attached: + * the next-hop is not known. but we can ARP for it. + * 3) Recursive. + * The next-hop is known but the interface is not. So to find the adj to use + * we must recursively resolve the next-hop. + * 3) deaggregate (deag) + * A further lookup is required. + */ +typedef struct fib_route_path_t_ { + /** + * The protocol of the address below. We need this since the all + * zeros address is ambiguous. + */ + fib_protocol_t frp_proto; + /** + * The next-hop address. + * Will be NULL for attached paths. + * Will be all zeros for attached-next-hop paths on a p2p interface + * Will be all zeros for a deag path. + */ + ip46_address_t frp_addr; + /** + * The interface. + * Will be invalid for recursive paths. + */ + u32 frp_sw_if_index; + /** + * The FIB index to lookup the nexthop + * Only valid for recursive paths. + */ + u32 frp_fib_index; + /** + * [un]equal cost path weight + */ + u32 frp_weight; + /** + * flags on the path + */ + fib_route_path_flags_t frp_flags; + /** + * The outgoing MPLS label. INVALID implies no label. + */ + mpls_label_t frp_label; +} fib_route_path_t; + +#endif diff --git a/vnet/vnet/fib/fib_walk.c b/vnet/vnet/fib/fib_walk.c new file mode 100644 index 00000000000..79e3ad0b242 --- /dev/null +++ b/vnet/vnet/fib/fib_walk.c @@ -0,0 +1,775 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_node_list.h> + +/** + * The flags on a walk + */ +typedef enum fib_walk_flags_t_ +{ + /** + * A synchronous walk. + * This walk will run to completion, i.e. visit ALL the children. + * It is a depth first traversal of the graph. + */ + FIB_WALK_FLAG_SYNC = (1 << 0), + /** + * An asynchronous walk. + * This walk will be scheduled to run in the background. It will thus visits + * the children at a later point in time. + * It is a depth first traversal of the graph. + */ + FIB_WALK_FLAG_ASYNC = (1 << 1), + /** + * An indication that the walk is currently executing. + */ + FIB_WALK_FLAG_EXECUTING = (1 << 2), +} fib_walk_flags_t; + +/** + * A representation of a graph walk from a parent object to its children + */ +typedef struct fib_walk_t_ +{ + /** + * FIB node linkage. This object is not in the FIB object graph, + * but it is present in other node's dependency lists, so it needs to + * be pointerable to. + */ + fib_node_t fw_node; + + /** + * the walk's flags + */ + fib_walk_flags_t fw_flags; + + /** + * Sibling index in the dependency list + */ + u32 fw_dep_sibling; + + /** + * Sibling index in the list of all walks + */ + u32 fw_prio_sibling; + + /** + * Pointer to the node whose dependants this walk is walking + */ + fib_node_ptr_t fw_parent; + + /** + * Number of nodes visited by this walk. saved for debugging purposes. + */ + u32 fw_n_visits; + + /** + * The reasons this walk is occuring. + * This is a vector ordered in time. The reasons and the front were started + * first, and so should be acted first when a node is visisted. + */ + fib_node_back_walk_ctx_t *fw_ctx; +} fib_walk_t; + +/** + * @brief The pool of all walk objects + */ +static fib_walk_t *fib_walk_pool; + +/** + * @brief There's only one event type sent to the walk process + */ +#define FIB_WALK_EVENT 0 + +/** + * Statistics maintained per-walk queue + */ +typedef enum fib_walk_queue_stats_t_ +{ + FIB_WALK_SCHEDULED, + FIB_WALK_COMPLETED, +} fib_walk_queue_stats_t; +#define FIB_WALK_QUEUE_STATS_NUM (FIB_WALK_COMPLETED+1) + +#define FIB_WALK_QUEUE_STATS { \ + [FIB_WALK_SCHEDULED] = "scheduled", \ + [FIB_WALK_COMPLETED] = "completed", \ +} + +#define FOR_EACH_FIB_WALK_QUEUE_STATS(_wqs) \ + for ((_wqs) = FIB_WALK_SCHEDULED; \ + (_wqs) < FIB_WALK_QUEUE_STATS_NUM; \ + (_wqs)++) + +/** + * The names of the walk stats + */ +static const char * const fib_walk_queue_stats_names[] = FIB_WALK_QUEUE_STATS; + +/** + * A represenation of one queue of walk + */ +typedef struct fib_walk_queue_t_ +{ + /** + * Qeuee stats + */ + u64 fwq_stats[FIB_WALK_QUEUE_STATS_NUM]; + + /** + * The node list which acts as the queue + */ + fib_node_list_t fwq_queue; +} fib_walk_queue_t; + +/** + * A set of priority queues for outstanding walks + */ +typedef struct fib_walk_queues_t_ +{ + fib_walk_queue_t fwqs_queues[FIB_WALK_PRIORITY_NUM]; +} fib_walk_queues_t; + +/** + * The global queues of outstanding walks + */ +static fib_walk_queues_t fib_walk_queues; + +/** + * The names of the walk priorities + */ +static const char * const fib_walk_priority_names[] = FIB_WALK_PRIORITIES; + +u8* +format_fib_walk_priority (u8 *s, va_list ap) +{ + fib_walk_priority_t prio = va_arg(ap, fib_walk_priority_t); + + ASSERT(prio < FIB_WALK_PRIORITY_NUM); + + return (format(s, "%s", fib_walk_priority_names[prio])); +} +static u8* +format_fib_walk_queue_stats (u8 *s, va_list ap) +{ + fib_walk_queue_stats_t wqs = va_arg(ap, fib_walk_queue_stats_t); + + ASSERT(wqs < FIB_WALK_QUEUE_STATS_NUM); + + return (format(s, "%s", fib_walk_queue_stats_names[wqs])); +} + +static index_t +fib_walk_get_index (fib_walk_t *fwalk) +{ + return (fwalk - fib_walk_pool); +} + +static fib_walk_t * +fib_walk_get (index_t fwi) +{ + return (pool_elt_at_index(fib_walk_pool, fwi)); +} + +/* + * not static so it can be used in the unit tests + */ +u32 +fib_walk_queue_get_size (fib_walk_priority_t prio) +{ + return (fib_node_list_get_size(fib_walk_queues.fwqs_queues[prio].fwq_queue)); +} + +static fib_node_index_t +fib_walk_queue_get_front (fib_walk_priority_t prio) +{ + fib_node_ptr_t wp; + + fib_node_list_get_front(fib_walk_queues.fwqs_queues[prio].fwq_queue, &wp); + + return (wp.fnp_index); +} + +static void +fib_walk_destroy (fib_walk_t *fwalk) +{ + if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling) + { + fib_node_list_elt_remove(fwalk->fw_prio_sibling); + } + fib_node_child_remove(fwalk->fw_parent.fnp_type, + fwalk->fw_parent.fnp_index, + fwalk->fw_dep_sibling); + + fib_node_deinit(&fwalk->fw_node); + pool_put(fib_walk_pool, fwalk); +} + +/** + * return code when advancing a walk + */ +typedef enum fib_walk_advance_rc_t_ +{ + /** + * The walk is complete + */ + FIB_WALK_ADVANCE_DONE, + /** + * the walk has more work + */ + FIB_WALK_ADVANCE_MORE, + /** + * The walk merged with the one in front + */ + FIB_WALK_ADVANCE_MERGE, +} fib_walk_advance_rc_t; + +/** + * @brief Advance the walk one element in its work list + */ +static fib_walk_advance_rc_t +fib_walk_advance (fib_node_index_t fwi) +{ + fib_node_back_walk_ctx_t *ctx; + fib_node_back_walk_rc_t wrc; + fib_node_ptr_t sibling; + fib_walk_t *fwalk; + int more_elts; + + /* + * this walk function is re-entrant - walks acan spawn walks. + * fib_walk_t objects come from a pool, so they can realloc. we need + * to retch from said pool at the appropriate times. + */ + fwalk = fib_walk_get(fwi); + + more_elts = fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &sibling); + + if (more_elts) + { + vec_foreach(ctx, fwalk->fw_ctx) + { + wrc = fib_node_back_walk_one(&sibling, ctx); + + fwalk = fib_walk_get(fwi); + fwalk->fw_n_visits++; + + if (FIB_NODE_BACK_WALK_MERGE == wrc) + { + /* + * this walk has merged with the one further along the node's + * dependecy list. + */ + return (FIB_WALK_ADVANCE_MERGE); + } + } + /* + * move foward to the next node to visit + */ + more_elts = fib_node_list_advance(fwalk->fw_dep_sibling); + } + + if (more_elts) + { + return (FIB_WALK_ADVANCE_MORE); + } + + return (FIB_WALK_ADVANCE_DONE); +} + +/** + * First guesses as to good values + */ +#define SHORT_SLEEP 1e-8 +#define LONG_SLEEP 1e-3 +#define QUOTA 1e-4 + +/** + * @brief Service the queues + * This is not declared static so that it can be unit tested - i know i know... + */ +f64 +fib_walk_process_queues (vlib_main_t * vm, + const f64 quota) +{ + fib_walk_priority_t prio; + fib_walk_advance_rc_t rc; + fib_node_index_t fwi; + fib_walk_t *fwalk; + + f64 sleep_time, start_time; // , vector_rate; + + start_time = vlib_time_now(vm); + + FOR_EACH_FIB_WALK_PRIORITY(prio) + { + while (0 != fib_walk_queue_get_size(prio)) + { + fwi = fib_walk_queue_get_front(prio); + + /* + * set this walk as executing + */ + fwalk = fib_walk_get(fwi); + fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING; + + do + { + rc = fib_walk_advance(fwi); + } while (((vlib_time_now(vm) - start_time) < quota) && + (FIB_WALK_ADVANCE_MORE == rc)); + + /* + * if this walk has no more work then pop it from the queue + * and move on to the next. + */ + if (FIB_WALK_ADVANCE_MORE != rc) + { + fwalk = fib_walk_get(fwi); + fib_walk_destroy(fwalk); + fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++; + } + else + { + /* + * passed our work quota. sleep time. + */ + fwalk = fib_walk_get(fwi); + fwalk->fw_flags &= ~FIB_WALK_FLAG_EXECUTING; + sleep_time = SHORT_SLEEP; + goto that_will_do_for_now; + } + } + } + /* + * got to the end of all the work + */ + sleep_time = LONG_SLEEP; + +that_will_do_for_now: + return (sleep_time); +} + +/** + * @brief The 'fib-walk' process's main loop. + */ +static uword +fib_walk_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + f64 sleep_time; + + sleep_time = SHORT_SLEEP; + + while (1) + { + vlib_process_wait_for_event_or_clock(vm, sleep_time); + + /* + * there may be lots of event queued between the processes, + * but the walks we want to schedule are in the priority queues, + * so we ignore the process events. + */ + vlib_process_get_events(vm, NULL); + + sleep_time = fib_walk_process_queues(vm, QUOTA); + } + + /* + * Unreached + */ + ASSERT(!"WTF"); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (fib_walk_process_node,static) = { + .function = fib_walk_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "fib-walk", +}; +/* *INDENT-ON* */ + +/** + * @brief Allocate a new walk object + */ +static fib_walk_t * +fib_walk_alloc (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_walk_flags_t flags, + fib_node_back_walk_ctx_t *ctx) +{ + fib_walk_t *fwalk; + + pool_get(fib_walk_pool, fwalk); + + fib_node_init(&fwalk->fw_node, FIB_NODE_TYPE_WALK); + + fwalk->fw_flags = flags; + fwalk->fw_dep_sibling = FIB_NODE_INDEX_INVALID; + fwalk->fw_prio_sibling = FIB_NODE_INDEX_INVALID; + fwalk->fw_parent.fnp_index = parent_index; + fwalk->fw_parent.fnp_type = parent_type; + fwalk->fw_ctx = NULL; + + /* + * make a copy of the backwalk context so the depth count remains + * the same for each sibling visitsed. This is important in the case + * where a parents has a loop via one child, but all the others are not. + * if the looped child were visited first, the depth count would exceed, the + * max and the walk would terminate before it reached the other siblings. + */ + vec_add1(fwalk->fw_ctx, *ctx); + + return (fwalk); +} + +/** + * @brief Enqueue a walk onto the appropriate priority queue. Then signal + * the background process there is work to do. + */ +static index_t +fib_walk_prio_queue_enquue (fib_walk_priority_t prio, + fib_walk_t *fwalk) +{ + index_t sibling; + + sibling = fib_node_list_push_front(fib_walk_queues.fwqs_queues[prio].fwq_queue, + 0, + FIB_NODE_TYPE_WALK, + fib_walk_get_index(fwalk)); + fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_SCHEDULED]++; + + /* + * poke the fib-walk process to perform the async walk. + * we are not passing it specific data, hence the last two args, + * the process will drain the queues + */ + vlib_process_signal_event(vlib_get_main(), + fib_walk_process_node.index, + FIB_WALK_EVENT, + FIB_WALK_EVENT); + + return (sibling); +} + +void +fib_walk_async (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_walk_priority_t prio, + fib_node_back_walk_ctx_t *ctx) +{ + fib_walk_t *fwalk; + + if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth) + { + /* + * The walk has reached the maximum depth. there is a loop in the graph. + * bail. + */ + return; + } + + fwalk = fib_walk_alloc(parent_type, + parent_index, + FIB_WALK_FLAG_ASYNC, + ctx); + + fwalk->fw_dep_sibling = fib_node_child_add(parent_type, + parent_index, + FIB_NODE_TYPE_WALK, + fib_walk_get_index(fwalk)); + + fwalk->fw_prio_sibling = fib_walk_prio_queue_enquue(prio, fwalk); +} + +/** + * @brief Back walk all the children of a FIB node. + * + * note this is a synchronous depth first walk. Children visited may propagate + * the walk to thier children. Other children node types may not propagate, + * synchronously but instead queue the walk for later async completion. + */ +void +fib_walk_sync (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_back_walk_ctx_t *ctx) +{ + fib_walk_advance_rc_t rc; + fib_node_index_t fwi; + fib_walk_t *fwalk; + + if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth) + { + /* + * The walk has reached the maximum depth. there is a loop in the graph. + * bail. + */ + return; + } + + fwalk = fib_walk_alloc(parent_type, + parent_index, + FIB_WALK_FLAG_SYNC, + ctx); + + fwalk->fw_dep_sibling = fib_node_child_add(parent_type, + parent_index, + FIB_NODE_TYPE_WALK, + fib_walk_get_index(fwalk)); + fwi = fib_walk_get_index(fwalk); + + while (1) + { + /* + * set this walk as executing + */ + fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING; + + do + { + rc = fib_walk_advance(fwi); + } while (FIB_WALK_ADVANCE_MORE == rc); + + + /* + * this walk function is re-entrant - walks can spawn walks. + * fib_walk_t objects come from a pool, so they can realloc. we need + * to re-fetch from said pool at the appropriate times. + */ + fwalk = fib_walk_get(fwi); + + if (FIB_WALK_ADVANCE_MERGE == rc) + { + /* + * this sync walk merged with an walk in front. + * by reqeusting a sync walk the client wanted all children walked, + * so we ditch the walk object in hand and continue with the one + * we merged into + */ + fib_node_ptr_t merged_walk; + + fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &merged_walk); + + ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index); + ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type); + + fib_walk_destroy(fwalk); + + fwi = merged_walk.fnp_index; + fwalk = fib_walk_get(fwi); + + if (FIB_WALK_FLAG_EXECUTING & fwalk->fw_flags) + { + /* + * we are executing a sync walk, and we have met with another + * walk that is also executing. since only one walk executs at once + * (there is no multi-threading) this implies we have met ourselves + * and hence the is a loop in the graph. + * This function is re-entrant, so the walk object we met is being + * acted on in a stack frame below this one. We must therefore not + * continue with it now, but let the stack unwind and along the + * appropriate frame to read the depth count and bail. + */ + fwalk = NULL; + break; + } + } + else + { + /* + * the walk reached the end of the depdency list. + */ + break; + } + } + + if (NULL != fwalk) + { + fib_walk_destroy(fwalk); + } +} + +static fib_node_t * +fib_walk_get_node (fib_node_index_t index) +{ + fib_walk_t *fwalk; + + fwalk = fib_walk_get(index); + + return (&(fwalk->fw_node)); +} + +/** + * Walk objects are not parents, nor are they locked. + * are no-ops + */ +static void +fib_walk_last_lock_gone (fib_node_t *node) +{ + ASSERT(0); +} + +static fib_walk_t* +fib_walk_get_from_node (fib_node_t *node) +{ + return ((fib_walk_t*)(((char*)node) - + STRUCT_OFFSET_OF(fib_walk_t, fw_node))); +} + +/** + * @brief Another back walk has reach this walk. + * Megre them so there is only one left. It is this node being + * visited that will remain, so copy or merge the context onto it. + */ +static fib_node_back_walk_rc_t +fib_walk_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + fib_node_back_walk_ctx_t *old; + fib_walk_t *fwalk; + + fwalk = fib_walk_get_from_node(node); + + /* + * check whether the walk context can be merge with another, + * or whether it needs to be appended. + */ + vec_foreach(old, fwalk->fw_ctx) + { + /* + * we can merge walks if the reason for the walk is the same. + */ + if (old->fnbw_reason == ctx->fnbw_reason) + { + /* + * copy the largest of the depth values. in the presence of a loop, + * the same walk will merge with itself. if we take the smaller depth + * then it will never end. + */ + old->fnbw_depth = ((old->fnbw_depth >= ctx->fnbw_depth) ? + old->fnbw_depth : + ctx->fnbw_depth); + goto out; + } + } + + /* + * walks could not be merged, this means that the walk infront needs to + * perform different action to this one that has caught up. the one in front + * was scheduled first so append the new walk context to the back of the list. + */ + vec_add1(fwalk->fw_ctx, *ctx); + +out: + return (FIB_NODE_BACK_WALK_MERGE); +} + +/** + * The FIB walk's graph node virtual function table + */ +static const fib_node_vft_t fib_walk_vft = { + .fnv_get = fib_walk_get_node, + .fnv_last_lock = fib_walk_last_lock_gone, + .fnv_back_walk = fib_walk_back_walk_notify, +}; + +void +fib_walk_module_init (void) +{ + fib_walk_priority_t prio; + + FOR_EACH_FIB_WALK_PRIORITY(prio) + { + fib_walk_queues.fwqs_queues[prio].fwq_queue = fib_node_list_create(); + } + + fib_node_register_type(FIB_NODE_TYPE_WALK, &fib_walk_vft); +} + +static u8* +format_fib_walk (u8* s, va_list ap) +{ + fib_node_index_t fwi = va_arg(ap, fib_node_index_t); + fib_walk_t *fwalk; + + fwalk = fib_walk_get(fwi); + + return (format(s, " parent:{%s:%d} visits:%d flags:%d", + fib_node_type_get_name(fwalk->fw_parent.fnp_type), + fwalk->fw_parent.fnp_index, + fwalk->fw_n_visits, + fwalk->fw_flags)); +} + +static clib_error_t * +fib_walk_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_walk_queue_stats_t wqs; + fib_walk_priority_t prio; + fib_node_ptr_t sibling; + fib_node_index_t fwi; + fib_walk_t *fwalk; + int more_elts; + + vlib_cli_output(vm, "FIB Walk queues:"); + + FOR_EACH_FIB_WALK_PRIORITY(prio) + { + vlib_cli_output(vm, " %U priority queue:", + format_fib_walk_priority, prio); + vlib_cli_output(vm, " Stats: "); + + FOR_EACH_FIB_WALK_QUEUE_STATS(wqs) + { + vlib_cli_output(vm, " %U:%d", + format_fib_walk_queue_stats, wqs, + fib_walk_queues.fwqs_queues[prio].fwq_stats[wqs]); + } + vlib_cli_output(vm, " Occupancy:%d", + fib_node_list_get_size( + fib_walk_queues.fwqs_queues[prio].fwq_queue)); + + more_elts = fib_node_list_get_front( + fib_walk_queues.fwqs_queues[prio].fwq_queue, + &sibling); + + while (more_elts) + { + ASSERT(FIB_NODE_INDEX_INVALID != sibling.fnp_index); + ASSERT(FIB_NODE_TYPE_WALK == sibling.fnp_type); + + fwi = sibling.fnp_index; + fwalk = fib_walk_get(fwi); + + vlib_cli_output(vm, " %U", format_fib_walk, fwi); + + more_elts = fib_node_list_elt_get_next(fwalk->fw_prio_sibling, + &sibling); + } + } + return (NULL); +} + +VLIB_CLI_COMMAND (fib_walk_show_command, static) = { + .path = "show fib walk", + .short_help = "show fib walk", + .function = fib_walk_show, +}; diff --git a/vnet/vnet/fib/fib_walk.h b/vnet/vnet/fib/fib_walk.h new file mode 100644 index 00000000000..7ae99d0d8aa --- /dev/null +++ b/vnet/vnet/fib/fib_walk.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_WALK_H__ +#define __FIB_WALK_H__ + +#include <vnet/fib/fib_node.h> + +/** + * @brief Walk priorities. + * Strict priorities. All walks a priority n are completed before n+1 is started. + * Increasing numerical value implies decreasing priority. + */ +typedef enum fib_walk_priority_t_ +{ + FIB_WALK_PRIORITY_HIGH = 0, + FIB_WALK_PRIORITY_LOW = 1, +} fib_walk_priority_t; + +#define FIB_WALK_PRIORITY_NUM (FIB_WALK_PRIORITY_LOW+1) + +#define FIB_WALK_PRIORITIES { \ + [FIB_WALK_PRIORITY_HIGH] = "high", \ + [FIB_WALK_PRIORITY_LOW] = "low", \ +} + +#define FOR_EACH_FIB_WALK_PRIORITY(_prio) \ + for ((_prio) = FIB_WALK_PRIORITY_HIGH; \ + (_prio) < FIB_WALK_PRIORITY_NUM; \ + (_prio)++) + +extern void fib_walk_module_init(void); + +extern void fib_walk_async(fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_walk_priority_t prio, + fib_node_back_walk_ctx_t *ctx); + +extern void fib_walk_sync(fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_back_walk_ctx_t *ctx); + +extern u8* format_fib_walk_priority(u8 *s, va_list ap); + +#endif + diff --git a/vnet/vnet/fib/ip4_fib.c b/vnet/vnet/fib/ip4_fib.c new file mode 100644 index 00000000000..21ebb7afafc --- /dev/null +++ b/vnet/vnet/fib/ip4_fib.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/ip4_fib.h> + +/* + * A table of pefixes to be added to tables and the sources for them + */ +typedef struct ip4_fib_table_special_prefix_t_ { + fib_prefix_t ift_prefix; + fib_source_t ift_source; + fib_entry_flag_t ift_flag; +} ip4_fib_table_special_prefix_t; + +static const ip4_fib_table_special_prefix_t ip4_specials[] = { + { + /* 0.0.0.0/0*/ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0, + }, + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_DEFAULT_ROUTE, + .ift_flag = FIB_ENTRY_FLAG_DROP, + }, + { + /* 0.0.0.0/32*/ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0, + }, + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_DEFAULT_ROUTE, + .ift_flag = FIB_ENTRY_FLAG_DROP, + }, + { + /* + * 240.0.0.0/8 + * drop class E + */ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0xf0000000, + }, + .fp_len = 8, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_SPECIAL, + .ift_flag = FIB_ENTRY_FLAG_DROP, + + }, + { + /* + * 224.0.0.0/8 + * drop all mcast + */ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0xe0000000, + }, + .fp_len = 8, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_SPECIAL, + .ift_flag = FIB_ENTRY_FLAG_DROP, + }, + { + /* + * 255.255.255.255/32 + * drop, but we'll allow it to be usurped by the likes of DHCP + */ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0xffffffff, + }, + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_DEFAULT_ROUTE, + .ift_flag = FIB_ENTRY_FLAG_DROP, + } +}; + + +static u32 +ip4_create_fib_with_table_id (u32 table_id) +{ + fib_table_t *fib_table; + + pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + + fib_table->ft_proto = FIB_PROTOCOL_IP4; + fib_table->ft_index = + fib_table->v4.index = + (fib_table - ip4_main.fibs); + + hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index); + + fib_table->ft_table_id = + fib_table->v4.table_id = + table_id; + fib_table->ft_flow_hash_config = + fib_table->v4.flow_hash_config = + IP_FLOW_HASH_DEFAULT; + fib_table->v4.fwd_classify_table_index = ~0; + fib_table->v4.rev_classify_table_index = ~0; + + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4); + + ip4_mtrie_init(&fib_table->v4.mtrie); + + /* + * add the special entries into the new FIB + */ + int ii; + + for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++) + { + fib_prefix_t prefix = ip4_specials[ii].ift_prefix; + + prefix.fp_addr.ip4.data_u32 = + clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32); + + fib_table_entry_special_add(fib_table->ft_index, + &prefix, + ip4_specials[ii].ift_source, + ip4_specials[ii].ift_flag, + ADJ_INDEX_INVALID); + } + + return (fib_table->ft_index); +} + +void +ip4_fib_table_destroy (ip4_fib_t *fib) +{ + fib_table_t *fib_table = (fib_table_t*)fib; + int ii; + + /* + * remove all the specials we added when the table was created. + */ + for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++) + { + fib_prefix_t prefix = ip4_specials[ii].ift_prefix; + + prefix.fp_addr.ip4.data_u32 = + clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32); + + fib_table_entry_special_remove(fib_table->ft_index, + &prefix, + ip4_specials[ii].ift_source); + } + + /* + * validate no more routes. + */ + ASSERT(0 == fib_table->ft_total_route_counts); + FOR_EACH_FIB_SOURCE(ii) + { + ASSERT(0 == fib_table->ft_src_route_counts[ii]); + } + + if (~0 != fib_table->ft_table_id) + { + hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id); + } + pool_put(ip4_main.fibs, fib_table); +} + + +u32 +ip4_fib_table_find_or_create_and_lock (u32 table_id) +{ + u32 index; + + index = ip4_fib_index_from_table_id(table_id); + if (~0 == index) + return ip4_create_fib_with_table_id(table_id); + + fib_table_lock(index, FIB_PROTOCOL_IP4); + + return (index); +} + +u32 +ip4_fib_table_create_and_lock (void) +{ + return (ip4_create_fib_with_table_id(~0)); +} + +u32 +ip4_fib_table_get_index_for_sw_if_index (u32 sw_if_index) +{ + if (sw_if_index >= vec_len(ip4_main.fib_index_by_sw_if_index)) + { + /* + * This is the case for interfaces that are not yet mapped to + * a IP table + */ + return (~0); + } + return (ip4_main.fib_index_by_sw_if_index[sw_if_index]); +} + +flow_hash_config_t +ip4_fib_table_get_flow_hash_config (u32 fib_index) +{ + return (ip4_fib_get(fib_index)->flow_hash_config); +} + +/* + * ip4_fib_table_lookup_exact_match + * + * Exact match prefix lookup + */ +fib_node_index_t +ip4_fib_table_lookup_exact_match (const ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len) +{ + uword * hash, * result; + u32 key; + + hash = fib->fib_entry_by_dst_address[len]; + key = (addr->data_u32 & ip4_main.fib_masks[len]); + + result = hash_get(hash, key); + + if (NULL != result) { + return (result[0]); + } + return (FIB_NODE_INDEX_INVALID); +} + +/* + * ip4_fib_table_lookup_adj + * + * Longest prefix match + */ +index_t +ip4_fib_table_lookup_lb (ip4_fib_t *fib, + const ip4_address_t *addr) +{ + fib_node_index_t fei; + + fei = ip4_fib_table_lookup(fib, addr, 32); + + if (FIB_NODE_INDEX_INVALID != fei) + { + const dpo_id_t *dpo; + + dpo = fib_entry_contribute_ip_forwarding(fei); + + return (dpo->dpoi_index); + } + return (INDEX_INVALID); +} + +/* + * ip4_fib_table_lookup + * + * Longest prefix match + */ +fib_node_index_t +ip4_fib_table_lookup (const ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len) +{ + uword * hash, * result; + i32 mask_len; + u32 key; + + for (mask_len = len; mask_len >= 0; mask_len--) + { + hash = fib->fib_entry_by_dst_address[mask_len]; + key = (addr->data_u32 & ip4_main.fib_masks[mask_len]); + + result = hash_get (hash, key); + + if (NULL != result) { + return (result[0]); + } + } + return (FIB_NODE_INDEX_INVALID); +} + +void +ip4_fib_table_entry_insert (ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + fib_node_index_t fib_entry_index) +{ + uword * hash, * result; + u32 key; + + key = (addr->data_u32 & ip4_main.fib_masks[len]); + hash = fib->fib_entry_by_dst_address[len]; + result = hash_get (hash, key); + + if (NULL == result) { + /* + * adding a new entry + */ + if (NULL == hash) { + hash = hash_create (32 /* elts */, sizeof (uword)); + hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK); + } + hash = hash_set(hash, key, fib_entry_index); + fib->fib_entry_by_dst_address[len] = hash; + } + else + { + ASSERT(0); + } +} + +void +ip4_fib_table_entry_remove (ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len) +{ + uword * hash, * result; + u32 key; + + key = (addr->data_u32 & ip4_main.fib_masks[len]); + hash = fib->fib_entry_by_dst_address[len]; + result = hash_get (hash, key); + + if (NULL == result) + { + /* + * removing a non-existant entry. i'll allow it. + */ + } + else + { + hash_unset(hash, key); + } + + fib->fib_entry_by_dst_address[len] = hash; +} + +void +ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + const dpo_id_t *dpo) +{ + ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD +} + +void +ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + const dpo_id_t *dpo) +{ + ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE +} + +static void +ip4_fib_table_show_all (ip4_fib_t *fib, + vlib_main_t * vm) +{ + fib_node_index_t *fib_entry_indicies; + fib_node_index_t *fib_entry_index; + int i; + + fib_entry_indicies = NULL; + + for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++) + { + uword * hash = fib->fib_entry_by_dst_address[i]; + + if (NULL != hash) + { + hash_pair_t * p; + + hash_foreach_pair (p, hash, + ({ + vec_add1(fib_entry_indicies, p->value[0]); + })); + } + } + + vec_sort_with_function(fib_entry_indicies, fib_entry_cmp_for_sort); + + vec_foreach(fib_entry_index, fib_entry_indicies) + { + vlib_cli_output(vm, "%U", + format_fib_entry, + *fib_entry_index, + FIB_ENTRY_FORMAT_BRIEF); + } + + vec_free(fib_entry_indicies); +} + +static void +ip4_fib_table_show_one (ip4_fib_t *fib, + vlib_main_t * vm, + ip4_address_t *address, + u32 mask_len) +{ + vlib_cli_output(vm, "%U", + format_fib_entry, + ip4_fib_table_lookup(fib, address, mask_len), + FIB_ENTRY_FORMAT_DETAIL); +} + +static clib_error_t * +ip4_show_fib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip4_main_t * im4 = &ip4_main; + fib_table_t * fib_table; + int verbose, matching, mtrie; + ip4_address_t matching_address; + u32 matching_mask = 32; + int i, table_id = -1, fib_index = ~0; + + verbose = 1; + matching = 0; + mtrie = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "brief") || unformat (input, "summary") + || unformat (input, "sum")) + verbose = 0; + + else if (unformat (input, "mtrie")) + mtrie = 1; + + else if (unformat (input, "%U/%d", + unformat_ip4_address, &matching_address, &matching_mask)) + matching = 1; + + else if (unformat (input, "%U", unformat_ip4_address, &matching_address)) + matching = 1; + + else if (unformat (input, "table %d", &table_id)) + ; + else if (unformat (input, "index %d", &fib_index)) + ; + else + break; + } + + pool_foreach (fib_table, im4->fibs, + ({ + ip4_fib_t *fib = &fib_table->v4; + + if (table_id >= 0 && table_id != (int)fib->table_id) + continue; + if (fib_index != ~0 && fib_index != (int)fib->index) + continue; + + vlib_cli_output (vm, "%U, fib_index %d, flow hash: %U", + format_fib_table_name, fib->index, FIB_PROTOCOL_IP4, + fib->index, + format_ip_flow_hash_config, fib->flow_hash_config); + + /* Show summary? */ + if (! verbose) + { + vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); + for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++) + { + uword * hash = fib->fib_entry_by_dst_address[i]; + uword n_elts = hash_elts (hash); + if (n_elts > 0) + vlib_cli_output (vm, "%20d%16d", i, n_elts); + } + continue; + } + + if (!matching) + { + ip4_fib_table_show_all(fib, vm); + } + else + { + ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask); + } + + if (mtrie) + vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); + })); + + return 0; +} + +/*? + * Show FIB/route entries + * + * @cliexpar + * @cliexstart{show ip fib} + * Display the IPv4 FIB. + * This command will run for a long time when the FIBs comprise millions of entries. + * vpp# sh ip fib + * Table 0 + * Destination Packets Bytes Adjacency + * 6.0.0.0/8 0 0 weight 1, index 3 + * arp fake-eth0 6.0.0.1/8 + * 6.0.0.1/32 0 0 weight 1, index 4 + * local 6.0.0.1/8 + * + * And so forth. Use 'show ip fib summary' for a summary: + * + * vpp# sh ip fib summary + * Table 0 + * Prefix length Count + * 8 1 + * 32 4 + * @cliexend + ?*/ +VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { + .path = "show ip fib", + .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]", + .function = ip4_show_fib, +}; diff --git a/vnet/vnet/fib/ip4_fib.h b/vnet/vnet/fib/ip4_fib.h new file mode 100644 index 00000000000..cf312cdc629 --- /dev/null +++ b/vnet/vnet/fib/ip4_fib.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief The IPv4 FIB + * + * FIBs are composed of two prefix data-bases (akak tables). The non-forwarding + * table contains all the routes that the control plane has programmed, the + * forwarding table contains the sub-set of those routes that can be used to + * forward packets. + * In the IPv4 FIB the non-forwarding table is an array of hash tables indexed + * by mask length, the forwarding table is an mtrie + * + * This IPv4 FIB is used by the protocol independent FIB. So directly using + * this APIs in client code is not encouraged. However, this IPv4 FIB can be + * used if all the client wants is an IPv4 prefix data-base + */ + +#ifndef __IP4_FIB_H__ +#define __IP4_FIB_H__ + +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_table.h> + +extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len); +extern fib_node_index_t ip4_fib_table_lookup_exact_match(const ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len); + +extern void ip4_fib_table_entry_remove(ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len); + +extern void ip4_fib_table_entry_insert(ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + fib_node_index_t fib_entry_index); +extern void ip4_fib_table_destroy(ip4_fib_t *fib); + +extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + const dpo_id_t *dpo); + +extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + const dpo_id_t *dpo); +extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib, + const ip4_address_t * dst); + +/** + * @brief Get the FIB at the given index + */ +static inline ip4_fib_t * +ip4_fib_get (u32 index) +{ + return (&(pool_elt_at_index(ip4_main.fibs, index)->v4)); +} + +always_inline u32 +ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst) +{ + return (ip4_fib_table_lookup_lb( + ip4_fib_get(vec_elt (im->fib_index_by_sw_if_index, sw_if_index)), + dst)); +} + +/** + * @brief Get or create an IPv4 fib. + * + * Get or create an IPv4 fib with the provided table ID. + * + * @param table_id + * When set to \c ~0, an arbitrary and unused fib ID is picked + * and can be retrieved with \c ret->table_id. + * Otherwise, the fib ID to be used to retrieve or create the desired fib. + * @returns A pointer to the retrieved or created fib. + * + */ +extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id); +extern u32 ip4_fib_table_create_and_lock(void); + + +static inline +u32 ip4_fib_index_from_table_id (u32 table_id) +{ + ip4_main_t * im = &ip4_main; + uword * p; + + p = hash_get (im->fib_index_by_table_id, table_id); + if (!p) + return ~0; + + return p[0]; +} + +extern u32 ip4_fib_table_get_index_for_sw_if_index(u32 sw_if_index); + +extern flow_hash_config_t ip4_fib_table_get_flow_hash_config(u32 fib_index); + + +always_inline index_t +ip4_fib_forwarding_lookup (u32 fib_index, + const ip4_address_t * addr) +{ + ip4_fib_mtrie_leaf_t leaf; + ip4_fib_mtrie_t * mtrie; + + mtrie = &ip4_fib_get(fib_index)->mtrie; + + leaf = IP4_FIB_MTRIE_LEAF_ROOT; + leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 0); + leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1); + leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2); + leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3); + + /* Handle default route. */ + leaf = (leaf == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie->default_leaf : leaf); + + return (ip4_fib_mtrie_leaf_get_adj_index(leaf)); +} + + +#endif + diff --git a/vnet/vnet/fib/ip6_fib.c b/vnet/vnet/fib/ip6_fib.c new file mode 100644 index 00000000000..772ce74430b --- /dev/null +++ b/vnet/vnet/fib/ip6_fib.c @@ -0,0 +1,698 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/fib_table.h> + +static void +vnet_ip6_fib_init (u32 fib_index) +{ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 0, + .fp_addr = { + .ip6 = { + { 0, 0, }, + }, + } + }; + + /* + * Add the default route. + */ + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_DEFAULT_ROUTE, + FIB_ENTRY_FLAG_DROP, + ADJ_INDEX_INVALID); + + /* + * Add ff02::1:ff00:0/104 via local route for all tables. + * This is required for neighbor discovery to work. + */ + ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0); + pfx.fp_len = 104; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * Add all-routers multicast address via local route for all tables + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_routers); + pfx.fp_len = 128; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * Add all-nodes multicast address via local route for all tables + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + pfx.fp_len = 128; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * Add all-mldv2 multicast address via local route for all tables + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + pfx.fp_len = 128; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * all link local for us + */ + pfx.fp_addr.ip6.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); + pfx.fp_addr.ip6.as_u64[1] = 0; + pfx.fp_len = 10; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); +} + +static u32 +create_fib_with_table_id (u32 table_id) +{ + fib_table_t *fib_table; + + pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + + fib_table->ft_proto = FIB_PROTOCOL_IP6; + fib_table->ft_index = + fib_table->v6.index = + (fib_table - ip6_main.fibs); + + hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index); + + fib_table->ft_table_id = + fib_table->v6.table_id = + table_id; + fib_table->ft_flow_hash_config = + fib_table->v6.flow_hash_config = + IP_FLOW_HASH_DEFAULT; + + vnet_ip6_fib_init(fib_table->ft_index); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6); + + return (fib_table->ft_index); +} + +u32 +ip6_fib_table_find_or_create_and_lock (u32 table_id) +{ + uword * p; + + p = hash_get (ip6_main.fib_index_by_table_id, table_id); + if (NULL == p) + return create_fib_with_table_id(table_id); + + fib_table_lock(p[0], FIB_PROTOCOL_IP6); + + return (p[0]); +} + +u32 +ip6_fib_table_create_and_lock (void) +{ + return (create_fib_with_table_id(~0)); +} + +void +ip6_fib_table_destroy (u32 fib_index) +{ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 0, + .fp_addr = { + .ip6 = { + { 0, 0, }, + }, + } + }; + + /* + * the default route. + */ + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_DEFAULT_ROUTE); + + + /* + * ff02::1:ff00:0/104 + */ + ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0); + pfx.fp_len = 104; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + /* + * all-routers multicast address + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_routers); + pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + /* + * all-nodes multicast address + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + /* + * all-mldv2 multicast address + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + /* + * all link local + */ + pfx.fp_addr.ip6.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); + pfx.fp_addr.ip6.as_u64[1] = 0; + pfx.fp_len = 10; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + fib_table_t *fib_table = fib_table_get(fib_index, FIB_PROTOCOL_IP6); + fib_source_t source; + + /* + * validate no more routes. + */ + ASSERT(0 == fib_table->ft_total_route_counts); + FOR_EACH_FIB_SOURCE(source) + { + ASSERT(0 == fib_table->ft_src_route_counts[source]); + } + + if (~0 != fib_table->ft_table_id) + { + hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id); + } + pool_put(ip6_main.fibs, fib_table); +} + +fib_node_index_t +ip6_fib_table_lookup (u32 fib_index, + const ip6_address_t *addr, + u32 len) +{ + const ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv, value; + int i, n_p, rv; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING]; + n_p = vec_len (table->prefix_lengths_in_search_order); + + kv.key[0] = addr->as_u64[0]; + kv.key[1] = addr->as_u64[1]; + fib = ((u64)((fib_index))<<32); + + /* + * start search from a mask length same length or shorter. + * we don't want matches longer than the mask passed + */ + i = 0; + while (i < n_p && table->prefix_lengths_in_search_order[i] > len) + { + i++; + } + + for (; i < n_p; i++) + { + int dst_address_length = table->prefix_lengths_in_search_order[i]; + ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length]; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + //As lengths are decreasing, masks are increasingly specific. + kv.key[0] &= mask->as_u64[0]; + kv.key[1] &= mask->as_u64[1]; + kv.key[2] = fib | dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); + if (rv == 0) + return value.value; + } + + return (FIB_NODE_INDEX_INVALID); +} + +fib_node_index_t +ip6_fib_table_lookup_exact_match (u32 fib_index, + const ip6_address_t *addr, + u32 len) +{ + const ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv, value; + ip6_address_t *mask; + u64 fib; + int rv; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + + rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); + if (rv == 0) + return value.value; + + return (FIB_NODE_INDEX_INVALID); +} + +static void +compute_prefix_lengths_in_search_order (ip6_fib_table_instance_t *table) +{ + int i; + vec_reset_length (table->prefix_lengths_in_search_order); + /* Note: bitmap reversed so this is in fact a longest prefix match */ + clib_bitmap_foreach (i, table->non_empty_dst_address_length_bitmap, + ({ + int dst_address_length = 128 - i; + vec_add1(table->prefix_lengths_in_search_order, dst_address_length); + })); +} + +void +ip6_fib_table_entry_remove (u32 fib_index, + const ip6_address_t *addr, + u32 len) +{ + ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv; + ip6_address_t *mask; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + + BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 0); + + /* refcount accounting */ + ASSERT (table->dst_address_length_refcounts[len] > 0); + if (--table->dst_address_length_refcounts[len] == 0) + { + table->non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + 128 - len, 0); + compute_prefix_lengths_in_search_order (table); + } +} + +void +ip6_fib_table_entry_insert (u32 fib_index, + const ip6_address_t *addr, + u32 len, + fib_node_index_t fib_entry_index) +{ + ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv; + ip6_address_t *mask; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + kv.value = fib_entry_index; + + BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 1); + + table->dst_address_length_refcounts[len]++; + + table->non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + 128 - len, 1); + compute_prefix_lengths_in_search_order (table); +} + +u32 +ip6_fib_table_fwding_lookup (ip6_main_t * im, + u32 fib_index, + const ip6_address_t * dst) +{ + const ip6_fib_table_instance_t *table; + int i, len; + int rv; + BVT(clib_bihash_kv) kv, value; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; + len = vec_len (table->prefix_lengths_in_search_order); + + kv.key[0] = dst->as_u64[0]; + kv.key[1] = dst->as_u64[1]; + fib = ((u64)((fib_index))<<32); + + for (i = 0; i < len; i++) + { + int dst_address_length = table->prefix_lengths_in_search_order[i]; + ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length]; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + //As lengths are decreasing, masks are increasingly specific. + kv.key[0] &= mask->as_u64[0]; + kv.key[1] &= mask->as_u64[1]; + kv.key[2] = fib | dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); + if (rv == 0) + return value.value; + } + + /* default route is always present */ + ASSERT(0); + return 0; +} + +u32 ip6_fib_table_fwding_lookup_with_if_index (ip6_main_t * im, + u32 sw_if_index, + const ip6_address_t * dst) +{ + u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); + return ip6_fib_table_fwding_lookup(im, fib_index, dst); +} + +flow_hash_config_t +ip6_fib_table_get_flow_hash_config (u32 fib_index) +{ + return (ip6_fib_get(fib_index)->flow_hash_config); +} + +u32 +ip6_fib_table_get_index_for_sw_if_index (u32 sw_if_index) +{ + if (sw_if_index >= vec_len(ip6_main.fib_index_by_sw_if_index)) + { + /* + * This is the case for interfaces that are not yet mapped to + * a IP table + */ + return (~0); + } + return (ip6_main.fib_index_by_sw_if_index[sw_if_index]); +} + +void +ip6_fib_table_fwding_dpo_update (u32 fib_index, + const ip6_address_t *addr, + u32 len, + const dpo_id_t *dpo) +{ + ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv; + ip6_address_t *mask; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + kv.value = dpo->dpoi_index; + + BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 1); + + table->dst_address_length_refcounts[len]++; + + table->non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + 128 - len, 1); + compute_prefix_lengths_in_search_order (table); +} + +void +ip6_fib_table_fwding_dpo_remove (u32 fib_index, + const ip6_address_t *addr, + u32 len, + const dpo_id_t *dpo) +{ + ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv; + ip6_address_t *mask; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + kv.value = dpo->dpoi_index; + + BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 0); + + /* refcount accounting */ + ASSERT (table->dst_address_length_refcounts[len] > 0); + if (--table->dst_address_length_refcounts[len] == 0) + { + table->non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + 128 - len, 0); + compute_prefix_lengths_in_search_order (table); + } +} + +typedef struct ip6_fib_show_ctx_t_ { + u32 fib_index; + fib_node_index_t *entries; +} ip6_fib_show_ctx_t; + +static void +ip6_fib_table_collect_entries (clib_bihash_kv_24_8_t * kvp, + void *arg) +{ + ip6_fib_show_ctx_t *ctx = arg; + + if ((kvp->key[2] >> 32) == ctx->fib_index) + { + vec_add1(ctx->entries, kvp->value); + } +} + +static void +ip6_fib_table_show_all (ip6_fib_t *fib, + vlib_main_t * vm) +{ + fib_node_index_t *fib_entry_index; + ip6_fib_show_ctx_t ctx = { + .fib_index = fib->index, + .entries = NULL, + }; + ip6_main_t *im = &ip6_main; + + BV(clib_bihash_foreach_key_value_pair)(&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, + ip6_fib_table_collect_entries, + &ctx); + + vec_sort_with_function(ctx.entries, fib_entry_cmp_for_sort); + + vec_foreach(fib_entry_index, ctx.entries) + { + vlib_cli_output(vm, "%U", + format_fib_entry, + *fib_entry_index, + FIB_ENTRY_FORMAT_BRIEF); + } + + vec_free(ctx.entries); +} + +static void +ip6_fib_table_show_one (ip6_fib_t *fib, + vlib_main_t * vm, + ip6_address_t *address, + u32 mask_len) +{ + vlib_cli_output(vm, "%U", + format_fib_entry, + ip6_fib_table_lookup(fib->index, address, mask_len), + FIB_ENTRY_FORMAT_DETAIL); +} + +typedef struct { + u32 fib_index; + u64 count_by_prefix_length[129]; +} count_routes_in_fib_at_prefix_length_arg_t; + +static void count_routes_in_fib_at_prefix_length +(BVT(clib_bihash_kv) * kvp, void *arg) +{ + count_routes_in_fib_at_prefix_length_arg_t * ap = arg; + int mask_width; + + if ((kvp->key[2]>>32) != ap->fib_index) + return; + + mask_width = kvp->key[2] & 0xFF; + + ap->count_by_prefix_length[mask_width]++; +} + +static clib_error_t * +ip6_show_fib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca; + ip6_main_t * im6 = &ip6_main; + fib_table_t *fib_table; + ip6_fib_t * fib; + int verbose, matching; + ip6_address_t matching_address; + u32 mask_len = 128; + int table_id = -1, fib_index = ~0; + + verbose = 1; + matching = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "brief") || + unformat (input, "summary") || + unformat (input, "sum")) + verbose = 0; + + else if (unformat (input, "%U/%d", + unformat_ip6_address, &matching_address, &mask_len)) + matching = 1; + + else if (unformat (input, "%U", unformat_ip6_address, &matching_address)) + matching = 1; + + else if (unformat (input, "table %d", &table_id)) + ; + else if (unformat (input, "index %d", &fib_index)) + ; + else + break; + } + + pool_foreach (fib_table, im6->fibs, + ({ + fib = &(fib_table->v6); + if (table_id >= 0 && table_id != (int)fib->table_id) + continue; + if (fib_index != ~0 && fib_index != (int)fib->index) + continue; + + vlib_cli_output (vm, "%s, fib_index %d, flow hash: %U", + fib_table->ft_desc, fib->index, + format_ip_flow_hash_config, fib->flow_hash_config); + + /* Show summary? */ + if (! verbose) + { + BVT(clib_bihash) * h = &im6->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash; + int len; + + vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); + + memset (ca, 0, sizeof(*ca)); + ca->fib_index = fib->index; + + BV(clib_bihash_foreach_key_value_pair) + (h, count_routes_in_fib_at_prefix_length, ca); + + for (len = 128; len >= 0; len--) + { + if (ca->count_by_prefix_length[len]) + vlib_cli_output (vm, "%=20d%=16lld", + len, ca->count_by_prefix_length[len]); + } + continue; + } + + if (!matching) + { + ip6_fib_table_show_all(fib, vm); + } + else + { + ip6_fib_table_show_one(fib, vm, &matching_address, mask_len); + } + })); + + return 0; +} + +/*? + * Show FIB6/route entries + * + * @cliexpar + * @cliexstart{show ip fib} + * Display the IPv6 FIB. + * This command will run for a long time when the FIBs comprise millions of entries. + * See 'show ip fib' + * @cliexend + ?*/ +VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { + .path = "show ip6 fib", + .short_help = "show ip6 fib [summary] [table <n>] [<ip6-addr>] [verboase]", + .function = ip6_show_fib, +}; diff --git a/vnet/vnet/fib/ip6_fib.h b/vnet/vnet/fib/ip6_fib.h new file mode 100644 index 00000000000..f6af993a3c2 --- /dev/null +++ b/vnet/vnet/fib/ip6_fib.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __IP6_FIB_H__ +#define __IP6_FIB_H__ + +#include <vlib/vlib.h> +#include <vnet/ip/format.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_table.h> +#include <vnet/ip/lookup.h> +#include <vnet/dpo/load_balance.h> + +extern fib_node_index_t ip6_fib_table_lookup(u32 fib_index, + const ip6_address_t *addr, + u32 len); +extern fib_node_index_t ip6_fib_table_lookup_exact_match(u32 fib_index, + const ip6_address_t *addr, + u32 len); + +extern void ip6_fib_table_entry_remove(u32 fib_index, + const ip6_address_t *addr, + u32 len); + +extern void ip6_fib_table_entry_insert(u32 fib_index, + const ip6_address_t *addr, + u32 len, + fib_node_index_t fib_entry_index); +extern void ip6_fib_table_destroy(u32 fib_index); + +extern void ip6_fib_table_fwding_dpo_update(u32 fib_index, + const ip6_address_t *addr, + u32 len, + const dpo_id_t *dpo); + +extern void ip6_fib_table_fwding_dpo_remove(u32 fib_index, + const ip6_address_t *addr, + u32 len, + const dpo_id_t *dpo); + +u32 ip6_fib_table_fwding_lookup_with_if_index(ip6_main_t * im, + u32 sw_if_index, + const ip6_address_t * dst); +u32 ip6_fib_table_fwding_lookup(ip6_main_t * im, + u32 fib_index, + const ip6_address_t * dst); + +/** + * @biref return the DPO that the LB stacks on. + */ +always_inline u32 +ip6_src_lookup_for_packet (ip6_main_t * im, + vlib_buffer_t * b, + ip6_header_t * i) +{ + if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0) + { + const dpo_id_t *dpo; + index_t lbi; + + lbi = ip6_fib_table_fwding_lookup_with_if_index( + im, + vnet_buffer (b)->sw_if_index[VLIB_RX], + &i->src_address); + + dpo = load_balance_get_bucket_i(load_balance_get(lbi), 0); + + if (dpo_is_adj(dpo)) + { + vnet_buffer (b)->ip.adj_index[VLIB_RX] = dpo->dpoi_index; + } + } + return vnet_buffer (b)->ip.adj_index[VLIB_RX]; +} + +/** + * \brief Get or create an IPv6 fib. + * + * Get or create an IPv4 fib with the provided table ID. + * + * \param im + * ip4_main pointer. + * \param table_id + * When set to \c ~0, an arbitrary and unused fib ID is picked + * and can be retrieved with \c ret->table_id. + * Otherwise, the fib ID to be used to retrieve or create the desired fib. + * \returns A pointer to the retrieved or created fib. + * + */ +extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id); +extern u32 ip6_fib_table_create_and_lock(void); + +static inline ip6_fib_t * +ip6_fib_get (fib_node_index_t index) +{ + ASSERT(!pool_is_free_index(ip6_main.fibs, index)); + return (&pool_elt_at_index (ip6_main.fibs, index)->v6); +} + +static inline +u32 ip6_fib_index_from_table_id (u32 table_id) +{ + ip6_main_t * im = &ip6_main; + uword * p; + + p = hash_get (im->fib_index_by_table_id, table_id); + if (!p) + return ~0; + + return p[0]; +} + +extern u32 ip6_fib_table_get_index_for_sw_if_index(u32 sw_if_index); + +extern flow_hash_config_t ip6_fib_table_get_flow_hash_config(u32 fib_index); + +#endif + diff --git a/vnet/vnet/fib/mpls_fib.c b/vnet/vnet/fib/mpls_fib.c new file mode 100644 index 00000000000..8f1ccef9061 --- /dev/null +++ b/vnet/vnet/fib/mpls_fib.c @@ -0,0 +1,439 @@ +/* + * mpls_fib.h: The Label/MPLS FIB + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * An MPLS_FIB table; + * + * The entries in the table are programmed wtih one or more MOIs. These MOIs + * may result in different forwarding actions for end-of-stack (EOS) and non-EOS + * packets. Whether the two actions are the same more often than they are + * different, or vice versa, is a function of the deployment in which the router + * is used and thus not predictable. + * The desgin choice to make with an MPLS_FIB table is: + * 1 - 20 bit key: label only. + * When the EOS and non-EOS actions differ the result is a 'EOS-choice' object. + * 2 - 21 bit key: label and EOS-bit. + * The result is then the specific action based on EOS-bit. + * + * 20 bit key: + * Advantages: + * - lower memory overhead, since there are few DB entries. + * Disadvantages: + * - slower DP performance in the case the chains differ, as more objects are + * encounterd in the switch path + * + * 21 bit key: + * Advantages: + * - faster DP performance + * Disadvantages + * - increased memory footprint. + * + * Switching between schemes based on observed/measured action similarity is not + * considered on the grounds of complexity and flip-flopping. + * + * VPP mantra - favour performance over memory. We choose a 21 bit key. + */ + +#include <vnet/fib/fib_table.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/punt_dpo.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/mpls/mpls.h> + +/** + * All lookups in an MPLS_FIB table must result in a DPO of type load-balance. + * This is the default result which links to drop + */ +static index_t mpls_fib_drop_dpo_index = INDEX_INVALID; + +/** + * FIXME + */ +#define MPLS_FLOW_HASH_DEFAULT 0 + +static inline u32 +mpls_fib_entry_mk_key (mpls_label_t label, + mpls_eos_bit_t eos) +{ + ASSERT(eos <= 1); + return (label << 1 | eos); +} + +u32 +mpls_fib_index_from_table_id (u32 table_id) +{ + mpls_main_t *mm = &mpls_main; + uword * p; + + p = hash_get (mm->fib_index_by_table_id, table_id); + if (!p) + return FIB_NODE_INDEX_INVALID; + + return p[0]; +} + +static u32 +mpls_fib_create_with_table_id (u32 table_id) +{ + dpo_id_t dpo = DPO_NULL; + fib_table_t *fib_table; + mpls_eos_bit_t eos; + mpls_fib_t *mf; + int i; + + pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + + fib_table->ft_proto = FIB_PROTOCOL_MPLS; + fib_table->ft_index = + (fib_table - mpls_main.fibs); + + hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index); + + fib_table->ft_table_id = + table_id; + fib_table->ft_flow_hash_config = + MPLS_FLOW_HASH_DEFAULT; + fib_table->v4.fwd_classify_table_index = ~0; + fib_table->v4.rev_classify_table_index = ~0; + + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); + + if (INDEX_INVALID == mpls_fib_drop_dpo_index) + { + mpls_fib_drop_dpo_index = load_balance_create(1, DPO_PROTO_MPLS, 0); + load_balance_set_bucket(mpls_fib_drop_dpo_index, + 0, + drop_dpo_get(DPO_PROTO_MPLS)); + } + + mf = &fib_table->mpls; + mf->mf_entries = hash_create(0, sizeof(fib_node_index_t)); + for (i = 0; i < MPLS_FIB_DB_SIZE; i++) + { + /* + * initialise each DPO in the data-path lookup table + * to be the special MPLS drop + */ + mf->mf_lbs[i] = mpls_fib_drop_dpo_index; + } + + /* + * non-default forwarding for the special labels. + */ + fib_prefix_t prefix = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + + /* + * PUNT the router alert, both EOS and non-eos + */ + prefix.fp_label = MPLS_IETF_ROUTER_ALERT_LABEL; + FOR_EACH_MPLS_EOS_BIT(eos) + { + prefix.fp_eos = eos; + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + punt_dpo_get(DPO_PROTO_MPLS)); + } + + /* + * IPv4 explicit NULL EOS lookup in the interface's IPv4 table + */ + prefix.fp_label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL; + prefix.fp_payload_proto = DPO_PROTO_IP4; + prefix.fp_eos = MPLS_EOS; + + lookup_dpo_add_or_lock_w_fib_index(0, // unused + DPO_PROTO_IP4, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + &dpo); + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + prefix.fp_payload_proto = DPO_PROTO_MPLS; + prefix.fp_eos = MPLS_NON_EOS; + + lookup_dpo_add_or_lock_w_fib_index(0, //unsued + DPO_PROTO_MPLS, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + &dpo); + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + /* + * IPv6 explicit NULL EOS lookup in the interface's IPv6 table + */ + prefix.fp_label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL; + prefix.fp_payload_proto = DPO_PROTO_IP6; + prefix.fp_eos = MPLS_EOS; + + lookup_dpo_add_or_lock_w_fib_index(0, //unused + DPO_PROTO_IP6, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + &dpo); + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + prefix.fp_payload_proto = DPO_PROTO_MPLS; + prefix.fp_eos = MPLS_NON_EOS; + lookup_dpo_add_or_lock_w_fib_index(0, // unsued + DPO_PROTO_MPLS, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + &dpo); + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + return (fib_table->ft_index); +} + +u32 +mpls_fib_table_find_or_create_and_lock (u32 table_id) +{ + u32 index; + + index = mpls_fib_index_from_table_id(table_id); + if (~0 == index) + return mpls_fib_create_with_table_id(table_id); + + fib_table_lock(index, FIB_PROTOCOL_MPLS); + + return (index); +} +u32 +mpls_fib_table_create_and_lock (void) +{ + return (mpls_fib_create_with_table_id(~0)); +} + +void +mpls_fib_table_destroy (mpls_fib_t *mf) +{ + fib_table_t *fib_table = (fib_table_t*)mf; + fib_prefix_t prefix = { + .fp_proto = FIB_PROTOCOL_MPLS, + }; + mpls_label_t special_labels[] = { + MPLS_IETF_ROUTER_ALERT_LABEL, + MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL, + MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL, + }; + mpls_eos_bit_t eos; + u32 ii; + + for (ii = 0; ii < ARRAY_LEN(special_labels); ii++) + { + FOR_EACH_MPLS_EOS_BIT(eos) + { + prefix.fp_label = special_labels[ii]; + prefix.fp_eos = eos; + + fib_table_entry_delete(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL); + } + } + if (~0 != fib_table->ft_table_id) + { + hash_unset(mpls_main.fib_index_by_table_id, + fib_table->ft_table_id); + } + hash_delete(mf->mf_entries); + + pool_put(mpls_main.fibs, fib_table); +} + +fib_node_index_t +mpls_fib_table_lookup (const mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos) +{ + uword *p; + + p = hash_get(mf->mf_entries, mpls_fib_entry_mk_key(label, eos)); + + if (NULL == p) + return FIB_NODE_INDEX_INVALID; + + return p[0]; +} + +void +mpls_fib_table_entry_insert (mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos, + fib_node_index_t lfei) +{ + hash_set(mf->mf_entries, mpls_fib_entry_mk_key(label, eos), lfei); +} + +void +mpls_fib_table_entry_remove (mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos) +{ + hash_unset(mf->mf_entries, mpls_fib_entry_mk_key(label, eos)); +} + +void +mpls_fib_forwarding_table_update (mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos, + const dpo_id_t *dpo) +{ + mpls_label_t key; + + ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type); + + key = mpls_fib_entry_mk_key(label, eos); + + mf->mf_lbs[key] = dpo->dpoi_index; +} + +void +mpls_fib_forwarding_table_reset (mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos) +{ + mpls_label_t key; + + key = mpls_fib_entry_mk_key(label, eos); + + mf->mf_lbs[key] = mpls_fib_drop_dpo_index; +} + +flow_hash_config_t +mpls_fib_table_get_flow_hash_config (u32 fib_index) +{ + // FIXME. + return (0); +} + +static void +mpls_fib_table_show_all (const mpls_fib_t *mpls_fib, + vlib_main_t * vm) +{ + fib_node_index_t lfei, *lfeip, *lfeis = NULL; + mpls_label_t key; + + hash_foreach(key, lfei, mpls_fib->mf_entries, + ({ + vec_add1(lfeis, lfei); + })); + + vec_sort_with_function(lfeis, fib_entry_cmp_for_sort); + + vec_foreach(lfeip, lfeis) + { + vlib_cli_output (vm, "%U", + format_fib_entry, *lfeip, + FIB_ENTRY_FORMAT_DETAIL); + } + vec_free(lfeis); +} + +static void +mpls_fib_table_show_one (const mpls_fib_t *mpls_fib, + mpls_label_t label, + vlib_main_t * vm) +{ + fib_node_index_t lfei; + mpls_eos_bit_t eos; + + FOR_EACH_MPLS_EOS_BIT(eos) + { + lfei = mpls_fib_table_lookup(mpls_fib, label, eos); + + if (FIB_NODE_INDEX_INVALID != lfei) + { + vlib_cli_output (vm, "%U", + format_fib_entry, lfei, FIB_ENTRY_FORMAT_DETAIL); + } + } +} + +static clib_error_t * +mpls_fib_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_table_t * fib_table; + mpls_label_t label; + int table_id; + + table_id = -1; + label = MPLS_LABEL_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* if (unformat (input, "brief") || unformat (input, "summary") */ + /* || unformat (input, "sum")) */ + /* verbose = 0; */ + + if (unformat (input, "%d", &label)) + continue; + else if (unformat (input, "table %d", &table_id)) + ; + else + break; + } + + pool_foreach (fib_table, mpls_main.fibs, + ({ + if (table_id >= 0 && table_id != fib_table->ft_table_id) + continue; + + vlib_cli_output (vm, "%v, fib_index %d", + fib_table->ft_desc, mpls_main.fibs - fib_table); + + if (MPLS_LABEL_INVALID == label) + { + mpls_fib_table_show_all(&(fib_table->mpls), vm); + } + else + { + mpls_fib_table_show_one(&(fib_table->mpls), label, vm); + } + })); + + return 0; +} + +VLIB_CLI_COMMAND (mpls_fib_show_command, static) = { + .path = "show mpls fib", + .short_help = "show mpls fib [summary] [table <n>]", + .function = mpls_fib_show, +}; diff --git a/vnet/vnet/fib/mpls_fib.h b/vnet/vnet/fib/mpls_fib.h new file mode 100644 index 00000000000..42c9a865276 --- /dev/null +++ b/vnet/vnet/fib/mpls_fib.h @@ -0,0 +1,106 @@ +/* + * mpls_fib.h: The Label/MPLS FIB + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_FIB_TABLE_H__ +#define __MPLS_FIB_TABLE_H__ + +#include <vnet/vnet.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_types.h> +#include <vnet/dpo/dpo.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_table.h> + +static inline mpls_fib_t* +mpls_fib_get (fib_node_index_t index) +{ + if (!pool_is_free_index(mpls_main.fibs, index)) + return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls)); + return (NULL); +} + +extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id); +extern u32 mpls_fib_table_create_and_lock(void); +// extern mpls_fib_t * mpls_fib_find(u32 table_id); +extern u32 mpls_fib_index_from_table_id(u32 table_id); + +extern u8 *format_mpls_fib_table_name(u8 * s, va_list * args); + +extern fib_node_index_t mpls_fib_table_entry_add_from_ip_fib_entry ( + u32 table_id, + mpls_label_t label, + mpls_eos_bit_t eos, + fib_node_index_t fib_entry_index); + + +extern fib_node_index_t mpls_fib_table_lookup(const mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos); + +extern void mpls_fib_table_entry_remove(mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos); +extern void mpls_fib_table_entry_insert(mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos, + fib_node_index_t fei); +extern void mpls_fib_table_destroy(mpls_fib_t *mf); + + + +extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos, + const dpo_id_t *dpo); +extern void mpls_fib_forwarding_table_reset(mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos); + +/** + * @brief + * Lookup a label and EOS bit in the MPLS_FIB table to retrieve the + * load-balance index to be used for packet forwarding. + */ +static inline index_t +mpls_fib_table_forwarding_lookup (u32 mpls_fib_index, + const mpls_unicast_header_t *hdr) +{ + mpls_label_t label; + mpls_fib_t *mf; + u32 key; + + label = clib_net_to_host_u32(hdr->label_exp_s_ttl); + key = (vnet_mpls_uc_get_label(label) << 1) | vnet_mpls_uc_get_s(label); + + mf = mpls_fib_get(mpls_fib_index); + + return (mf->mf_lbs[key]); +} + +static inline u32 +mpls_fib_table_get_index_for_sw_if_index (u32 sw_if_index) +{ + mpls_main_t *mm = &mpls_main; + + ASSERT(vec_len(mm->fib_index_by_sw_if_index) < sw_if_index); + + return (mm->fib_index_by_sw_if_index[sw_if_index]); +} + +extern flow_hash_config_t mpls_fib_table_get_flow_hash_config(u32 fib_index); + +#endif diff --git a/vnet/vnet/gre/gre.c b/vnet/vnet/gre/gre.c index f00977c8cd6..9f8adc79ff0 100644 --- a/vnet/vnet/gre/gre.c +++ b/vnet/vnet/gre/gre.c @@ -17,14 +17,10 @@ #include <vnet/vnet.h> #include <vnet/gre/gre.h> +#include <vnet/adj/adj.h> gre_main_t gre_main; -typedef CLIB_PACKED (struct { - ip4_header_t ip4; - gre_header_t gre; -}) ip4_and_gre_header_t; - typedef struct { union { ip4_and_gre_header_t ip4_and_gre; @@ -233,179 +229,39 @@ gre_interface_tx (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); /* - * As long as we have enough pkts left to process two pkts - * and prefetch two pkts... + * FIXME DUAL LOOP */ - while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t * b0, * b1; - ip4_header_t * ip0, * ip1; - ip4_and_gre_union_t * h0, * h1; - u32 bi0, next0, bi1, next1; - __attribute__((unused)) u8 error0, error1; - u16 gre_protocol0, gre_protocol1; - - /* Prefetch the next iteration */ - { - vlib_buffer_t * p2, * p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - /* - * Prefetch packet data. We expect to overwrite - * the inbound L2 header with an ip header and a - * gre header. Might want to prefetch the last line - * of rewrite space as well; need profile data - */ - CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); - } - - /* Pick up the next two buffer indices */ - bi0 = from[0]; - bi1 = from[1]; - - /* Speculatively enqueue them where we sent the last buffer */ - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->outer_fib_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = t->outer_fib_index; - - if (PREDICT_FALSE(t->teb)) - { - gre_protocol0 = clib_net_to_host_u16(GRE_PROTOCOL_teb); - gre_protocol1 = clib_net_to_host_u16(GRE_PROTOCOL_teb); - } - else - { - ip0 = vlib_buffer_get_current (b0); - gre_protocol0 = clib_net_to_host_u16 (0x800); - gre_protocol0 = - ((ip0->ip_version_and_header_length & 0xF0) == 0x60) ? - 0x86DD : gre_protocol0; - - ip1 = vlib_buffer_get_current (b1); - gre_protocol1 = clib_net_to_host_u16 (0x800); - gre_protocol1 = - ((ip1->ip_version_and_header_length & 0xF0) == 0x60) ? - 0x86DD : gre_protocol1; - } - - vlib_buffer_advance (b0, -sizeof(*h0)); - vlib_buffer_advance (b1, -sizeof(*h1)); - - h0 = vlib_buffer_get_current (b0); - h1 = vlib_buffer_get_current (b1); - h0->as_u64[0] = 0; - h0->as_u64[1] = 0; - h0->as_u64[2] = 0; - - h1->as_u64[0] = 0; - h1->as_u64[1] = 0; - h1->as_u64[2] = 0; - - ip0 = &h0->ip4_and_gre.ip4; - h0->ip4_and_gre.gre.protocol = gre_protocol0; - ip0->ip_version_and_header_length = 0x45; - ip0->ttl = 254; - ip0->protocol = IP_PROTOCOL_GRE; - - ip1 = &h1->ip4_and_gre.ip4; - h1->ip4_and_gre.gre.protocol = gre_protocol1; - ip1->ip_version_and_header_length = 0x45; - ip1->ttl = 254; - ip1->protocol = IP_PROTOCOL_GRE; - - ip0->length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - ip1->length = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); - ip0->src_address.as_u32 = t->tunnel_src.as_u32; - ip1->src_address.as_u32 = t->tunnel_src.as_u32; - ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; - ip1->dst_address.as_u32 = t->tunnel_dst.as_u32; - ip0->checksum = ip4_header_checksum (ip0); - ip1->checksum = ip4_header_checksum (ip1); - - /* ip4_lookup will route to the tunnel partner */ - next0 = GRE_OUTPUT_NEXT_LOOKUP; - next1 = GRE_OUTPUT_NEXT_LOOKUP; - error0 = GRE_ERROR_NONE; - error1 = GRE_ERROR_NONE; - - /* - * Enqueue 2 pkts. This macro deals with next0 != next1, - * acquiring enqueue rights to the indicated next - * node input frame, etc. - */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } while (n_left_from > 0 && n_left_to_next > 0) { - vlib_buffer_t * b0; + u32 bi0, adj_index0, next0; + const ip_adjacency_t * adj0; + const dpo_id_t *dpo0; ip4_header_t * ip0; - ip4_and_gre_union_t * h0; - u32 bi0, next0; - __attribute__((unused)) u8 error0; - u16 gre_protocol0; - - bi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = t->outer_fib_index; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer(vm, bi0); ip0 = vlib_buffer_get_current (b0); - if (PREDICT_FALSE(t->teb)) - { - gre_protocol0 = clib_net_to_host_u16(GRE_PROTOCOL_teb); - } - else - { - gre_protocol0 = clib_net_to_host_u16 (0x800); - gre_protocol0 = - ((ip0->ip_version_and_header_length & 0xF0) == 0x60) ? - 0x86DD : gre_protocol0; - } - - vlib_buffer_advance (b0, -sizeof(*h0)); - - h0 = vlib_buffer_get_current (b0); - h0->as_u64[0] = 0; - h0->as_u64[1] = 0; - h0->as_u64[2] = 0; - - ip0 = &h0->ip4_and_gre.ip4; - h0->ip4_and_gre.gre.protocol = gre_protocol0; - ip0->ip_version_and_header_length = 0x45; - ip0->ttl = 254; - ip0->protocol = IP_PROTOCOL_GRE; + + /* Fixup the checksum and len fields in the LISP tunnel encap + * that was applied at the midchain node */ ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - ip0->src_address.as_u32 = t->tunnel_src.as_u32; - ip0->dst_address.as_u32 = t->tunnel_dst.as_u32; ip0->checksum = ip4_header_checksum (ip0); - next0 = GRE_OUTPUT_NEXT_LOOKUP; - error0 = GRE_ERROR_NONE; + /* Follow the DPO on which the midchain is stacked */ + adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX]; + adj0 = adj_get(adj_index0); + dpo0 = &adj0->sub_type.midchain.next_dpo; + next0 = dpo0->dpoi_next_node; + vnet_buffer(b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { diff --git a/vnet/vnet/gre/gre.h b/vnet/vnet/gre/gre.h index ad599d2f09e..beb13d989ee 100644 --- a/vnet/vnet/gre/gre.h +++ b/vnet/vnet/gre/gre.h @@ -25,6 +25,7 @@ #include <vnet/ip/ip4_packet.h> #include <vnet/pg/pg.h> #include <vnet/ip/format.h> +#include <vnet/adj/adj_types.h> extern vnet_hw_interface_class_t gre_hw_interface_class; @@ -50,12 +51,44 @@ typedef struct { } gre_protocol_info_t; typedef struct { + /** + * Linkage into the FIB object graph + */ + fib_node_t node; + + /** + * The tunnel's source/local address + */ ip4_address_t tunnel_src; + /** + * The tunnel's destination/remote address + */ ip4_address_t tunnel_dst; + /** + * The FIB in which the src.dst address are present + */ u32 outer_fib_index; u32 hw_if_index; u32 sw_if_index; u8 teb; + + /** + * The FIB entry sourced by the tunnel for its destination prefix + */ + fib_node_index_t fib_entry_index; + + /** + * The tunnel is a child of the FIB entry for its desintion. This is + * so it receives updates when the forwarding information for that entry + * changes. + * The tunnels sibling index on the FIB entry's dependency list. + */ + u32 sibling_index; + + /** + * The index of the midchain adjacency created for this tunnel + */ + adj_index_t adj_index[FIB_LINK_NUM]; } gre_tunnel_t; typedef struct { @@ -80,6 +113,15 @@ typedef struct { vnet_main_t * vnet_main; } gre_main_t; +/** + * @brief IPv4 and GRE header. + * +*/ +typedef CLIB_PACKED (struct { + ip4_header_t ip4; + gre_header_t gre; +}) ip4_and_gre_header_t; + always_inline gre_protocol_info_t * gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol) { diff --git a/vnet/vnet/gre/interface.c b/vnet/vnet/gre/interface.c index 864c384b992..10e9ff9be8c 100644 --- a/vnet/vnet/gre/interface.c +++ b/vnet/vnet/gre/interface.c @@ -19,10 +19,24 @@ #include <vnet/pg/pg.h> #include <vnet/gre/gre.h> #include <vnet/ip/format.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/adj/adj_midchain.h> +#include <vnet/mpls/mpls.h> + +static inline u64 +gre_mk_key (const ip4_address_t *src, + const ip4_address_t *dst, + u32 out_fib_index) +{ + // FIXME. the fib index should be part of the key + return ((u64)src->as_u32 << 32 | (u64)dst->as_u32); +} -u8 * format_gre_tunnel (u8 * s, va_list * args) +static u8 * +format_gre_tunnel (u8 * s, va_list * args) { gre_tunnel_t * t = va_arg (*args, gre_tunnel_t *); + int detail = va_arg (*args, int); gre_main_t * gm = &gre_main; s = format (s, @@ -32,11 +46,193 @@ u8 * format_gre_tunnel (u8 * s, va_list * args) format_ip4_address, &t->tunnel_dst, (t->teb ? "teb" : "ip"), t->outer_fib_index); + if (detail) + { + s = format (s, "\n fib-entry:%d adj-ip4:%d adj-ip6:%d adj-mpls:%d", + t->fib_entry_index, + t->adj_index[FIB_LINK_IP4], + t->adj_index[FIB_LINK_IP6], + t->adj_index[FIB_LINK_MPLS]); + } + return s; } -int vnet_gre_add_del_tunnel - (vnet_gre_add_del_tunnel_args_t *a, u32 * sw_if_indexp) +static gre_tunnel_t * +gre_tunnel_db_find (const ip4_address_t *src, + const ip4_address_t *dst, + u32 out_fib_index) +{ + gre_main_t * gm = &gre_main; + uword * p; + u64 key; + + key = gre_mk_key(src, dst, out_fib_index); + + p = hash_get (gm->tunnel_by_key, key); + + if (NULL == p) + return (NULL); + + return (pool_elt_at_index (gm->tunnels, p[0])); +} + +static void +gre_tunnel_db_add (const gre_tunnel_t *t) +{ + gre_main_t * gm = &gre_main; + u64 key; + + key = gre_mk_key(&t->tunnel_src, &t->tunnel_dst, t->outer_fib_index); + hash_set (gm->tunnel_by_key, key, t - gm->tunnels); +} + +static void +gre_tunnel_db_remove (const gre_tunnel_t *t) +{ + gre_main_t * gm = &gre_main; + u64 key; + + key = gre_mk_key(&t->tunnel_src, &t->tunnel_dst, t->outer_fib_index); + hash_unset (gm->tunnel_by_key, key); +} + +static gre_tunnel_t * +gre_tunnel_from_fib_node (fib_node_t *node) +{ +#if (CLIB_DEBUG > 0) + ASSERT(FIB_NODE_TYPE_GRE_TUNNEL == node->fn_type); +#endif + return ((gre_tunnel_t*) (((char*)node) - + STRUCT_OFFSET_OF(gre_tunnel_t, node))); +} + +/* + * gre_tunnel_stack + * + * 'stack' (resolve the recursion for) the tunnel's midchain adjacency + */ +static void +gre_tunnel_stack (gre_tunnel_t *gt) +{ + fib_link_t linkt; + + /* + * find the adjacency that is contributed by the FIB entry + * that this tunnel resovles via, and use it as the next adj + * in the midchain + */ + FOR_EACH_FIB_LINK(linkt) + { + if (ADJ_INDEX_INVALID != gt->adj_index[linkt]) + { + adj_nbr_midchain_stack( + gt->adj_index[linkt], + fib_entry_contribute_ip_forwarding(gt->fib_entry_index)); + } + } +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +gre_tunnel_back_walk (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + gre_tunnel_stack(gre_tunnel_from_fib_node(node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t* +gre_tunnel_fib_node_get (fib_node_index_t index) +{ + gre_tunnel_t * gt; + gre_main_t * gm; + + gm = &gre_main; + gt = pool_elt_at_index(gm->tunnels, index); + + return (>->node); +} + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +gre_tunnel_last_lock_gone (fib_node_t *node) +{ + /* + * The MPLS GRE tunnel is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT(0); +} + +/* + * Virtual function table registered by MPLS GRE tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t gre_vft = { + .fnv_get = gre_tunnel_fib_node_get, + .fnv_last_lock = gre_tunnel_last_lock_gone, + .fnv_back_walk = gre_tunnel_back_walk, +}; + +static int +gre_proto_from_fib_link (fib_link_t link) +{ + switch (link) + { + case FIB_LINK_IP4: + return (GRE_PROTOCOL_ip4); + case FIB_LINK_IP6: + return (GRE_PROTOCOL_ip6); + case FIB_LINK_MPLS: + return (GRE_PROTOCOL_mpls_unicast); + } + ASSERT(0); + return (GRE_PROTOCOL_ip4); +} + +static u8 * +gre_rewrite (gre_tunnel_t * t, + fib_link_t link) +{ + ip4_and_gre_header_t * h0; + u8 * rewrite_data = 0; + + vec_validate_init_empty (rewrite_data, sizeof (*h0) - 1, 0); + + h0 = (ip4_and_gre_header_t *) rewrite_data; + + if (t->teb) + { + h0->gre.protocol = clib_net_to_host_u16(GRE_PROTOCOL_teb); + } + else + { + h0->gre.protocol = clib_host_to_net_u16(gre_proto_from_fib_link(link)); + } + + h0->ip4.ip_version_and_header_length = 0x45; + h0->ip4.ttl = 254; + h0->ip4.protocol = IP_PROTOCOL_GRE; + /* $$$ fixup ip4 header length and checksum after-the-fact */ + h0->ip4.src_address.as_u32 = t->tunnel_src.as_u32; + h0->ip4.dst_address.as_u32 = t->tunnel_dst.as_u32; + h0->ip4.checksum = ip4_header_checksum (&h0->ip4); + + return (rewrite_data); +} + +static int +vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a, + u32 * sw_if_indexp) { gre_main_t * gm = &gre_main; vnet_main_t * vnm = gm->vnet_main; @@ -44,49 +240,45 @@ int vnet_gre_add_del_tunnel gre_tunnel_t * t; vnet_hw_interface_t * hi; u32 hw_if_index, sw_if_index; - u32 slot; u32 outer_fib_index; - uword * p; - u64 key; u8 address[6]; clib_error_t *error; + fib_link_t linkt; + u8 *rewrite; - key = (u64)a->src.as_u32 << 32 | (u64)a->dst.as_u32; - p = hash_get (gm->tunnel_by_key, key); + outer_fib_index = ip4_fib_index_from_table_id(a->outer_fib_id); - if (a->is_add) { - /* check if same src/dst pair exists */ - if (p) - return VNET_API_ERROR_INVALID_VALUE; + if (~0 == outer_fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; - p = hash_get (im->fib_index_by_table_id, a->outer_fib_id); - if (! p) - return VNET_API_ERROR_NO_SUCH_FIB; + t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id); - outer_fib_index = p[0]; + if (NULL != t) + return VNET_API_ERROR_INVALID_VALUE; - pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES); - memset (t, 0, sizeof (*t)); + pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES); + memset (t, 0, sizeof (*t)); + fib_node_init(&t->node, FIB_NODE_TYPE_GRE_TUNNEL); - if (vec_len (gm->free_gre_tunnel_hw_if_indices) > 0) { - vnet_interface_main_t * im = &vnm->interface_main; + if (vec_len (gm->free_gre_tunnel_hw_if_indices) > 0) { + vnet_interface_main_t * im = &vnm->interface_main; - hw_if_index = gm->free_gre_tunnel_hw_if_indices + hw_if_index = gm->free_gre_tunnel_hw_if_indices [vec_len (gm->free_gre_tunnel_hw_if_indices)-1]; - _vec_len (gm->free_gre_tunnel_hw_if_indices) -= 1; + _vec_len (gm->free_gre_tunnel_hw_if_indices) -= 1; - hi = vnet_get_hw_interface (vnm, hw_if_index); - hi->dev_instance = t - gm->tunnels; - hi->hw_instance = hi->dev_instance; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = t - gm->tunnels; + hi->hw_instance = hi->dev_instance; - /* clear old stats of freed tunnel before reuse */ - sw_if_index = hi->sw_if_index; - vnet_interface_counter_lock(im); - vlib_zero_combined_counter + /* clear old stats of freed tunnel before reuse */ + sw_if_index = hi->sw_if_index; + vnet_interface_counter_lock(im); + vlib_zero_combined_counter (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index); - vlib_zero_combined_counter + vlib_zero_combined_counter (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index); - vlib_zero_simple_counter + vlib_zero_simple_counter (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index); vnet_interface_counter_unlock(im); } else { @@ -111,67 +303,186 @@ int vnet_gre_add_del_tunnel return VNET_API_ERROR_INVALID_REGISTRATION; } } else { - hw_if_index = vnet_register_interface - (vnm, gre_device_class.index, t - gm->tunnels, - gre_hw_interface_class.index, - t - gm->tunnels); + hw_if_index = vnet_register_interface + (vnm, gre_device_class.index, t - gm->tunnels, + gre_hw_interface_class.index, + t - gm->tunnels); } hi = vnet_get_hw_interface (vnm, hw_if_index); sw_if_index = hi->sw_if_index; } - t->hw_if_index = hw_if_index; - t->outer_fib_index = outer_fib_index; - t->sw_if_index = sw_if_index; + t->hw_if_index = hw_if_index; + t->outer_fib_index = outer_fib_index; + t->sw_if_index = sw_if_index; - vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0); - gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels; + vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0); + gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels; - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = t->outer_fib_index; + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + im->fib_index_by_sw_if_index[sw_if_index] = t->outer_fib_index; + ip4_sw_interface_enable_disable(sw_if_index, 1); - hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t); - hi->per_packet_overhead_bytes = + hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t); + hi->per_packet_overhead_bytes = /* preamble */ 8 + /* inter frame gap */ 12; - /* Standard default gre MTU. */ - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000; + /* Standard default gre MTU. */ + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000; + + clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); + clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst)); + + gre_tunnel_db_add(t); + + /* + * source the FIB entry for the tunnel's destination + * and become a child thereof. The tunnel will then get poked + * when the forwarding for the entry updates, and the tunnel can + * re-stack accordingly + */ + const fib_prefix_t tun_dst_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = t->tunnel_dst, + } + }; + + t->fib_entry_index = + fib_table_entry_special_add(outer_fib_index, + &tun_dst_pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + t->sibling_index = + fib_entry_child_add(t->fib_entry_index, + FIB_NODE_TYPE_GRE_TUNNEL, + t - gm->tunnels); + + /* + * create and update the midchain adj this tunnel sources. + * We could be smarter here and trigger this on an interface proto enable, + * like we do for MPLS. + */ + for (linkt = FIB_LINK_IP4; linkt <= FIB_LINK_IP6; linkt++) + { + t->adj_index[linkt] = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + linkt, + &zero_addr, + sw_if_index); + + rewrite = gre_rewrite(t, linkt); + adj_nbr_midchain_update_rewrite(t->adj_index[linkt], + hi->tx_node_index, + rewrite); + vec_free(rewrite); + } + t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID; - t->teb = a->teb; - clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); - clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst)); + t->teb = a->teb; + clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src)); + clib_memcpy (&t->tunnel_dst, &a->dst, sizeof (t->tunnel_dst)); + gre_tunnel_stack(t); - hash_set (gm->tunnel_by_key, key, t - gm->tunnels); + if (sw_if_indexp) + *sw_if_indexp = sw_if_index; - slot = vlib_node_add_named_next_with_slot - (vnm->vlib_main, hi->tx_node_index, "ip4-lookup", GRE_OUTPUT_NEXT_LOOKUP); + return 0; +} - ASSERT (slot == GRE_OUTPUT_NEXT_LOOKUP); +static int +vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a, + u32 * sw_if_indexp) +{ + gre_main_t * gm = &gre_main; + vnet_main_t * vnm = gm->vnet_main; + gre_tunnel_t * t; + fib_link_t linkt; + u32 sw_if_index; + + t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id); - } else { /* !is_add => delete */ - /* tunnel needs to exist */ - if (! p) - return VNET_API_ERROR_NO_SUCH_ENTRY; + if (NULL == t) + return VNET_API_ERROR_NO_SUCH_ENTRY; - t = pool_elt_at_index (gm->tunnels, p[0]); + sw_if_index = t->sw_if_index; + vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */); + /* make sure tunnel is removed from l2 bd or xconnect */ + set_int_l2_mode(gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0); + vec_add1 (gm->free_gre_tunnel_hw_if_indices, t->hw_if_index); + gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0; + ip4_sw_interface_enable_disable(sw_if_index, 0); - sw_if_index = t->sw_if_index; - vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */); - /* make sure tunnel is removed from l2 bd or xconnect */ - set_int_l2_mode(gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0); - vec_add1 (gm->free_gre_tunnel_hw_if_indices, t->hw_if_index); - gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0; + fib_entry_child_remove(t->fib_entry_index, + t->sibling_index); + fib_table_entry_delete_index(t->fib_entry_index, + FIB_SOURCE_RR); - hash_unset (gm->tunnel_by_key, key); - pool_put (gm->tunnels, t); + FOR_EACH_FIB_LINK(linkt) + { + adj_unlock(t->adj_index[linkt]); } + gre_tunnel_db_remove(t); + fib_node_deinit(&t->node); + pool_put (gm->tunnels, t); + if (sw_if_indexp) *sw_if_indexp = sw_if_index; return 0; } +int +vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t *a, + u32 * sw_if_indexp) +{ + if (a->is_add) + return (vnet_gre_tunnel_add(a, sw_if_indexp)); + else + return (vnet_gre_tunnel_delete(a, sw_if_indexp)); +} + +static void +gre_sw_interface_mpls_state_change (u32 sw_if_index, + u32 is_enable) +{ + gre_main_t *gm = &gre_main; + vnet_hw_interface_t * hi; + gre_tunnel_t *t; + u8 *rewrite; + + if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) || + (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index])) + return; + + t = pool_elt_at_index(gm->tunnels, + gm->tunnel_index_by_sw_if_index[sw_if_index]); + + if (is_enable) + { + hi = vnet_get_hw_interface (vnet_get_main(), t->hw_if_index); + t->adj_index[FIB_LINK_MPLS] = + adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_MPLS, + &zero_addr, + sw_if_index); + + rewrite = gre_rewrite(t, FIB_LINK_MPLS); + adj_nbr_midchain_update_rewrite(t->adj_index[FIB_LINK_MPLS], + hi->tx_node_index, + rewrite); + vec_free(rewrite); + } + else + { + adj_unlock(t->adj_index[FIB_LINK_MPLS]); + t->adj_index[FIB_LINK_MPLS] = ADJ_INDEX_INVALID; + } + + gre_tunnel_stack(t); +} static clib_error_t * create_gre_tunnel_command_fn (vlib_main_t * vm, @@ -216,13 +527,15 @@ create_gre_tunnel_command_fn (vlib_main_t * vm, return clib_error_return (0, "src and dst are identical"); memset (a, 0, sizeof (*a)); - a->is_add = is_add; a->outer_fib_id = outer_fib_id; a->teb = teb; clib_memcpy(&a->src, &src, sizeof(src)); clib_memcpy(&a->dst, &dst, sizeof(dst)); - rv = vnet_gre_add_del_tunnel (a, &sw_if_index); + if (is_add) + rv = vnet_gre_tunnel_add(a, &sw_if_index); + else + rv = vnet_gre_tunnel_delete(a, &sw_if_index); switch(rv) { @@ -255,14 +568,32 @@ show_gre_tunnel_command_fn (vlib_main_t * vm, { gre_main_t * gm = &gre_main; gre_tunnel_t * t; + u32 ti = ~0; if (pool_elts (gm->tunnels) == 0) vlib_cli_output (vm, "No GRE tunnels configured..."); - pool_foreach (t, gm->tunnels, - ({ - vlib_cli_output (vm, "%U", format_gre_tunnel, t); - })); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &ti)) + ; + else + break; + } + + if (~0 == ti) + { + pool_foreach (t, gm->tunnels, + ({ + vlib_cli_output (vm, "%U", format_gre_tunnel, t, 0); + })); + } + else + { + t = pool_elt_at_index(gm->tunnels, ti); + + vlib_cli_output (vm, "%U", format_gre_tunnel, t, 1); + } return 0; } @@ -275,6 +606,11 @@ VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = { /* force inclusion from application's main.c */ clib_error_t *gre_interface_init (vlib_main_t *vm) { + vec_add1(mpls_main.mpls_interface_state_change_callbacks, + gre_sw_interface_mpls_state_change); + + fib_node_register_type(FIB_NODE_TYPE_GRE_TUNNEL, &gre_vft); + return 0; } VLIB_INIT_FUNCTION(gre_interface_init); diff --git a/vnet/vnet/gre/node.c b/vnet/vnet/gre/node.c index d5ea4b65ddb..b55f5511916 100644 --- a/vnet/vnet/gre/node.c +++ b/vnet/vnet/gre/node.c @@ -18,6 +18,7 @@ #include <vlib/vlib.h> #include <vnet/pg/pg.h> #include <vnet/gre/gre.h> +#include <vnet/mpls/mpls.h> #include <vppinfra/sparse_vec.h> #define foreach_gre_input_next \ @@ -25,7 +26,8 @@ _(PUNT, "error-punt") \ _(DROP, "error-drop") \ _(ETHERNET_INPUT, "ethernet-input") \ _(IP4_INPUT, "ip4-input") \ -_(IP6_INPUT, "ip6-input") +_(IP6_INPUT, "ip6-input") \ +_(MPLS_INPUT, "mpls-input") typedef enum { #define _(s,n) GRE_INPUT_NEXT_##s, @@ -66,13 +68,17 @@ gre_input (vlib_main_t * vm, vlib_frame_t * from_frame) { gre_main_t * gm = &gre_main; + mpls_main_t * mm = &mpls_main; + ip4_main_t * ip4m = &ip4_main; gre_input_runtime_t * rt = (void *) node->runtime_data; __attribute__((unused)) u32 n_left_from, next_index, * from, * to_next; u64 cached_tunnel_key = (u64) ~0; - u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index; + u32 cached_tunnel_sw_if_index = 0, tunnel_sw_if_index = 0; u32 cached_tunnel_fib_index = 0, tunnel_fib_index; u32 cpu_index = os_get_cpu_number(); + u32 len; + vnet_interface_main_t *im = &gm->vnet_main->interface_main; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -141,7 +147,7 @@ gre_input (vlib_main_t * vm, /* Index sparse array with network byte order. */ protocol0 = h0->protocol; protocol1 = h1->protocol; - sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1, + sparse_vec_index2 (rt->next_by_protocol, protocol0, protocol1, &i0, &i1); next0 = vec_elt(rt->next_by_protocol, i0); next1 = vec_elt(rt->next_by_protocol, i1); @@ -154,10 +160,10 @@ gre_input (vlib_main_t * vm, version1 = clib_net_to_host_u16 (h1->flags_and_version); verr1 = version1 & GRE_VERSION_MASK; - b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] + b0->error = verr0 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b0->error; next0 = verr0 ? GRE_INPUT_NEXT_DROP : next0; - b1->error = verr1 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] + b1->error = verr1 ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b1->error; next1 = verr1 ? GRE_INPUT_NEXT_DROP : next1; @@ -176,7 +182,6 @@ gre_input (vlib_main_t * vm, gre_tunnel_t * t; uword * p; - ip4_main_t * ip4m = &ip4_main; p = hash_get (gm->tunnel_by_key, key); if (!p) { @@ -199,19 +204,56 @@ gre_input (vlib_main_t * vm, tunnel_sw_if_index = cached_tunnel_sw_if_index; tunnel_fib_index = cached_tunnel_fib_index; } + } + else if (PREDICT_TRUE(next0 == GRE_INPUT_NEXT_MPLS_INPUT)) + { + u64 key = ((u64)(vnet_buffer(b0)->gre.dst) << 32) | + (u64)(vnet_buffer(b0)->gre.src); + + if (cached_tunnel_key != key) + { + vnet_hw_interface_t * hi; + mpls_gre_tunnel_t * t; + uword * p; - u32 len = vlib_buffer_length_in_chain (vm, b0); - vnet_interface_main_t *im = &gm->vnet_main->interface_main; - vlib_increment_combined_counter (im->combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - cpu_index, - tunnel_sw_if_index, - 1 /* packets */, - len /* bytes */); - - vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index; - vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index; + p = hash_get (gm->tunnel_by_key, key); + if (!p) + { + next0 = GRE_INPUT_NEXT_DROP; + b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL]; + goto drop0; + } + t = pool_elt_at_index (mm->gre_tunnels, p[0]); + hi = vnet_get_hw_interface (gm->vnet_main, + t->hw_if_index); + tunnel_sw_if_index = hi->sw_if_index; + tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index, + tunnel_sw_if_index); + + cached_tunnel_sw_if_index = tunnel_sw_if_index; + cached_tunnel_fib_index = tunnel_fib_index; + } + else + { + tunnel_sw_if_index = cached_tunnel_sw_if_index; + tunnel_fib_index = cached_tunnel_fib_index; + } } + else + { + next0 = GRE_INPUT_NEXT_DROP; + goto drop0; + } + len = vlib_buffer_length_in_chain (vm, b0); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + tunnel_sw_if_index, + 1 /* packets */, + len /* bytes */); + + vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index; + vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index; drop0: if (PREDICT_FALSE(next1 == GRE_INPUT_NEXT_IP4_INPUT @@ -227,7 +269,6 @@ drop0: gre_tunnel_t * t; uword * p; - ip4_main_t * ip4m = &ip4_main; p = hash_get (gm->tunnel_by_key, key); if (!p) { @@ -250,23 +291,62 @@ drop0: tunnel_sw_if_index = cached_tunnel_sw_if_index; tunnel_fib_index = cached_tunnel_fib_index; } + } + else if (PREDICT_TRUE(next1 == GRE_INPUT_NEXT_MPLS_INPUT)) + { + u64 key = ((u64)(vnet_buffer(b1)->gre.dst) << 32) | + (u64)(vnet_buffer(b1)->gre.src); - u32 len = vlib_buffer_length_in_chain (vm, b1); - vnet_interface_main_t *im = &gm->vnet_main->interface_main; - vlib_increment_combined_counter (im->combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - cpu_index, - tunnel_sw_if_index, - 1 /* packets */, - len /* bytes */); - - vnet_buffer(b1)->sw_if_index[VLIB_TX] = tunnel_fib_index; - vnet_buffer(b1)->sw_if_index[VLIB_RX] = tunnel_sw_if_index; + if (cached_tunnel_key != key) + { + vnet_hw_interface_t * hi; + mpls_gre_tunnel_t * t; + uword * p; + + ip4_main_t * ip4m = &ip4_main; + p = hash_get (gm->tunnel_by_key, key); + if (!p) + { + next1 = GRE_INPUT_NEXT_DROP; + b1->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL]; + goto drop1; + } + t = pool_elt_at_index (mm->gre_tunnels, p[0]); + hi = vnet_get_hw_interface (gm->vnet_main, + t->hw_if_index); + tunnel_sw_if_index = hi->sw_if_index; + tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index, + tunnel_sw_if_index); + + cached_tunnel_sw_if_index = tunnel_sw_if_index; + cached_tunnel_fib_index = tunnel_fib_index; + } + else + { + tunnel_sw_if_index = cached_tunnel_sw_if_index; + tunnel_fib_index = cached_tunnel_fib_index; + } } + else + { + next1 = GRE_INPUT_NEXT_DROP; + goto drop1; + } + len = vlib_buffer_length_in_chain (vm, b1); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + tunnel_sw_if_index, + 1 /* packets */, + len /* bytes */); + + vnet_buffer(b1)->sw_if_index[VLIB_TX] = tunnel_fib_index; + vnet_buffer(b1)->sw_if_index[VLIB_RX] = tunnel_sw_if_index; + drop1: - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { - gre_rx_trace_t *tr = vlib_add_trace (vm, node, + gre_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); tr->tunnel_id = ~0; tr->length = ip0->length; @@ -274,9 +354,9 @@ drop1: tr->dst.as_u32 = ip0->dst_address.as_u32; } - if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED)) { - gre_rx_trace_t *tr = vlib_add_trace (vm, node, + gre_rx_trace_t *tr = vlib_add_trace (vm, node, b1, sizeof (*tr)); tr->tunnel_id = ~0; tr->length = ip1->length; @@ -336,6 +416,7 @@ drop1: /* For IP payload we need to find source interface so we can increase counters and help forward node to pick right FIB */ + /* RPF check for ip4/ip6 input */ if (PREDICT_FALSE(next0 == GRE_INPUT_NEXT_IP4_INPUT || next0 == GRE_INPUT_NEXT_IP6_INPUT || next0 == GRE_INPUT_NEXT_ETHERNET_INPUT)) @@ -349,7 +430,6 @@ drop1: gre_tunnel_t * t; uword * p; - ip4_main_t * ip4m = &ip4_main; p = hash_get (gm->tunnel_by_key, key); if (!p) { @@ -372,26 +452,63 @@ drop1: tunnel_sw_if_index = cached_tunnel_sw_if_index; tunnel_fib_index = cached_tunnel_fib_index; } + } + else if (PREDICT_TRUE(next0 == GRE_INPUT_NEXT_MPLS_INPUT)) + { + u64 key = ((u64)(vnet_buffer(b0)->gre.dst) << 32) | + (u64)(vnet_buffer(b0)->gre.src); - u32 len = vlib_buffer_length_in_chain (vm, b0); - vnet_interface_main_t *im = &gm->vnet_main->interface_main; - vlib_increment_combined_counter (im->combined_sw_if_counters - + VNET_INTERFACE_COUNTER_RX, - cpu_index, - tunnel_sw_if_index, - 1 /* packets */, - len /* bytes */); - - vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index; - vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index; + if (cached_tunnel_key != key) + { + vnet_hw_interface_t * hi; + mpls_gre_tunnel_t * t; + uword * p; + + p = hash_get (gm->tunnel_by_key, key); + if (!p) + { + next0 = GRE_INPUT_NEXT_DROP; + b0->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL]; + goto drop; + } + t = pool_elt_at_index (mm->gre_tunnels, p[0]); + hi = vnet_get_hw_interface (gm->vnet_main, + t->hw_if_index); + tunnel_sw_if_index = hi->sw_if_index; + tunnel_fib_index = vec_elt (ip4m->fib_index_by_sw_if_index, + tunnel_sw_if_index); + + cached_tunnel_sw_if_index = tunnel_sw_if_index; + cached_tunnel_fib_index = tunnel_fib_index; + } + else + { + tunnel_sw_if_index = cached_tunnel_sw_if_index; + tunnel_fib_index = cached_tunnel_fib_index; + } + } + else + { + next0 = GRE_INPUT_NEXT_DROP; + goto drop; } + len = vlib_buffer_length_in_chain (vm, b0); + vlib_increment_combined_counter (im->combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + cpu_index, + tunnel_sw_if_index, + 1 /* packets */, + len /* bytes */); + + vnet_buffer(b0)->sw_if_index[VLIB_TX] = tunnel_fib_index; + vnet_buffer(b0)->sw_if_index[VLIB_RX] = tunnel_sw_if_index; drop: if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { gre_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->tunnel_id = ~0; + tr->tunnel_id = tunnel_sw_if_index; tr->length = ip0->length; tr->src.as_u32 = ip0->src_address.as_u32; tr->dst.as_u32 = ip0->dst_address.as_u32; @@ -509,7 +626,7 @@ static clib_error_t * gre_input_init (vlib_main_t * vm) ASSERT(ip4_input); ip6_input = vlib_get_node_by_name (vm, (u8 *)"ip6-input"); ASSERT(ip6_input); - mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *)"mpls-gre-input"); + mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *)"mpls-input"); ASSERT(mpls_unicast_input); gre_register_input_protocol (vm, GRE_PROTOCOL_teb, diff --git a/vnet/vnet/handoff.c b/vnet/vnet/handoff.c index 67fc6417414..05eea0329b1 100644 --- a/vnet/vnet/handoff.c +++ b/vnet/vnet/handoff.c @@ -515,11 +515,11 @@ VLIB_REGISTER_NODE (handoff_dispatch_node) = { .n_next_nodes = HANDOFF_DISPATCH_N_NEXT, .next_nodes = { - [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop", - [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input", - [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum", - [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input", - [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-gre-input", + [HANDOFF_DISPATCH_NEXT_DROP] = "error-drop", + [HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input", + [HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum", + [HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input", + [HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-input", }, }; /* *INDENT-ON* */ diff --git a/vnet/vnet/handoff.h b/vnet/vnet/handoff.h index 00832635a5a..9320f5602b5 100644 --- a/vnet/vnet/handoff.h +++ b/vnet/vnet/handoff.h @@ -20,7 +20,7 @@ #include <vnet/ethernet/ethernet.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> -#include <vnet/mpls-gre/packet.h> +#include <vnet/mpls/packet.h> typedef enum { diff --git a/vnet/vnet/interface.c b/vnet/vnet/interface.c index 772c3bca75f..595ed1432bc 100644 --- a/vnet/vnet/interface.c +++ b/vnet/vnet/interface.c @@ -449,8 +449,16 @@ vnet_sw_interface_set_flags_helper (vnet_main_t * vnm, u32 sw_if_index, mc_serialize (vm->mc_main, &vnet_sw_interface_set_flags_msg, &s); } - error = call_elf_section_interface_callbacks - (vnm, sw_if_index, flags, vnm->sw_interface_admin_up_down_functions); + /* set the flags now before invoking the registered clients + * so that the state they query is consistent with the state here notified */ + old_flags = si->flags; + si->flags &= ~mask; + si->flags |= flags; + if ((flags | old_flags) & VNET_SW_INTERFACE_FLAG_ADMIN_UP) + error = call_elf_section_interface_callbacks + (vnm, sw_if_index, flags, + vnm->sw_interface_admin_up_down_functions); + si->flags = old_flags; if (error) goto done; diff --git a/vnet/vnet/interface.h b/vnet/vnet/interface.h index 7738bb6edc7..9f032e987bb 100644 --- a/vnet/vnet/interface.h +++ b/vnet/vnet/interface.h @@ -459,7 +459,8 @@ typedef enum VNET_INTERFACE_COUNTER_RX_MISS = 5, VNET_INTERFACE_COUNTER_RX_ERROR = 6, VNET_INTERFACE_COUNTER_TX_ERROR = 7, - VNET_N_SIMPLE_INTERFACE_COUNTER = 8, + VNET_INTERFACE_COUNTER_MPLS = 8, + VNET_N_SIMPLE_INTERFACE_COUNTER = 9, /* Combined counters. */ VNET_INTERFACE_COUNTER_RX = 0, VNET_INTERFACE_COUNTER_TX = 1, diff --git a/vnet/vnet/interface_cli.c b/vnet/vnet/interface_cli.c index 7d828f54951..477716d4b97 100644 --- a/vnet/vnet/interface_cli.c +++ b/vnet/vnet/interface_cli.c @@ -45,6 +45,8 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vppinfra/bitmap.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> static int compare_interface_names (void *a1, void *a2) @@ -290,8 +292,8 @@ show_sw_interfaces (vlib_main_t * vm, fib_index6 = vec_elt (im6->fib_index_by_sw_if_index, si->sw_if_index); - fib4 = vec_elt_at_index (im4->fibs, fib_index4); - fib6 = vec_elt_at_index (im6->fibs, fib_index6); + fib4 = ip4_fib_get (fib_index4); + fib6 = ip6_fib_get (fib_index6); if (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) vlib_cli_output diff --git a/vnet/vnet/interface_funcs.h b/vnet/vnet/interface_funcs.h index 81a819a64bd..735d47ec192 100644 --- a/vnet/vnet/interface_funcs.h +++ b/vnet/vnet/interface_funcs.h @@ -105,6 +105,7 @@ clib_error_t *vnet_create_sw_interface (vnet_main_t * vnm, void vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index); void vnet_delete_sw_interface (vnet_main_t * vnm, u32 sw_if_index); +int vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index); always_inline uword vnet_sw_interface_get_flags (vnet_main_t * vnm, u32 sw_if_index) diff --git a/vnet/vnet/ip/format.h b/vnet/vnet/ip/format.h index 4d73d6b1bf2..0d0eb6c9476 100644 --- a/vnet/vnet/ip/format.h +++ b/vnet/vnet/ip/format.h @@ -48,6 +48,12 @@ unformat_function_t unformat_ip_protocol; format_function_t format_tcp_udp_port; unformat_function_t unformat_tcp_udp_port; +typedef enum format_ip_adjacency_flags_t_ +{ + FORMAT_IP_ADJACENCY_NONE, + FORMAT_IP_ADJACENCY_DETAIL = (1 << 0), +} format_ip_adjacency_flags_t; + format_function_t format_ip_adjacency; format_function_t format_ip_adjacency_packet_data; diff --git a/vnet/vnet/ip/ip4.h b/vnet/vnet/ip/ip4.h index fc74e9d61ed..f9fe48687c1 100644 --- a/vnet/vnet/ip/ip4.h +++ b/vnet/vnet/ip/ip4.h @@ -47,10 +47,7 @@ typedef struct ip4_fib_t { /* Hash table for each prefix length mapping. */ - uword * adj_index_by_dst_address[33]; - - /* Temporary vectors for holding new/old values for hash_set. */ - uword * new_hash_values, * old_hash_values; + uword * fib_entry_by_dst_address[33]; /* Mtrie for fast lookups. Hash is used to maintain overlapping prefixes. */ ip4_fib_mtrie_t mtrie; @@ -62,7 +59,7 @@ typedef struct ip4_fib_t { u32 index; /* flow hash configuration */ - u32 flow_hash_config; + flow_hash_config_t flow_hash_config; /* N-tuple classifier indices */ u32 fwd_classify_table_index; @@ -72,22 +69,6 @@ typedef struct ip4_fib_t { struct ip4_main_t; -typedef void (ip4_add_del_route_function_t) - (struct ip4_main_t * im, - uword opaque, - ip4_fib_t * fib, - u32 flags, - ip4_address_t * address, - u32 address_length, - void * old_result, - void * new_result); - -typedef struct { - ip4_add_del_route_function_t * function; - uword required_flags; - uword function_opaque; -} ip4_add_del_route_callback_t; - typedef void (ip4_add_del_interface_address_function_t) (struct ip4_main_t * im, uword opaque, @@ -115,23 +96,20 @@ typedef struct ip4_main_t { ip_lookup_main_t lookup_main; /** Vector of FIBs. */ - ip4_fib_t * fibs; + struct fib_table_t_ * fibs; u32 fib_masks[33]; /** Table index indexed by software interface. */ u32 * fib_index_by_sw_if_index; + /* IP4 enabled count by software interface */ + u8 * ip_enabled_by_sw_if_index; + /** Hash table mapping table id to fib index. ID space is not necessarily dense; index space is dense. */ uword * fib_index_by_table_id; - /** Vector of functions to call when routes are added/deleted. */ - ip4_add_del_route_callback_t * add_del_route_callbacks; - - /** Hash table mapping interface route rewrite adjacency index by sw if index. */ - uword * interface_route_adj_index_by_sw_if_index; - /** Functions to call when interface address changes. */ ip4_add_del_interface_address_callback_t * add_del_interface_address_callbacks; @@ -159,11 +137,15 @@ typedef struct ip4_main_t { u32 ip4_unicast_rx_feature_lookup; /** Built-in unicast feature path index, see @ref ip_feature_init_cast() */ u32 ip4_unicast_rx_feature_source_and_port_range_check; + /** Built-in unicast feature path indice, see @ref ip_feature_init_cast() */ + u32 ip4_unicast_rx_feature_drop; /** Built-in multicast feature path index */ u32 ip4_multicast_rx_feature_vpath; /** Built-in multicast feature path index */ u32 ip4_multicast_rx_feature_lookup; + /** Built-in multicast feature path indices */ + u32 ip4_multicast_rx_feature_drop; /** Built-in unicast feature path index, see @ref ip_feature_init_cast() */ u32 ip4_unicast_tx_feature_source_and_port_range_check; @@ -235,30 +217,13 @@ extern vlib_node_registration_t ip4_lookup_node; extern vlib_node_registration_t ip4_rewrite_node; extern vlib_node_registration_t ip4_rewrite_local_node; extern vlib_node_registration_t ip4_arp_node; - -u32 ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, ip4_address_t * dst, - u32 disable_default_route); - -always_inline u32 -ip4_fib_lookup_buffer (ip4_main_t * im, u32 fib_index, ip4_address_t * dst, - vlib_buffer_t * b) -{ - return ip4_fib_lookup_with_table (im, fib_index, dst, - /* disable_default_route */ 0); -} - -always_inline u32 -ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst) -{ - u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - return ip4_fib_lookup_with_table (im, fib_index, dst, - /* disable_default_route */ 0); -} +extern vlib_node_registration_t ip4_glean_node; +extern vlib_node_registration_t ip4_midchain_node; always_inline uword -ip4_destination_matches_route (ip4_main_t * im, - ip4_address_t * key, - ip4_address_t * dest, +ip4_destination_matches_route (const ip4_main_t * im, + const ip4_address_t * key, + const ip4_address_t * dest, uword dest_length) { return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]); } @@ -280,15 +245,26 @@ ip4_unaligned_destination_matches_route (ip4_main_t * im, { return 0 == ((clib_mem_unaligned (&key->data_u32, u32) ^ dest->data_u32) & im->fib_masks[dest_length]); } always_inline int -ip4_src_address_for_packet (ip4_main_t * im, vlib_buffer_t * p, ip4_address_t * src, u32 sw_if_index) +ip4_src_address_for_packet (ip_lookup_main_t * lm, + u32 sw_if_index, + ip4_address_t * src) { - ip_lookup_main_t * lm = &im->lookup_main; - ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index); - if (ia == NULL) - return -1; - ip4_address_t * a = ip_interface_address_get_address (lm, ia); - *src = a[0]; - return 0; + u32 if_add_index = + lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + if (PREDICT_TRUE(if_add_index != ~0)) { + ip_interface_address_t *if_add = + pool_elt_at_index(lm->if_address_pool, if_add_index); + ip4_address_t *if_ip = + ip_interface_address_get_address(lm, if_add); + *src = *if_ip; + return 0; + } + else + { + ASSERT(0); + src->as_u32 = 0; + } + return (!0); } /* Find interface address which matches destination. */ @@ -315,126 +291,20 @@ ip4_interface_address_matching_destination (ip4_main_t * im, ip4_address_t * dst return result; } +ip4_address_t * +ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, + ip_interface_address_t ** result_ia); + clib_error_t * ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, ip4_address_t * address, u32 address_length, u32 is_del); -int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2); - -/* Add/del a route to the FIB. */ - -#define IP4_ROUTE_FLAG_ADD (0 << 0) -#define IP4_ROUTE_FLAG_DEL (1 << 0) -#define IP4_ROUTE_FLAG_TABLE_ID (0 << 1) -#define IP4_ROUTE_FLAG_FIB_INDEX (1 << 1) -#define IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2) -#define IP4_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3) -/* Not last add/del in group. Facilities batching requests into packets. */ -#define IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4) -/* Dynamic route created via ARP reply. */ -#define IP4_ROUTE_FLAG_NEIGHBOR (1 << 5) - -typedef struct { - /* IP4_ROUTE_FLAG_* */ - u32 flags; - - /* Either index of fib or table_id to hash and get fib. - IP4_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */ - u32 table_index_or_table_id; - - /* Destination address (prefix) and length. */ - ip4_address_t dst_address; - u32 dst_address_length; - - /* Adjacency to use for this destination. */ - u32 adj_index; - - /* If specified adjacencies to add and then - use for this destination. add_adj/n_add_adj - are override adj_index if specified. */ - ip_adjacency_t * add_adj; - u32 n_add_adj; -} ip4_add_del_route_args_t; - -/** - * \brief Get or create an IPv4 fib. - * - * Get or create an IPv4 fib with the provided fib ID or index. - * The fib ID is a possibly-sparse user-defined value while - * the fib index defines the position of the fib in the fib vector. - * - * \param im - * ip4_main pointer. - * \param table_index_or_id - * The table index if \c IP4_ROUTE_FLAG_FIB_INDEX bit is set in \p flags. - * Otherwise, when set to \c ~0, an arbitrary and unused fib ID is picked - * and can be retrieved with \c ret->table_id. - * Otherwise, the fib ID to be used to retrieve or create the desired fib. - * \param flags - * Indicates whether \p table_index_or_id is the fib index or ID. - * When the bit \c IP4_ROUTE_FLAG_FIB_INDEX is set, \p table_index_or_id - * is considered as the fib index, and the fib ID otherwise. - * \returns A pointer to the retrieved or created fib. - * - * \remark When getting a fib with the fib index, the fib MUST already exist. - */ -ip4_fib_t * -find_ip4_fib_by_table_index_or_id (ip4_main_t * im, - u32 table_index_or_id, u32 flags); - -void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * args); - -void ip4_add_del_route_next_hop (ip4_main_t * im, - u32 flags, - ip4_address_t * dst_address, - u32 dst_address_length, - ip4_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index); - -u32 -ip4_route_get_next_hop_adj (ip4_main_t * im, - u32 fib_index, - ip4_address_t *next_hop, - u32 next_hop_sw_if_index, - u32 explicit_fib_index); - -void * -ip4_get_route (ip4_main_t * im, - u32 fib_index_or_table_id, - u32 flags, - u8 * address, - u32 address_length); - void -ip4_foreach_matching_route (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip4_address_t * address, - u32 address_length, - ip4_address_t ** results, - u8 ** result_lengths); - -void ip4_delete_matching_routes (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip4_address_t * address, - u32 address_length); - -void ip4_maybe_remap_adjacencies (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags); - -void ip4_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index); +ip4_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable); -ip4_address_t * -ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, - ip_interface_address_t ** result_ia); +int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2); /* Send an ARP request to see if given destination is reachable on given interface. */ clib_error_t * @@ -458,7 +328,7 @@ void ip4_register_protocol (u32 protocol, u32 node_index); serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main; -int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config); +int vnet_set_ip4_flow_hash (u32 table_id, flow_hash_config_t flow_hash_config); void ip4_mtrie_init (ip4_fib_mtrie_t * m); @@ -468,7 +338,8 @@ int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, /* Compute flow hash. We'll use it to select which adjacency to use for this flow. And other things. */ always_inline u32 -ip4_compute_flow_hash (ip4_header_t * ip, u32 flow_hash_config) +ip4_compute_flow_hash (const ip4_header_t * ip, + flow_hash_config_t flow_hash_config) { tcp_header_t * tcp = (void *) (ip + 1); u32 a, b, c, t1, t2; diff --git a/vnet/vnet/ip/ip4_forward.c b/vnet/vnet/ip/ip4_forward.c index 751260a72ea..4c49d0e4916 100644 --- a/vnet/vnet/ip/ip4_forward.c +++ b/vnet/vnet/ip/ip4_forward.c @@ -39,668 +39,16 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> -/** for ethernet_header_t */ -#include <vnet/ethernet/ethernet.h> -/** for ethernet_arp_header_t */ -#include <vnet/ethernet/arp_packet.h> +#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ +#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */ #include <vnet/ppp/ppp.h> -/** for srp_hw_interface_class */ -#include <vnet/srp/srp.h> -/** for API error numbers */ -#include <vnet/api_errno.h> - -/** @file - vnet ip4 forwarding -*/ - -/* This is really, really simple but stupid fib. */ -u32 -ip4_fib_lookup_with_table (ip4_main_t * im, u32 fib_index, - ip4_address_t * dst, - u32 disable_default_route) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip4_fib_t * fib = vec_elt_at_index (im->fibs, fib_index); - uword * p, * hash, key; - i32 i, i_min, dst_address, ai; - - i_min = disable_default_route ? 1 : 0; - dst_address = clib_mem_unaligned (&dst->data_u32, u32); - for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= i_min; i--) - { - hash = fib->adj_index_by_dst_address[i]; - if (! hash) - continue; - - key = dst_address & im->fib_masks[i]; - if ((p = hash_get (hash, key)) != 0) - { - ai = p[0]; - goto done; - } - } - - /* Nothing matches in table. */ - ai = lm->miss_adj_index; - - done: - return ai; -} - -/** @brief Create FIB from table ID and init all hashing. - @param im - @ref ip4_main_t - @param table_id - table ID - @return fib - @ref ip4_fib_t -*/ -static ip4_fib_t * -create_fib_with_table_id (ip4_main_t * im, u32 table_id) -{ - ip4_fib_t * fib; - hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs)); - vec_add2 (im->fibs, fib, 1); - fib->table_id = table_id; - fib->index = fib - im->fibs; - /* IP_FLOW_HASH_DEFAULT is net value of 5 tuple flags without "reverse" bit */ - fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; - fib->fwd_classify_table_index = ~0; - fib->rev_classify_table_index = ~0; - ip4_mtrie_init (&fib->mtrie); - return fib; -} - -/** @brief Find existing or Create new FIB based on index - @param im @ref ip4_main_t - @param table_index_or_id - overloaded parameter referring - to the table or a table's index in the FIB vector - @param flags - used to check if table_index_or_id was a table or - an index (detected by @ref IP4_ROUTE_FLAG_FIB_INDEX) - @return either the existing or a new ip4_fib_t entry -*/ -ip4_fib_t * -find_ip4_fib_by_table_index_or_id (ip4_main_t * im, - u32 table_index_or_id, u32 flags) -{ - uword * p, fib_index; - - fib_index = table_index_or_id; - /* If this isn't a FIB_INDEX ... */ - if (! (flags & IP4_ROUTE_FLAG_FIB_INDEX)) - { - /* If passed ~0 then request the next table available */ - if (table_index_or_id == ~0) { - table_index_or_id = 0; - while ((p = hash_get (im->fib_index_by_table_id, table_index_or_id))) { - table_index_or_id++; - } - /* Create the next table and return the ip4_fib_t associated with it */ - return create_fib_with_table_id (im, table_index_or_id); - } - /* A specific table_id was requested.. */ - p = hash_get (im->fib_index_by_table_id, table_index_or_id); - /* ... and if it doesn't exist create it else grab its index */ - if (! p) - return create_fib_with_table_id (im, table_index_or_id); - fib_index = p[0]; - } - /* Return the ip4_fib_t associated with this index */ - return vec_elt_at_index (im->fibs, fib_index); -} - -static void -ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm, - ip4_fib_t * fib, - u32 address_length) -{ - hash_t * h; - uword max_index; - - ASSERT (lm->fib_result_n_bytes >= sizeof (uword)); - lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) / sizeof (uword); - - fib->adj_index_by_dst_address[address_length] = - hash_create (32 /* elts */, lm->fib_result_n_words * sizeof (uword)); - - hash_set_flags (fib->adj_index_by_dst_address[address_length], - HASH_FLAG_NO_AUTO_SHRINK); - - h = hash_header (fib->adj_index_by_dst_address[address_length]); - max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1; - - /* Initialize new/old hash value vectors. */ - vec_validate_init_empty (fib->new_hash_values, max_index, ~0); - vec_validate_init_empty (fib->old_hash_values, max_index, ~0); -} - -static void -ip4_fib_set_adj_index (ip4_main_t * im, - ip4_fib_t * fib, - u32 flags, - u32 dst_address_u32, - u32 dst_address_length, - u32 adj_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - uword * hash; - - if (vec_bytes(fib->old_hash_values)) - memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values)); - if (vec_bytes(fib->new_hash_values)) - memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values)); - fib->new_hash_values[0] = adj_index; - - /* Make sure adj index is valid. */ - if (CLIB_DEBUG > 0) - (void) ip_get_adjacency (lm, adj_index); - - hash = fib->adj_index_by_dst_address[dst_address_length]; - - hash = _hash_set3 (hash, dst_address_u32, - fib->new_hash_values, - fib->old_hash_values); - - fib->adj_index_by_dst_address[dst_address_length] = hash; - - if (vec_len (im->add_del_route_callbacks) > 0) - { - ip4_add_del_route_callback_t * cb; - ip4_address_t d; - uword * p; - - d.data_u32 = dst_address_u32; - vec_foreach (cb, im->add_del_route_callbacks) - if ((flags & cb->required_flags) == cb->required_flags) - cb->function (im, cb->function_opaque, - fib, flags, - &d, dst_address_length, - fib->old_hash_values, - fib->new_hash_values); - - p = hash_get (hash, dst_address_u32); - /* hash_get should never return NULL here */ - if (p) - clib_memcpy (p, fib->new_hash_values, - vec_bytes (fib->new_hash_values)); - else - ASSERT(0); - } -} - -void ip4_add_del_route (ip4_main_t * im, ip4_add_del_route_args_t * a) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip4_fib_t * fib; - u32 dst_address, dst_address_length, adj_index, old_adj_index; - uword * hash, is_del; - ip4_add_del_route_callback_t * cb; - - /* Either create new adjacency or use given one depending on arguments. */ - if (a->n_add_adj > 0) - { - ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); - } - else - adj_index = a->adj_index; - - dst_address = a->dst_address.data_u32; - dst_address_length = a->dst_address_length; - fib = find_ip4_fib_by_table_index_or_id (im, a->table_index_or_table_id, a->flags); - - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - dst_address &= im->fib_masks[dst_address_length]; - - if (! fib->adj_index_by_dst_address[dst_address_length]) - ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length); - - hash = fib->adj_index_by_dst_address[dst_address_length]; - - is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0; - - if (is_del) - { - fib->old_hash_values[0] = ~0; - hash = _hash_unset (hash, dst_address, fib->old_hash_values); - fib->adj_index_by_dst_address[dst_address_length] = hash; - - if (vec_len (im->add_del_route_callbacks) > 0 - && fib->old_hash_values[0] != ~0) /* make sure destination was found in hash */ - { - fib->new_hash_values[0] = ~0; - vec_foreach (cb, im->add_del_route_callbacks) - if ((a->flags & cb->required_flags) == cb->required_flags) - cb->function (im, cb->function_opaque, - fib, a->flags, - &a->dst_address, dst_address_length, - fib->old_hash_values, - fib->new_hash_values); - } - } - else - ip4_fib_set_adj_index (im, fib, a->flags, dst_address, dst_address_length, - adj_index); - - old_adj_index = fib->old_hash_values[0]; - - /* Avoid spurious reference count increments */ - if (old_adj_index == adj_index - && adj_index != ~0 - && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) - { - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); - if (adj->share_count > 0) - adj->share_count --; - } - - ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length, - is_del ? old_adj_index : adj_index, - is_del); - - /* Delete old adjacency index if present and changed. */ - if (! (a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY) - && old_adj_index != ~0 - && old_adj_index != adj_index) - ip_del_adjacency (lm, old_adj_index); -} - - -u32 -ip4_route_get_next_hop_adj (ip4_main_t * im, - u32 fib_index, - ip4_address_t *next_hop, - u32 next_hop_sw_if_index, - u32 explicit_fib_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - vnet_main_t * vnm = vnet_get_main(); - uword * nh_hash, * nh_result; - int is_interface_next_hop; - u32 nh_adj_index; - ip4_fib_t * fib; - - fib = vec_elt_at_index (im->fibs, fib_index); - - is_interface_next_hop = next_hop->data_u32 == 0; - if (is_interface_next_hop) - { - nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index); - if (nh_result) - nh_adj_index = *nh_result; - else - { - ip_adjacency_t * adj; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &nh_adj_index); - ip4_adjacency_set_interface_route (vnm, adj, next_hop_sw_if_index, /* if_address_index */ ~0); - ip_call_add_del_adjacency_callbacks (lm, nh_adj_index, /* is_del */ 0); - hash_set (im->interface_route_adj_index_by_sw_if_index, next_hop_sw_if_index, nh_adj_index); - } - } - else if (next_hop_sw_if_index == ~0) - { - /* next-hop is recursive. we always need a indirect adj - * for recursive paths. Any LPM we perform now will give - * us a valid adj, but without tracking the next-hop we - * have no way to keep it valid. - */ - ip_adjacency_t add_adj; - memset (&add_adj, 0, sizeof(add_adj)); - add_adj.n_adj = 1; - add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT; - add_adj.indirect.next_hop.ip4.as_u32 = next_hop->as_u32; - add_adj.explicit_fib_index = explicit_fib_index; - ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index); - } - else - { - nh_hash = fib->adj_index_by_dst_address[32]; - nh_result = hash_get (nh_hash, next_hop->data_u32); - - /* Next hop must be known. */ - if (! nh_result) - { - ip_adjacency_t * adj; - - /* no /32 exists, get the longest prefix match */ - nh_adj_index = ip4_fib_lookup_with_table (im, fib_index, - next_hop, 0); - adj = ip_get_adjacency (lm, nh_adj_index); - /* if ARP interface adjacency is present, we need to - install ARP adjaceny for specific next hop */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - adj->arp.next_hop.ip4.as_u32 == 0) - { - nh_adj_index = vnet_arp_glean_add(fib_index, next_hop); - } - } - else - { - nh_adj_index = *nh_result; - } - } - - return (nh_adj_index); -} - -void -ip4_add_del_route_next_hop (ip4_main_t * im, - u32 flags, - ip4_address_t * dst_address, - u32 dst_address_length, - ip4_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index) -{ - vnet_main_t * vnm = vnet_get_main(); - ip_lookup_main_t * lm = &im->lookup_main; - u32 fib_index; - ip4_fib_t * fib; - u32 dst_address_u32, old_mp_adj_index, new_mp_adj_index; - u32 dst_adj_index, nh_adj_index; - uword * dst_hash, * dst_result; - ip_adjacency_t * dst_adj; - ip_multipath_adjacency_t * old_mp, * new_mp; - int is_del = (flags & IP4_ROUTE_FLAG_DEL) != 0; - clib_error_t * error = 0; - - if (explicit_fib_index == (u32)~0) - fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index); - else - fib_index = explicit_fib_index; - - fib = vec_elt_at_index (im->fibs, fib_index); - - /* Lookup next hop to be added or deleted. */ - if (adj_index == (u32)~0) - { - nh_adj_index = ip4_route_get_next_hop_adj(im, fib_index, - next_hop, - next_hop_sw_if_index, - explicit_fib_index); - } - else - { - nh_adj_index = adj_index; - } - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - dst_address_u32 = dst_address->data_u32 & im->fib_masks[dst_address_length]; - - dst_hash = fib->adj_index_by_dst_address[dst_address_length]; - dst_result = hash_get (dst_hash, dst_address_u32); - if (dst_result) - { - dst_adj_index = dst_result[0]; - dst_adj = ip_get_adjacency (lm, dst_adj_index); - } - else - { - /* For deletes destination must be known. */ - if (is_del) - { - vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; - error = clib_error_return (0, "unknown destination %U/%d", - format_ip4_address, dst_address, - dst_address_length); - goto done; - } - - dst_adj_index = ~0; - dst_adj = 0; - } - - /* Ignore adds of X/32 with next hop of X. */ - if (! is_del - && dst_address_length == 32 - && dst_address->data_u32 == next_hop->data_u32 - && adj_index != (u32)~0) - { - vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP; - error = clib_error_return (0, "prefix matches next hop %U/%d", - format_ip4_address, dst_address, - dst_address_length); - goto done; - } - - /* Destination is not known and default weight is set so add route - to existing non-multipath adjacency */ - if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0) - { - /* create / delete additional mapping of existing adjacency */ - ip4_add_del_route_args_t a; - - a.table_index_or_table_id = fib_index; - a.flags = ((is_del ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD) - | IP4_ROUTE_FLAG_FIB_INDEX - | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE - | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP))); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = nh_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip4_add_del_route (im, &a); - goto done; - } - - old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0; - - if (! ip_multipath_adjacency_add_del_next_hop - (lm, is_del, - old_mp_adj_index, - nh_adj_index, - next_hop_weight, - &new_mp_adj_index)) - { - vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP; - error = clib_error_return (0, "requested deleting next-hop %U not found in multi-path", - format_ip4_address, next_hop); - goto done; - } - - old_mp = new_mp = 0; - if (old_mp_adj_index != ~0) - old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - if (new_mp_adj_index != ~0) - new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index); - - if (old_mp != new_mp) - { - ip4_add_del_route_args_t a; - ip_adjacency_t * adj; - - a.table_index_or_table_id = fib_index; - a.flags = ((is_del && ! new_mp ? IP4_ROUTE_FLAG_DEL : IP4_ROUTE_FLAG_ADD) - | IP4_ROUTE_FLAG_FIB_INDEX - | IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & (IP4_ROUTE_FLAG_NO_REDISTRIBUTE | IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP))); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip4_add_del_route (im, &a); - - adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index); - if (adj->n_adj == 1) - adj->share_count += is_del ? -1 : 1; - } - - done: - if (error) - clib_error_report (error); -} - -void * -ip4_get_route (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - u8 * address, - u32 address_length) -{ - ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - u32 dst_address = * (u32 *) address; - uword * hash, * p; - - ASSERT (address_length < ARRAY_LEN (im->fib_masks)); - dst_address &= im->fib_masks[address_length]; - - hash = fib->adj_index_by_dst_address[address_length]; - p = hash_get (hash, dst_address); - return (void *) p; -} - -void -ip4_foreach_matching_route (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip4_address_t * address, - u32 address_length, - ip4_address_t ** results, - u8 ** result_lengths) -{ - ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - u32 dst_address = address->data_u32; - u32 this_length = address_length; - - if (*results) - _vec_len (*results) = 0; - if (*result_lengths) - _vec_len (*result_lengths) = 0; - - while (this_length <= 32 && vec_len (results) == 0) - { - uword k, v; - hash_foreach (k, v, fib->adj_index_by_dst_address[this_length], ({ - if (0 == ((k ^ dst_address) & im->fib_masks[address_length])) - { - ip4_address_t a; - a.data_u32 = k; - vec_add1 (*results, a); - vec_add1 (*result_lengths, this_length); - } - })); - - this_length++; - } -} - -void ip4_maybe_remap_adjacencies (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags) -{ - ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - ip_lookup_main_t * lm = &im->lookup_main; - u32 i, l; - ip4_address_t a; - ip4_add_del_route_callback_t * cb; - static ip4_address_t * to_delete; - - if (lm->n_adjacency_remaps == 0) - return; - - for (l = 0; l <= 32; l++) - { - hash_pair_t * p; - uword * hash = fib->adj_index_by_dst_address[l]; - - if (hash_elts (hash) == 0) - continue; - - if (to_delete) - _vec_len (to_delete) = 0; - - hash_foreach_pair (p, hash, ({ - u32 adj_index = p->value[0]; - u32 m = vec_elt (lm->adjacency_remap_table, adj_index); - - if (m) - { - /* Record destination address from hash key. */ - a.data_u32 = p->key; - - /* New adjacency points to nothing: so delete prefix. */ - if (m == ~0) - vec_add1 (to_delete, a); - else - { - /* Remap to new adjacency. */ - clib_memcpy (fib->old_hash_values, p->value, vec_bytes (fib->old_hash_values)); - - /* Set new adjacency value. */ - fib->new_hash_values[0] = p->value[0] = m - 1; - - vec_foreach (cb, im->add_del_route_callbacks) - if ((flags & cb->required_flags) == cb->required_flags) - cb->function (im, cb->function_opaque, - fib, flags | IP4_ROUTE_FLAG_ADD, - &a, l, - fib->old_hash_values, - fib->new_hash_values); - } - } - })); - - fib->new_hash_values[0] = ~0; - for (i = 0; i < vec_len (to_delete); i++) - { - hash = _hash_unset (hash, to_delete[i].data_u32, fib->old_hash_values); - vec_foreach (cb, im->add_del_route_callbacks) - if ((flags & cb->required_flags) == cb->required_flags) - cb->function (im, cb->function_opaque, - fib, flags | IP4_ROUTE_FLAG_DEL, - &a, l, - fib->old_hash_values, - fib->new_hash_values); - } - } - - /* Also remap adjacencies in mtrie. */ - ip4_mtrie_maybe_remap_adjacencies (lm, &fib->mtrie); - - /* Reset mapping table. */ - vec_zero (lm->adjacency_remap_table); - - /* All remaps have been performed. */ - lm->n_adjacency_remaps = 0; -} - -void ip4_delete_matching_routes (ip4_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip4_address_t * address, - u32 address_length) -{ - static ip4_address_t * matching_addresses; - static u8 * matching_address_lengths; - u32 l, i; - ip4_add_del_route_args_t a; - - a.flags = IP4_ROUTE_FLAG_DEL | IP4_ROUTE_FLAG_NO_REDISTRIBUTE | flags; - a.table_index_or_table_id = table_index_or_table_id; - a.adj_index = ~0; - a.add_adj = 0; - a.n_add_adj = 0; - - for (l = address_length + 1; l <= 32; l++) - { - ip4_foreach_matching_route (im, table_index_or_table_id, flags, - address, - l, - &matching_addresses, - &matching_address_lengths); - for (i = 0; i < vec_len (matching_addresses); i++) - { - a.dst_address = matching_addresses[i]; - a.dst_address_length = matching_address_lengths[i]; - ip4_add_del_route (im, &a); - } - } - - ip4_maybe_remap_adjacencies (im, table_index_or_table_id, flags); -} +#include <vnet/srp/srp.h> /* for srp_hw_interface_class */ +#include <vnet/api_errno.h> /* for API error numbers */ +#include <vnet/fib/fib_table.h> /* for FIB table and entry creation */ +#include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */ +#include <vnet/fib/ip4_fib.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/classify_dpo.h> void ip4_forward_next_trace (vlib_main_t * vm, @@ -712,12 +60,10 @@ always_inline uword ip4_lookup_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, - int lookup_for_responses_to_locally_received_packets, - int is_indirect) + int lookup_for_responses_to_locally_received_packets) { ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, * from, * to_next; ip_lookup_next_t next; u32 cpu_index = os_get_cpu_number(); @@ -732,217 +78,194 @@ ip4_lookup_inline (vlib_main_t * vm, to_next, n_left_to_next); while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t * p0, * p1; - ip4_header_t * ip0, * ip1; - __attribute__((unused)) tcp_header_t * tcp0, * tcp1; - ip_lookup_next_t next0, next1; - ip_adjacency_t * adj0, * adj1; - ip4_fib_mtrie_t * mtrie0, * mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip4_address_t * dst_addr0, *dst_addr1; - __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0; - __attribute__((unused)) u32 pi1, fib_index1, adj_index1, is_tcp_udp1; - u32 flow_hash_config0, flow_hash_config1; + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + __attribute__((unused)) tcp_header_t * tcp0, * tcp1; + ip_lookup_next_t next0, next1; + const load_balance_t * lb0, * lb1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_address_t * dst_addr0, *dst_addr1; + __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0; + __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1; + flow_hash_config_t flow_hash_config0, flow_hash_config1; u32 hash_c0, hash_c1; - u32 wrong_next; + u32 wrong_next; + const dpo_id_t *dpo0, *dpo1; - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); - CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); - } + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); + } - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); - if (is_indirect) - { - ip_adjacency_t * iadj0, * iadj1; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip4; - dst_addr1 = &iadj1->indirect.next_hop.ip4; - } - else - { - dst_addr0 = &ip0->dst_address; - dst_addr1 = &ip1->dst_address; - } + dst_addr0 = &ip0->dst_address; + dst_addr1 = &ip1->dst_address; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; - if (! lookup_for_responses_to_locally_received_packets) - { - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; - mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; - - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0); - } - - tcp0 = (void *) (ip0 + 1); - tcp1 = (void *) (ip1 + 1); - - is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP - || ip0->protocol == IP_PROTOCOL_UDP); - is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP - || ip1->protocol == IP_PROTOCOL_UDP); - - if (! lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1); - } - - if (! lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); - } - - if (! lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); - } - - if (lookup_for_responses_to_locally_received_packets) - { - adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; - adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; - } - else - { - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); - - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - } - - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - dst_addr0, - /* no_default_route */ 0)); - ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, - dst_addr1, - /* no_default_route */ 0)); - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); - - next0 = adj0->lookup_next_index; - next1 = adj1->lookup_next_index; - - /* Use flow hash to compute multipath adjacency. */ + if (! lookup_for_responses_to_locally_received_packets) + { + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + mtrie1 = &ip4_fib_get (fib_index1)->mtrie; + + leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; + + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0); + } + + tcp0 = (void *) (ip0 + 1); + tcp1 = (void *) (ip1 + 1); + + is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP + || ip0->protocol == IP_PROTOCOL_UDP); + is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP + || ip1->protocol == IP_PROTOCOL_UDP); + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1); + } + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); + } + + if (! lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); + } + + if (lookup_for_responses_to_locally_received_packets) + { + lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; + lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; + } + else + { + /* Handle default route. */ + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); + + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + } + + lb0 = load_balance_get (lb_index0); + lb1 = load_balance_get (lb_index1); + + /* Use flow hash to compute multipath adjacency. */ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0; - if (PREDICT_FALSE (adj0->n_adj > 1)) + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; - hash_c0 = vnet_buffer (p0)->ip.flow_hash = + flow_hash_config0 = lb0->lb_hash_config; + hash_c0 = vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash (ip0, flow_hash_config0); } - if (PREDICT_FALSE(adj1->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config1 = - vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config; - hash_c1 = vnet_buffer (p1)->ip.flow_hash = + flow_hash_config1 = lb1->lb_hash_config; + hash_c1 = vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash (ip1, flow_hash_config1); } - ASSERT (adj0->n_adj > 0); - ASSERT (adj1->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - ASSERT (is_pow2 (adj1->n_adj)); - adj_index0 += (hash_c0 & (adj0->n_adj - 1)); - adj_index1 += (hash_c1 & (adj1->n_adj - 1)); - - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; - - if (is_indirect) - { - /* ARP for next-hop not packet's destination address */ - if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP) - ip0->dst_address.as_u32 = dst_addr0->as_u32; - if (adj1->lookup_next_index == IP_LOOKUP_NEXT_ARP) - ip1->dst_address.as_u32 = dst_addr1->as_u32; - } - - vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, - vlib_buffer_length_in_chain (vm, p0) + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + dpo1 = load_balance_get_bucket_i(lb1, + (hash_c1 & + (lb0->lb_n_buckets_minus_1))); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + next1 = dpo1->dpoi_next_node; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lb_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + sizeof(ethernet_header_t)); - vlib_increment_combined_counter - (cm, cpu_index, adj_index1, 1, + vlib_increment_combined_counter + (cm, cpu_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, p1) + sizeof(ethernet_header_t)); - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - wrong_next = (next0 != next) + 2*(next1 != next); - if (PREDICT_FALSE (wrong_next != 0)) - { - switch (wrong_next) - { - case 1: - /* A B A */ - to_next[-2] = pi1; - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - break; - - case 2: - /* A A B */ - to_next -= 1; - n_left_to_next += 1; - vlib_set_next_frame_buffer (vm, node, next1, pi1); - break; - - case 3: - /* A B C */ - to_next -= 2; - n_left_to_next += 2; - vlib_set_next_frame_buffer (vm, node, next0, pi0); - vlib_set_next_frame_buffer (vm, node, next1, pi1); - if (next0 == next1) - { - /* A B B */ - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next1; - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - } - } - } - } + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + wrong_next = (next0 != next) + 2*(next1 != next); + if (PREDICT_FALSE (wrong_next != 0)) + { + switch (wrong_next) + { + case 1: + /* A B A */ + to_next[-2] = pi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, pi1); + break; + + case 3: + /* A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + vlib_set_next_frame_buffer (vm, node, next1, pi1); + if (next0 == next1) + { + /* A B B */ + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next1; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + } + } + } + } while (n_left_from > 0 && n_left_to_next > 0) { @@ -950,12 +273,14 @@ ip4_lookup_inline (vlib_main_t * vm, ip4_header_t * ip0; __attribute__((unused)) tcp_header_t * tcp0; ip_lookup_next_t next0; - ip_adjacency_t * adj0; + const load_balance_t *lb0; ip4_fib_mtrie_t * mtrie0; ip4_fib_mtrie_leaf_t leaf0; ip4_address_t * dst_addr0; - __attribute__((unused)) u32 pi0, fib_index0, adj_index0, is_tcp_udp0; - u32 flow_hash_config0, hash_c0; + __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0; + flow_hash_config_t flow_hash_config0; + const dpo_id_t *dpo0; + u32 hash_c0; pi0 = from[0]; to_next[0] = pi0; @@ -964,16 +289,7 @@ ip4_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); - if (is_indirect) - { - ip_adjacency_t * iadj0; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip4; - } - else - { - dst_addr0 = &ip0->dst_address; - } + dst_addr0 = &ip0->dst_address; fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? @@ -981,7 +297,7 @@ ip4_lookup_inline (vlib_main_t * vm, if (! lookup_for_responses_to_locally_received_packets) { - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + mtrie0 = &ip4_fib_get( fib_index0)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -1003,50 +319,39 @@ ip4_lookup_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); if (lookup_for_responses_to_locally_received_packets) - adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; else { /* Handle default route. */ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); } - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - dst_addr0, - /* no_default_route */ 0)); - - adj0 = ip_get_adjacency (lm, adj_index0); - - next0 = adj0->lookup_next_index; + lb0 = load_balance_get (lbi0); /* Use flow hash to compute multipath adjacency. */ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE(adj0->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + flow_hash_config0 = lb0->lb_hash_config; hash_c0 = vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash (ip0, flow_hash_config0); } - ASSERT (adj0->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - adj_index0 += (hash_c0 & (adj0->n_adj - 1)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); - if (is_indirect) - { - /* ARP for next-hop not packet's destination address */ - if (adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP) - ip0->dst_address.as_u32 = dst_addr0->as_u32; - } + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, - vlib_buffer_length_in_chain (vm, p0) - + sizeof(ethernet_header_t)); + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); from += 1; to_next += 1; @@ -1113,55 +418,135 @@ ip4_lookup (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_lookup_inline (vm, node, frame, - /* lookup_for_responses_to_locally_received_packets */ 0, - /* is_indirect */ 0); + /* lookup_for_responses_to_locally_received_packets */ 0); } -void ip4_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index) +static u8 * format_ip4_lookup_trace (u8 * s, va_list * args); + +VLIB_REGISTER_NODE (ip4_lookup_node) = { + .function = ip4_lookup, + .name = "ip4-lookup", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_lookup_trace, + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = IP4_LOOKUP_NEXT_NODES, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup) + +always_inline uword +ip4_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - ip_lookup_next_t n; - vnet_l3_packet_type_t packet_type; - u32 node_index; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); - if (hw->hw_class_index == ethernet_hw_interface_class.index - || hw->hw_class_index == srp_hw_interface_class.index) - { - /* - * We have a bit of a problem in this case. ip4-arp uses - * the rewrite_header.next_index to hand pkts to the - * indicated inteface output node. We can end up in - * ip4_rewrite_local, too, which also pays attention to - * rewrite_header.next index. Net result: a hack in - * ip4_rewrite_local... - */ - n = IP_LOOKUP_NEXT_ARP; - node_index = ip4_arp_node.index; - adj->if_address_index = if_address_index; - adj->arp.next_hop.ip4.as_u32 = 0; - ip46_address_reset(&adj->arp.next_hop); - packet_type = VNET_L3_PACKET_TYPE_ARP; - } - else + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace(vm, node, frame, VLIB_TX); + + while (n_left_from > 0) { - n = IP_LOOKUP_NEXT_REWRITE; - node_index = ip4_rewrite_node.index; - packet_type = VNET_L3_PACKET_TYPE_IP4; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0; + const ip4_header_t *ip0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + hc0 = lb0->lb_hash_config; + vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0); + + dpo0 = load_balance_get_bucket_i(lb0, + vnet_buffer(p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); } - adj->lookup_next_index = n; - vnet_rewrite_for_sw_interface - (vnm, - packet_type, - sw_if_index, - node_index, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); + return frame->n_vectors; +} + +static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args); + +VLIB_REGISTER_NODE (ip4_load_balance_node) = { + .function = ip4_load_balance, + .name = "ip4-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + + .format_trace = format_ip4_forward_next_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance) + +/* get first interface address */ +ip4_address_t * +ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, + ip_interface_address_t ** result_ia) +{ + ip_lookup_main_t * lm = &im->lookup_main; + ip_interface_address_t * ia = 0; + ip4_address_t * result = 0; + + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip4_address_t * a = ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + if (result_ia) + *result_ia = result ? ia : 0; + return result; } static void @@ -1169,115 +554,160 @@ ip4_add_interface_routes (u32 sw_if_index, ip4_main_t * im, u32 fib_index, ip_interface_address_t * a) { - vnet_main_t * vnm = vnet_get_main(); ip_lookup_main_t * lm = &im->lookup_main; - ip_adjacency_t * adj; ip4_address_t * address = ip_interface_address_get_address (lm, a); - ip4_add_del_route_args_t x; - vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index); - u32 classify_table_index; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP4_ROUTE_FLAG_ADD - | IP4_ROUTE_FLAG_FIB_INDEX - | IP4_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = a->address_length; - x.n_add_adj = 0; - x.add_adj = 0; + fib_prefix_t pfx = { + .fp_len = a->address_length, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = *address, + }; a->neighbor_probe_adj_index = ~0; - if (a->address_length < 32) - { - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); - ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool); - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - ip4_add_del_route (im, &x); - a->neighbor_probe_adj_index = x.adj_index; - } - - /* Add e.g. 1.1.1.1/32 as local to this host. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); - - classify_table_index = ~0; + + if (pfx.fp_len < 32) + { + fib_node_index_t fei; + + fei = fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, /* No next-hop address */ + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + a->neighbor_probe_adj_index = fib_entry_get_adj(fei); + } + + pfx.fp_len = 32; + if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) - classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index]; - if (classify_table_index != (u32) ~0) - { - adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; - adj->classify.table_index = classify_table_index; - } - else - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - - adj->if_address_index = a - lm->if_address_pool; - adj->rewrite_header.sw_if_index = sw_if_index; - adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX]; - /* - * Local adjs are never to be rewritten. Spoofed pkts w/ src = dst = local - * fail an RPF-ish check, but still go thru the rewrite code... - */ - adj->rewrite_header.data_bytes = 0; + { + u32 classify_table_index = + lm->classify_table_index_by_sw_if_index [sw_if_index]; + if (classify_table_index != (u32) ~0) + { + dpo_id_t dpo = DPO_NULL; + + dpo_set(&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP4, + classify_dpo_create(FIB_PROTOCOL_IP4, + classify_table_index)); + + fib_table_entry_special_dpo_add(fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, + &dpo); + dpo_reset(&dpo); + } + } - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - x.dst_address_length = 32; - ip4_add_del_route (im, &x); + fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + &pfx.fp_addr, + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); } static void -ip4_del_interface_routes (ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length) +ip4_del_interface_routes (ip4_main_t * im, + u32 fib_index, + ip4_address_t * address, + u32 address_length) { - ip4_add_del_route_args_t x; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP4_ROUTE_FLAG_DEL - | IP4_ROUTE_FLAG_FIB_INDEX - | IP4_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = address_length; - x.adj_index = ~0; - x.n_add_adj = 0; - x.add_adj = 0; - - if (address_length < 32) - ip4_add_del_route (im, &x); - - x.dst_address_length = 32; - ip4_add_del_route (im, &x); - - ip4_delete_matching_routes (im, - fib_index, - IP4_ROUTE_FLAG_FIB_INDEX, - address, - address_length); + fib_prefix_t pfx = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = *address, + }; + + if (pfx.fp_len < 32) + { + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); + } + + pfx.fp_len = 32; + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); } -typedef struct { - u32 sw_if_index; - ip4_address_t address; - u32 length; -} ip4_interface_address_t; +void +ip4_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable) +{ + vlib_main_t * vm = vlib_get_main(); + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 ci, cast; + u32 lookup_feature_index; -static clib_error_t * -ip4_add_del_interface_address_internal (vlib_main_t * vm, - u32 sw_if_index, - ip4_address_t * new_address, - u32 new_length, - u32 redistribute, - u32 insert_routes, - u32 is_del); + vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) + { + if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + else + { + ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + + for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++) + { + ip_config_main_t * cm = &lm->feature_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; + + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + if (cast == VNET_IP_RX_UNICAST_FEAT) + lookup_feature_index = im->ip4_unicast_rx_feature_lookup; + else + lookup_feature_index = im->ip4_multicast_rx_feature_lookup; + + if (is_enable) + ci = vnet_config_add_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + cm->config_index_by_sw_if_index[sw_if_index] = ci; + } +} static clib_error_t * ip4_add_del_interface_address_internal (vlib_main_t * vm, u32 sw_if_index, ip4_address_t * address, u32 address_length, - u32 redistribute, - u32 insert_routes, u32 is_del) { vnet_main_t * vnm = vnet_get_main(); @@ -1292,9 +722,15 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip4_af); - /* When adding an address check that it does not conflict with an existing address. */ + /* FIXME-LATER + * there is no support for adj-fib handling in the presence of overlapping + * subnets on interfaces. Easy fix - disallow overlapping subnets, like + * most routers do. + */ if (! is_del) { + /* When adding an address check that it does not conflict + with an existing address. */ ip_interface_address_t * ia; foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, @@ -1307,7 +743,7 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, format_ip4_address_and_length, address, address_length, format_ip4_address_and_length, x, ia->address_length, format_vnet_sw_if_index_name, vnm, sw_if_index); - })); + })); } elts_before = pool_elts (lm->if_address_pool); @@ -1322,18 +758,16 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, if (error) goto done; - if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes) - { - if (is_del) - ip4_del_interface_routes (im, ip4_af.fib_index, address, - address_length); - - else - ip4_add_interface_routes (sw_if_index, - im, ip4_af.fib_index, - pool_elt_at_index - (lm->if_address_pool, if_address_index)); - } + ip4_sw_interface_enable_disable(sw_if_index, !is_del); + + if (is_del) + ip4_del_interface_routes (im, ip4_af.fib_index, address, + address_length); + else + ip4_add_interface_routes (sw_if_index, + im, ip4_af.fib_index, + pool_elt_at_index + (lm->if_address_pool, if_address_index)); /* If pool did not grow/shrink: add duplicate address. */ if (elts_before != pool_elts (lm->if_address_pool)) @@ -1358,48 +792,9 @@ ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, { return ip4_add_del_interface_address_internal (vm, sw_if_index, address, address_length, - /* redistribute */ 1, - /* insert_routes */ 1, is_del); } -static clib_error_t * -ip4_sw_interface_admin_up_down (vnet_main_t * vnm, - u32 sw_if_index, - u32 flags) -{ - ip4_main_t * im = &ip4_main; - ip_interface_address_t * ia; - ip4_address_t * a; - u32 is_admin_up, fib_index; - - /* Fill in lookup tables with default table (0). */ - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - - vec_validate_init_empty (im->lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0); - - is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; - - fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - - foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, - 0 /* honor unnumbered */, - ({ - a = ip_interface_address_get_address (&im->lookup_main, ia); - if (is_admin_up) - ip4_add_interface_routes (sw_if_index, - im, fib_index, - ia); - else - ip4_del_interface_routes (im, fib_index, - a, ia->address_length); - })); - - return 0; -} - -VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down); - /* Built-in ip4 unicast rx feature path definition */ VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = { .node_name = "ip4-inacl", @@ -1449,10 +844,17 @@ VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = { VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = { .node_name = "ip4-lookup", - .runs_before = 0, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0}, .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup, }; +VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = { + .node_name = "ip4-drop", + .runs_before = 0, /* not before any other features */ + .feature_index = &ip4_main.ip4_unicast_rx_feature_drop, +}; + + /* Built-in ip4 multicast rx feature path definition */ VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = { .node_name = "vpath-input-ip4", @@ -1462,10 +864,16 @@ VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = { VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = { .node_name = "ip4-lookup-multicast", - .runs_before = 0, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0}, .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup, }; +VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = { + .node_name = "ip4-drop", + .runs_before = 0, /* last feature */ + .feature_index = &ip4_main.ip4_multicast_rx_feature_drop, +}; + static char * rx_feature_start_nodes[] = { "ip4-input", "ip4-input-no-checksum"}; @@ -1488,7 +896,6 @@ VNET_IP4_TX_FEATURE_INIT (interface_output, static) = { .feature_index = &ip4_main.ip4_tx_feature_interface_output, }; - static clib_error_t * ip4_feature_init (vlib_main_t * vm, ip4_main_t * im) { @@ -1520,7 +927,7 @@ ip4_feature_init (vlib_main_t * vm, ip4_main_t * im) feature_start_nodes, feature_start_len, cast, - 1 /* is_ip4 */))) + VNET_L3_PACKET_TYPE_IP4))) return error; } @@ -1538,6 +945,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 ci, cast; u32 feature_index; + /* Fill in lookup tables with default table (0). */ + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + for (cast = 0; cast < VNET_N_IP_FEAT; cast++) { ip_config_main_t * cm = &lm->feature_config_mains[cast]; @@ -1547,9 +957,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, ci = cm->config_index_by_sw_if_index[sw_if_index]; if (cast == VNET_IP_RX_UNICAST_FEAT) - feature_index = im->ip4_unicast_rx_feature_lookup; + feature_index = im->ip4_unicast_rx_feature_drop; else if (cast == VNET_IP_RX_MULTICAST_FEAT) - feature_index = im->ip4_multicast_rx_feature_lookup; + feature_index = im->ip4_multicast_rx_feature_drop; else feature_index = im->ip4_tx_feature_interface_output; @@ -1560,14 +970,16 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, /* config data */ 0, /* # bytes of config data */ 0); else - ci = vnet_config_del_feature (vm, vcm, - ci, - feature_index, - /* config data */ 0, - /* # bytes of config data */ 0); - + { + ci = vnet_config_del_feature (vm, vcm, ci, + feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index) + im->ip_enabled_by_sw_if_index[sw_if_index] = 0; + } cm->config_index_by_sw_if_index[sw_if_index] = ci; - /* + /* * note: do not update the tx feature count here. */ } @@ -1577,44 +989,6 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del); -static u8 * format_ip4_lookup_trace (u8 * s, va_list * args); - -VLIB_REGISTER_NODE (ip4_lookup_node) = { - .function = ip4_lookup, - .name = "ip4-lookup", - .vector_size = sizeof (u32), - - .format_trace = format_ip4_lookup_trace, - - .n_next_nodes = IP4_LOOKUP_N_NEXT, - .next_nodes = IP4_LOOKUP_NEXT_NODES, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup); - -static uword -ip4_indirect (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return ip4_lookup_inline (vm, node, frame, - /* lookup_for_responses_to_locally_received_packets */ 0, - /* is_indirect */ 1); -} - -VLIB_REGISTER_NODE (ip4_indirect_node) = { - .function = ip4_indirect, - .name = "ip4-indirect", - .vector_size = sizeof (u32), - .sibling_of = "ip4-lookup", - .format_trace = format_ip4_lookup_trace, - - .n_next_nodes = 0, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_indirect_node, ip4_indirect); - - /* Global IP4 main. */ ip4_main_t ip4_main; @@ -1636,11 +1010,11 @@ ip4_lookup_init (vlib_main_t * vm) im->fib_masks[i] = clib_host_to_net_u32 (m); } - /* Create FIB with index 0 and table id of 0. */ - find_ip4_fib_by_table_index_or_id (im, /* table id */ 0, IP4_ROUTE_FLAG_TABLE_ID); - ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); + /* Create FIB with index 0 and table id of 0. */ + fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0); + { pg_node_t * pn; pn = pg_get_node (ip4_lookup_node.index); @@ -1708,12 +1082,12 @@ static u8 * format_ip4_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *); vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im = &ip4_main; uword indent = format_get_indent (s); s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x", t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header, t->packet_data); @@ -1726,16 +1100,16 @@ static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *); vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im = &ip4_main; uword indent = format_get_indent (s); s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x", t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, - vnm, &im->lookup_main, t->adj_index, + vnm, t->adj_index, t->packet_data, sizeof (t->packet_data)); return s; } @@ -1863,12 +1237,6 @@ ip4_punt (vlib_main_t * vm, vlib_frame_t * frame) { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); } -static uword -ip4_miss (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_DST_LOOKUP_MISS); } - VLIB_REGISTER_NODE (ip4_drop_node,static) = { .function = ip4_drop, .name = "ip4-drop", @@ -1882,7 +1250,7 @@ VLIB_REGISTER_NODE (ip4_drop_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop) VLIB_REGISTER_NODE (ip4_punt_node,static) = { .function = ip4_punt, @@ -1897,22 +1265,7 @@ VLIB_REGISTER_NODE (ip4_punt_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt); - -VLIB_REGISTER_NODE (ip4_miss_node,static) = { - .function = ip4_miss, - .name = "ip4-miss", - .vector_size = sizeof (u32), - - .format_trace = format_ip4_forward_next_trace, - - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-drop", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_miss_node, ip4_miss); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt) /* Compute TCP/UDP/ICMP4 checksum in software. */ u16 @@ -2009,26 +1362,27 @@ ip4_local (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t * p0, * p1; - ip4_header_t * ip0, * ip1; - udp_header_t * udp0, * udp1; - ip4_fib_mtrie_t * mtrie0, * mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip_adjacency_t * adj0, * adj1; - u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, adj_index0; - u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, adj_index1; - i32 len_diff0, len_diff1; - u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; - u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; - u8 enqueue_code; + { + vlib_buffer_t * p0, * p1; + ip4_header_t * ip0, * ip1; + udp_header_t * udp0, * udp1; + ip4_fib_mtrie_t * mtrie0, * mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t *lb0, *lb1; + u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0; + u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1; + i32 len_diff0, len_diff1; + u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; + u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; + u8 enqueue_code; - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); @@ -2041,8 +1395,8 @@ ip4_local (vlib_main_t * vm, fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer(p1)->sw_if_index[VLIB_RX]); - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; - mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + mtrie1 = &ip4_fib_get (fib_index1)->mtrie; leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -2130,41 +1484,42 @@ ip4_local (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); + leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1); - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - - vnet_buffer (p1)->ip.adj_index[VLIB_RX] = adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - &ip0->src_address, - /* no_default_route */ 1)); - ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, - &ip1->src_address, - /* no_default_route */ 1)); + vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1; - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); + lb0 = load_balance_get(lbi0); + lb1 = load_balance_get(lbi1); + dpo0 = load_balance_get_bucket_i(lb0, 0); + dpo1 = load_balance_get_bucket_i(lb1, 0); /* * Must have a route to source otherwise we drop the packet. * ip4 broadcasts are accepted, e.g. to make dhcp client work */ error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL - && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE - && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL + && dpo0->dpoi_type != DPO_ADJACENCY + && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE && ip0->dst_address.as_u32 != 0xFFFFFFFF ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + error0 = (dpo0->dpoi_type == DPO_RECEIVE ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : + error0); error1 = (error1 == IP4_ERROR_UNKNOWN_PROTOCOL - && adj1->lookup_next_index != IP_LOOKUP_NEXT_REWRITE - && adj1->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj1->lookup_next_index != IP_LOOKUP_NEXT_LOCAL - && ip0->dst_address.as_u32 != 0xFFFFFFFF + && dpo1->dpoi_type != DPO_ADJACENCY + && dpo1->dpoi_type != DPO_ADJACENCY_INCOMPLETE + && ip1->dst_address.as_u32 != 0xFFFFFFFF ? IP4_ERROR_SRC_LOOKUP_MISS : error1); + error1 = (dpo0->dpoi_type == DPO_RECEIVE ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : + error1); next0 = lm->local_next_by_ip_protocol[proto0]; next1 = lm->local_next_by_ip_protocol[proto1]; @@ -2220,11 +1575,12 @@ ip4_local (vlib_main_t * vm, udp_header_t * udp0; ip4_fib_mtrie_t * mtrie0; ip4_fib_mtrie_leaf_t leaf0; - ip_adjacency_t * adj0; - u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, adj_index0; + u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0; i32 len_diff0; u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; - + load_balance_t *lb0; + const dpo_id_t *dpo0; + pi0 = to_next[0] = from[0]; from += 1; n_left_from -= 1; @@ -2238,7 +1594,7 @@ ip4_local (vlib_main_t * vm, fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer(p0)->sw_if_index[VLIB_RX]); - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -2296,24 +1652,30 @@ ip4_local (vlib_main_t * vm, : error0); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - &ip0->src_address, - /* no_default_route */ 1)); + lb0 = load_balance_get(lbi0); + dpo0 = load_balance_get_bucket_i(lb0, 0); - adj0 = ip_get_adjacency (lm, adj_index0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = + dpo0->dpoi_index; /* Must have a route to source otherwise we drop the packet. */ error0 = (error0 == IP4_ERROR_UNKNOWN_PROTOCOL - && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE - && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj0->lookup_next_index != IP_LOOKUP_NEXT_LOCAL + && dpo0->dpoi_type != DPO_ADJACENCY + && dpo0->dpoi_type != DPO_ADJACENCY_INCOMPLETE + && dpo0->dpoi_type != DPO_RECEIVE && ip0->dst_address.as_u32 != 0xFFFFFFFF ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + /* Packet originated from a local address => spoofing */ + error0 = (dpo0->dpoi_type == DPO_RECEIVE ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : + error0); next0 = lm->local_next_by_ip_protocol[proto0]; @@ -2356,7 +1718,7 @@ VLIB_REGISTER_NODE (ip4_local_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local) void ip4_register_protocol (u32 protocol, u32 node_index) { @@ -2394,10 +1756,11 @@ VLIB_CLI_COMMAND (show_ip_local, static) = { .short_help = "Show ip local protocol table", }; -static uword -ip4_arp (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +always_inline uword +ip4_arp_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_glean) { vnet_main_t * vnm = vnet_get_main(); ip4_main_t * im = &ip4_main; @@ -2441,12 +1804,11 @@ ip4_arp (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next_drop > 0) { + u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; + ip_adjacency_t * adj0; vlib_buffer_t * p0; ip4_header_t * ip0; - ethernet_header_t * eh0; - u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; uword bm0; - ip_adjacency_t * adj0; pi0 = from[0]; @@ -2456,35 +1818,10 @@ ip4_arp (vlib_main_t * vm, adj0 = ip_get_adjacency (lm, adj_index0); ip0 = vlib_buffer_get_current (p0); - /* If packet destination is not local, send ARP to next hop */ - if (adj0->arp.next_hop.ip4.as_u32) - ip0->dst_address.data_u32 = adj0->arp.next_hop.ip4.as_u32; - - /* - * if ip4_rewrite_local applied the IP_LOOKUP_NEXT_ARP - * rewrite to this packet, we need to skip it here. - * Note, to distinguish from src IP addr *.8.6.*, we - * check for a bcast eth dest instead of IPv4 version. - */ - eh0 = (ethernet_header_t*)ip0; - if ((ip0->ip_version_and_header_length & 0xF0) != 0x40) - { - u32 vlan_num = 0; - u16 * etype = &eh0->type; - while ((*etype == clib_host_to_net_u16 (0x8100)) //dot1q - || (*etype == clib_host_to_net_u16 (0x88a8)))//dot1ad - { - vlan_num += 1; - etype += 2; //vlan tag also 16 bits, same as etype - } - if (*etype == clib_host_to_net_u16 (0x0806)) //arp - { - vlib_buffer_advance ( - p0, sizeof(ethernet_header_t) + (4*vlan_num)); - ip0 = vlib_buffer_get_current (p0); - } - } - + /* + * this is the Glean case, so we are ARPing for the + * packet's destination + */ a0 = hash_seeds[0]; b0 = hash_seeds[1]; c0 = hash_seeds[2]; @@ -2492,7 +1829,14 @@ ip4_arp (vlib_main_t * vm, sw_if_index0 = adj0->rewrite_header.sw_if_index; vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; - a0 ^= ip0->dst_address.data_u32; + if (is_glean) + { + a0 ^= ip0->dst_address.data_u32; + } + else + { + a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32; + } b0 ^= sw_if_index0; hash_v3_finalize32 (a0, b0, c0); @@ -2522,10 +1866,11 @@ ip4_arp (vlib_main_t * vm, * Can happen if the control-plane is programming tables * with traffic flowing; at least that's today's lame excuse. */ - if (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP) - { - p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ]; - } + if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) || + (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)) + { + p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ]; + } else /* Send ARP request. */ { @@ -2545,15 +1890,32 @@ ip4_arp (vlib_main_t * vm, clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address, sizeof (h0->ip4_over_ethernet[0].ethernet)); - if (ip4_src_address_for_packet (im, p0, &h0->ip4_over_ethernet[0].ip4, sw_if_index0)) { - //No source address available - p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS]; - vlib_buffer_free(vm, &bi0, 1); - continue; + if (is_glean) + { + /* The interface's source address is stashed in the Glean Adj */ + h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4; + + /* Copy in destination address we are requesting. This is the + * glean case, so it's the packet's destination.*/ + h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32; } + else + { + /* Src IP address in ARP header. */ + if (ip4_src_address_for_packet(lm, sw_if_index0, + &h0->ip4_over_ethernet[0].ip4)) + { + /* No source address available */ + p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS]; + vlib_buffer_free(vm, &bi0, 1); + continue; + } - /* Copy in destination address we are requesting. */ - h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32; + /* Copy in destination address we are requesting from the + incomplete adj */ + h0->ip4_over_ethernet[1].ip4.data_u32 = + adj0->sub_type.nbr.next_hop.ip4.as_u32; + } vlib_buffer_copy_trace_flag (vm, p0, bi0); b0 = vlib_get_buffer (vm, bi0); @@ -2571,6 +1933,22 @@ ip4_arp (vlib_main_t * vm, return frame->n_vectors; } +static uword +ip4_arp (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip4_arp_inline(vm, node, frame, 0)); +} + +static uword +ip4_glean (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip4_arp_inline(vm, node, frame, 1)); +} + static char * ip4_arp_error_strings[] = { [IP4_ARP_ERROR_DROP] = "address overflow drops", [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent", @@ -2596,6 +1974,22 @@ VLIB_REGISTER_NODE (ip4_arp_node) = { }, }; +VLIB_REGISTER_NODE (ip4_glean_node) = { + .function = ip4_glean, + .name = "ip4-glean", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; + #define foreach_notrace_ip4_arp_error \ _(DROP) \ _(REQUEST_SENT) \ @@ -2720,7 +2114,7 @@ ip4_rewrite_inline (vlib_main_t * vm, u32 pi1, rw_len1, next1, error1, checksum1, adj_index1; u32 next0_override, next1_override; u32 tx_sw_if_index0, tx_sw_if_index1; - + if (rewrite_for_locally_received_packets) next0_override = next1_override = 0; @@ -2818,21 +2212,9 @@ ip4_rewrite_inline (vlib_main_t * vm, if (rewrite_for_locally_received_packets) { - /* - * If someone sends e.g. an icmp4 w/ src = dst = interface addr, - * we end up here with a local adjacency in hand - * The local adj rewrite data is 0xfefe on purpose. - * Bad engineer, no donut for you. - */ - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; if (PREDICT_FALSE(adj0->lookup_next_index == IP_LOOKUP_NEXT_ARP)) next0_override = IP4_REWRITE_NEXT_ARP; - if (PREDICT_FALSE(adj1->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error1 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; if (PREDICT_FALSE(adj1->lookup_next_index == IP_LOOKUP_NEXT_ARP)) next1_override = IP4_REWRITE_NEXT_ARP; @@ -2869,14 +2251,14 @@ ip4_rewrite_inline (vlib_main_t * vm, */ if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) vlib_increment_combined_counter - (&lm->adjacency_counters, + (&adjacency_counters, cpu_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0-sizeof(ethernet_header_t)); if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t))) vlib_increment_combined_counter - (&lm->adjacency_counters, + (&adjacency_counters, cpu_index, adj_index1, /* packet increment */ 0, /* byte increment */ rw_len1-sizeof(ethernet_header_t)); @@ -2945,7 +2327,7 @@ ip4_rewrite_inline (vlib_main_t * vm, u32 pi0, rw_len0, adj_index0, next0, error0, checksum0; u32 next0_override; u32 tx_sw_if_index0; - + if (rewrite_for_locally_received_packets) next0_override = 0; @@ -3000,15 +2382,6 @@ ip4_rewrite_inline (vlib_main_t * vm, if (rewrite_for_locally_received_packets) { - /* - * If someone sends e.g. an icmp4 w/ src = dst = interface addr, - * we end up here with a local adjacency in hand - * The local adj rewrite data is 0xfefe on purpose. - * Bad engineer, no donut for you. - */ - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP4_ERROR_SPOOFED_LOCAL_PACKETS; /* * We have to override the next_index in ARP adjacencies, * because they're set up for ip4-arp, not this node... @@ -3028,7 +2401,7 @@ ip4_rewrite_inline (vlib_main_t * vm, if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) vlib_increment_combined_counter - (&lm->adjacency_counters, + (&adjacency_counters, cpu_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0-sizeof(ethernet_header_t)); @@ -3172,6 +2545,15 @@ ip4_rewrite_local (vlib_main_t * vm, /* rewrite_for_locally_received_packets */ 1); } +static uword +ip4_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 0); +} + VLIB_REGISTER_NODE (ip4_rewrite_node) = { .function = ip4_rewrite_transit, .name = "ip4-rewrite-transit", @@ -3187,7 +2569,23 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit) + +VLIB_REGISTER_NODE (ip4_midchain_node) = { + .function = ip4_midchain, + .name = "ip4-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + + .n_next_nodes = 2, + .next_nodes = { + [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_ARP] = "ip4-arp", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain) VLIB_REGISTER_NODE (ip4_rewrite_local_node) = { .function = ip4_rewrite_local, @@ -3201,7 +2599,7 @@ VLIB_REGISTER_NODE (ip4_rewrite_local_node) = { .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local) static clib_error_t * add_del_interface_table (vlib_main_t * vm, @@ -3232,13 +2630,18 @@ add_del_interface_table (vlib_main_t * vm, { ip4_main_t * im = &ip4_main; - ip4_fib_t * fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID); - - if (fib) - { - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib->index; - } + u32 fib_index; + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, + table_id); + + // + // FIXME-LATER + // changing an interface's table has consequences for any connecteds + // and adj-fibs already installed. + // + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + im->fib_index_by_sw_if_index[sw_if_index] = fib_index; } done: @@ -3272,8 +2675,7 @@ ip4_lookup_multicast (vlib_main_t * vm, vlib_frame_t * frame) { ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, * from, * to_next; ip_lookup_next_t next; u32 cpu_index = os_get_cpu_number(); @@ -3290,12 +2692,12 @@ ip4_lookup_multicast (vlib_main_t * vm, while (n_left_from >= 4 && n_left_to_next >= 2) { vlib_buffer_t * p0, * p1; - u32 pi0, pi1, adj_index0, adj_index1, wrong_next; + u32 pi0, pi1, lb_index0, lb_index1, wrong_next; ip_lookup_next_t next0, next1; ip4_header_t * ip0, * ip1; - ip_adjacency_t * adj0, * adj1; u32 fib_index0, fib_index1; - u32 flow_hash_config0, flow_hash_config1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t * lb0, * lb1; /* Prefetch next iteration. */ { @@ -3327,46 +2729,44 @@ ip4_lookup_multicast (vlib_main_t * vm, fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; - adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, - &ip0->dst_address, p0); - adj_index1 = ip4_fib_lookup_buffer (im, fib_index1, - &ip1->dst_address, p1); - - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); - - next0 = adj0->lookup_next_index; - next1 = adj1->lookup_next_index; + lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), + &ip0->dst_address); + lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1), + &ip1->dst_address); - flow_hash_config0 = - vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + lb0 = load_balance_get (lb_index0); + lb1 = load_balance_get (lb_index1); - flow_hash_config1 = - vec_elt_at_index (im->fibs, fib_index1)->flow_hash_config; + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash - (ip0, flow_hash_config0); + (ip0, lb0->lb_hash_config); vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash - (ip1, flow_hash_config1); + (ip1, lb1->lb_hash_config); - ASSERT (adj0->n_adj > 0); - ASSERT (adj1->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - ASSERT (is_pow2 (adj1->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); - adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); + dpo1 = load_balance_get_bucket_i(lb1, + (vnet_buffer (p1)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + next1 = dpo1->dpoi_next_node; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; if (1) /* $$$$$$ HACK FIXME */ vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + (cm, cpu_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, p0)); if (1) /* $$$$$$ HACK FIXME */ vlib_increment_combined_counter - (cm, cpu_index, adj_index1, 1, + (cm, cpu_index, lb_index1, 1, vlib_buffer_length_in_chain (vm, p1)); from += 2; @@ -3415,11 +2815,11 @@ ip4_lookup_multicast (vlib_main_t * vm, { vlib_buffer_t * p0; ip4_header_t * ip0; - u32 pi0, adj_index0; + u32 pi0, lb_index0; ip_lookup_next_t next0; - ip_adjacency_t * adj0; u32 fib_index0; - u32 flow_hash_config0; + const dpo_id_t *dpo0; + const load_balance_t * lb0; pi0 = from[0]; to_next[0] = pi0; @@ -3433,28 +2833,27 @@ ip4_lookup_multicast (vlib_main_t * vm, fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; - adj_index0 = ip4_fib_lookup_buffer (im, fib_index0, - &ip0->dst_address, p0); - - adj0 = ip_get_adjacency (lm, adj_index0); + lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), + &ip0->dst_address); - next0 = adj0->lookup_next_index; + lb0 = load_balance_get (lb_index0); - flow_hash_config0 = - vec_elt_at_index (im->fibs, fib_index0)->flow_hash_config; + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); - vnet_buffer (p0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, flow_hash_config0); + vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash + (ip0, lb0->lb_hash_config); - ASSERT (adj0->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; if (1) /* $$$$$$ HACK FIXME */ vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + (cm, cpu_index, lb_index0, 1, vlib_buffer_length_in_chain (vm, p0)); from += 1; @@ -3494,7 +2893,7 @@ VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = { .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast); +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast) VLIB_REGISTER_NODE (ip4_multicast_node,static) = { .function = ip4_drop, @@ -3511,12 +2910,11 @@ VLIB_REGISTER_NODE (ip4_multicast_node,static) = { int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0) { - ip4_main_t * im = &ip4_main; ip4_fib_mtrie_t * mtrie0; ip4_fib_mtrie_leaf_t leaf0; - u32 adj_index0; + u32 lbi0; - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0); @@ -3527,11 +2925,9 @@ int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0) /* Handle default route. */ leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - return adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - a, - /* no_default_route */ 0); + return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a); } static clib_error_t * @@ -3595,7 +2991,7 @@ int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config) if (p == 0) return VNET_API_ERROR_NO_SUCH_FIB; - fib = vec_elt_at_index (im4->fibs, p[0]); + fib = ip4_fib_get (p[0]); fib->flow_hash_config = flow_hash_config; return 0; @@ -3719,44 +3115,3 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = { .function = set_ip_classify_command_fn, }; - -#define TEST_CODE 1 -#if TEST_CODE > 0 - -static clib_error_t * -set_interface_output_feature_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - vnet_main_t * vnm = vnet_get_main(); - u32 sw_if_index = ~0; - int is_add = 1; - ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index)) - ; - else if (unformat (input, "del")) - is_add = 0; - else - break; - } - - if (sw_if_index == ~0) - return clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - - lm->tx_sw_if_has_ip_output_features = - clib_bitmap_set (lm->tx_sw_if_has_ip_output_features, sw_if_index, is_add); - - return 0; -} - -VLIB_CLI_COMMAND (set_interface_output_feature, static) = { - .path = "set interface output feature", - .function = set_interface_output_feature_command_fn, - .short_help = "set interface output feature <intfc>", -}; -#endif /* TEST_CODE */ diff --git a/vnet/vnet/ip/ip4_mtrie.c b/vnet/vnet/ip/ip4_mtrie.c index 006610a0f4e..364182415ba 100644 --- a/vnet/vnet/ip/ip4_mtrie.c +++ b/vnet/vnet/ip/ip4_mtrie.c @@ -38,6 +38,7 @@ */ #include <vnet/ip/ip.h> +#include <vnet/fib/fib_entry.h> static void ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, uword prefix_len) @@ -401,21 +402,27 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, unset_leaf (m, &a, root_ply, 0); /* Find next less specific route and insert into mtrie. */ - for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= 1; i--) + for (i = dst_address_length - 1; i >= 1; i--) { uword * p; + index_t lbi; ip4_address_t key; - if (! fib->adj_index_by_dst_address[i]) + if (! fib->fib_entry_by_dst_address[i]) continue; key.as_u32 = dst_address.as_u32 & im->fib_masks[i]; - p = hash_get (fib->adj_index_by_dst_address[i], key.as_u32); + p = hash_get (fib->fib_entry_by_dst_address[i], key.as_u32); if (p) { + lbi = fib_entry_contribute_ip_forwarding(p[0])->dpoi_index; + if (INDEX_INVALID == lbi) + continue; + a.dst_address = key; + a.adj_index = lbi; a.dst_address_length = i; - a.adj_index = p[0]; + set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0); break; } @@ -424,65 +431,6 @@ ip4_fib_mtrie_add_del_route (ip4_fib_t * fib, } } -always_inline uword -maybe_remap_leaf (ip_lookup_main_t * lm, ip4_fib_mtrie_leaf_t * p) -{ - ip4_fib_mtrie_leaf_t l = p[0]; - uword was_remapped_to_empty_leaf = 0; - if (ip4_fib_mtrie_leaf_is_terminal (l)) - { - u32 adj_index = ip4_fib_mtrie_leaf_get_adj_index (l); - u32 m = vec_elt (lm->adjacency_remap_table, adj_index); - if (m) - { - was_remapped_to_empty_leaf = m == ~0; - - /* - * The intent of the original form - which dates to 2013 or - * earlier - is not obvious. Here's the original: - * - * if (was_remapped_to_empty_leaf) - * p[0] = (was_remapped_to_empty_leaf - * ? IP4_FIB_MTRIE_LEAF_EMPTY - * : ip4_fib_mtrie_leaf_set_adj_index (m - 1)); - * - * Notice the outer "if (was_remapped_to_empty_leaf)" - * means that p[0] is always set to IP4_FIB_MTRIE_LEAF_EMPTY, - * and is otherwise left intact. - * - * It seems unlikely that the adjacency mapping scheme - * works in detail. Coverity correctly complains that the - * else-case of the original ternary expression is dead code. - */ - if (was_remapped_to_empty_leaf) - p[0] = IP4_FIB_MTRIE_LEAF_EMPTY; - } - } - return was_remapped_to_empty_leaf; -} - -static void maybe_remap_ply (ip_lookup_main_t * lm, ip4_fib_mtrie_ply_t * ply) -{ - u32 n_remapped_to_empty = 0; - u32 i; - for (i = 0; i < ARRAY_LEN (ply->leaves); i++) - n_remapped_to_empty += maybe_remap_leaf (lm, &ply->leaves[i]); - if (n_remapped_to_empty > 0) - { - ASSERT (n_remapped_to_empty <= ply->n_non_empty_leafs); - ply->n_non_empty_leafs -= n_remapped_to_empty; - if (ply->n_non_empty_leafs == 0) - os_panic (); - } -} - -void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m) -{ - ip4_fib_mtrie_ply_t * ply; - pool_foreach (ply, m->ply_pool, maybe_remap_ply (lm, ply)); - maybe_remap_leaf (lm, &m->default_leaf); -} - /* Returns number of bytes of memory used by mtrie. */ static uword mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p) { diff --git a/vnet/vnet/ip/ip4_mtrie.h b/vnet/vnet/ip/ip4_mtrie.h index 31de41e14fa..c49937d6814 100644 --- a/vnet/vnet/ip/ip4_mtrie.h +++ b/vnet/vnet/ip/ip4_mtrie.h @@ -51,7 +51,7 @@ 1 => empty (adjacency index of zero is special miss adjacency). */ typedef u32 ip4_fib_mtrie_leaf_t; -#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*IP_LOOKUP_MISS_ADJ_INDEX) +#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0) #define IP4_FIB_MTRIE_LEAF_ROOT (0 + 2*0) always_inline u32 ip4_fib_mtrie_leaf_is_empty (ip4_fib_mtrie_leaf_t n) @@ -115,6 +115,9 @@ typedef struct { - 1 * sizeof (i32)]; } ip4_fib_mtrie_ply_t; +_Static_assert(0 == sizeof(ip4_fib_mtrie_ply_t) % CLIB_CACHE_LINE_BYTES, + "IP4 Mtrie ply cache line"); + typedef struct { /* Pool of plies. Index zero is root ply. */ ip4_fib_mtrie_ply_t * ply_pool; @@ -136,15 +139,13 @@ void ip4_fib_mtrie_add_del_route (struct ip4_fib_t * f, /* Returns adjacency index. */ u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst); -void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m); - format_function_t format_ip4_fib_mtrie; /* Lookup step. Processes 1 byte of 4 byte ip4 address. */ always_inline ip4_fib_mtrie_leaf_t ip4_fib_mtrie_lookup_step (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t current_leaf, - ip4_address_t * dst_address, + const ip4_address_t * dst_address, u32 dst_address_byte_index) { ip4_fib_mtrie_leaf_t next_leaf; diff --git a/vnet/vnet/ip/ip4_source_and_port_range_check.c b/vnet/vnet/ip/ip4_source_and_port_range_check.c index ebfa767d8f0..8a469baa804 100644 --- a/vnet/vnet/ip/ip4_source_and_port_range_check.c +++ b/vnet/vnet/ip/ip4_source_and_port_range_check.c @@ -14,7 +14,19 @@ */ #include <vnet/ip/ip.h> #include <vnet/ip/ip_source_and_port_range_check.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +/** + * @brief The pool of range chack DPOs + */ +static protocol_port_range_dpo_t *ppr_dpo_pool; + +/** + * @brief Dynamically registered DPO type + */ +static dpo_type_t ppr_dpo_type; vlib_node_registration_t ip4_source_port_and_range_check_rx; vlib_node_registration_t ip4_source_port_and_range_check_tx; @@ -73,23 +85,20 @@ typedef enum static inline u32 -check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next) +check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo, + u16 dst_port, u32 next) { - protocol_port_range_t *range; + const protocol_port_range_t *range; u16x8vec_t key; u16x8vec_t diff1; u16x8vec_t diff2; u16x8vec_t sum, sum_equal_diff2; u16 sum_nonzero, sum_equal, winner_mask; int i; - u8 *rwh; - if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR || dst_port == 0) + if (NULL == ppr_dpo || dst_port == 0) return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; - rwh = (u8 *) (&adj->rewrite_header); - range = (protocol_port_range_t *) rwh; - /* Make the obvious screw-case work. A variant also works w/ no MMX */ if (PREDICT_FALSE (dst_port == 65535)) { @@ -100,20 +109,20 @@ check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next) i++) { for (j = 0; j < 8; j++) - if (range->low.as_u16[j] == 65535) + if (ppr_dpo->blocks[i].low.as_u16[j] == 65535) return next; - range++; } return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; } key.as_u16x8 = u16x8_splat (dst_port); - for (i = 0; i < VLIB_BUFFER_PRE_DATA_SIZE / sizeof (protocol_port_range_t); - i++) + for (i = 0; i < ppr_dpo->n_used_blocks; i++) { - diff1.as_u16x8 = u16x8_sub_saturate (range->low.as_u16x8, key.as_u16x8); - diff2.as_u16x8 = u16x8_sub_saturate (range->hi.as_u16x8, key.as_u16x8); + diff1.as_u16x8 = + u16x8_sub_saturate (ppr_dpo->blocks[i].low.as_u16x8, key.as_u16x8); + diff2.as_u16x8 = + u16x8_sub_saturate (ppr_dpo->blocks[i].hi.as_u16x8, key.as_u16x8); sum.as_u16x8 = u16x8_add (diff1.as_u16x8, diff2.as_u16x8); sum_equal_diff2.as_u16x8 = u16x8_is_equal (sum.as_u16x8, diff2.as_u16x8); @@ -127,6 +136,12 @@ check_adj_port_range_x1 (ip_adjacency_t * adj, u16 dst_port, u32 next) return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; } +always_inline protocol_port_range_dpo_t * +protocol_port_range_dpo_get (index_t index) +{ + return (pool_elt_at_index (ppr_dpo_pool, index)); +} + always_inline uword ip4_source_and_port_range_check_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -154,264 +169,263 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *b0, *b1; - ip4_header_t *ip0, *ip1; - ip4_fib_mtrie_t *mtrie0, *mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip_source_and_port_range_check_config_t *c0, *c1; - ip_adjacency_t *adj0 = 0, *adj1 = 0; - u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; - u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; - udp_header_t *udp0, *udp1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); - } - - bi0 = to_next[0] = from[0]; - bi1 = to_next[1] = from[1]; - from += 2; - to_next += 2; - n_left_from -= 2; - n_left_to_next -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (b0)->sw_if_index[VLIB_RX]); - fib_index1 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (b1)->sw_if_index[VLIB_RX]); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - if (is_tx) - { - c0 = vnet_get_config_data (&tx_cm->config_main, - &b0->current_config_index, - &next0, sizeof (c0[0])); - c1 = vnet_get_config_data (&tx_cm->config_main, - &b1->current_config_index, - &next1, sizeof (c1[0])); - } - else - { - c0 = vnet_get_config_data (&rx_cm->config_main, - &b0->current_config_index, - &next0, sizeof (c0[0])); - c1 = vnet_get_config_data (&rx_cm->config_main, - &b1->current_config_index, - &next1, sizeof (c1[0])); - } - - /* we can't use the default VRF here... */ - for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) - { - ASSERT (c0->fib_index[i] && c1->fib_index[i]); - } - - - if (is_tx) - { - if (ip0->protocol == IP_PROTOCOL_UDP) - fib_index0 = - c0->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; - if (ip0->protocol == IP_PROTOCOL_TCP) - fib_index0 = - c0->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; - } - else - { - if (ip0->protocol == IP_PROTOCOL_UDP) - fib_index0 = - c0->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; - if (ip0->protocol == IP_PROTOCOL_TCP) - fib_index0 = - c0->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; - } - - if (PREDICT_TRUE (fib_index0 != ~0)) - { - - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; - - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 0); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 2); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 3); - - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, - &ip0->src_address, - 0 - /* use dflt rt */ - )); - adj0 = ip_get_adjacency (lm, adj_index0); - } - - if (is_tx) - { - if (ip1->protocol == IP_PROTOCOL_UDP) - fib_index1 = - c1->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; - if (ip1->protocol == IP_PROTOCOL_TCP) - fib_index1 = - c1->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; - } - else - { - if (ip1->protocol == IP_PROTOCOL_UDP) - fib_index1 = - c1->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; - if (ip1->protocol == IP_PROTOCOL_TCP) - fib_index1 = - c1->fib_index - [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; - } - - if (PREDICT_TRUE (fib_index1 != ~0)) - { - - mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; - - leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 0); - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 1); - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 2); - - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, - &ip1->src_address, 3); - - adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - - ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, - &ip1->src_address, - 0)); - adj1 = ip_get_adjacency (lm, adj_index1); - } - - pass0 = 0; - pass0 |= adj0 == 0; - pass0 |= ip4_address_is_multicast (&ip0->src_address); - pass0 |= - ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); - pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) - && (ip0->protocol != IP_PROTOCOL_TCP); - - pass1 = 0; - pass1 |= adj1 == 0; - pass1 |= ip4_address_is_multicast (&ip1->src_address); - pass1 |= - ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); - pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) - && (ip1->protocol != IP_PROTOCOL_TCP); - - save_next0 = next0; - udp0 = ip4_next_header (ip0); - save_next1 = next1; - udp1 = ip4_next_header (ip1); - - if (PREDICT_TRUE (pass0 == 0)) - { - good_packets++; - next0 = check_adj_port_range_x1 - (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); - good_packets -= (save_next0 != next0); - b0->error = error_node->errors - [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; - } - - if (PREDICT_TRUE (pass1 == 0)) - { - good_packets++; - next1 = check_adj_port_range_x1 - (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); - good_packets -= (save_next1 != next1); - b1->error = error_node->errors - [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; - } - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b0->flags & VLIB_BUFFER_IS_TRACED))) - { - ip4_source_and_port_range_check_trace_t *t = - vlib_add_trace (vm, node, b0, sizeof (*t)); - t->pass = next0 == save_next0; - t->bypass = pass0; - t->fib_index = fib_index0; - t->src_addr.as_u32 = ip0->src_address.as_u32; - t->port = (pass0 == 0) ? - clib_net_to_host_u16 (udp0->dst_port) : 0; - t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; - } - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) - && (b1->flags & VLIB_BUFFER_IS_TRACED))) - { - ip4_source_and_port_range_check_trace_t *t = - vlib_add_trace (vm, node, b1, sizeof (*t)); - t->pass = next1 == save_next1; - t->bypass = pass1; - t->fib_index = fib_index1; - t->src_addr.as_u32 = ip1->src_address.as_u32; - t->port = (pass1 == 0) ? - clib_net_to_host_u16 (udp1->dst_port) : 0; - t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* { */ + /* vlib_buffer_t *b0, *b1; */ + /* ip4_header_t *ip0, *ip1; */ + /* ip4_fib_mtrie_t *mtrie0, *mtrie1; */ + /* ip4_fib_mtrie_leaf_t leaf0, leaf1; */ + /* ip_source_and_port_range_check_config_t *c0, *c1; */ + /* ip_adjacency_t *adj0 = 0, *adj1 = 0; */ + /* u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; */ + /* u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; */ + /* udp_header_t *udp0, *udp1; */ + + /* /\* Prefetch next iteration. *\/ */ + /* { */ + /* vlib_buffer_t *p2, *p3; */ + + /* p2 = vlib_get_buffer (vm, from[2]); */ + /* p3 = vlib_get_buffer (vm, from[3]); */ + + /* vlib_prefetch_buffer_header (p2, LOAD); */ + /* vlib_prefetch_buffer_header (p3, LOAD); */ + + /* CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); */ + /* CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); */ + /* } */ + + /* bi0 = to_next[0] = from[0]; */ + /* bi1 = to_next[1] = from[1]; */ + /* from += 2; */ + /* to_next += 2; */ + /* n_left_from -= 2; */ + /* n_left_to_next -= 2; */ + + /* b0 = vlib_get_buffer (vm, bi0); */ + /* b1 = vlib_get_buffer (vm, bi1); */ + + /* fib_index0 = */ + /* vec_elt (im->fib_index_by_sw_if_index, */ + /* vnet_buffer (b0)->sw_if_index[VLIB_RX]); */ + /* fib_index1 = */ + /* vec_elt (im->fib_index_by_sw_if_index, */ + /* vnet_buffer (b1)->sw_if_index[VLIB_RX]); */ + + /* ip0 = vlib_buffer_get_current (b0); */ + /* ip1 = vlib_buffer_get_current (b1); */ + + /* if (is_tx) */ + /* { */ + /* c0 = vnet_get_config_data (&tx_cm->config_main, */ + /* &b0->current_config_index, */ + /* &next0, sizeof (c0[0])); */ + /* c1 = vnet_get_config_data (&tx_cm->config_main, */ + /* &b1->current_config_index, */ + /* &next1, sizeof (c1[0])); */ + /* } */ + /* else */ + /* { */ + /* c0 = vnet_get_config_data (&rx_cm->config_main, */ + /* &b0->current_config_index, */ + /* &next0, sizeof (c0[0])); */ + /* c1 = vnet_get_config_data (&rx_cm->config_main, */ + /* &b1->current_config_index, */ + /* &next1, sizeof (c1[0])); */ + /* } */ + + /* /\* we can't use the default VRF here... *\/ */ + /* for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) */ + /* { */ + /* ASSERT (c0->fib_index[i] && c1->fib_index[i]); */ + /* } */ + + + /* if (is_tx) */ + /* { */ + /* if (ip0->protocol == IP_PROTOCOL_UDP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */ + /* if (ip0->protocol == IP_PROTOCOL_TCP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */ + /* } */ + /* else */ + /* { */ + /* if (ip0->protocol == IP_PROTOCOL_UDP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */ + /* if (ip0->protocol == IP_PROTOCOL_TCP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */ + /* } */ + + /* if (PREDICT_TRUE (fib_index0 != ~0)) */ + /* { */ + + /* mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; */ + + /* leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 0); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 1); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 2); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 3); */ + + /* adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); */ + + /* ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, */ + /* &ip0->src_address, */ + /* 0 */ + /* /\* use dflt rt *\/ */ + /* )); */ + /* adj0 = ip_get_adjacency (lm, adj_index0); */ + /* } */ + + /* if (is_tx) */ + /* { */ + /* if (ip1->protocol == IP_PROTOCOL_UDP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */ + /* if (ip1->protocol == IP_PROTOCOL_TCP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */ + /* } */ + /* else */ + /* { */ + /* if (ip1->protocol == IP_PROTOCOL_UDP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */ + /* if (ip1->protocol == IP_PROTOCOL_TCP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */ + /* } */ + + /* if (PREDICT_TRUE (fib_index1 != ~0)) */ + /* { */ + + /* mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; */ + + /* leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 0); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 1); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 2); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 3); */ + + /* adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); */ + + /* ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, */ + /* &ip1->src_address, */ + /* 0)); */ + /* adj1 = ip_get_adjacency (lm, adj_index1); */ + /* } */ + + /* pass0 = 0; */ + /* pass0 |= adj0 == 0; */ + /* pass0 |= ip4_address_is_multicast (&ip0->src_address); */ + /* pass0 |= */ + /* ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */ + /* pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) */ + /* && (ip0->protocol != IP_PROTOCOL_TCP); */ + + /* pass1 = 0; */ + /* pass1 |= adj1 == 0; */ + /* pass1 |= ip4_address_is_multicast (&ip1->src_address); */ + /* pass1 |= */ + /* ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */ + /* pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) */ + /* && (ip1->protocol != IP_PROTOCOL_TCP); */ + + /* save_next0 = next0; */ + /* udp0 = ip4_next_header (ip0); */ + /* save_next1 = next1; */ + /* udp1 = ip4_next_header (ip1); */ + + /* if (PREDICT_TRUE (pass0 == 0)) */ + /* { */ + /* good_packets++; */ + /* next0 = check_adj_port_range_x1 */ + /* (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); */ + /* good_packets -= (save_next0 != next0); */ + /* b0->error = error_node->errors */ + /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */ + /* } */ + + /* if (PREDICT_TRUE (pass1 == 0)) */ + /* { */ + /* good_packets++; */ + /* next1 = check_adj_port_range_x1 */ + /* (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); */ + /* good_packets -= (save_next1 != next1); */ + /* b1->error = error_node->errors */ + /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */ + /* } */ + + /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */ + /* && (b0->flags & VLIB_BUFFER_IS_TRACED))) */ + /* { */ + /* ip4_source_and_port_range_check_trace_t *t = */ + /* vlib_add_trace (vm, node, b0, sizeof (*t)); */ + /* t->pass = next0 == save_next0; */ + /* t->bypass = pass0; */ + /* t->fib_index = fib_index0; */ + /* t->src_addr.as_u32 = ip0->src_address.as_u32; */ + /* t->port = (pass0 == 0) ? */ + /* clib_net_to_host_u16 (udp0->dst_port) : 0; */ + /* t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; */ + /* } */ + + /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */ + /* && (b1->flags & VLIB_BUFFER_IS_TRACED))) */ + /* { */ + /* ip4_source_and_port_range_check_trace_t *t = */ + /* vlib_add_trace (vm, node, b1, sizeof (*t)); */ + /* t->pass = next1 == save_next1; */ + /* t->bypass = pass1; */ + /* t->fib_index = fib_index1; */ + /* t->src_addr.as_u32 = ip1->src_address.as_u32; */ + /* t->port = (pass1 == 0) ? */ + /* clib_net_to_host_u16 (udp1->dst_port) : 0; */ + /* t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; */ + /* } */ + + /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */ + /* to_next, n_left_to_next, */ + /* bi0, bi1, next0, next1); */ + /* } */ while (n_left_from > 0 && n_left_to_next > 0) { vlib_buffer_t *b0; ip4_header_t *ip0; - ip4_fib_mtrie_t *mtrie0; - ip4_fib_mtrie_leaf_t leaf0; ip_source_and_port_range_check_config_t *c0; - ip_adjacency_t *adj0 = 0; - u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; + u32 bi0, next0, lb_index0, pass0, save_next0, fib_index0; udp_header_t *udp0; + const protocol_port_range_dpo_t *ppr_dpo0 = NULL; + const dpo_id_t *dpo; bi0 = from[0]; to_next[0] = bi0; @@ -476,35 +490,25 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm, if (fib_index0 != ~0) { + lb_index0 = ip4_fib_forwarding_lookup (fib_index0, + &ip0->src_address); - mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; - - leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 0); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 1); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 2); - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, - &ip0->src_address, 3); + dpo = + load_balance_get_bucket_i (load_balance_get (lb_index0), 0); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - - ASSERT (adj_index0 == ip4_fib_lookup_with_table - (im, fib_index0, - &ip0->src_address, 0 /* use default route */ )); - adj0 = ip_get_adjacency (lm, adj_index0); + if (ppr_dpo_type == dpo->dpoi_type) + { + ppr_dpo0 = protocol_port_range_dpo_get (dpo->dpoi_index); + } + /* + * else the lookup hit an enty that was no inserted + * by this range checker, which is the default route + */ } /* * $$$ which (src,dst) categories should we always pass? */ pass0 = 0; - pass0 |= adj0 == 0; pass0 |= ip4_address_is_multicast (&ip0->src_address); pass0 |= ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); @@ -518,7 +522,7 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm, { good_packets++; next0 = check_adj_port_range_x1 - (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); + (ppr_dpo0, clib_net_to_host_u16 (udp0->dst_port), next0); good_packets -= (save_next0 != next0); b0->error = error_node->errors [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; @@ -558,6 +562,7 @@ ip4_source_and_port_range_check_inline (vlib_main_t * vm, IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK, good_packets); return frame->n_vectors; + return 0; } static uword @@ -786,209 +791,299 @@ VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, /* *INDENT-ON* */ static u8 * -format_source_and_port_rc_adjacency (u8 * s, va_list * args) +format_ppr_dpo (u8 * s, va_list * args) { - CLIB_UNUSED (vnet_main_t * vnm) = va_arg (*args, vnet_main_t *); - ip_lookup_main_t *lm = va_arg (*args, ip_lookup_main_t *); - u32 adj_index = va_arg (*args, u32); - ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index); - source_range_check_main_t *srm = &source_range_check_main; - u8 *rwh = (u8 *) (&adj->rewrite_header); - protocol_port_range_t *range; + index_t index = va_arg (args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (args, u32); + + protocol_port_range_dpo_t *ppr_dpo; int i, j; int printed = 0; - range = (protocol_port_range_t *) rwh; + ppr_dpo = protocol_port_range_dpo_get (index); s = format (s, "allow "); - for (i = 0; i < srm->ranges_per_adjacency; i++) + for (i = 0; i < ppr_dpo->n_used_blocks; i++) { for (j = 0; j < 8; j++) { - if (range->low.as_u16[j]) + if (ppr_dpo->blocks[i].low.as_u16[j]) { if (printed) s = format (s, ", "); - if (range->hi.as_u16[j] > (range->low.as_u16[j] + 1)) - s = format (s, "%d-%d", (u32) range->low.as_u16[j], - (u32) range->hi.as_u16[j] - 1); + if (ppr_dpo->blocks[i].hi.as_u16[j] > + (ppr_dpo->blocks[i].low.as_u16[j] + 1)) + s = + format (s, "%d-%d", (u32) ppr_dpo->blocks[i].low.as_u16[j], + (u32) ppr_dpo->blocks[i].hi.as_u16[j] - 1); else - s = format (s, "%d", range->low.as_u16[j]); + s = format (s, "%d", ppr_dpo->blocks[i].low.as_u16[j]); printed = 1; } } - range++; } return s; } +static void +ppr_dpo_lock (dpo_id_t * dpo) +{ +} + +static void +ppr_dpo_unlock (dpo_id_t * dpo) +{ +} + +const static dpo_vft_t ppr_vft = { + .dv_lock = ppr_dpo_lock, + .dv_unlock = ppr_dpo_unlock, + .dv_format = format_ppr_dpo, +}; + +const static char *const ppr_ip4_nodes[] = { + "ip4-source-and-port-range-check-rx", + NULL, +}; + +const static char *const *const ppr_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = ppr_ip4_nodes, +}; + clib_error_t * ip4_source_and_port_range_check_init (vlib_main_t * vm) { source_range_check_main_t *srm = &source_range_check_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; srm->vlib_main = vm; srm->vnet_main = vnet_get_main (); - srm->ranges_per_adjacency = - VLIB_BUFFER_PRE_DATA_SIZE / (2 * sizeof (u16x8)); - srm->special_adjacency_format_function_index = - vnet_register_special_adjacency_format_function (lm, - format_source_and_port_rc_adjacency); - ASSERT (srm->special_adjacency_format_function_index); + ppr_dpo_type = dpo_register_new_type (&ppr_vft, ppr_nodes); return 0; } VLIB_INIT_FUNCTION (ip4_source_and_port_range_check_init); -int -add_port_range_adjacency (ip4_address_t * address, - u32 length, - u32 adj_index, - u16 * low_ports, u16 * high_ports, u32 fib_index) +protocol_port_range_dpo_t * +protocol_port_range_dpo_alloc (void) { - ip_adjacency_t *adj; - int i, j, k; - source_range_check_main_t *srm = &source_range_check_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - protocol_port_range_t *range; - u8 *rwh; + protocol_port_range_dpo_t *ppr_dpo; - adj = ip_get_adjacency (lm, adj_index); - /* $$$$ fixme: add ports if address + mask match */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ICMP_ERROR) - return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + pool_get_aligned (ppr_dpo_pool, ppr_dpo, CLIB_CACHE_LINE_BYTES); + memset (ppr_dpo, 0, sizeof (*ppr_dpo)); - ip_adjacency_t template_adj; - ip4_add_del_route_args_t a; + ppr_dpo->n_free_ranges = N_PORT_RANGES_PER_DPO; - memset (&template_adj, 0, sizeof (template_adj)); + return (ppr_dpo); +} - template_adj.lookup_next_index = IP_LOOKUP_NEXT_ICMP_ERROR; - template_adj.if_address_index = ~0; - template_adj.special_adjacency_format_function_index = - srm->special_adjacency_format_function_index; - rwh = (u8 *) (&template_adj.rewrite_header); +static int +add_port_range_adjacency (u32 fib_index, + ip4_address_t * address, + u32 length, u16 * low_ports, u16 * high_ports) +{ + protocol_port_range_dpo_t *ppr_dpo; + dpo_id_t dpop = DPO_NULL; + int i, j, k; - range = (protocol_port_range_t *) rwh; + fib_node_index_t fei; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = length, + .fp_addr = { + .ip4 = *address, + }, + }; + + /* + * check to see if we have already sourced this prefix + */ + fei = fib_table_lookup_exact_match (fib_index, &pfx); + + if (FIB_NODE_INDEX_INVALID == fei) + { + /* + * this is a first time add for this prefix. + */ + ppr_dpo = protocol_port_range_dpo_alloc (); + } + else + { + /* + * the prefix is already there. + * check it was sourced by us, and if so get the ragne DPO from it. + */ + dpo_id_t dpo = DPO_NULL; + const dpo_id_t *bucket; + + if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo)) + { + /* + * there is existing state. we'll want to add the new ranges to it + */ + bucket = + load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + } + else + { + /* + * there is no PPR state associated with this prefix, + * so we'll need a new DPO + */ + ppr_dpo = protocol_port_range_dpo_alloc (); + } + } - if (vec_len (low_ports) > 8 * srm->ranges_per_adjacency) + if (vec_len (low_ports) > ppr_dpo->n_free_ranges) return VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY; j = k = 0; for (i = 0; i < vec_len (low_ports); i++) { - for (; j < srm->ranges_per_adjacency; j++) + for (; j < N_BLOCKS_PER_DPO; j++) { for (; k < 8; k++) { - if (range->low.as_u16[k] == 0) + if (ppr_dpo->blocks[j].low.as_u16[k] == 0) { - range->low.as_u16[k] = low_ports[i]; - range->hi.as_u16[k] = high_ports[i]; - k++; - if (k == 7) - { - k = 0; - j++; - } - goto doublebreak2; + ppr_dpo->blocks[j].low.as_u16[k] = low_ports[i]; + ppr_dpo->blocks[j].hi.as_u16[k] = high_ports[i]; + goto doublebreak; } } - k = 0; - range++; } - j = 0; - /* Too many ports specified... */ - return VNET_API_ERROR_EXCEEDED_NUMBER_OF_PORTS_CAPACITY; - - doublebreak2:; + doublebreak:; } + ppr_dpo->n_used_blocks = j + 1; - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_FIB_INDEX; - a.table_index_or_table_id = fib_index; - a.dst_address = address[0]; - a.dst_address_length = length; - a.add_adj = &template_adj; - a.n_add_adj = 1; + /* + * add or update the entry in the FIB + */ + dpo_set (&dpop, ppr_dpo_type, DPO_PROTO_IP4, (ppr_dpo - ppr_dpo_pool)); + + if (FIB_NODE_INDEX_INVALID == fei) + { + fib_table_entry_special_dpo_add (fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_NONE, &dpop); + } + else + { + fib_table_entry_special_dpo_update (fei, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_NONE, &dpop); + } - ip4_add_del_route (im, &a); return 0; } -int -remove_port_range_adjacency (ip4_address_t * address, - u32 length, - u32 adj_index, - u16 * low_ports, u16 * high_ports, u32 fib_index) +static int +remove_port_range_adjacency (u32 fib_index, + ip4_address_t * address, + u32 length, u16 * low_ports, u16 * high_ports) { - ip_adjacency_t *adj; + protocol_port_range_dpo_t *ppr_dpo; + fib_node_index_t fei; int i, j, k; - source_range_check_main_t *srm = &source_range_check_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - protocol_port_range_t *range; - u8 *rwh; - adj = ip_get_adjacency (lm, adj_index); - if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR) /* _ICMP_ERROR is a dummy placeholder */ - return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = length, + .fp_addr = { + .ip4 = *address, + }, + }; + + /* + * check to see if we have sourced this prefix + */ + fei = fib_table_lookup_exact_match (fib_index, &pfx); - rwh = (u8 *) (&adj->rewrite_header); + if (FIB_NODE_INDEX_INVALID == fei) + { + /* + * not one of ours + */ + return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + } + else + { + /* + * the prefix is already there. + * check it was sourced by us + */ + dpo_id_t dpo = DPO_NULL; + const dpo_id_t *bucket; + + if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo)) + { + /* + * there is existing state. we'll want to add the new ranges to it + */ + bucket = + load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + } + else + { + /* + * not one of ours + */ + return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + } + } for (i = 0; i < vec_len (low_ports); i++) { - range = (protocol_port_range_t *) rwh; - for (j = 0; j < srm->ranges_per_adjacency; j++) + for (j = 0; j < N_BLOCKS_PER_DPO; j++) { for (k = 0; k < 8; k++) { - if (low_ports[i] == range->low.as_u16[k] && - high_ports[i] == range->hi.as_u16[k]) + if (low_ports[i] == ppr_dpo->blocks[j].low.as_u16[k] && + high_ports[i] == ppr_dpo->blocks[j].hi.as_u16[k]) { - range->low.as_u16[k] = range->hi.as_u16[k] = 0; + ppr_dpo->blocks[j].low.as_u16[k] = + ppr_dpo->blocks[j].hi.as_u16[k] = 0; goto doublebreak; } } - range++; } doublebreak:; } - range = (protocol_port_range_t *) rwh; + ppr_dpo->n_free_ranges = 0; + /* Have we deleted all ranges yet? */ - for (i = 0; i < srm->ranges_per_adjacency; i++) + for (i = 0; i < N_BLOCKS_PER_DPO; i++) { for (j = 0; j < 8; j++) { - if (range->low.as_u16[i] != 0) - goto still_occupied; + if (ppr_dpo->blocks[j].low.as_u16[i] == 0) + ppr_dpo->n_free_ranges++; } - range++; } - /* Yes, lose the adjacency... */ - { - ip4_add_del_route_args_t a; - - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; - a.table_index_or_table_id = fib_index; - a.dst_address = address[0]; - a.dst_address_length = length; - a.adj_index = adj_index; - ip4_add_del_route (im, &a); - } - -still_occupied: - ; + + if (N_PORT_RANGES_PER_DPO == ppr_dpo->n_free_ranges) + { + /* Yes, lose the adjacency... */ + fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_SPECIAL); + } + else + { + /* + * compact the ranges down to a contiguous block + */ + // FIXME. TODO. + } + return 0; } @@ -1010,35 +1105,19 @@ ip4_source_and_port_range_check_add_del (ip4_address_t * address, u16 * low_ports, u16 * high_ports, int is_add) { - - ip4_main_t *im = &ip4_main; - // ip_lookup_main_t * lm = &im->lookup_main; - uword *p; u32 fib_index; - u32 adj_index; - - p = hash_get (im->fib_index_by_table_id, vrf_id); - if (!p) - { - ip4_fib_t *f; - f = find_ip4_fib_by_table_index_or_id (im, vrf_id, 0 /* flags */ ); - fib_index = f->index; - } - else - fib_index = p[0]; - adj_index = ip4_fib_lookup_with_table - (im, fib_index, address, 0 /* disable_default_route */ ); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id); if (is_add == 0) { - remove_port_range_adjacency (address, length, adj_index, low_ports, - high_ports, fib_index); + remove_port_range_adjacency (fib_index, address, length, + low_ports, high_ports); } else { - add_port_range_adjacency (address, length, adj_index, low_ports, - high_ports, fib_index); + add_port_range_adjacency (fib_index, address, length, + low_ports, high_ports); } return 0; @@ -1159,24 +1238,20 @@ show_source_and_port_range_check_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - source_range_check_main_t *srm = &source_range_check_main; - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - protocol_port_range_t *range; + protocol_port_range_dpo_t *ppr_dpo; u32 fib_index; - ip4_address_t addr; u8 addr_set = 0; u32 vrf_id = ~0; int rv, i, j; - u32 adj_index; - ip_adjacency_t *adj; u32 port = 0; - u8 *rwh; - uword *p; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + }; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "%U", unformat_ip4_address, &addr)) + if (unformat (input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4)) addr_set = 1; else if (unformat (input, "vrf %d", &vrf_id)) ; @@ -1192,51 +1267,58 @@ show_source_and_port_range_check_fn (vlib_main_t * vm, if (vrf_id == ~0) return clib_error_return (0, "VRF ID required, not specified"); - p = hash_get (im->fib_index_by_table_id, vrf_id); - if (p == 0) + fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + if (~0 == fib_index) return clib_error_return (0, "VRF %d not found", vrf_id); - fib_index = p[0]; - adj_index = ip4_fib_lookup_with_table - (im, fib_index, &addr, 0 /* disable_default_route */ ); + /* + * find the longest prefix match on the address requested, + * check it was sourced by us + */ + dpo_id_t dpo = DPO_NULL; + const dpo_id_t *bucket; - adj = ip_get_adjacency (lm, adj_index); - - if (adj->lookup_next_index != IP_LOOKUP_NEXT_ICMP_ERROR) + if (!fib_entry_get_dpo_for_source (fib_table_lookup (fib_index, &pfx), + FIB_SOURCE_SPECIAL, &dpo)) { - vlib_cli_output (vm, "%U: src address drop", format_ip4_address, &addr); + /* + * not one of ours + */ + vlib_cli_output (vm, "%U: src address drop", format_ip4_address, + &pfx.fp_addr.ip4); return 0; } + bucket = load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + if (port) { - rv = check_adj_port_range_x1 (adj, (u16) port, 1234); + rv = check_adj_port_range_x1 (ppr_dpo, (u16) port, 1234); if (rv == 1234) vlib_cli_output (vm, "%U port %d PASS", format_ip4_address, - &addr, port); + &pfx.fp_addr.ip4, port); else vlib_cli_output (vm, "%U port %d FAIL", format_ip4_address, - &addr, port); + &pfx.fp_addr.ip4, port); return 0; } else { u8 *s; - rwh = (u8 *) (&adj->rewrite_header); - - s = format (0, "%U: ", format_ip4_address, &addr); - range = (protocol_port_range_t *) rwh; + s = format (0, "%U: ", format_ip4_address, &pfx.fp_addr.ip4); - for (i = 0; i < srm->ranges_per_adjacency; i++) + for (i = 0; i < N_BLOCKS_PER_DPO; i++) { for (j = 0; j < 8; j++) { - if (range->low.as_u16[j]) - s = format (s, "%d - %d ", (u32) range->low.as_u16[j], - (u32) range->hi.as_u16[j]); + if (ppr_dpo->blocks[i].low.as_u16[j]) + s = format (s, "%d - %d ", + (u32) ppr_dpo->blocks[i].low.as_u16[j], + (u32) ppr_dpo->blocks[i].hi.as_u16[j]); } - range++; } vlib_cli_output (vm, "%s", s); vec_free (s); diff --git a/vnet/vnet/ip/ip4_source_check.c b/vnet/vnet/ip/ip4_source_check.c index 1f8e7214ff1..2323ac291aa 100644 --- a/vnet/vnet/ip/ip4_source_check.c +++ b/vnet/vnet/ip/ip4_source_check.c @@ -38,6 +38,8 @@ */ #include <vnet/ip/ip.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/dpo/load_balance.h> typedef struct { u8 packet_data[64]; @@ -110,9 +112,12 @@ ip4_source_check_inline (vlib_main_t * vm, ip4_fib_mtrie_t * mtrie0, * mtrie1; ip4_fib_mtrie_leaf_t leaf0, leaf1; ip4_source_check_config_t * c0, * c1; - ip_adjacency_t * adj0, * adj1; - u32 pi0, next0, pass0, adj_index0; - u32 pi1, next1, pass1, adj_index1; + const load_balance_t * lb0, * lb1; + u32 pi0, next0, pass0, lb_index0; + u32 pi1, next1, pass1, lb_index1; + const ip_adjacency_t *adj0, *adj1; + const dpo_id_t *dpo0, *dpo1; + u32 ii0, ii1; /* Prefetch next iteration. */ { @@ -150,8 +155,8 @@ ip4_source_check_inline (vlib_main_t * vm, &next1, sizeof (c1[0])); - mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie; - mtrie1 = &vec_elt_at_index (im->fibs, c1->fib_index)->mtrie; + mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; + mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie; leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -167,29 +172,70 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index, - &ip0->src_address, - c0->no_default_route)); - ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, c1->fib_index, - &ip1->src_address, - c1->no_default_route)); - - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); + lb0 = load_balance_get(lb_index0); + lb1 = load_balance_get(lb_index1); /* Pass multicast. */ pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); pass1 = ip4_address_is_multicast (&ip1->src_address) || ip1->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); - pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE - && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY - || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index)); - pass1 |= (adj1->lookup_next_index == IP_LOOKUP_NEXT_REWRITE - && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY - || vnet_buffer (p1)->sw_if_index[VLIB_RX] == adj1->rewrite_header.sw_if_index)); + if (PREDICT_TRUE(1 == lb0->lb_n_buckets)) + { + dpo0 = load_balance_get_bucket_i(lb0, 0); + if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY)) + { + pass0 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj0 = adj_get(dpo0->dpoi_index); + pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] == + adj0->rewrite_header.sw_if_index); + } + } + else + { + for (ii0 = 0; ii0 < lb0->lb_n_buckets && !pass0; ii0++) + { + dpo0 = load_balance_get_bucket_i(lb0, ii0); + if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY)) + { + pass0 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj0 = adj_get(dpo0->dpoi_index); + pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] == + adj0->rewrite_header.sw_if_index); + } + } + } + if (PREDICT_TRUE(1 == lb1->lb_n_buckets)) + { + dpo1 = load_balance_get_bucket_i(lb1, 0); + if (PREDICT_TRUE(dpo1->dpoi_type == DPO_ADJACENCY)) + { + pass1 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj1 = adj_get(dpo1->dpoi_index); + pass1 |= (vnet_buffer (p1)->sw_if_index[VLIB_RX] == + adj1->rewrite_header.sw_if_index); + } + } + else + { + for (ii1 = 0; ii1 < lb1->lb_n_buckets && !pass1; ii1++) + { + dpo1 = load_balance_get_bucket_i(lb1, ii1); + if (PREDICT_TRUE(dpo1->dpoi_type == DPO_ADJACENCY)) + { + pass1 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj1 = adj_get(dpo1->dpoi_index); + pass1 |= (vnet_buffer (p1)->sw_if_index[VLIB_RX] == + adj1->rewrite_header.sw_if_index); + } + } + } next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP); next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP); @@ -210,7 +256,10 @@ ip4_source_check_inline (vlib_main_t * vm, ip4_fib_mtrie_leaf_t leaf0; ip4_source_check_config_t * c0; ip_adjacency_t * adj0; - u32 pi0, next0, pass0, adj_index0; + u32 pi0, next0, pass0, lb_index0; + const load_balance_t * lb0; + const dpo_id_t *dpo0; + u32 ii0; pi0 = from[0]; to_next[0] = pi0; @@ -227,7 +276,7 @@ ip4_source_check_inline (vlib_main_t * vm, &next0, sizeof (c0[0])); - mtrie0 = &vec_elt_at_index (im->fibs, c0->fib_index)->mtrie; + mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; @@ -239,19 +288,40 @@ ip4_source_check_inline (vlib_main_t * vm, leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); - adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, c0->fib_index, - &ip0->src_address, - c0->no_default_route)); - adj0 = ip_get_adjacency (lm, adj_index0); + lb0 = load_balance_get(lb_index0); /* Pass multicast. */ pass0 = ip4_address_is_multicast (&ip0->src_address) || ip0->src_address.as_u32 == clib_host_to_net_u32(0xFFFFFFFF); - pass0 |= (adj0->lookup_next_index == IP_LOOKUP_NEXT_REWRITE - && (source_check_type == IP4_SOURCE_CHECK_REACHABLE_VIA_ANY - || vnet_buffer (p0)->sw_if_index[VLIB_RX] == adj0->rewrite_header.sw_if_index)); + if (PREDICT_TRUE(1 == lb0->lb_n_buckets)) + { + dpo0 = load_balance_get_bucket_i(lb0, 0); + if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY)) + { + pass0 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj0 = adj_get(dpo0->dpoi_index); + pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] == + adj0->rewrite_header.sw_if_index); + } + } + else + { + for (ii0 = 0; ii0 < lb0->lb_n_buckets && !pass0; ii0++) + { + dpo0 = load_balance_get_bucket_i(lb0, ii0); + if (PREDICT_TRUE(dpo0->dpoi_type == DPO_ADJACENCY)) + { + pass0 |= (source_check_type == + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); + adj0 = adj_get(dpo0->dpoi_index); + pass0 |= (vnet_buffer (p0)->sw_if_index[VLIB_RX] == + adj0->rewrite_header.sw_if_index); + } + } + } next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP); p0->error = error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS]; diff --git a/vnet/vnet/ip/ip4_test.c b/vnet/vnet/ip/ip4_test.c index ff088e78f3e..b76a719fe13 100644 --- a/vnet/vnet/ip/ip4_test.c +++ b/vnet/vnet/ip/ip4_test.c @@ -142,7 +142,7 @@ thrash (vlib_main_t * vm, } /* Find or create FIB table 11 */ - fib = find_ip4_fib_by_table_index_or_id (im, table_id, IP4_ROUTE_FLAG_TABLE_ID); + fib = ip4_fib_find_or_create_fib_by_table_id (table_id); for (i = tm->test_interfaces_created; i < ninterfaces; i++) { @@ -164,6 +164,7 @@ thrash (vlib_main_t * vm, hw = vnet_get_hw_interface (vnm, hw_if_index); vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index); im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index; + ip4_sw_interface_enable_disable(sw_if_index, 1); } tm->test_interfaces_created = ninterfaces; diff --git a/vnet/vnet/ip/ip6.h b/vnet/vnet/ip/ip6.h index f5f3de84676..36be64948c9 100644 --- a/vnet/vnet/ip/ip6.h +++ b/vnet/vnet/ip/ip6.h @@ -71,27 +71,11 @@ typedef struct { u32 index; /* flow hash configuration */ - u32 flow_hash_config; + flow_hash_config_t flow_hash_config; } ip6_fib_t; struct ip6_main_t; -typedef void (ip6_add_del_route_function_t) - (struct ip6_main_t * im, - uword opaque, - ip6_fib_t * fib, - u32 flags, - ip6_address_t * address, - u32 address_length, - void * old_result, - void * new_result); - -typedef struct { - ip6_add_del_route_function_t * function; - uword required_flags; - uword function_opaque; -} ip6_add_del_route_callback_t; - typedef void (ip6_add_del_interface_address_function_t) (struct ip6_main_t * im, uword opaque, @@ -106,31 +90,63 @@ typedef struct { uword function_opaque; } ip6_add_del_interface_address_callback_t; -typedef struct ip6_main_t { - BVT(clib_bihash) ip6_lookup_table; +/** + * Enumeration of the FIB table instance types + */ +typedef enum ip6_fib_table_instance_type_t_ { + /** + * This table stores the routes that are used to forward traffic. + * The key is the prefix, the result the adjacnecy to forward on. + */ + IP6_FIB_TABLE_FWDING, + /** + * The table that stores ALL routes learned by the DP. + * Some of these routes may not be ready to install in forwarding + * at a given time. + * The key in this table is the prefix, the result is the fib_entry_t + */ + IP6_FIB_TABLE_NON_FWDING, +} ip6_fib_table_instance_type_t; + +#define IP6_FIB_NUM_TABLES (IP6_FIB_TABLE_NON_FWDING+1) - ip_lookup_main_t lookup_main; +/** + * A represenation of a single IP6 table + */ +typedef struct ip6_fib_table_instance_t_ { + /* The hash table */ + BVT(clib_bihash) ip6_hash; /* bitmap / refcounts / vector of mask widths to search */ uword * non_empty_dst_address_length_bitmap; u8 * prefix_lengths_in_search_order; i32 dst_address_length_refcounts[129]; +} ip6_fib_table_instance_t; + +typedef struct ip6_main_t { + /** + * The two FIB tables; fwding and non-fwding + */ + ip6_fib_table_instance_t ip6_table[IP6_FIB_NUM_TABLES]; + + ip_lookup_main_t lookup_main; - /* Vector of FIBs. */ - ip6_fib_t * fibs; + /* Pool of FIBs. */ + struct fib_table_t_ * fibs; + /* Network byte orders subnet mask for each prefix length */ ip6_address_t fib_masks[129]; /* Table index indexed by software interface. */ u32 * fib_index_by_sw_if_index; + /* IP6 enabled count by software interface */ + u8 * ip_enabled_by_sw_if_index; + /* Hash table mapping table id to fib index. ID space is not necessarily dense; index space is dense. */ uword * fib_index_by_table_id; - /* Vector of functions to call when routes are added/deleted. */ - ip6_add_del_route_callback_t * add_del_route_callbacks; - /* Hash table mapping interface rewrite adjacency index by sw if index. */ uword * interface_route_adj_index_by_sw_if_index; @@ -156,8 +172,10 @@ typedef struct ip6_main_t { u32 ip6_unicast_rx_feature_l2tp_decap; u32 ip6_unicast_rx_feature_vpath; u32 ip6_unicast_rx_feature_lookup; + u32 ip6_unicast_rx_feature_drop; /* Built-in multicast feature path indices */ + u32 ip6_multicast_rx_feature_drop; u32 ip6_multicast_rx_feature_vpath; u32 ip6_multicast_rx_feature_lookup; @@ -226,6 +244,8 @@ extern vlib_node_registration_t ip6_input_node; extern vlib_node_registration_t ip6_rewrite_node; extern vlib_node_registration_t ip6_rewrite_local_node; extern vlib_node_registration_t ip6_discover_neighbor_node; +extern vlib_node_registration_t ip6_glean_node; +extern vlib_node_registration_t ip6_midchain_node; extern vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node; @@ -242,40 +262,10 @@ typedef union { } up_down_event; } ip6_icmp_neighbor_discovery_event_data_t; -u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst); -u32 ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, - ip6_address_t * dst); - -/** - * \brief Get or create an IPv6 fib. - * - * Get or create an IPv6 fib with the provided fib ID or index. - * The fib ID is a possibly-sparse user-defined value while - * the fib index defines the position of the fib in the fib vector. - * - * \param im - * ip6_main pointer. - * \param table_index_or_id - * The table index if \c IP6_ROUTE_FLAG_FIB_INDEX bit is set in \p flags. - * Otherwise, when set to \c ~0, an arbitrary and unused fib ID is picked - * and can be retrieved with \c ret->table_id. - * Otherwise, it is the fib ID to be used to retrieve or create the desired fib. - * \param flags - * Indicates whether \p table_index_or_id is the fib index or ID. - * When the bit \c IP6_ROUTE_FLAG_FIB_INDEX is set, \p table_index_or_id - * is considered as the fib index, and the fib ID otherwise. - * \return A pointer to the retrieved or created fib. - * - * \remark When getting a fib with the fib index, the fib MUST already exist. - */ -ip6_fib_t * find_ip6_fib_by_table_index_or_id (ip6_main_t * im, - u32 table_index_or_id, - u32 flags); - always_inline uword -ip6_destination_matches_route (ip6_main_t * im, - ip6_address_t * key, - ip6_address_t * dest, +ip6_destination_matches_route (const ip6_main_t * im, + const ip6_address_t * key, + const ip6_address_t * dest, uword dest_length) { int i; @@ -313,25 +303,26 @@ ip6_unaligned_destination_matches_route (ip6_main_t * im, } always_inline int -ip6_src_address_for_packet (ip6_main_t * im, vlib_buffer_t * p, ip6_address_t * src, u32 sw_if_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip_interface_address_t * ia = ip_interface_address_for_packet (lm, p, sw_if_index); - if (ia == NULL) - return -1; - ip6_address_t * a = ip_interface_address_get_address (lm, ia); - *src = a[0]; - return 0; -} - -always_inline u32 -ip6_src_lookup_for_packet (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i) +ip6_src_address_for_packet (ip_lookup_main_t * lm, + u32 sw_if_index, + ip6_address_t * src) { - if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0) - vnet_buffer (b)->ip.adj_index[VLIB_RX] - = ip6_fib_lookup (im, vnet_buffer (b)->sw_if_index[VLIB_RX], - &i->src_address); - return vnet_buffer (b)->ip.adj_index[VLIB_RX]; + u32 if_add_index = + lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + if (PREDICT_TRUE(if_add_index != ~0)) { + ip_interface_address_t *if_add = + pool_elt_at_index(lm->if_address_pool, if_add_index); + ip6_address_t *if_ip = + ip_interface_address_get_address(lm, if_add); + *src = *if_ip; + return (0); + } + else + { + src->as_u64[0] = 0; + src->as_u64[1] = 0; + } + return (!0); } /* Find interface address which matches destination. */ @@ -362,95 +353,12 @@ clib_error_t * ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, ip6_address_t * address, u32 address_length, u32 is_del); +void +ip6_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable); int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2); -/* Add/del a route to the FIB. */ - -#define IP6_ROUTE_FLAG_ADD (0 << 0) -#define IP6_ROUTE_FLAG_DEL (1 << 0) -#define IP6_ROUTE_FLAG_TABLE_ID (0 << 1) -#define IP6_ROUTE_FLAG_FIB_INDEX (1 << 1) -#define IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY (1 << 2) -#define IP6_ROUTE_FLAG_NO_REDISTRIBUTE (1 << 3) -#define IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP (1 << 4) -/* Dynamic route created via neighbor discovery. */ -#define IP6_ROUTE_FLAG_NEIGHBOR (1 << 5) - -typedef struct { - /* IP6_ROUTE_FLAG_* */ - u32 flags; - - /* Either index of fib or table_id to hash and get fib. - IP6_ROUTE_FLAG_FIB_INDEX specifies index; otherwise table_id is assumed. */ - u32 table_index_or_table_id; - - /* Destination address (prefix) and length. */ - ip6_address_t dst_address; - u32 dst_address_length; - - /* Adjacency to use for this destination. */ - u32 adj_index; - - /* If specified adjacencies to add and then - use for this destination. add_adj/n_add_adj - are override adj_index if specified. */ - ip_adjacency_t * add_adj; - u32 n_add_adj; -} ip6_add_del_route_args_t; - -void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * args); - -void ip6_add_del_route_next_hop (ip6_main_t * im, - u32 flags, - ip6_address_t * dst_address, - u32 dst_address_length, - ip6_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index); - -u32 -ip6_route_get_next_hop_adj (ip6_main_t * im, - u32 fib_index, - ip6_address_t *next_hop, - u32 next_hop_sw_if_index, - u32 explicit_fib_index); - -u32 -ip6_get_route (ip6_main_t * im, - u32 fib_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length); - -void -ip6_foreach_matching_route (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length, - ip6_address_t ** results, - u8 ** result_length); - -void ip6_delete_matching_routes (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length); - -void ip6_maybe_remap_adjacencies (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags); - -void ip6_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index); - -u32 -vnet_ip6_neighbor_glean_add(u32 fib_index, void * next_hop_arg); - clib_error_t * ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index); @@ -481,8 +389,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, ip6_address_t * a, u8 * link_layer_address, uword n_bytes_link_layer_address); -void -vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index); void ip6_link_local_address_from_ethernet_mac_address (ip6_address_t *ip, @@ -492,7 +398,8 @@ void ip6_ethernet_mac_address_from_link_local_address (u8 *mac, ip6_address_t *ip); -int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config); +int vnet_set_ip6_flow_hash (u32 table_id, + flow_hash_config_t flow_hash_config); int ip6_neighbor_ra_config(vlib_main_t * vm, u32 sw_if_index, @@ -560,7 +467,8 @@ extern vlib_node_registration_t ip6_lookup_node; /* Compute flow hash. We'll use it to select which Sponge to use for this flow. And other things. */ always_inline u32 -ip6_compute_flow_hash (ip6_header_t * ip, u32 flow_hash_config) +ip6_compute_flow_hash (const ip6_header_t * ip, + flow_hash_config_t flow_hash_config) { tcp_header_t * tcp = (void *) (ip + 1); u64 a, b, c; diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index c977960285d..f7514dc3cbf 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -42,668 +42,13 @@ #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ #include <vnet/srp/srp.h> /* for srp_hw_interface_class */ #include <vppinfra/cache.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/classify_dpo.h> #include <vppinfra/bihash_template.c> -static void compute_prefix_lengths_in_search_order (ip6_main_t * im) -{ - int i; - vec_reset_length (im->prefix_lengths_in_search_order); - /* Note: bitmap reversed so this is in fact a longest prefix match */ - clib_bitmap_foreach (i, im->non_empty_dst_address_length_bitmap, - ({ - int dst_address_length = 128 - i; - vec_add1 (im->prefix_lengths_in_search_order, dst_address_length); - })); -} - -u32 -ip6_fib_lookup_with_table (ip6_main_t * im, u32 fib_index, ip6_address_t * dst) -{ - ip_lookup_main_t * lm = &im->lookup_main; - int i, len; - int rv; - BVT(clib_bihash_kv) kv, value; - u64 fib; - - len = vec_len (im->prefix_lengths_in_search_order); - - kv.key[0] = dst->as_u64[0]; - kv.key[1] = dst->as_u64[1]; - fib = ((u64)((fib_index))<<32); - - for (i = 0; i < len; i++) - { - int dst_address_length = im->prefix_lengths_in_search_order[i]; - ip6_address_t * mask = &im->fib_masks[dst_address_length]; - - ASSERT(dst_address_length >= 0 && dst_address_length <= 128); - //As lengths are decreasing, masks are increasingly specific. - kv.key[0] &= mask->as_u64[0]; - kv.key[1] &= mask->as_u64[1]; - kv.key[2] = fib | dst_address_length; - - rv = BV(clib_bihash_search_inline_2)(&im->ip6_lookup_table, &kv, &value); - if (rv == 0) - return value.value; - } - - return lm->miss_adj_index; -} - -u32 ip6_fib_lookup (ip6_main_t * im, u32 sw_if_index, ip6_address_t * dst) -{ - u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); - return ip6_fib_lookup_with_table (im, fib_index, dst); -} - -void -vnet_ip6_fib_init (ip6_main_t * im, u32 fib_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip6_add_del_route_args_t a; - ip_adjacency_t * adj; - - memset(&a, 0x0, sizeof(ip6_add_del_route_args_t)); - - a.table_index_or_table_id = fib_index; - a.flags = (IP6_ROUTE_FLAG_ADD - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - - /* Add ff02::1:ff00:0/104 via local route for all tables. - This is required for neighbor discovery to work. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_solicited_node_multicast_address (&a.dst_address, 0); - - a.dst_address_length = 104; - ip6_add_del_route (im, &a); - - /* Add all-routers multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_all_routers); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); - - /* Add all-nodes multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_all_hosts); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); - - /* Add all-mldv2 multicast address via local route for all tables */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a.adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - adj->if_address_index = ~0; - adj->rewrite_header.data_bytes = 0; - - ip6_set_reserved_multicast_address (&a.dst_address, - IP6_MULTICAST_SCOPE_link_local, - IP6_MULTICAST_GROUP_ID_mldv2_routers); - - a.dst_address_length = 128; - ip6_add_del_route (im, &a); -} - -static ip6_fib_t * -create_fib_with_table_id (ip6_main_t * im, u32 table_id) -{ - ip6_fib_t * fib; - hash_set (im->fib_index_by_table_id, table_id, vec_len (im->fibs)); - vec_add2 (im->fibs, fib, 1); - fib->table_id = table_id; - fib->index = fib - im->fibs; - fib->flow_hash_config = IP_FLOW_HASH_DEFAULT; - vnet_ip6_fib_init (im, fib->index); - return fib; -} - -ip6_fib_t * -find_ip6_fib_by_table_index_or_id (ip6_main_t * im, u32 table_index_or_id, u32 flags) -{ - uword * p, fib_index; - - fib_index = table_index_or_id; - if (! (flags & IP6_ROUTE_FLAG_FIB_INDEX)) - { - if (table_index_or_id == ~0) { - table_index_or_id = 0; - while (hash_get (im->fib_index_by_table_id, table_index_or_id)) { - table_index_or_id++; - } - return create_fib_with_table_id (im, table_index_or_id); - } - - p = hash_get (im->fib_index_by_table_id, table_index_or_id); - if (! p) - return create_fib_with_table_id (im, table_index_or_id); - fib_index = p[0]; - } - return vec_elt_at_index (im->fibs, fib_index); -} - -void ip6_add_del_route (ip6_main_t * im, ip6_add_del_route_args_t * a) -{ - ip_lookup_main_t * lm = &im->lookup_main; - ip6_fib_t * fib; - ip6_address_t dst_address; - u32 dst_address_length, adj_index; - uword is_del; - u32 old_adj_index = ~0; - BVT(clib_bihash_kv) kv, value; - - vlib_smp_unsafe_warning(); - - is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0; - - /* Either create new adjacency or use given one depending on arguments. */ - if (a->n_add_adj > 0) - { - ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); - } - else - adj_index = a->adj_index; - - dst_address = a->dst_address; - dst_address_length = a->dst_address_length; - fib = find_ip6_fib_by_table_index_or_id (im, a->table_index_or_table_id, - a->flags); - - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - ip6_address_mask (&dst_address, &im->fib_masks[dst_address_length]); - - /* refcount accounting */ - if (is_del) - { - ASSERT (im->dst_address_length_refcounts[dst_address_length] > 0); - if (--im->dst_address_length_refcounts[dst_address_length] == 0) - { - im->non_empty_dst_address_length_bitmap = - clib_bitmap_set (im->non_empty_dst_address_length_bitmap, - 128 - dst_address_length, 0); - compute_prefix_lengths_in_search_order (im); - } - } - else - { - im->dst_address_length_refcounts[dst_address_length]++; - - im->non_empty_dst_address_length_bitmap = - clib_bitmap_set (im->non_empty_dst_address_length_bitmap, - 128 - dst_address_length, 1); - compute_prefix_lengths_in_search_order (im); - } - - kv.key[0] = dst_address.as_u64[0]; - kv.key[1] = dst_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0) - old_adj_index = value.value; - - if (is_del) - BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 0 /* is_add */); - else - { - /* Make sure adj index is valid. */ - if (CLIB_DEBUG > 0) - (void) ip_get_adjacency (lm, adj_index); - - kv.value = adj_index; - - BV(clib_bihash_add_del) (&im->ip6_lookup_table, &kv, 1 /* is_add */); - } - - /* Avoid spurious reference count increments */ - if (old_adj_index == adj_index - && adj_index != ~0 - && !(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) - { - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); - if (adj->share_count > 0) - adj->share_count --; - } - - /* Delete old adjacency index if present and changed. */ - { - if (! (a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY) - && old_adj_index != ~0 - && old_adj_index != adj_index) - ip_del_adjacency (lm, old_adj_index); - } -} - -u32 -ip6_route_get_next_hop_adj (ip6_main_t * im, - u32 fib_index, - ip6_address_t *next_hop, - u32 next_hop_sw_if_index, - u32 explicit_fib_index) -{ - ip_lookup_main_t * lm = &im->lookup_main; - vnet_main_t * vnm = vnet_get_main(); - int is_interface_next_hop; - uword * nh_result; - u32 nh_adj_index; - ip6_fib_t * fib; - - fib = vec_elt_at_index (im->fibs, fib_index); - - is_interface_next_hop = ip6_address_is_zero (next_hop); - - if (is_interface_next_hop) - { - nh_result = hash_get (im->interface_route_adj_index_by_sw_if_index, - next_hop_sw_if_index); - if (nh_result) - nh_adj_index = *nh_result; - else - { - ip_adjacency_t * adj; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &nh_adj_index); - ip6_adjacency_set_interface_route (vnm, adj, - next_hop_sw_if_index, ~0); - ip_call_add_del_adjacency_callbacks - (lm, next_hop_sw_if_index, /* is_del */ 0); - hash_set (im->interface_route_adj_index_by_sw_if_index, - next_hop_sw_if_index, nh_adj_index); - } - } - else if (next_hop_sw_if_index == ~0) - { - /* next-hop is recursive. we always need a indirect adj - * for recursive paths. Any LPM we perform now will give - * us a valid adj, but without tracking the next-hop we - * have no way to keep it valid. - */ - ip_adjacency_t add_adj; - memset (&add_adj, 0, sizeof(add_adj)); - add_adj.n_adj = 1; - add_adj.lookup_next_index = IP_LOOKUP_NEXT_INDIRECT; - add_adj.indirect.next_hop.ip6.as_u64[0] = next_hop->as_u64[0]; - add_adj.indirect.next_hop.ip6.as_u64[1] = next_hop->as_u64[1]; - add_adj.explicit_fib_index = explicit_fib_index; - ip_add_adjacency (lm, &add_adj, 1, &nh_adj_index); - } - else - { - BVT(clib_bihash_kv) kv, value; - - /* Look for the interface /128 route */ - kv.key[0] = next_hop->as_u64[0]; - kv.key[1] = next_hop->as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128; -after_nd: - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0) - { - ip_adjacency_t * adj; - nh_adj_index = ip6_fib_lookup_with_table (im, fib_index, next_hop); - adj = ip_get_adjacency (lm, nh_adj_index); - /* if ND interface adjacencty is present, we need to - install ND adjaceny for specific next hop */ - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - adj->arp.next_hop.ip6.as_u64[0] == 0 && - adj->arp.next_hop.ip6.as_u64[1] == 0) - { - nh_adj_index = vnet_ip6_neighbor_glean_add(fib_index, next_hop); - } - else if (next_hop->as_u8[0] == 0xfe) - { - //Next hop is link-local. No indirect in this case. - //Let's add it as a possible neighbor on this interface - ip6_address_t null_addr= {}; - ip6_add_del_route_next_hop (im, IP6_ROUTE_FLAG_ADD, - next_hop, 128, - &null_addr, next_hop_sw_if_index, - 1, ~0, fib_index); - goto after_nd; - } - } - else - { - nh_adj_index = value.value; - } - } - - return (nh_adj_index); -} - -void -ip6_add_del_route_next_hop (ip6_main_t * im, - u32 flags, - ip6_address_t * dst_address, - u32 dst_address_length, - ip6_address_t * next_hop, - u32 next_hop_sw_if_index, - u32 next_hop_weight, u32 adj_index, - u32 explicit_fib_index) -{ - vnet_main_t * vnm = vnet_get_main(); - ip_lookup_main_t * lm = &im->lookup_main; - u32 fib_index; - ip6_fib_t * fib; - ip6_address_t masked_dst_address; - u32 old_mp_adj_index, new_mp_adj_index; - u32 dst_adj_index, nh_adj_index; - int rv; - ip_adjacency_t * dst_adj; - ip_multipath_adjacency_t * old_mp, * new_mp; - int is_del = (flags & IP6_ROUTE_FLAG_DEL) != 0; - clib_error_t * error = 0; - BVT(clib_bihash_kv) kv, value; - - vlib_smp_unsafe_warning(); - - if (explicit_fib_index == (u32)~0) - fib_index = vec_elt (im->fib_index_by_sw_if_index, next_hop_sw_if_index); - else - fib_index = explicit_fib_index; - - fib = vec_elt_at_index (im->fibs, fib_index); - - /* Lookup next hop to be added or deleted. */ - if (adj_index == (u32)~0) - { - nh_adj_index = ip6_route_get_next_hop_adj(im, fib_index, - next_hop, - next_hop_sw_if_index, - explicit_fib_index); - } - else - { - /* Look for the interface /128 route */ - kv.key[0] = next_hop->as_u64[0]; - kv.key[1] = next_hop->as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | 128; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) < 0) - { - vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; - error = clib_error_return (0, "next-hop %U/128 not in FIB", - format_ip6_address, next_hop); - goto done; - } - - nh_adj_index = value.value; - } - - ASSERT (dst_address_length < ARRAY_LEN (im->fib_masks)); - masked_dst_address = dst_address[0]; - ip6_address_mask (&masked_dst_address, &im->fib_masks[dst_address_length]); - - kv.key[0] = masked_dst_address.as_u64[0]; - kv.key[1] = masked_dst_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | dst_address_length; - - rv = BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value); - - if (rv == 0) - { - dst_adj_index = value.value; - dst_adj = ip_get_adjacency (lm, dst_adj_index); - } - else - { - /* For deletes destination must be known. */ - if (is_del) - { - vnm->api_errno = VNET_API_ERROR_UNKNOWN_DESTINATION; - error = clib_error_return (0, "unknown destination %U/%d", - format_ip6_address, dst_address, - dst_address_length); - goto done; - } - - dst_adj_index = ~0; - dst_adj = 0; - } - - /* Ignore adds of X/128 with next hop of X. */ - if (! is_del - && dst_address_length == 128 - && ip6_address_is_equal (dst_address, next_hop)) - { - vnm->api_errno = VNET_API_ERROR_PREFIX_MATCHES_NEXT_HOP; - error = clib_error_return (0, "prefix matches next hop %U/%d", - format_ip6_address, dst_address, - dst_address_length); - goto done; - } - - /* Destination is not known and default weight is set so add route - to existing non-multipath adjacency */ - if (dst_adj_index == ~0 && next_hop_weight == 1 && next_hop_sw_if_index == ~0) - { - /* create / delete additional mapping of existing adjacency */ - ip6_add_del_route_args_t a; - - a.table_index_or_table_id = fib_index; - a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD) - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & (IP6_ROUTE_FLAG_NO_REDISTRIBUTE - | IP6_ROUTE_FLAG_NOT_LAST_IN_GROUP))); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = nh_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip6_add_del_route (im, &a); - goto done; - } - - old_mp_adj_index = dst_adj ? dst_adj->heap_handle : ~0; - - if (! ip_multipath_adjacency_add_del_next_hop - (lm, is_del, - dst_adj ? dst_adj->heap_handle : ~0, - nh_adj_index, - next_hop_weight, - &new_mp_adj_index)) - { - vnm->api_errno = VNET_API_ERROR_NEXT_HOP_NOT_FOUND_MP; - error = clib_error_return - (0, "requested deleting next-hop %U not found in multi-path", - format_ip6_address, next_hop); - goto done; - } - - old_mp = new_mp = 0; - if (old_mp_adj_index != ~0) - old_mp = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - if (new_mp_adj_index != ~0) - new_mp = vec_elt_at_index (lm->multipath_adjacencies, new_mp_adj_index); - - if (old_mp != new_mp) - { - ip6_add_del_route_args_t a; - ip_adjacency_t * adj; - - a.table_index_or_table_id = fib_index; - a.flags = ((is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD) - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY - | (flags & IP6_ROUTE_FLAG_NO_REDISTRIBUTE)); - a.dst_address = dst_address[0]; - a.dst_address_length = dst_address_length; - a.adj_index = new_mp ? new_mp->adj_index : dst_adj_index; - a.add_adj = 0; - a.n_add_adj = 0; - - ip6_add_del_route (im, &a); - - adj = ip_get_adjacency (lm, new_mp ? new_mp->adj_index : dst_adj_index); - if (adj->n_adj == 1) - adj->share_count += is_del ? -1 : 1; - } - - done: - if (error) - clib_error_report (error); -} - -u32 -ip6_get_route (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length) -{ - ip6_fib_t * fib = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - ip6_address_t masked_address; - BVT(clib_bihash_kv) kv, value; - - ASSERT (address_length < ARRAY_LEN (im->fib_masks)); - clib_memcpy (&masked_address, address, sizeof (masked_address)); - ip6_address_mask (&masked_address, &im->fib_masks[address_length]); - - kv.key[0] = masked_address.as_u64[0]; - kv.key[1] = masked_address.as_u64[1]; - kv.key[2] = ((u64)((fib - im->fibs))<<32) | address_length; - - if (BV(clib_bihash_search)(&im->ip6_lookup_table, &kv, &value) == 0) - return (value.value); - return 0; -} - -void -ip6_foreach_matching_route (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * dst_address, - u32 address_length, - ip6_address_t ** results, - u8 ** result_lengths) -{ - ip6_fib_t * fib = - find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); - BVT(clib_bihash) * h = &im->ip6_lookup_table; - BVT(clib_bihash_value) * v; - clib_bihash_bucket_t * b; - int i, j, k; - - if (*results) - _vec_len (*results) = 0; - if (*result_lengths) - _vec_len (*result_lengths) = 0; - - /* Walk the table looking for routes which match the supplied address */ - for (i = 0; i < h->nbuckets; i++) - { - b = &h->buckets [i]; - if (b->offset == 0) - continue; - - v = BV(clib_bihash_get_value) (h, b->offset); - for (j = 0; j < (1<<b->log2_pages); j++) - { - for (k = 0; k < BIHASH_KVP_PER_PAGE; k++) - { - if (BV(clib_bihash_is_free)(&v->kvp[k])) - continue; - - if ((v->kvp[k].key[2] - == (((u64)((fib - im->fibs))<<32) | address_length)) - && ip6_destination_matches_route - (im, dst_address, (ip6_address_t *) &v->kvp[k], - address_length)) - { - ip6_address_t * a; - - a = (ip6_address_t *)(&v->kvp[k]); - - vec_add1 (*results, a[0]); - vec_add1 (*result_lengths, address_length); - } - } - v++; - } - } -} - -void ip6_maybe_remap_adjacencies (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags) -{ -#if SOONE - ip6_fib_t * fib - = find_ip6_fib_by_table_index_or_id (im, table_index_or_table_id, flags); -#endif - ip_lookup_main_t * lm = &im->lookup_main; - - if (lm->n_adjacency_remaps == 0) - return; - - clib_warning ("unimplemented, please report to vpp-dev@cisco.com"); - - /* All remaps have been performed. */ - lm->n_adjacency_remaps = 0; -} - -void ip6_delete_matching_routes (ip6_main_t * im, - u32 table_index_or_table_id, - u32 flags, - ip6_address_t * address, - u32 address_length) -{ - /* $$$$ static may be OK - this should happen only on thread 0 */ - static ip6_address_t * matching_addresses; - static u8 * matching_address_lengths; - u32 l, i; - ip6_add_del_route_args_t a; - - vlib_smp_unsafe_warning(); - - a.flags = IP6_ROUTE_FLAG_DEL | IP6_ROUTE_FLAG_NO_REDISTRIBUTE | flags; - a.table_index_or_table_id = table_index_or_table_id; - a.adj_index = ~0; - a.add_adj = 0; - a.n_add_adj = 0; - - for (l = address_length + 1; l <= 128; l++) - { - ip6_foreach_matching_route (im, table_index_or_table_id, flags, - address, - l, - &matching_addresses, - &matching_address_lengths); - for (i = 0; i < vec_len (matching_addresses); i++) - { - a.dst_address = matching_addresses[i]; - a.dst_address_length = matching_address_lengths[i]; - ip6_add_del_route (im, &a); - } - } - - ip6_maybe_remap_adjacencies (im, table_index_or_table_id, flags); -} - void ip6_forward_next_trace (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -713,12 +58,10 @@ ip6_forward_next_trace (vlib_main_t * vm, always_inline uword ip6_lookup_inline (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, - int is_indirect) + vlib_frame_t * frame) { ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; - vlib_combined_counter_main_t * cm = &im->lookup_main.adjacency_counters; + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; u32 n_left_from, n_left_to_next, * from, * to_next; ip_lookup_next_t next; u32 cpu_index = os_get_cpu_number(); @@ -735,13 +78,14 @@ ip6_lookup_inline (vlib_main_t * vm, while (n_left_from >= 4 && n_left_to_next >= 2) { vlib_buffer_t * p0, * p1; - u32 pi0, pi1, adj_index0, adj_index1, wrong_next; + u32 pi0, pi1, lbi0, lbi1, wrong_next; ip_lookup_next_t next0, next1; ip6_header_t * ip0, * ip1; - ip_adjacency_t * adj0, * adj1; ip6_address_t * dst_addr0, * dst_addr1; u32 fib_index0, fib_index1; u32 flow_hash_config0, flow_hash_config1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t *lb0, *lb1; /* Prefetch next iteration. */ { @@ -765,19 +109,8 @@ ip6_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); ip1 = vlib_buffer_get_current (p1); - if (PREDICT_FALSE(is_indirect)) - { - ip_adjacency_t * iadj0, * iadj1; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - iadj1 = ip_get_adjacency (lm, vnet_buffer(p1)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip6; - dst_addr1 = &iadj1->indirect.next_hop.ip6; - } - else - { - dst_addr0 = &ip0->dst_address; - dst_addr1 = &ip1->dst_address; - } + dst_addr0 = &ip0->dst_address; + dst_addr1 = &ip1->dst_address; fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]); @@ -787,69 +120,60 @@ ip6_lookup_inline (vlib_main_t * vm, fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX]; - adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0); - adj_index1 = ip6_fib_lookup_with_table (im, fib_index1, dst_addr1); + lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); + lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1); - adj0 = ip_get_adjacency (lm, adj_index0); - adj1 = ip_get_adjacency (lm, adj_index1); - - if (PREDICT_FALSE (adj0->explicit_fib_index != ~0)) - { - adj_index0 = ip6_fib_lookup_with_table - (im, adj0->explicit_fib_index, dst_addr0); - adj0 = ip_get_adjacency (lm, adj_index0); - } - if (PREDICT_FALSE (adj1->explicit_fib_index != ~0)) - { - adj_index1 = ip6_fib_lookup_with_table - (im, adj1->explicit_fib_index, dst_addr1); - adj1 = ip_get_adjacency (lm, adj_index1); - } - - next0 = adj0->lookup_next_index; - next1 = adj1->lookup_next_index; - - /* Only process the HBH Option Header if explicitly configured to do so */ - next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index0 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj0->lookup_next_index; - next1 = (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index1 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj1->lookup_next_index; + lb0 = load_balance_get (lbi0); + lb1 = load_balance_get (lbi1); vnet_buffer (p0)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash = 0; - if (PREDICT_FALSE(adj0->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + flow_hash_config0 = lb0->lb_hash_config; vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, flow_hash_config0); } - - if (PREDICT_FALSE(adj1->n_adj > 1)) + if (PREDICT_FALSE(lb1->lb_n_buckets > 1)) { - flow_hash_config1 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; - + flow_hash_config1 = lb1->lb_hash_config; vnet_buffer (p1)->ip.flow_hash = ip6_compute_flow_hash (ip1, flow_hash_config1); } - ASSERT (adj0->n_adj > 0); - ASSERT (adj1->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - ASSERT (is_pow2 (adj1->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); - adj_index1 += (vnet_buffer (p1)->ip.flow_hash & (adj1->n_adj - 1)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + lb0->lb_n_buckets_minus_1)); + dpo1 = load_balance_get_bucket_i(lb1, + (vnet_buffer (p1)->ip.flow_hash & + lb1->lb_n_buckets_minus_1)); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = adj_index1; + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + /* Only process the HBH Option Header if explicitly configured to do so */ + next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next0; + next1 = ((ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next1; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); vlib_increment_combined_counter - (cm, cpu_index, adj_index1, 1, + (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); from += 2; @@ -898,11 +222,12 @@ ip6_lookup_inline (vlib_main_t * vm, { vlib_buffer_t * p0; ip6_header_t * ip0; - u32 pi0, adj_index0; + u32 pi0, lbi0; ip_lookup_next_t next0; - ip_adjacency_t * adj0; + load_balance_t * lb0; ip6_address_t * dst_addr0; u32 fib_index0, flow_hash_config0; + const dpo_id_t *dpo0; pi0 = from[0]; to_next[0] = pi0; @@ -911,57 +236,44 @@ ip6_lookup_inline (vlib_main_t * vm, ip0 = vlib_buffer_get_current (p0); - if (PREDICT_FALSE(is_indirect)) - { - ip_adjacency_t * iadj0; - iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]); - dst_addr0 = &iadj0->indirect.next_hop.ip6; - } - else - { - dst_addr0 = &ip0->dst_address; - } + dst_addr0 = &ip0->dst_address; fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]); fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ? fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX]; flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + ip6_fib_get (fib_index0)->flow_hash_config; - adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, dst_addr0); + lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); - adj0 = ip_get_adjacency (lm, adj_index0); - - if (PREDICT_FALSE (adj0->explicit_fib_index != ~0)) - { - adj_index0 = ip6_fib_lookup_with_table - (im, adj0->explicit_fib_index, dst_addr0); - adj0 = ip_get_adjacency (lm, adj_index0); - } - - /* Only process the HBH Option Header if explicitly configured to do so */ - next0 = (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && im->hbh_enabled && - adj_index0 ? (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : adj0->lookup_next_index; + lb0 = load_balance_get (lbi0); vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE(adj0->n_adj > 1)) + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) { - flow_hash_config0 = - vec_elt_at_index (im->fibs,fib_index0)->flow_hash_config; + flow_hash_config0 = lb0->lb_hash_config; vnet_buffer (p0)->ip.flow_hash = ip6_compute_flow_hash (ip0, flow_hash_config0); } - ASSERT (adj0->n_adj > 0); - ASSERT (is_pow2 (adj0->n_adj)); - adj_index0 += (vnet_buffer (p0)->ip.flow_hash & (adj0->n_adj - 1)); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + dpo0 = load_balance_get_bucket_i(lb0, + (vnet_buffer (p0)->ip.flow_hash & + lb0->lb_n_buckets_minus_1)); + next0 = dpo0->dpoi_next_node; + /* Only process the HBH Option Header if explicitly configured to do so */ + next0 = ((ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) && + im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : + next0; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; vlib_increment_combined_counter - (cm, cpu_index, adj_index0, 1, + (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); from += 1; @@ -986,163 +298,171 @@ ip6_lookup_inline (vlib_main_t * vm, } if (node->flags & VLIB_NODE_FLAG_TRACE) - ip6_forward_next_trace(vm, node, frame, VLIB_TX); + ip6_forward_next_trace(vm, node, frame, VLIB_TX); return frame->n_vectors; } -void ip6_adjacency_set_interface_route (vnet_main_t * vnm, - ip_adjacency_t * adj, - u32 sw_if_index, - u32 if_address_index) -{ - vnet_hw_interface_t * hw = vnet_get_sup_hw_interface (vnm, sw_if_index); - ip_lookup_next_t n; - u32 node_index; - - if (hw->hw_class_index == ethernet_hw_interface_class.index - || hw->hw_class_index == srp_hw_interface_class.index) - { - n = IP_LOOKUP_NEXT_ARP; - node_index = ip6_discover_neighbor_node.index; - adj->if_address_index = if_address_index; - adj->arp.next_hop.ip6.as_u64[0] = 0; - adj->arp.next_hop.ip6.as_u64[1] = 0; - } - else - { - n = IP_LOOKUP_NEXT_REWRITE; - node_index = ip6_rewrite_node.index; - } - - adj->lookup_next_index = n; - adj->explicit_fib_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - node_index, - VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); -} - static void ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, ip6_main_t * im, u32 fib_index, ip_interface_address_t * a) { ip_lookup_main_t * lm = &im->lookup_main; - ip_adjacency_t * adj; ip6_address_t * address = ip_interface_address_get_address (lm, a); - ip6_add_del_route_args_t x; - vnet_hw_interface_t * hw_if = vnet_get_sup_hw_interface (vnm, sw_if_index); - u32 classify_table_index; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP6_ROUTE_FLAG_ADD - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = a->address_length; - x.n_add_adj = 0; - x.add_adj = 0; + fib_prefix_t pfx = { + .fp_len = a->address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; a->neighbor_probe_adj_index = ~0; if (a->address_length < 128) - { - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); - ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a - lm->if_address_pool); - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - ip6_add_del_route (im, &x); - a->neighbor_probe_adj_index = x.adj_index; - } - - /* Add e.g. ::1/128 as local to this host. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &x.adj_index); + { + fib_node_index_t fei; + + fei = fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP6, + NULL, /* No next-hop address */ + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + a->neighbor_probe_adj_index = fib_entry_get_adj(fei); + } - classify_table_index = ~0; + pfx.fp_len = 128; if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) - classify_table_index = lm->classify_table_index_by_sw_if_index [sw_if_index]; - if (classify_table_index != (u32) ~0) + { + u32 classify_table_index = + lm->classify_table_index_by_sw_if_index [sw_if_index]; + if (classify_table_index != (u32) ~0) + { + dpo_id_t dpo = DPO_NULL; + + dpo_set(&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP4, + classify_dpo_create(FIB_PROTOCOL_IP6, + classify_table_index)); + + fib_table_entry_special_dpo_add(fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, + &dpo); + dpo_reset(&dpo); + } + } + + fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP6, + &pfx.fp_addr, + sw_if_index, + ~0, // invalid FIB index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); +} + +static void +ip6_del_interface_routes (ip6_main_t * im, + u32 fib_index, + ip6_address_t * address, + u32 address_length) +{ + fib_prefix_t pfx = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; + + if (pfx.fp_len < 128) { - adj->lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; - adj->classify.table_index = classify_table_index; + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); + } - else - adj->lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - - adj->if_address_index = a - lm->if_address_pool; - adj->rewrite_header.sw_if_index = sw_if_index; - adj->rewrite_header.max_l3_packet_bytes = hw_if->max_l3_packet_bytes[VLIB_RX]; - adj->rewrite_header.data_bytes = 0; - ip_call_add_del_adjacency_callbacks (lm, x.adj_index, /* is_del */ 0); - x.dst_address_length = 128; - ip6_add_del_route (im, &x); + + pfx.fp_len = 128; + fib_table_entry_delete(fib_index, + &pfx, + FIB_SOURCE_INTERFACE); } -static void -ip6_del_interface_routes (ip6_main_t * im, u32 fib_index, - ip6_address_t * address, u32 address_length) +void +ip6_sw_interface_enable_disable (u32 sw_if_index, + u32 is_enable) { - ip6_add_del_route_args_t x; - - /* Add e.g. 1.0.0.0/8 as interface route (arp for Ethernet). */ - x.table_index_or_table_id = fib_index; - x.flags = (IP6_ROUTE_FLAG_DEL - | IP6_ROUTE_FLAG_FIB_INDEX - | IP6_ROUTE_FLAG_NO_REDISTRIBUTE); - x.dst_address = address[0]; - x.dst_address_length = address_length; - x.adj_index = ~0; - x.n_add_adj = 0; - x.add_adj = 0; - - if (address_length < 128) + vlib_main_t * vm = vlib_get_main(); + ip6_main_t * im = &ip6_main; + ip_lookup_main_t * lm = &im->lookup_main; + u32 ci, cast; + u32 lookup_feature_index; + + vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) { - /* Don't wipe out fe80::0/64 */ - if (address_length != 64 || - address[0].as_u64[0] != clib_net_to_host_u64(0xfe80000000000000ULL)) - ip6_add_del_route (im, &x); + if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + else + { + ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) + return; } - x.dst_address_length = 128; - ip6_add_del_route (im, &x); + for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++) + { + ip_config_main_t * cm = &lm->feature_config_mains[cast]; + vnet_config_main_t * vcm = &cm->config_main; - ip6_delete_matching_routes (im, - fib_index, - IP6_ROUTE_FLAG_FIB_INDEX, - address, - address_length); -} + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; -typedef struct { - u32 sw_if_index; - ip6_address_t address; - u32 length; -} ip6_interface_address_t; + if (cast == VNET_IP_RX_UNICAST_FEAT) + lookup_feature_index = im->ip6_unicast_rx_feature_lookup; + else + lookup_feature_index = im->ip6_multicast_rx_feature_lookup; -static clib_error_t * -ip6_add_del_interface_address_internal (vlib_main_t * vm, - u32 sw_if_index, - ip6_address_t * new_address, - u32 new_length, - u32 redistribute, - u32 insert_routes, - u32 is_del); + if (is_enable) + ci = vnet_config_add_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); -static clib_error_t * -ip6_add_del_interface_address_internal (vlib_main_t * vm, - u32 sw_if_index, - ip6_address_t * address, - u32 address_length, - u32 redistribute, - u32 insert_routes, - u32 is_del) + cm->config_index_by_sw_if_index[sw_if_index] = ci; + } +} + +clib_error_t * +ip6_add_del_interface_address (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 is_del) { vnet_main_t * vnm = vnet_get_main(); ip6_main_t * im = &ip6_main; @@ -1174,17 +494,13 @@ ip6_add_del_interface_address_internal (vlib_main_t * vm, goto done; } - if (vnet_sw_interface_is_admin_up (vnm, sw_if_index) && insert_routes) - { - if (is_del) - ip6_del_interface_routes (im, ip6_af.fib_index, address, - address_length); - - else - ip6_add_interface_routes (vnm, sw_if_index, - im, ip6_af.fib_index, - pool_elt_at_index (lm->if_address_pool, if_address_index)); - } + if (is_del) + ip6_del_interface_routes (im, ip6_af.fib_index, address, + address_length); + else + ip6_add_interface_routes (vnm, sw_if_index, + im, ip6_af.fib_index, + pool_elt_at_index (lm->if_address_pool, if_address_index)); { ip6_add_del_interface_address_callback_t * cb; @@ -1201,18 +517,6 @@ ip6_add_del_interface_address_internal (vlib_main_t * vm, } clib_error_t * -ip6_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, - ip6_address_t * address, u32 address_length, - u32 is_del) -{ - return ip6_add_del_interface_address_internal - (vm, sw_if_index, address, address_length, - /* redistribute */ 1, - /* insert_routes */ 1, - is_del); -} - -clib_error_t * ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) @@ -1282,10 +586,16 @@ VNET_IP6_UNICAST_FEATURE_INIT (ip6_vpath, static) = { VNET_IP6_UNICAST_FEATURE_INIT (ip6_lookup, static) = { .node_name = "ip6-lookup", - .runs_before = 0, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0}, .feature_index = &ip6_main.ip6_unicast_rx_feature_lookup, }; +VNET_IP6_UNICAST_FEATURE_INIT (ip6_drop, static) = { + .node_name = "ip6-drop", + .runs_before = 0, /*last feature*/ + .feature_index = &ip6_main.ip6_unicast_rx_feature_drop, +}; + /* Built-in ip6 multicast rx feature path definition (none now) */ VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = { .node_name = "vpath-input-ip6", @@ -1295,10 +605,16 @@ VNET_IP6_MULTICAST_FEATURE_INIT (ip6_vpath_mc, static) = { VNET_IP6_MULTICAST_FEATURE_INIT (ip6_lookup, static) = { .node_name = "ip6-lookup", - .runs_before = 0, /* not before any other features */ + .runs_before = ORDER_CONSTRAINTS {"ip6-drop", 0}, .feature_index = &ip6_main.ip6_multicast_rx_feature_lookup, }; +VNET_IP6_MULTICAST_FEATURE_INIT (ip6_drop_mc, static) = { + .node_name = "ip6-drop", + .runs_before = 0, /* last feature */ + .feature_index = &ip6_main.ip6_multicast_rx_feature_drop, +}; + static char * rx_feature_start_nodes[] = {"ip6-input"}; @@ -1343,7 +659,7 @@ ip6_feature_init (vlib_main_t * vm, ip6_main_t * im) feature_start_nodes, feature_start_len, cast, - 0 /* is_ip4 */))) + VNET_L3_PACKET_TYPE_IP6))) return error; } return 0; @@ -1369,9 +685,9 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, ci = cm->config_index_by_sw_if_index[sw_if_index]; if (cast == VNET_IP_RX_UNICAST_FEAT) - feature_index = im->ip6_unicast_rx_feature_lookup; + feature_index = im->ip6_unicast_rx_feature_drop; else if (cast == VNET_IP_RX_MULTICAST_FEAT) - feature_index = im->ip6_multicast_rx_feature_lookup; + feature_index = im->ip6_multicast_rx_feature_drop; else feature_index = im->ip6_tx_feature_interface_output; @@ -1382,12 +698,14 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, /* config data */ 0, /* # bytes of config data */ 0); else - ci = vnet_config_del_feature (vm, vcm, - ci, - feature_index, - /* config data */ 0, - /* # bytes of config data */ 0); - + { + ci = vnet_config_del_feature (vm, vcm, ci, + feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index) + im->ip_enabled_by_sw_if_index[sw_if_index] = 0; + } cm->config_index_by_sw_if_index[sw_if_index] = ci; /* * note: do not update the tx feature count here. @@ -1403,7 +721,7 @@ ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 0); + return ip6_lookup_inline (vm, node, frame); } static u8 * format_ip6_lookup_trace (u8 * s, va_list * args); @@ -1419,27 +737,97 @@ VLIB_REGISTER_NODE (ip6_lookup_node) = { .next_nodes = IP6_LOOKUP_NEXT_NODES, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup) -static uword -ip6_indirect (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +always_inline uword +ip6_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { - return ip6_lookup_inline (vm, node, frame, /* is_indirect */ 1); -} + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace(vm, node, frame, VLIB_TX); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t * p0; + u32 pi0, lbi0, hc0; + const ip6_header_t *ip0; + const dpo_id_t *dpo0; + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + hc0 = lb0->lb_hash_config; + vnet_buffer(p0)->ip.flow_hash = ip6_compute_flow_hash(ip0, hc0); + + dpo0 = load_balance_get_bucket_i(lb0, + vnet_buffer(p0)->ip.flow_hash & + (lb0->lb_n_buckets - 1)); -VLIB_REGISTER_NODE (ip6_indirect_node) = { - .function = ip6_indirect, - .name = "ip6-indirect", + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip6_load_balance_node) = { + .function = ip6_load_balance, + .name = "ip6-load-balance", .vector_size = sizeof (u32), .sibling_of = "ip6-lookup", .format_trace = format_ip6_lookup_trace, .n_next_nodes = 0, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_indirect_node, ip6_indirect); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance) typedef struct { /* Adjacency taken. */ @@ -1469,13 +857,10 @@ static u8 * format_ip6_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *); - vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im = &ip6_main; uword indent = format_get_indent (s); - s = format (s, "fib %d adj-idx %d : %U flow hash: 0x%08x", - t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + s = format (s, "fib %d dpo-idx %d : flow hash: 0x%08x", + t->fib_index, t->adj_index, t->flow_hash); s = format(s, "\n%U%U", format_white_space, indent, format_ip6_header, t->packet_data); @@ -1489,16 +874,16 @@ static u8 * format_ip6_rewrite_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip6_forward_next_trace_t * t = va_arg (*args, ip6_forward_next_trace_t *); vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im = &ip6_main; uword indent = format_get_indent (s); s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x", t->fib_index, t->adj_index, format_ip_adjacency, - vnm, &im->lookup_main, t->adj_index, t->flow_hash); + vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, - vnm, &im->lookup_main, t->adj_index, + vnm, t->adj_index, t->packet_data, sizeof (t->packet_data)); return s; } @@ -1628,12 +1013,6 @@ ip6_punt (vlib_main_t * vm, vlib_frame_t * frame) { return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); } -static uword -ip6_miss (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_DST_LOOKUP_MISS); } - VLIB_REGISTER_NODE (ip6_drop_node,static) = { .function = ip6_drop, .name = "ip6-drop", @@ -1647,7 +1026,7 @@ VLIB_REGISTER_NODE (ip6_drop_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop) VLIB_REGISTER_NODE (ip6_punt_node,static) = { .function = ip6_punt, @@ -1662,22 +1041,7 @@ VLIB_REGISTER_NODE (ip6_punt_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt); - -VLIB_REGISTER_NODE (ip6_miss_node,static) = { - .function = ip6_miss, - .name = "ip6-miss", - .vector_size = sizeof (u32), - - .format_trace = format_ip6_forward_next_trace, - - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-drop", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip6_miss_node, ip6_miss); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt) VLIB_REGISTER_NODE (ip6_multicast_node,static) = { .function = ip6_drop, @@ -1931,17 +1295,21 @@ ip6_local (vlib_main_t * vm, /* Drop packets from unroutable hosts. */ /* If this is a neighbor solicitation (ICMP), skip source RPF check */ - if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP) + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && + type0 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip0->src_address)) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - error0 = (lm->miss_adj_index == src_adj_index0 + error0 = (ADJ_INDEX_INVALID == src_adj_index0 ? IP6_ERROR_SRC_LOOKUP_MISS : error0); } - if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && type1 != IP_BUILTIN_PROTOCOL_ICMP) + if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && + type1 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip1->src_address)) { u32 src_adj_index1 = ip6_src_lookup_for_packet (im, p1, ip1); - error1 = (lm->miss_adj_index == src_adj_index1 + error1 = (ADJ_INDEX_INVALID == src_adj_index1 ? IP6_ERROR_SRC_LOOKUP_MISS : error1); } @@ -2018,10 +1386,12 @@ ip6_local (vlib_main_t * vm, : error0); /* If this is a neighbor solicitation (ICMP), skip source RPF check */ - if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && type0 != IP_BUILTIN_PROTOCOL_ICMP) + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && + type0 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast(&ip0->src_address)) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - error0 = (lm->miss_adj_index == src_adj_index0 + error0 = (ADJ_INDEX_INVALID == src_adj_index0 ? IP6_ERROR_SRC_LOOKUP_MISS : error0); } @@ -2057,7 +1427,7 @@ VLIB_REGISTER_NODE (ip6_local_node,static) = { }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local); +VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local) void ip6_register_protocol (u32 protocol, u32 node_index) { @@ -2082,9 +1452,10 @@ typedef enum { } ip6_discover_neighbor_error_t; static uword -ip6_discover_neighbor (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +ip6_discover_neighbor_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int is_glean) { vnet_main_t * vnm = vnet_get_main(); ip6_main_t * im = &ip6_main; @@ -2144,11 +1515,11 @@ ip6_discover_neighbor (vlib_main_t * vm, adj0 = ip_get_adjacency (lm, adj_index0); - if (adj0->arp.next_hop.ip6.as_u64[0] || - adj0->arp.next_hop.ip6.as_u64[1]) { - ip0->dst_address.as_u64[0] = adj0->arp.next_hop.ip6.as_u64[0]; - ip0->dst_address.as_u64[1] = adj0->arp.next_hop.ip6.as_u64[1]; - } + if (!is_glean) + { + ip0->dst_address.as_u64[0] = adj0->sub_type.nbr.next_hop.ip6.as_u64[0]; + ip0->dst_address.as_u64[1] = adj0->sub_type.nbr.next_hop.ip6.as_u64[1]; + } a0 = hash_seeds[0]; b0 = hash_seeds[1]; @@ -2209,13 +1580,15 @@ ip6_discover_neighbor (vlib_main_t * vm, * Choose source address based on destination lookup * adjacency. */ - if (ip6_src_address_for_packet (im, p0, &h0->ip.src_address, - sw_if_index0)) { - //There is no address on the interface + if (ip6_src_address_for_packet (lm, + sw_if_index0, + &h0->ip.src_address)) + { + /* There is no address on the interface */ p0->error = node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]; vlib_buffer_free(vm, &bi0, 1); continue; - } + } /* * Destination address is a solicited node multicast address. @@ -2262,6 +1635,22 @@ ip6_discover_neighbor (vlib_main_t * vm, return frame->n_vectors; } +static uword +ip6_discover_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip6_discover_neighbor_inline(vm, node, frame, 0)); +} + +static uword +ip6_glean (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (ip6_discover_neighbor_inline(vm, node, frame, 1)); +} + static char * ip6_discover_neighbor_error_strings[] = { [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops", [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] @@ -2287,6 +1676,23 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = { }, }; +VLIB_REGISTER_NODE (ip6_glean_node) = { + .function = ip6_glean, + .name = "ip6-glean", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings), + .error_strings = ip6_discover_neighbor_error_strings, + + .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, + .next_nodes = { + [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop", + [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output", + }, +}; + clib_error_t * ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) { @@ -2474,31 +1880,17 @@ ip6_rewrite_inline (vlib_main_t * vm, adj0 = ip_get_adjacency (lm, adj_index0); adj1 = ip_get_adjacency (lm, adj_index1); - if (rewrite_for_locally_received_packets) - { - /* - * If someone sends e.g. an icmp6 w/ src = dst = interface addr, - * we end up here with a local adjacency in hand - */ - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - if (PREDICT_FALSE(adj1->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error1 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - } - rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; vnet_buffer(p1)->ip.save_rewrite_length = rw_len1; - vlib_increment_combined_counter (&lm->adjacency_counters, + vlib_increment_combined_counter (&adjacency_counters, cpu_index, adj_index0, /* packet increment */ 0, /* byte increment */ rw_len0); - vlib_increment_combined_counter (&lm->adjacency_counters, + vlib_increment_combined_counter (&adjacency_counters, cpu_index, adj_index1, /* packet increment */ 0, @@ -2621,13 +2013,6 @@ ip6_rewrite_inline (vlib_main_t * vm, } } - if (rewrite_for_locally_received_packets) - { - if (PREDICT_FALSE(adj0->lookup_next_index - == IP_LOOKUP_NEXT_LOCAL)) - error0 = IP6_ERROR_SPOOFED_LOCAL_PACKETS; - } - /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); @@ -2635,7 +2020,7 @@ ip6_rewrite_inline (vlib_main_t * vm, rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer(p0)->ip.save_rewrite_length = rw_len0; - vlib_increment_combined_counter (&lm->adjacency_counters, + vlib_increment_combined_counter (&adjacency_counters, cpu_index, adj_index0, /* packet increment */ 0, @@ -2712,6 +2097,29 @@ ip6_rewrite_local (vlib_main_t * vm, /* rewrite_for_locally_received_packets */ 1); } +static uword +ip6_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip6_rewrite_inline (vm, node, frame, + /* rewrite_for_locally_received_packets */ 0); +} + +VLIB_REGISTER_NODE (ip6_midchain_node) = { + .function = ip6_midchain, + .name = "ip6-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + + .next_nodes = { + [IP6_REWRITE_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain) + VLIB_REGISTER_NODE (ip6_rewrite_node) = { .function = ip6_rewrite_transit, .name = "ip6-rewrite", @@ -3207,12 +2615,17 @@ ip6_lookup_init (vlib_main_t * vm) if (im->lookup_table_size == 0) im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE; - BV(clib_bihash_init) (&im->ip6_lookup_table, "ip6 lookup table", + BV(clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash), + "ip6 FIB fwding table", im->lookup_table_nbuckets, im->lookup_table_size); - + BV(clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, + "ip6 FIB non-fwding table", + im->lookup_table_nbuckets, + im->lookup_table_size); + /* Create FIB with index 0 and table id of 0. */ - find_ip6_fib_by_table_index_or_id (im, /* table id */ 0, IP6_ROUTE_FLAG_TABLE_ID); + fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 0); { pg_node_t * pn; @@ -3282,17 +2695,14 @@ add_del_ip6_interface_table (vlib_main_t * vm, } { - ip6_main_t * im = &ip6_main; - ip6_fib_t * fib = - find_ip6_fib_by_table_index_or_id (im, table_id, IP6_ROUTE_FLAG_TABLE_ID); + u32 fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, + table_id); - if (fib) - { - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib->index; - } + vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; } + done: return error; } @@ -3368,7 +2778,7 @@ int vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) if (p == 0) return -1; - fib = vec_elt_at_index (im6->fibs, p[0]); + fib = ip6_fib_get (p[0]); fib->flow_hash_config = flow_hash_config; return 1; diff --git a/vnet/vnet/ip/ip6_hop_by_hop.c b/vnet/vnet/ip/ip6_hop_by_hop.c index 2a037033d13..d927d279bff 100644 --- a/vnet/vnet/ip/ip6_hop_by_hop.c +++ b/vnet/vnet/ip/ip6_hop_by_hop.c @@ -24,6 +24,7 @@ #include <vppinfra/elog.h> #include <vnet/ip/ip6_hop_by_hop.h> +#include <vnet/fib/ip6_fib.h> char *ppc_state[] = { "None", "Encap", "Decap" }; @@ -935,48 +936,22 @@ ip6_ioam_set_destination (ip6_address_t * addr, u32 mask_width, u32 vrf_id, ip_lookup_main_t *lm = &im->lookup_main; ip_adjacency_t *adj; u32 fib_index; - u32 len, adj_index; - int i, rv; - uword *p; - BVT (clib_bihash_kv) kv, value; + u32 adj_index; if ((is_add + is_pop + is_none) != 1) return VNET_API_ERROR_INVALID_VALUE_2; /* Go find the adjacency we're supposed to tickle */ - p = hash_get (im->fib_index_by_table_id, vrf_id); + fib_index = ip6_fib_index_from_table_id (vrf_id); - if (p == 0) + if (~0 == fib_index) return VNET_API_ERROR_NO_SUCH_FIB; - fib_index = p[0]; + adj_index = ip6_fib_table_fwding_lookup (im, fib_index, addr); - len = vec_len (im->prefix_lengths_in_search_order); - - for (i = 0; i < len; i++) - { - int dst_address_length = im->prefix_lengths_in_search_order[i]; - ip6_address_t *mask = &im->fib_masks[dst_address_length]; - - if (dst_address_length != mask_width) - continue; - - kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; - kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; - kv.key[2] = ((u64) ((fib_index)) << 32) | dst_address_length; - - rv = - BV (clib_bihash_search_inline_2) (&im->ip6_lookup_table, &kv, &value); - if (rv == 0) - goto found; - - } - return VNET_API_ERROR_NO_SUCH_ENTRY; - -found: + ASSERT (!"Not an ADJ"); /* Got it, modify as directed... */ - adj_index = value.value; adj = ip_get_adjacency (lm, adj_index); /* Restore original lookup-next action */ @@ -1015,7 +990,7 @@ ip6_set_ioam_destination_command_fn (vlib_main_t * vm, int is_pop = 0; int is_none = 0; u32 vrf_id = 0; - int rv; + // int rv; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -1038,19 +1013,23 @@ ip6_set_ioam_destination_command_fn (vlib_main_t * vm, if (mask_width == ~0) return clib_error_return (0, "<address>/<mask-width> required"); - rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id, - is_add, is_pop, is_none); + /* rv = ip6_ioam_set_destination (&addr, mask_width, vrf_id, */ + /* is_add, is_pop, is_none); */ - switch (rv) - { - case 0: - break; - default: - return clib_error_return (0, "ip6_ioam_set_destination returned %d", - rv); - } + /* switch (rv) */ + /* { */ + /* case 0: */ + /* break; */ + /* default: */ + /* return clib_error_return (0, "ip6_ioam_set_destination returned %d", */ + /* rv); */ + /* } */ - return 0; + /* return 0; */ + + return clib_error_return (0, + "ip6_ioam_set_destination Currnetly Disabled due to FIB2.0", + 1); } /* *INDENT-OFF* */ diff --git a/vnet/vnet/ip/ip6_neighbor.c b/vnet/vnet/ip/ip6_neighbor.c index a35f58a3039..11df776e1fc 100644 --- a/vnet/vnet/ip/ip6_neighbor.c +++ b/vnet/vnet/ip/ip6_neighbor.c @@ -19,6 +19,9 @@ #include <vnet/ethernet/ethernet.h> #include <vppinfra/mhash.h> #include <vppinfra/md5.h> +#include <vnet/adj/adj.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip6_fib.h> #if DPDK==1 #include <vnet/devices/dpdk/dpdk.h> @@ -38,9 +41,9 @@ typedef struct { u8 link_layer_address[8]; u16 flags; #define IP6_NEIGHBOR_FLAG_STATIC (1 << 0) -#define IP6_NEIGHBOR_FLAG_GLEAN (2 << 0) +#define IP6_NEIGHBOR_FLAG_DYNAMIC (2 << 0) u64 cpu_time_last_updated; - u32 *adjacencies; + adj_index_t adj_index; } ip6_neighbor_t; /* advertised prefix option */ @@ -121,9 +124,9 @@ typedef struct { u32 seed; u64 randomizer; int ref_count; - u32 all_nodes_adj_index; - u32 all_routers_adj_index; - u32 all_mldv2_routers_adj_index; + adj_index_t all_nodes_adj_index; + adj_index_t all_routers_adj_index; + adj_index_t all_mldv2_routers_adj_index; /* timing information */ #define DEF_MAX_RADV_INTERVAL 200 @@ -217,8 +220,8 @@ static u8 * format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) if (! n) return format (s, "%=12s%=20s%=6s%=20s%=40s", "Time", "Address", "Flags", "Link layer", "Interface"); - if (n->flags & IP6_NEIGHBOR_FLAG_GLEAN) - flags = format(flags, "G"); + if (n->flags & IP6_NEIGHBOR_FLAG_DYNAMIC) + flags = format(flags, "D"); if (n->flags & IP6_NEIGHBOR_FLAG_STATIC) flags = format(flags, "S"); @@ -330,6 +333,52 @@ static void set_unset_ip6_neighbor_rpc } #endif +static void +ip6_nd_mk_complete (ip6_neighbor_t * nbr) +{ + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = nbr->key.ip6_address, + }, + }; + ip6_main_t *im; + u32 fib_index; + + im = &ip6_main; + fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index]; + + /* only once please */ + if (ADJ_INDEX_INVALID == nbr->adj_index) + { + nbr->adj_index = + adj_nbr_add_or_lock_w_rewrite(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + &pfx.fp_addr, + nbr->key.sw_if_index, + nbr->link_layer_address); + ASSERT(ADJ_INDEX_INVALID != nbr->adj_index); + + fib_table_entry_update_one_path(fib_index, + &pfx, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &pfx.fp_addr, + nbr->key.sw_if_index, + ~0, + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); + } + else + { + adj_nbr_update_rewrite(nbr->adj_index, + nbr->link_layer_address); + } +} + int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -338,17 +387,12 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, uword n_bytes_link_layer_address, int is_static) { - vnet_main_t * vnm = vnet_get_main(); ip6_neighbor_main_t * nm = &ip6_neighbor_main; ip6_neighbor_key_t k; ip6_neighbor_t * n = 0; - ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; int make_new_nd_cache_entry=1; uword * p; u32 next_index; - u32 adj_index; - ip_adjacency_t *existing_adj; pending_resolution_t * pr, * mc; #if DPDK > 0 @@ -376,77 +420,26 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, make_new_nd_cache_entry = 0; } - /* Note: always install the route. It might have been deleted */ - ip6_add_del_route_args_t args; - ip_adjacency_t adj; - - memset (&adj, 0, sizeof(adj)); - adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj.explicit_fib_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - ip6_rewrite_node.index, - link_layer_address, - &adj.rewrite_header, - sizeof (adj.rewrite_data)); - - /* result of this lookup should be next-hop adjacency */ - adj_index = ip6_fib_lookup_with_table (im, im->fib_index_by_sw_if_index[sw_if_index], a); - existing_adj = ip_get_adjacency(lm, adj_index); - - if (existing_adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - existing_adj->arp.next_hop.ip6.as_u64[0] == a->as_u64[0] && - existing_adj->arp.next_hop.ip6.as_u64[1] == a->as_u64[1]) - { - u32 * ai; - u32 * adjs = 0; - - if (n) - adjs = vec_dup(n->adjacencies); - else - clib_warning ("ip6 neighbor n not set"); - - /* Update all adj assigned to this arp entry */ - vec_foreach(ai, adjs) - { - int i; - ip_adjacency_t * uadj = ip_get_adjacency(lm, *ai); - for (i = 0; i < uadj->n_adj; i++) - if (uadj[i].lookup_next_index == IP_LOOKUP_NEXT_ARP && - uadj[i].arp.next_hop.ip6.as_u64[0] == a->as_u64[0] && - uadj[i].arp.next_hop.ip6.as_u64[1] == a->as_u64[1]) - ip_update_adjacency (lm, *ai + i, &adj); - } - vec_free(adjs); - } - else - { - /* create new adj */ - args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index]; - args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_ADD | IP6_ROUTE_FLAG_NEIGHBOR; - args.dst_address = a[0]; - args.dst_address_length = 128; - args.adj_index = ~0; - args.add_adj = &adj; - args.n_add_adj = 1; - ip6_add_del_route (im, &args); - } - if (make_new_nd_cache_entry) { pool_get (nm->neighbor_pool, n); mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, /* old value */ 0); n->key = k; + n->adj_index = ADJ_INDEX_INVALID; } /* Update time stamp and ethernet address. */ - clib_memcpy (n->link_layer_address, link_layer_address, n_bytes_link_layer_address); + clib_memcpy (n->link_layer_address, + link_layer_address, + n_bytes_link_layer_address); + n->cpu_time_last_updated = clib_cpu_time_now (); if (is_static) n->flags |= IP6_NEIGHBOR_FLAG_STATIC; + else + n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC; + + ip6_nd_mk_complete(n); /* Customer(s) waiting for this address to be resolved? */ p = mhash_get (&nm->pending_resolutions_by_address, a); @@ -499,6 +492,40 @@ vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, return 0; } +static void +ip6_nd_mk_incomplete (ip6_neighbor_t *nbr) +{ + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = nbr->key.ip6_address, + }, + }; + u32 fib_index; + ip6_main_t *im; + + im = &ip6_main; + fib_index = im->fib_index_by_sw_if_index[nbr->key.sw_if_index]; + + /* + * revert the adj this ND entry sourced to incomplete + */ + adj_nbr_update_rewrite(nbr->adj_index, + NULL); + + /* + * remove the FIB entry the ND entry sourced + */ + fib_table_entry_delete(fib_index, &pfx, FIB_SOURCE_ADJ); + + /* + * Unlock the adj now that the ARP entry is no longer a source + */ + adj_unlock(nbr->adj_index); + nbr->adj_index = ADJ_INDEX_INVALID; +} + int vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, u32 sw_if_index, @@ -509,8 +536,6 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, ip6_neighbor_main_t * nm = &ip6_neighbor_main; ip6_neighbor_key_t k; ip6_neighbor_t * n; - ip6_main_t * im = &ip6_main; - ip6_add_del_route_args_t args; uword * p; int rv = 0; @@ -537,73 +562,16 @@ vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, } n = pool_elt_at_index (nm->neighbor_pool, p[0]); + + ip6_nd_mk_incomplete(n); mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); pool_put (nm->neighbor_pool, n); - args.table_index_or_table_id = im->fib_index_by_sw_if_index[sw_if_index]; - args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_DEL - | IP6_ROUTE_FLAG_NEIGHBOR; - args.dst_address = a[0]; - args.dst_address_length = 128; - args.adj_index = ~0; - args.add_adj = NULL; - args.n_add_adj = 0; - ip6_add_del_route (im, &args); out: vlib_worker_thread_barrier_release(vm); return rv; } - -u32 -vnet_ip6_neighbor_glean_add(u32 fib_index, void * next_hop_arg) -{ - ip6_neighbor_main_t * nm = &ip6_neighbor_main; - ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; - ip6_address_t * next_hop = next_hop_arg; - ip_adjacency_t add_adj, *adj; - ip6_add_del_route_args_t args; - ip6_neighbor_t * n; - ip6_neighbor_key_t k; - u32 adj_index; - - adj_index = ip6_fib_lookup_with_table(im, fib_index, next_hop); - adj = ip_get_adjacency(lm, adj_index); - - if (!adj || adj->lookup_next_index != IP_LOOKUP_NEXT_ARP) - return ~0; - - if (adj->arp.next_hop.ip6.as_u64[0] || - adj->arp.next_hop.ip6.as_u64[1]) - return adj_index; - - k.sw_if_index = adj->rewrite_header.sw_if_index; - k.ip6_address = *next_hop; - k.pad = 0; - if (mhash_get (&nm->neighbor_index_by_key, &k)) - return adj_index; - - pool_get (nm->neighbor_pool, n); - mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, /* old value */ 0); - n->key = k; - n->cpu_time_last_updated = clib_cpu_time_now (); - n->flags = IP6_NEIGHBOR_FLAG_GLEAN; - - memset(&args, 0, sizeof(args)); - memcpy(&add_adj, adj, sizeof(add_adj)); - add_adj.arp.next_hop.ip6 = *next_hop; /* install neighbor /128 route */ - args.table_index_or_table_id = fib_index; - args.flags = IP6_ROUTE_FLAG_FIB_INDEX | IP6_ROUTE_FLAG_ADD | IP6_ROUTE_FLAG_NEIGHBOR; - args.dst_address = *next_hop; - args.dst_address_length = 128; - args.adj_index = ~0; - args.add_adj = &add_adj; - args.n_add_adj = 1; - ip6_add_del_route (im, &args); - return ip6_fib_lookup_with_table (im, fib_index, next_hop); -} - #if DPDK > 0 static void ip6_neighbor_set_unset_rpc_callback ( ip6_neighbor_set_unset_rpc_args_t * a) @@ -728,7 +696,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, { vnet_main_t * vnm = vnet_get_main(); ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; uword n_packets = frame->n_vectors; u32 * from, * to_next; u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent; @@ -787,17 +754,25 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, if (!ip6_sadd_unspecified && !ip6_sadd_link_local) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0); - /* Allow all realistic-looking rewrite adjacencies to pass */ - ni0 = adj0->lookup_next_index; - is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) && - (ni0 < IP6_LOOKUP_N_NEXT); + if (ADJ_INDEX_INVALID != src_adj_index0) + { + ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0); - error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 - || ! is_rewrite0) - ? ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK - : error0); + /* Allow all realistic-looking rewrite adjacencies to pass */ + ni0 = adj0->lookup_next_index; + is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) && + (ni0 < IP6_LOOKUP_N_NEXT); + + error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 + || ! is_rewrite0) + ? ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK + : error0); + } + else + { + error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK; + } } o0 = (void *) (h0 + 1); @@ -820,21 +795,28 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, if (is_solicitation && error0 == ICMP6_ERROR_NONE) { - /* Check that target address is one that we know about. */ - ip_interface_address_t * ia0; - ip6_address_fib_t ip6_af0; - void * oldheap; - - ip6_addr_fib_init (&ip6_af0, &h0->target_address, - vec_elt (im->fib_index_by_sw_if_index, - sw_if_index0)); - - /* Gross kludge, "thank you" MJ, don't even ask */ - oldheap = clib_mem_set_heap (clib_per_cpu_mheaps[0]); - ia0 = ip_get_interface_address (lm, &ip6_af0); - clib_mem_set_heap (oldheap); - error0 = ia0 == 0 ? - ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN : error0; + /* Check that target address is local to this router. */ + fib_node_index_t fei; + u32 fib_index; + + fib_index = ip6_fib_table_get_index_for_sw_if_index(sw_if_index0); + + if (~0 == fib_index) + { + error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN; + } + else + { + fei = ip6_fib_table_lookup_exact_match(fib_index, + &h0->target_address, + 128); + + if (FIB_NODE_INDEX_INVALID == fei || + !(FIB_ENTRY_FLAG_LOCAL & fib_entry_get_flags(fei))) + { + error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN; + } + } } if (is_solicitation) @@ -1052,13 +1034,20 @@ icmp6_router_solicitation(vlib_main_t * vm, if (!is_unspecified && !is_link_local) { u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); - ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, src_adj_index0); - error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 - || (adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj0->lookup_next_index != IP_LOOKUP_NEXT_REWRITE)) - ? ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK - : error0); + if (ADJ_INDEX_INVALID != src_adj_index0) + { + ip_adjacency_t * adj0 = ip_get_adjacency (&im->lookup_main, + src_adj_index0); + + error0 = (adj0->rewrite_header.sw_if_index != sw_if_index0 + ? ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK + : error0); + } + else + { + error0 = ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK; + } } /* check for source LL option and process */ @@ -1472,8 +1461,7 @@ icmp6_router_advertisement(vlib_main_t * vm, /* check for MTU or prefix options or .. */ u8 * opt_hdr = (u8 *)(h0 + 1); - while( options_len0 > 0 && - opt_hdr < p0->data + p0->current_data) + while( options_len0 > 0) { icmp6_neighbor_discovery_option_header_t *o0 = ( icmp6_neighbor_discovery_option_header_t *)opt_hdr; int opt_len = o0->n_data_u64s << 3; @@ -1606,11 +1594,9 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) { - ip6_main_t * im = &ip6_main; ip6_neighbor_main_t * nm = &ip6_neighbor_main; - ip_lookup_main_t * lm = &im->lookup_main; ip6_radv_t * a= 0; - u32 ri = ~0;; + u32 ri = ~0; vnet_sw_interface_t * sw_if0; ethernet_interface_t * eth_if0 = 0; @@ -1636,9 +1622,9 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, ip6_mldp_group_t *m; /* remove adjacencies */ - ip_del_adjacency (lm, a->all_nodes_adj_index); - ip_del_adjacency (lm, a->all_routers_adj_index); - ip_del_adjacency (lm, a->all_mldv2_routers_adj_index); + adj_unlock(a->all_nodes_adj_index); + adj_unlock(a->all_routers_adj_index); + adj_unlock(a->all_mldv2_routers_adj_index); /* clean up prefix_pool */ pool_foreach (p, a->adv_prefixes_pool, ({ @@ -1672,6 +1658,7 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, pool_put (nm->if_radv_pool, a); nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0; ri = ~0; + ip6_sw_interface_enable_disable(sw_if_index, 0); } } else @@ -1680,6 +1667,7 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, { vnet_hw_interface_t * hw_if0; + ip6_sw_interface_enable_disable(sw_if_index, 1); hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index); pool_get (nm->if_radv_pool, a); @@ -1702,10 +1690,11 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS; a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS; a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME; - a->seed = (u32) (clib_cpu_time_now() & 0xFFFFFFFF); + a->seed = random_default_seed(); /* for generating random interface ids */ - a->randomizer = random_u64 (&a->seed); + a->randomizer = 0x1119194911191949; + a->randomizer = random_u64 ((u32 *)&a->randomizer); a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS ; a->initial_adverts_sent = a->initial_adverts_count-1; @@ -1727,66 +1716,34 @@ ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, mhash_init (&a->address_to_mldp_index, sizeof (uword), sizeof (ip6_address_t)); { - ip_adjacency_t *adj; u8 link_layer_address[6] = {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_hosts}; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a->all_nodes_adj_index); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj->if_address_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - ip6_rewrite_node.index, - link_layer_address, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); + a->all_nodes_adj_index = adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + sw_if_index, + link_layer_address); } { - ip_adjacency_t *adj; u8 link_layer_address[6] = {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_all_routers}; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a->all_routers_adj_index); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj->if_address_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - ip6_rewrite_node.index, - link_layer_address, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); + a->all_routers_adj_index = adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + sw_if_index, + link_layer_address); } { - ip_adjacency_t *adj; u8 link_layer_address[6] = {0x33, 0x33, 0x00, 0x00, 0x00, IP6_MULTICAST_GROUP_ID_mldv2_routers}; - adj = ip_add_adjacency (lm, /* template */ 0, /* block size */ 1, - &a->all_mldv2_routers_adj_index); - - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj->if_address_index = ~0; - - vnet_rewrite_for_sw_interface - (vnm, - VNET_L3_PACKET_TYPE_IP6, - sw_if_index, - ip6_rewrite_node.index, - link_layer_address, - &adj->rewrite_header, - sizeof (adj->rewrite_data)); + a->all_mldv2_routers_adj_index = + adj_rewrite_add_and_lock(FIB_PROTOCOL_IP6, + FIB_LINK_IP6, + sw_if_index, + link_layer_address); } /* add multicast groups we will always be reporting */ @@ -2969,7 +2926,8 @@ enable_ip6_interface(vlib_main_t * vm, /* essentially "enables" ipv6 on this interface */ error = ip6_add_del_interface_address (vm, sw_if_index, - &link_local_address, 64 /* address width */, + &link_local_address, + 128 /* address width */, 0 /* is_del */); if(error) @@ -3255,87 +3213,10 @@ clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit) return 0; } - -static void -ip6_neighbor_entry_del_adj(ip6_neighbor_t *n, u32 adj_index) -{ - int done = 0; - int i; - while (!done) - { - vec_foreach_index(i, n->adjacencies) - if (vec_elt(n->adjacencies, i) == adj_index) - { - vec_del1(n->adjacencies, i); - continue; - } - done = 1; - } -} - -static void -ip6_neighbor_entry_add_adj(ip6_neighbor_t *n, u32 adj_index) -{ - int i; - vec_foreach_index(i, n->adjacencies) - if (vec_elt(n->adjacencies, i) == adj_index) - return; - vec_add1(n->adjacencies, adj_index); -} - -static void -ip6_neighbor_add_del_adj_cb (struct ip_lookup_main_t * lm, - u32 adj_index, - ip_adjacency_t * adj, - u32 is_del) -{ - ip6_neighbor_main_t * nm = &ip6_neighbor_main; - ip6_neighbor_key_t k; - ip6_neighbor_t *n = 0; - uword * p; - u32 ai; - - for(ai = adj->heap_handle; ai < adj->heap_handle + adj->n_adj ; ai++) - { - adj = ip_get_adjacency (lm, ai); - if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP && - (adj->arp.next_hop.ip6.as_u64[0] || adj->arp.next_hop.ip6.as_u64[1])) - { - k.sw_if_index = adj->rewrite_header.sw_if_index; - k.ip6_address.as_u64[0] = adj->arp.next_hop.ip6.as_u64[0]; - k.ip6_address.as_u64[1] = adj->arp.next_hop.ip6.as_u64[1]; - k.pad = 0; - p = mhash_get (&nm->neighbor_index_by_key, &k); - if (p) - n = pool_elt_at_index (nm->neighbor_pool, p[0]); - } - else - continue; - - if (is_del) - { - if (!n) - clib_warning("Adjacency contains unknown ND next hop %U (del)", - format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP6); - else - ip6_neighbor_entry_del_adj(n, adj->heap_handle); - } - else /* add */ - { - if (!n) - clib_warning("Adjacency contains unknown ND next hop %U (add)", - format_ip46_address, &adj->arp.next_hop, IP46_TYPE_IP6); - else - ip6_neighbor_entry_add_adj(n, adj->heap_handle); - } - } -} - static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) { ip6_neighbor_main_t * nm = &ip6_neighbor_main; ip6_main_t * im = &ip6_main; - ip_lookup_main_t * lm = &im->lookup_main; mhash_init (&nm->neighbor_index_by_key, /* value size */ sizeof (uword), @@ -3375,8 +3256,6 @@ static clib_error_t * ip6_neighbor_init (vlib_main_t * vm) (im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */); #endif - ip_register_add_del_adjacency_callback(lm, ip6_neighbor_add_del_adj_cb); - return 0; } @@ -3593,5 +3472,3 @@ int vnet_ip6_nd_term (vlib_main_t * vm, return 0; } - - diff --git a/vnet/vnet/ip/ip6_packet.h b/vnet/vnet/ip/ip6_packet.h index c83e5764803..29fa4a4e128 100644 --- a/vnet/vnet/ip/ip6_packet.h +++ b/vnet/vnet/ip/ip6_packet.h @@ -70,6 +70,8 @@ typedef CLIB_PACKED (union { #define ip46_address_mask_ip4(ip46) ((ip46)->pad[0] = (ip46)->pad[1] = (ip46)->pad[2] = 0) #define ip46_address_set_ip4(ip46, ip) (ip46_address_mask_ip4(ip46), (ip46)->ip4 = (ip)[0]) #define ip46_address_reset(ip46) ((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0) +#define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1))) +#define ip46_address_is_zero(ip46) (((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0)) always_inline void ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address, @@ -303,6 +305,22 @@ ip6_next_header (ip6_header_t * i) { return (void *) (i + 1); } always_inline void +ip6_copy_header (ip6_header_t * dst, + const ip6_header_t *src) +{ + dst->ip_version_traffic_class_and_flow_label = + src->ip_version_traffic_class_and_flow_label; + dst->payload_length = src->payload_length; + dst->protocol = src->protocol; + dst->hop_limit = src->hop_limit; + + dst->src_address.as_uword[0] = src->src_address.as_uword[0]; + dst->src_address.as_uword[1] = src->src_address.as_uword[1]; + dst->dst_address.as_uword[0] = src->dst_address.as_uword[0]; + dst->dst_address.as_uword[1] = src->dst_address.as_uword[1]; +} + +always_inline void ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0) { { diff --git a/vnet/vnet/ip/ip_feature_registration.c b/vnet/vnet/ip/ip_feature_registration.c index 9505a09e20d..b96f81bd58d 100644 --- a/vnet/vnet/ip/ip_feature_registration.c +++ b/vnet/vnet/ip/ip_feature_registration.c @@ -15,6 +15,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <vnet/mpls/mpls.h> /** \file @@ -131,7 +132,7 @@ ip_feature_init_cast (vlib_main_t * vm, vnet_config_main_t * vcm, char **feature_start_nodes, int num_feature_start_nodes, - vnet_cast_t cast, int is_ip4) + vnet_cast_t cast, vnet_l3_packet_type_t proto) { uword *index_by_name; uword *reg_by_index; @@ -155,33 +156,43 @@ ip_feature_init_cast (vlib_main_t * vm, u8 **keys_to_delete = 0; ip4_main_t *im4 = &ip4_main; ip6_main_t *im6 = &ip6_main; + mpls_main_t *mm = &mpls_main; index_by_name = hash_create_string (0, sizeof (uword)); reg_by_index = hash_create (0, sizeof (uword)); if (cast == VNET_IP_RX_UNICAST_FEAT) { - if (is_ip4) + if (proto == VNET_L3_PACKET_TYPE_IP4) first_reg = im4->next_uc_feature; - else + else if (proto == VNET_L3_PACKET_TYPE_IP6) first_reg = im6->next_uc_feature; + else if (proto == VNET_L3_PACKET_TYPE_MPLS_UNICAST) + first_reg = mm->next_feature; + else + return clib_error_return (0, + "protocol %d cast %d unsupport for features", + proto, cast); } else if (cast == VNET_IP_RX_MULTICAST_FEAT) { - if (is_ip4) + if (proto == VNET_L3_PACKET_TYPE_IP4) first_reg = im4->next_mc_feature; - else + else if (proto == VNET_L3_PACKET_TYPE_IP6) first_reg = im6->next_mc_feature; + else + return clib_error_return (0, + "protocol %d cast %d unsupport for features", + proto, cast); } else if (cast == VNET_IP_TX_FEAT) { - if (is_ip4) + if (proto == VNET_L3_PACKET_TYPE_IP4) first_reg = im4->next_tx_feature; else first_reg = im6->next_tx_feature; } - this_reg = first_reg; /* pass 1, collect feature node names, construct a before b pairs */ @@ -281,8 +292,7 @@ again: /* see if we got a partial order... */ if (vec_len (result) != n_features) return clib_error_return - (0, "ip%s_feature_init_cast (cast=%d), no partial order!", - is_ip4 ? "4" : "6", cast); + (0, "%d feature_init_cast (cast=%d), no partial order!", proto, cast); /* * We win. @@ -308,10 +318,12 @@ again: feature_nodes, vec_len (feature_nodes)); /* Save a copy for show command */ - if (is_ip4) + if (proto == VNET_L3_PACKET_TYPE_IP4) im4->feature_nodes[cast] = feature_nodes; - else + else if (proto == VNET_L3_PACKET_TYPE_IP6) im6->feature_nodes[cast] = feature_nodes; + else if (proto == VNET_L3_PACKET_TYPE_MPLS_UNICAST) + mm->feature_nodes = feature_nodes; /* Finally, clean up all the shit we allocated */ /* *INDENT-OFF* */ diff --git a/vnet/vnet/ip/ip_feature_registration.h b/vnet/vnet/ip/ip_feature_registration.h index 2d9a15bcf2c..95ee78ad8fe 100644 --- a/vnet/vnet/ip/ip_feature_registration.h +++ b/vnet/vnet/ip/ip_feature_registration.h @@ -39,7 +39,8 @@ clib_error_t *ip_feature_init_cast (vlib_main_t * vm, vnet_config_main_t * vcm, char **feature_start_nodes, int num_feature_start_nodes, - vnet_cast_t cast, int is_ip4); + vnet_cast_t cast, + vnet_l3_packet_type_t proto); #endif /* included_ip_feature_registration_h */ diff --git a/vnet/vnet/ip/ip_source_and_port_range_check.h b/vnet/vnet/ip/ip_source_and_port_range_check.h index 5b49aabd849..fefe5ff1fd9 100644 --- a/vnet/vnet/ip/ip_source_and_port_range_check.h +++ b/vnet/vnet/ip/ip_source_and_port_range_check.h @@ -19,9 +19,6 @@ typedef struct { - u32 ranges_per_adjacency; - u32 special_adjacency_format_function_index; - /* convenience */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; @@ -60,6 +57,69 @@ typedef struct u16x8vec_t hi; } protocol_port_range_t; +/** + * @brief The number of supported ranges per-data path object. + * If more ranges are required, bump this number. + */ +#define N_PORT_RANGES_PER_DPO 64 +#define N_RANGES_PER_BLOCK (sizeof(u16x8vec_t)/2) +#define N_BLOCKS_PER_DPO (N_PORT_RANGES_PER_DPO/N_RANGES_PER_BLOCK) + +/** + * @brief + * The object that is in the data-path to perform the check. + * + * Some trade-offs here; memory vs performance. + * + * performance: + * the principle factor is d-cache line misses/hits. + * so we want the data layout to minimise the d-cache misses. This + * means not following dependent reads. i.e. not doing + * + * struct B { + * u16 n_ranges; + * range_t *ragnes; // vector of ranges. + * } + * + * so to read ranges[0] we would first d-cache miss on the address + * of the object of type B, for which we would need to wait before we + * can get the address of B->ranges. + * So this layout is better: + * + * struct B { + * u16 n_ranges; + * range_t ragnes[N]; + * } + * + * memory: + * the latter layout above is more memory hungry. And N needs to be: + * 1 - sized for the maximum required + * 2 - fixed, so that objects of type B can be pool allocated and so + * 'get'-able using an index. + * An option over fixed might be to allocate contiguous chunk from + * the pool (like we used to do for multi-path adjs). + */ +typedef struct protocol_port_range_dpo_t_ +{ + /** + * The number of blocks from the 'block' array below + * that have rnages configured. We keep this count so that in the data-path + * we can limit the loop to be only over the blocks we need + */ + u16 n_used_blocks; + + /** + * The total number of free ranges from all blocks. + * Used to prevent overrun of the ranges available. + */ + u16 n_free_ranges; + + /** + * the fixed size array of ranges + */ + protocol_port_range_t blocks[N_BLOCKS_PER_DPO]; +} protocol_port_range_dpo_t; + int ip4_source_and_port_range_check_add_del (ip4_address_t * address, u32 length, u32 vrf_id, diff --git a/vnet/vnet/ip/lookup.c b/vnet/vnet/ip/lookup.c index 47138071639..a695ef765a0 100644 --- a/vnet/vnet/ip/lookup.c +++ b/vnet/vnet/ip/lookup.c @@ -37,728 +37,16 @@ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include <vppinfra/math.h> /* for fabs */ #include <vnet/ip/ip.h> -#include <vnet/ip/adj_alloc.h> - -static void -ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index); - -always_inline void -ip_poison_adjacencies (ip_adjacency_t * adj, uword n_adj) -{ - if (CLIB_DEBUG > 0) - { - u32 save_handle = adj->heap_handle;; - u32 save_n_adj = adj->n_adj; - - memset (adj, 0xfe, n_adj * sizeof (adj[0])); - - adj->heap_handle = save_handle; - adj->n_adj = save_n_adj; - } -} - -static void -ip_share_adjacency(ip_lookup_main_t * lm, u32 adj_index) -{ - ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index); - uword * p; - u32 old_ai; - uword signature = vnet_ip_adjacency_signature (adj); - - p = hash_get (lm->adj_index_by_signature, signature); - /* Hash collision? */ - if (p) - { - /* Save the adj index, p[0] will be toast after the unset! */ - old_ai = p[0]; - hash_unset (lm->adj_index_by_signature, signature); - hash_set (lm->adj_index_by_signature, signature, adj_index); - adj->next_adj_with_signature = old_ai; - } - else - { - adj->next_adj_with_signature = 0; - hash_set (lm->adj_index_by_signature, signature, adj_index); - } -} - -static void -ip_unshare_adjacency(ip_lookup_main_t * lm, u32 adj_index) -{ - ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index); - uword signature; - uword * p; - u32 this_ai; - ip_adjacency_t * this_adj, * prev_adj = 0; - - signature = vnet_ip_adjacency_signature (adj); - p = hash_get (lm->adj_index_by_signature, signature); - if (p == 0) - return; - - this_ai = p[0]; - /* At the top of the signature chain (likely)? */ - if (this_ai == adj_index) - { - if (adj->next_adj_with_signature == 0) - { - hash_unset (lm->adj_index_by_signature, signature); - return; - } - else - { - this_adj = ip_get_adjacency (lm, adj->next_adj_with_signature); - hash_unset (lm->adj_index_by_signature, signature); - hash_set (lm->adj_index_by_signature, signature, - this_adj->heap_handle); - } - } - else /* walk signature chain */ - { - this_adj = ip_get_adjacency (lm, this_ai); - while (this_adj != adj) - { - prev_adj = this_adj; - this_adj = ip_get_adjacency - (lm, this_adj->next_adj_with_signature); - /* - * This can happen when creating the first multipath adj of a set - * We end up looking at the miss adjacency (handle==0). - */ - if (this_adj->heap_handle == 0) - return; - } - prev_adj->next_adj_with_signature = this_adj->next_adj_with_signature; - } -} - -int ip_register_adjacency(vlib_main_t *vm, - u8 is_ip4, - ip_adj_register_t *reg) -{ - ip_lookup_main_t *lm = (is_ip4)?&ip4_main.lookup_main:&ip6_main.lookup_main; - vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) ((is_ip4)?"ip4-lookup":"ip6-lookup")); - vlib_node_t *next_node = vlib_get_node_by_name(vm, (u8 *) reg->node_name); - *reg->next_index = vlib_node_add_next (vm, node->index, next_node->index); - vec_validate(lm->registered_adjacencies, *reg->next_index); - lm->registered_adjacencies[*reg->next_index] = *reg; - return 0; -} - -int ip_init_registered_adjacencies(u8 is_ip4) -{ - vlib_main_t *vm = vlib_get_main(); - ip_lookup_main_t *lm = (is_ip4)?&ip4_main.lookup_main:&ip6_main.lookup_main; - ip_adj_register_t *reg = lm->registered_adjacencies; - lm->registered_adjacencies = 0; //Init vector - int rv; - while (reg) { - if((rv = ip_register_adjacency(vm, is_ip4, reg))) - return rv; - reg = reg->next; - } - return 0; -} - -/* Create new block of given number of contiguous adjacencies. */ -ip_adjacency_t * -ip_add_adjacency (ip_lookup_main_t * lm, - ip_adjacency_t * copy_adj, - u32 n_adj, - u32 * adj_index_return) -{ - ip_adjacency_t * adj; - u32 ai, i, handle; - - /* See if we know enough to attempt to share an existing adjacency */ - if (copy_adj && n_adj == 1) - { - uword signature; - uword * p; - - switch (copy_adj->lookup_next_index) - { - case IP_LOOKUP_NEXT_DROP: - if (lm->drop_adj_index) - { - adj = ip_get_adjacency (lm, lm->drop_adj_index); - *adj_index_return = lm->drop_adj_index; - return (adj); - } - break; - - case IP_LOOKUP_NEXT_LOCAL: - if (lm->local_adj_index) - { - adj = ip_get_adjacency (lm, lm->local_adj_index); - *adj_index_return = lm->local_adj_index; - return (adj); - } - default: - break; - } - - signature = vnet_ip_adjacency_signature (copy_adj); - p = hash_get (lm->adj_index_by_signature, signature); - if (p) - { - adj = vec_elt_at_index (lm->adjacency_heap, p[0]); - while (1) - { - if (vnet_ip_adjacency_share_compare (adj, copy_adj)) - { - adj->share_count++; - *adj_index_return = p[0]; - return adj; - } - if (adj->next_adj_with_signature == 0) - break; - adj = vec_elt_at_index (lm->adjacency_heap, - adj->next_adj_with_signature); - } - } - } - - lm->adjacency_heap = aa_alloc (lm->adjacency_heap, &adj, n_adj); - handle = ai = adj->heap_handle; - - ip_poison_adjacencies (adj, n_adj); - - /* Validate adjacency counters. */ - vlib_validate_combined_counter (&lm->adjacency_counters, ai + n_adj - 1); - - for (i = 0; i < n_adj; i++) - { - /* Make sure certain fields are always initialized. */ - adj[i].rewrite_header.sw_if_index = ~0; - adj[i].explicit_fib_index = ~0; - adj[i].mcast_group_index = ~0; - adj[i].classify.table_index = ~0; - adj[i].saved_lookup_next_index = 0; - adj[i].special_adjacency_format_function_index = 0; - - if (copy_adj) - adj[i] = copy_adj[i]; - - adj[i].heap_handle = handle; - adj[i].n_adj = n_adj; - adj[i].share_count = 0; - adj[i].next_adj_with_signature = 0; - - /* Zero possibly stale counters for re-used adjacencies. */ - vlib_zero_combined_counter (&lm->adjacency_counters, ai + i); - } - - /* Set up to share the adj later */ - if (copy_adj && n_adj == 1) - ip_share_adjacency(lm, ai); - - *adj_index_return = ai; - return adj; -} - -void -ip_update_adjacency (ip_lookup_main_t * lm, - u32 adj_index, - ip_adjacency_t * copy_adj) -{ - ip_adjacency_t * adj = ip_get_adjacency(lm, adj_index); - - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1); - ip_unshare_adjacency(lm, adj_index); - - /* temporary redirect to drop while updating rewrite data */ - adj->lookup_next_index = IP_LOOKUP_NEXT_ARP; - CLIB_MEMORY_BARRIER(); - - clib_memcpy (&adj->rewrite_header, ©_adj->rewrite_header, - VLIB_BUFFER_PRE_DATA_SIZE); - adj->lookup_next_index = copy_adj->lookup_next_index; - ip_share_adjacency(lm, adj_index); - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); -} - -static void ip_del_adjacency2 (ip_lookup_main_t * lm, u32 adj_index, u32 delete_multipath_adjacency) -{ - ip_adjacency_t * adj; - - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 1); - - adj = ip_get_adjacency (lm, adj_index); - - /* Special-case miss, local, drop adjs */ - if (adj_index < 3) - return; - - if (adj->n_adj == 1) - { - if (adj->share_count > 0) - { - adj->share_count --; - return; - } - - ip_unshare_adjacency(lm, adj_index); - } - - if (delete_multipath_adjacency) - ip_multipath_del_adjacency (lm, adj_index); - - ip_poison_adjacencies (adj, adj->n_adj); - - aa_free (lm->adjacency_heap, adj); -} - -void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index) -{ ip_del_adjacency2 (lm, adj_index, /* delete_multipath_adjacency */ 1); } - -static int -next_hop_sort_by_weight (ip_multipath_next_hop_t * n1, - ip_multipath_next_hop_t * n2) -{ - int cmp = (int) n1->weight - (int) n2->weight; - return (cmp == 0 - ? (int) n1->next_hop_adj_index - (int) n2->next_hop_adj_index - : (cmp > 0 ? +1 : -1)); -} - -/* Given next hop vector is over-written with normalized one with sorted weights and - with weights corresponding to the number of adjacencies for each next hop. - Returns number of adjacencies in block. */ -static u32 ip_multipath_normalize_next_hops (ip_lookup_main_t * lm, - ip_multipath_next_hop_t * raw_next_hops, - ip_multipath_next_hop_t ** normalized_next_hops) -{ - ip_multipath_next_hop_t * nhs; - uword n_nhs, n_adj, n_adj_left, i; - f64 sum_weight, norm, error; - - n_nhs = vec_len (raw_next_hops); - ASSERT (n_nhs > 0); - if (n_nhs == 0) - return 0; - - /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */ - nhs = *normalized_next_hops; - vec_validate (nhs, 2*n_nhs - 1); - - /* Fast path: 1 next hop in block. */ - n_adj = n_nhs; - if (n_nhs == 1) - { - nhs[0] = raw_next_hops[0]; - nhs[0].weight = 1; - _vec_len (nhs) = 1; - goto done; - } - - else if (n_nhs == 2) - { - int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0; - - /* Fast sort. */ - nhs[0] = raw_next_hops[cmp]; - nhs[1] = raw_next_hops[cmp ^ 1]; - - /* Fast path: equal cost multipath with 2 next hops. */ - if (nhs[0].weight == nhs[1].weight) - { - nhs[0].weight = nhs[1].weight = 1; - _vec_len (nhs) = 2; - goto done; - } - } - else - { - clib_memcpy (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0])); - qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight); - } - - /* Find total weight to normalize weights. */ - sum_weight = 0; - for (i = 0; i < n_nhs; i++) - sum_weight += nhs[i].weight; - - /* In the unlikely case that all weights are given as 0, set them all to 1. */ - if (sum_weight == 0) - { - for (i = 0; i < n_nhs; i++) - nhs[i].weight = 1; - sum_weight = n_nhs; - } - - /* Save copies of all next hop weights to avoid being overwritten in loop below. */ - for (i = 0; i < n_nhs; i++) - nhs[n_nhs + i].weight = nhs[i].weight; - - /* Try larger and larger power of 2 sized adjacency blocks until we - find one where traffic flows to within 1% of specified weights. */ - for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2) - { - error = 0; - - norm = n_adj / sum_weight; - n_adj_left = n_adj; - for (i = 0; i < n_nhs; i++) - { - f64 nf = nhs[n_nhs + i].weight * norm; /* use saved weights */ - word n = flt_round_nearest (nf); - - n = n > n_adj_left ? n_adj_left : n; - n_adj_left -= n; - error += fabs (nf - n); - nhs[i].weight = n; - } - - nhs[0].weight += n_adj_left; - - /* Less than 5% average error per adjacency with this size adjacency block? */ - if (error <= lm->multipath_next_hop_error_tolerance*n_adj) - { - /* Truncate any next hops with zero weight. */ - _vec_len (nhs) = i; - break; - } - } - - done: - /* Save vector for next call. */ - *normalized_next_hops = nhs; - return n_adj; -} - -always_inline uword -ip_next_hop_hash_key_from_handle (uword handle) -{ return 1 + 2*handle; } - -always_inline uword -ip_next_hop_hash_key_is_heap_handle (uword k) -{ return k & 1; } - -always_inline uword -ip_next_hop_hash_key_get_heap_handle (uword k) -{ - ASSERT (ip_next_hop_hash_key_is_heap_handle (k)); - return k / 2; -} - -static u32 -ip_multipath_adjacency_get (ip_lookup_main_t * lm, - ip_multipath_next_hop_t * raw_next_hops, - uword create_if_non_existent) -{ - uword * p; - u32 i, j, n_adj, adj_index, adj_heap_handle; - ip_adjacency_t * adj, * copy_adj; - ip_multipath_next_hop_t * nh, * nhs; - ip_multipath_adjacency_t * madj; - - n_adj = ip_multipath_normalize_next_hops (lm, raw_next_hops, &lm->next_hop_hash_lookup_key_normalized); - nhs = lm->next_hop_hash_lookup_key_normalized; - - /* Basic sanity. */ - ASSERT (n_adj >= vec_len (raw_next_hops)); - - /* Use normalized next hops to see if we've seen a block equivalent to this one before. */ - p = hash_get_mem (lm->multipath_adjacency_by_next_hops, nhs); - if (p) - return p[0]; - - if (! create_if_non_existent) - return 0; - - adj = ip_add_adjacency (lm, /* copy_adj */ 0, n_adj, &adj_index); - adj_heap_handle = adj[0].heap_handle; - - /* Fill in adjacencies in block based on corresponding next hop adjacencies. */ - i = 0; - vec_foreach (nh, nhs) - { - copy_adj = ip_get_adjacency (lm, nh->next_hop_adj_index); - for (j = 0; j < nh->weight; j++) - { - adj[i] = copy_adj[0]; - adj[i].heap_handle = adj_heap_handle; - adj[i].n_adj = n_adj; - i++; - } - } - - /* All adjacencies should have been initialized. */ - ASSERT (i == n_adj); - - vec_validate (lm->multipath_adjacencies, adj_heap_handle); - madj = vec_elt_at_index (lm->multipath_adjacencies, adj_heap_handle); - - madj->adj_index = adj_index; - madj->n_adj_in_block = n_adj; - madj->reference_count = 0; /* caller will set to one. */ - - madj->normalized_next_hops.count = vec_len (nhs); - madj->normalized_next_hops.heap_offset - = heap_alloc (lm->next_hop_heap, vec_len (nhs), - madj->normalized_next_hops.heap_handle); - clib_memcpy (lm->next_hop_heap + madj->normalized_next_hops.heap_offset, - nhs, vec_bytes (nhs)); - - hash_set (lm->multipath_adjacency_by_next_hops, - ip_next_hop_hash_key_from_handle (madj->normalized_next_hops.heap_handle), - madj - lm->multipath_adjacencies); - - madj->unnormalized_next_hops.count = vec_len (raw_next_hops); - madj->unnormalized_next_hops.heap_offset - = heap_alloc (lm->next_hop_heap, vec_len (raw_next_hops), - madj->unnormalized_next_hops.heap_handle); - clib_memcpy (lm->next_hop_heap + madj->unnormalized_next_hops.heap_offset, - raw_next_hops, vec_bytes (raw_next_hops)); - - ip_call_add_del_adjacency_callbacks (lm, adj_index, /* is_del */ 0); - - return adj_heap_handle; -} - -/* Returns 0 for next hop not found. */ -u32 -ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm, - u32 is_del, - u32 old_mp_adj_index, - u32 next_hop_adj_index, - u32 next_hop_weight, - u32 * new_mp_adj_index) -{ - ip_multipath_adjacency_t * mp_old, * mp_new; - ip_multipath_next_hop_t * nh, * nhs, * hash_nhs; - u32 n_nhs, i_nh; - - mp_new = mp_old = 0; - n_nhs = 0; - i_nh = 0; - nhs = 0; - - /* If old adj is not multipath, we need to "convert" it by calling this - * function recursively */ - if (old_mp_adj_index != ~0 && !ip_adjacency_is_multipath(lm, old_mp_adj_index)) - { - ip_multipath_adjacency_add_del_next_hop(lm, /* is_del */ 0, - /* old_mp_adj_index */ ~0, - /* nh_adj_index */ old_mp_adj_index, - /* weight * */ 1, - &old_mp_adj_index); - } - - /* If old multipath adjacency is valid, find requested next hop. */ - if (old_mp_adj_index < vec_len (lm->multipath_adjacencies) - && lm->multipath_adjacencies[old_mp_adj_index].normalized_next_hops.count > 0) - { - mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - - nhs = vec_elt_at_index (lm->next_hop_heap, mp_old->unnormalized_next_hops.heap_offset); - n_nhs = mp_old->unnormalized_next_hops.count; - - /* Linear search: ok since n_next_hops is small. */ - for (i_nh = 0; i_nh < n_nhs; i_nh++) - if (nhs[i_nh].next_hop_adj_index == next_hop_adj_index) - break; - - /* Given next hop not found. */ - if (i_nh >= n_nhs && is_del) - return 0; - } - - hash_nhs = lm->next_hop_hash_lookup_key; - if (hash_nhs) - _vec_len (hash_nhs) = 0; - - if (is_del) - { - if (n_nhs > 1) - { - /* Prepare lookup key for multipath with target next hop deleted. */ - if (i_nh > 0) - vec_add (hash_nhs, nhs + 0, i_nh); - if (i_nh + 1 < n_nhs) - vec_add (hash_nhs, nhs + i_nh + 1, n_nhs - (i_nh + 1)); - } - } - else /* it's an add. */ - { - /* If next hop is already there with the same weight, we have nothing to do. */ - if (i_nh < n_nhs && nhs[i_nh].weight == next_hop_weight) - { - new_mp_adj_index[0] = ~0; - goto done; - } - - /* Copy old next hops to lookup key vector. */ - if (n_nhs > 0) - vec_add (hash_nhs, nhs, n_nhs); - - if (i_nh < n_nhs) - { - /* Change weight of existing next hop. */ - nh = vec_elt_at_index (hash_nhs, i_nh); - } - else - { - /* Add a new next hop. */ - vec_add2 (hash_nhs, nh, 1); - nh->next_hop_adj_index = next_hop_adj_index; - } - - /* Set weight for added or old next hop. */ - nh->weight = next_hop_weight; - } - - if (vec_len (hash_nhs) > 0) - { - u32 tmp = ip_multipath_adjacency_get (lm, hash_nhs, - /* create_if_non_existent */ 1); - if (tmp != ~0) - mp_new = vec_elt_at_index (lm->multipath_adjacencies, tmp); - - /* Fetch again since pool may have moved. */ - if (mp_old) - mp_old = vec_elt_at_index (lm->multipath_adjacencies, old_mp_adj_index); - } - - new_mp_adj_index[0] = mp_new ? mp_new - lm->multipath_adjacencies : ~0; - - if (mp_new != mp_old) - { - if (mp_old) - { - ASSERT (mp_old->reference_count > 0); - mp_old->reference_count -= 1; - } - if (mp_new) - mp_new->reference_count += 1; - } - - if (mp_old && mp_old->reference_count == 0) - ip_multipath_adjacency_free (lm, mp_old); - - done: - /* Save key vector next call. */ - lm->next_hop_hash_lookup_key = hash_nhs; - - return 1; -} - -static void -ip_multipath_del_adjacency (ip_lookup_main_t * lm, u32 del_adj_index) -{ - ip_adjacency_t * adj = ip_get_adjacency (lm, del_adj_index); - ip_multipath_adjacency_t * madj, * new_madj; - ip_multipath_next_hop_t * nhs, * hash_nhs; - u32 i, n_nhs, madj_index, new_madj_index; - - if (adj->heap_handle >= vec_len (lm->multipath_adjacencies)) - return; - - vec_validate (lm->adjacency_remap_table, vec_len (lm->adjacency_heap) - 1); - - for (madj_index = 0; madj_index < vec_len (lm->multipath_adjacencies); madj_index++) - { - madj = vec_elt_at_index (lm->multipath_adjacencies, madj_index); - if (madj->n_adj_in_block == 0) - continue; - - nhs = heap_elt_at_index (lm->next_hop_heap, madj->unnormalized_next_hops.heap_offset); - n_nhs = madj->unnormalized_next_hops.count; - for (i = 0; i < n_nhs; i++) - if (nhs[i].next_hop_adj_index == del_adj_index) - break; - - /* del_adj_index not found in unnormalized_next_hops? We're done. */ - if (i >= n_nhs) - continue; - - new_madj = 0; - if (n_nhs > 1) - { - hash_nhs = lm->next_hop_hash_lookup_key; - if (hash_nhs) - _vec_len (hash_nhs) = 0; - if (i > 0) - vec_add (hash_nhs, nhs + 0, i); - if (i + 1 < n_nhs) - vec_add (hash_nhs, nhs + i + 1, n_nhs - (i + 1)); - - new_madj_index = ip_multipath_adjacency_get (lm, hash_nhs, /* create_if_non_existent */ 1); - - lm->next_hop_hash_lookup_key = hash_nhs; - - if (new_madj_index == madj_index) - continue; - - new_madj = vec_elt_at_index (lm->multipath_adjacencies, new_madj_index); - } - - lm->adjacency_remap_table[madj->adj_index] = new_madj ? 1 + new_madj->adj_index : ~0; - lm->n_adjacency_remaps += 1; - ip_multipath_adjacency_free (lm, madj); - } -} - -void -ip_multipath_adjacency_free (ip_lookup_main_t * lm, - ip_multipath_adjacency_t * a) -{ - hash_unset (lm->multipath_adjacency_by_next_hops, - ip_next_hop_hash_key_from_handle (a->normalized_next_hops.heap_handle)); - heap_dealloc (lm->next_hop_heap, a->normalized_next_hops.heap_handle); - heap_dealloc (lm->next_hop_heap, a->unnormalized_next_hops.heap_handle); - - ip_del_adjacency2 (lm, a->adj_index, a->reference_count == 0); - memset (a, 0, sizeof (a[0])); -} - -always_inline ip_multipath_next_hop_t * -ip_next_hop_hash_key_get_next_hops (ip_lookup_main_t * lm, uword k, - uword * n_next_hops) -{ - ip_multipath_next_hop_t * nhs; - uword n_nhs; - if (ip_next_hop_hash_key_is_heap_handle (k)) - { - uword handle = ip_next_hop_hash_key_get_heap_handle (k); - nhs = heap_elt_with_handle (lm->next_hop_heap, handle); - n_nhs = heap_len (lm->next_hop_heap, handle); - } - else - { - nhs = uword_to_pointer (k, ip_multipath_next_hop_t *); - n_nhs = vec_len (nhs); - } - *n_next_hops = n_nhs; - return nhs; -} - -static uword -ip_next_hop_hash_key_sum (hash_t * h, uword key0) -{ - ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *); - ip_multipath_next_hop_t * k0; - uword n0; - - k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0); - return hash_memory (k0, n0 * sizeof (k0[0]), /* seed */ n0); -} - -static uword -ip_next_hop_hash_key_equal (hash_t * h, uword key0, uword key1) -{ - ip_lookup_main_t * lm = uword_to_pointer (h->user, ip_lookup_main_t *); - ip_multipath_next_hop_t * k0, * k1; - uword n0, n1; - - k0 = ip_next_hop_hash_key_get_next_hops (lm, key0, &n0); - k1 = ip_next_hop_hash_key_get_next_hops (lm, key1, &n1); - - return n0 == n1 && ! memcmp (k0, k1, n0 * sizeof (k0[0])); -} +#include <vnet/adj/adj_alloc.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/mpls/mpls.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/classify_dpo.h> +#include <vnet/dpo/punt_dpo.h> +#include <vnet/dpo/receive_dpo.h> clib_error_t * ip_interface_address_add_del (ip_lookup_main_t * lm, @@ -869,52 +157,16 @@ ip_interface_address_add_del (ip_lookup_main_t * lm, void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) { - ip_adjacency_t * adj; - ip_adjacency_t template_adj; - /* ensure that adjacency is cacheline aligned and sized */ ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0); ASSERT(STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) == CLIB_CACHE_LINE_BYTES); - lm->adj_index_by_signature = hash_create (0, sizeof (uword)); - memset (&template_adj, 0, sizeof (template_adj)); - /* Preallocate three "special" adjacencies */ - lm->adjacency_heap = aa_bootstrap (0, 3 /* n=1 free items */); - - /* Hand-craft special miss adjacency to use when nothing matches in the - routing table. Same for drop adjacency. */ - adj = ip_add_adjacency (lm, /* template */ 0, /* n-adj */ 1, - &lm->miss_adj_index); - adj->lookup_next_index = IP_LOOKUP_NEXT_MISS; - ASSERT (lm->miss_adj_index == IP_LOOKUP_MISS_ADJ_INDEX); - - /* Make the "drop" adj sharable */ - template_adj.lookup_next_index = IP_LOOKUP_NEXT_DROP; - adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, - &lm->drop_adj_index); - - /* Make the "local" adj sharable */ - template_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - template_adj.if_address_index = ~0; - adj = ip_add_adjacency (lm, &template_adj, /* n-adj */ 1, - &lm->local_adj_index); + lm->adjacency_heap = adj_heap; if (! lm->fib_result_n_bytes) lm->fib_result_n_bytes = sizeof (uword); - lm->multipath_adjacency_by_next_hops - = hash_create2 (/* elts */ 0, - /* user */ pointer_to_uword (lm), - /* value_bytes */ sizeof (uword), - ip_next_hop_hash_key_sum, - ip_next_hop_hash_key_equal, - /* format pair/arg */ - 0, 0); - - /* 1% max error tolerance for multipath. */ - lm->multipath_next_hop_error_tolerance = .01; - lm->is_ip6 = is_ip6; if (is_ip6) { @@ -944,14 +196,12 @@ void ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] = IP_BUILTIN_PROTOCOL_UDP; lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : IP_PROTOCOL_ICMP] = IP_BUILTIN_PROTOCOL_ICMP; } - - ip_init_registered_adjacencies(!is_ip6); } u8 * format_ip_flow_hash_config (u8 * s, va_list * args) { - u32 flow_hash_config = va_arg (*args, u32); - + flow_hash_config_t flow_hash_config = va_arg (*args, u32); + #define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n); foreach_flow_hash_bit; #undef _ @@ -961,31 +211,20 @@ u8 * format_ip_flow_hash_config (u8 * s, va_list * args) u8 * format_ip_lookup_next (u8 * s, va_list * args) { - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); - ip_lookup_next_t n = va_arg (*args, u32); - ip_adj_register_t *reg; - + ip_lookup_next_t n = va_arg (*args, ip_lookup_next_t); char * t = 0; switch (n) { default: - vec_validate(lm->registered_adjacencies, n); - reg = vec_elt_at_index(lm->registered_adjacencies, n); - if (reg->node_name) { - s = format (s, "%s:", reg->node_name); - } + s = format (s, "unknown %d", n); return s; - case IP_LOOKUP_NEXT_MISS: t = "miss"; break; case IP_LOOKUP_NEXT_DROP: t = "drop"; break; case IP_LOOKUP_NEXT_PUNT: t = "punt"; break; - case IP_LOOKUP_NEXT_LOCAL: t = "local"; break; case IP_LOOKUP_NEXT_ARP: t = "arp"; break; - case IP_LOOKUP_NEXT_CLASSIFY: t = "classify"; break; - case IP_LOOKUP_NEXT_MAP: t = "map"; break; - case IP_LOOKUP_NEXT_MAP_T: t = "map-t"; break; - case IP_LOOKUP_NEXT_INDIRECT: t="indirect"; break; + case IP_LOOKUP_NEXT_MIDCHAIN: t="midchain"; break; + case IP_LOOKUP_NEXT_GLEAN: t="glean"; break; case IP_LOOKUP_NEXT_REWRITE: break; } @@ -996,120 +235,13 @@ u8 * format_ip_lookup_next (u8 * s, va_list * args) return s; } -static u8 * format_ip_interface_address (u8 * s, va_list * args) -{ - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); - u32 if_address_index = va_arg (*args, u32); - ip_interface_address_t * ia = pool_elt_at_index (lm->if_address_pool, if_address_index); - void * a = ip_interface_address_get_address (lm, ia); - - if (lm->is_ip6) - return format (s, "%U", format_ip6_address_and_length, a, ia->address_length); - else - return format (s, "%U", format_ip4_address_and_length, a, ia->address_length); -} - -u32 vnet_register_special_adjacency_format_function -(ip_lookup_main_t * lm, format_function_t * fp) -{ - u32 rv; - /* - * Initialize the format function registration vector - * Index 0 must be invalid, to avoid finding and fixing trivial bugs - * all over the place - */ - if (vec_len (lm->special_adjacency_format_functions) == 0) - { - vec_add1 (lm->special_adjacency_format_functions, - (format_function_t *) 0); - } - - rv = vec_len (lm->special_adjacency_format_functions); - vec_add1 (lm->special_adjacency_format_functions, fp); - return rv; -} - -/** @brief Pretty print helper function for formatting specific adjacencies. - @param s - input string to format - @param args - other args passed to format function such as: - - vnet_main_t - - ip_lookup_main_t - - adj_index -*/ -u8 * format_ip_adjacency (u8 * s, va_list * args) -{ - vnet_main_t * vnm = va_arg (*args, vnet_main_t *); - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); - u32 adj_index = va_arg (*args, u32); - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); - ip_adj_register_t *reg; - - if (adj->lookup_next_index < vec_len (lm->registered_adjacencies)) - { - reg = vec_elt_at_index(lm->registered_adjacencies, - adj->lookup_next_index); - if (reg->fn) - { - s = format(s, " %U", reg->fn, lm, adj); - goto format_done; - } - } - - switch (adj->lookup_next_index) - { - case IP_LOOKUP_NEXT_REWRITE: - s = format (s, "%U", - format_vnet_rewrite, - vnm->vlib_main, &adj->rewrite_header, - sizeof (adj->rewrite_data)); - break; - - case IP_LOOKUP_NEXT_ARP: - if (adj->if_address_index != ~0) - s = format (s, " %U", format_ip_interface_address, lm, - adj->if_address_index); - if (adj->arp.next_hop.ip6.as_u64[0] || adj->arp.next_hop.ip6.as_u64[1]) - s = format (s, " via %U", format_ip46_address, - &adj->arp.next_hop, IP46_TYPE_ANY); - break; - case IP_LOOKUP_NEXT_LOCAL: - if (adj->if_address_index != ~0) - s = format (s, " %U", format_ip_interface_address, lm, - adj->if_address_index); - break; - - case IP_LOOKUP_NEXT_CLASSIFY: - s = format (s, " table %d", adj->classify.table_index); - break; - case IP_LOOKUP_NEXT_INDIRECT: - s = format (s, " via %U", format_ip46_address, - &adj->indirect.next_hop, IP46_TYPE_ANY); - break; - - default: - s = format (s, " unknown %d", adj->lookup_next_index); - break; - } - - format_done: - if (adj->explicit_fib_index != ~0 && adj->explicit_fib_index != 0) - s = format (s, " lookup fib index %d", adj->explicit_fib_index); - if (adj->share_count > 0) - s = format (s, " shared %d", adj->share_count + 1); - if (adj->next_adj_with_signature) - s = format (s, " next_adj_with_signature %d", adj->next_adj_with_signature); - - return s; -} - u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args) { vnet_main_t * vnm = va_arg (*args, vnet_main_t *); - ip_lookup_main_t * lm = va_arg (*args, ip_lookup_main_t *); u32 adj_index = va_arg (*args, u32); u8 * packet_data = va_arg (*args, u8 *); u32 n_packet_data_bytes = va_arg (*args, u32); - ip_adjacency_t * adj = ip_get_adjacency (lm, adj_index); + ip_adjacency_t * adj = adj_get(adj_index); switch (adj->lookup_next_index) { @@ -1126,119 +258,90 @@ u8 * format_ip_adjacency_packet_data (u8 * s, va_list * args) return s; } -static uword unformat_ip_lookup_next (unformat_input_t * input, va_list * args) +static uword unformat_dpo (unformat_input_t * input, va_list * args) { - ip_lookup_next_t * result = va_arg (*args, ip_lookup_next_t *); - ip_lookup_next_t n; + dpo_id_t *dpo = va_arg (*args, dpo_id_t *); + fib_protocol_t fp = va_arg (*args, int); + dpo_proto_t proto; - if (unformat (input, "drop")) - n = IP_LOOKUP_NEXT_DROP; + proto = fib_proto_to_dpo(fp); + if (unformat (input, "drop")) + dpo_copy(dpo, drop_dpo_get(proto)); else if (unformat (input, "punt")) - n = IP_LOOKUP_NEXT_PUNT; - + dpo_copy(dpo, punt_dpo_get(proto)); else if (unformat (input, "local")) - n = IP_LOOKUP_NEXT_LOCAL; - - else if (unformat (input, "arp")) - n = IP_LOOKUP_NEXT_ARP; - + receive_dpo_add_or_lock(proto, ~0, NULL, dpo); else if (unformat (input, "classify")) - n = IP_LOOKUP_NEXT_CLASSIFY; + { + u32 classify_table_index; + + if (!unformat (input, "%d", &classify_table_index)) + { + clib_warning ("classify adj must specify table index"); + return 0; + } + dpo_set(dpo, DPO_CLASSIFY, proto, + classify_dpo_create(fp, classify_table_index)); + } else return 0; - - *result = n; + return 1; } -static uword unformat_ip_adjacency (unformat_input_t * input, va_list * args) -{ - vlib_main_t * vm = va_arg (*args, vlib_main_t *); - ip_adjacency_t * adj = va_arg (*args, ip_adjacency_t *); - u32 node_index = va_arg (*args, u32); - vnet_main_t * vnm = vnet_get_main(); - u32 sw_if_index, is_ip6; - ip46_address_t a46; - ip_lookup_next_t next; +const ip46_address_t zero_addr = { + .as_u64 = { + 0, 0 + }, +}; - is_ip6 = node_index == ip6_rewrite_node.index; - adj->rewrite_header.node_index = node_index; - adj->explicit_fib_index = ~0; +u32 +fib_table_id_find_fib_index (fib_protocol_t proto, + u32 table_id) +{ + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + uword * p; - if (unformat (input, "arp %U %U", - unformat_vnet_sw_interface, vnm, &sw_if_index, - unformat_ip46_address, &a46, is_ip6?IP46_TYPE_IP6:IP46_TYPE_IP4)) + switch (proto) { - ip_lookup_main_t * lm = is_ip6 ? &ip6_main.lookup_main : &ip4_main.lookup_main; - ip_adjacency_t * a_adj; - u32 adj_index; - - if (is_ip6) - adj_index = ip6_fib_lookup (&ip6_main, sw_if_index, &a46.ip6); - else - adj_index = ip4_fib_lookup (&ip4_main, sw_if_index, &a46.ip4); - - a_adj = ip_get_adjacency (lm, adj_index); - - if (a_adj->rewrite_header.sw_if_index != sw_if_index) - return 0; - - if (is_ip6) - ip6_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index); - else - ip4_adjacency_set_interface_route (vnm, adj, sw_if_index, a_adj->if_address_index); + case FIB_PROTOCOL_IP4: + p = hash_get(im4->fib_index_by_table_id, table_id); + break; + case FIB_PROTOCOL_IP6: + p = hash_get(im6->fib_index_by_table_id, table_id); + break; + default: + p = NULL; + break; } - - else if (unformat_user (input, unformat_ip_lookup_next, &next)) + if (NULL != p) { - adj->lookup_next_index = next; - adj->if_address_index = ~0; - if (next == IP_LOOKUP_NEXT_LOCAL) - (void) unformat (input, "%d", &adj->if_address_index); - else if (next == IP_LOOKUP_NEXT_CLASSIFY) - { - if (!unformat (input, "%d", &adj->classify.table_index)) - { - clib_warning ("classify adj must specify table index"); - return 0; - } - } - else if (next == IP_LOOKUP_NEXT_DROP) - { - adj->rewrite_header.node_index = 0; - } + return (p[0]); } - - else if (unformat_user (input, - unformat_vnet_rewrite, - vm, &adj->rewrite_header, sizeof (adj->rewrite_data))) - adj->lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - - else - return 0; - - return 1; + return (~0); } clib_error_t * -vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_command_t * cmd) +vnet_ip_route_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmd) { - vnet_main_t * vnm = vnet_get_main(); - clib_error_t * error = 0; - u32 table_id, is_del; - u32 weight, * weights = 0; - u32 * table_ids = 0; - u32 sw_if_index, * sw_if_indices = 0; - ip4_address_t ip4_addr, * ip4_dst_addresses = 0, * ip4_via_next_hops = 0; - ip6_address_t ip6_addr, * ip6_dst_addresses = 0, * ip6_via_next_hops = 0; - u32 dst_address_length, * dst_address_lengths = 0; - ip_adjacency_t parse_adj, * add_adj = 0; unformat_input_t _line_input, * line_input = &_line_input; + fib_route_path_t *rpaths = NULL, rpath; + dpo_id_t dpo = DPO_NULL, *dpos = NULL; + fib_prefix_t *prefixs = NULL, pfx; + clib_error_t * error = NULL; + mpls_label_t out_label; + u32 table_id, is_del; + vnet_main_t * vnm; + u32 fib_index; f64 count; - u32 outer_table_id; + int i; + vnm = vnet_get_main(); is_del = 0; table_id = 0; count = 1; @@ -1247,410 +350,311 @@ vnet_ip_route_cmd (vlib_main_t * vm, unformat_input_t * main_input, vlib_cli_com if (! unformat_user (main_input, unformat_line_input, line_input)) return 0; - memset(&parse_adj, 0, sizeof (parse_adj)); - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + memset(&rpath, 0, sizeof(rpath)); + memset(&pfx, 0, sizeof(pfx)); + if (unformat (line_input, "table %d", &table_id)) ; else if (unformat (line_input, "del")) is_del = 1; else if (unformat (line_input, "add")) is_del = 0; + else if (unformat (line_input, "resolve-via-host")) + { + if (vec_len(rpaths) == 0) + { + error = clib_error_return(0 , "Paths then flags"); + goto done; + } + rpaths[vec_len(rpaths)-1].frp_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST; + } + else if (unformat (line_input, "resolve-via-attached")) + { + if (vec_len(rpaths) == 0) + { + error = clib_error_return(0 , "Paths then flags"); + goto done; + } + rpaths[vec_len(rpaths)-1].frp_flags |= + FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED; + } + else if (unformat (line_input, "out-label %U", + unformat_mpls_unicast_label, &out_label)) + { + if (vec_len(rpaths) == 0) + { + error = clib_error_return(0 , "Paths then labels"); + goto done; + } + rpaths[vec_len(rpaths)-1].frp_label = out_label; + } else if (unformat (line_input, "count %f", &count)) ; else if (unformat (line_input, "%U/%d", - unformat_ip4_address, &ip4_addr, - &dst_address_length)) - { - vec_add1 (ip4_dst_addresses, ip4_addr); - vec_add1 (dst_address_lengths, dst_address_length); - } - + unformat_ip4_address, + &pfx.fp_addr.ip4, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + vec_add1(prefixs, pfx); + } else if (unformat (line_input, "%U/%d", - unformat_ip6_address, &ip6_addr, - &dst_address_length)) - { - vec_add1 (ip6_dst_addresses, ip6_addr); - vec_add1 (dst_address_lengths, dst_address_length); - } - + unformat_ip6_address, + &pfx.fp_addr.ip6, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + vec_add1(prefixs, pfx); + } else if (unformat (line_input, "via %U %U weight %u", - unformat_ip4_address, &ip4_addr, - unformat_vnet_sw_interface, vnm, &sw_if_index, - &weight)) - { - vec_add1 (ip4_via_next_hops, ip4_addr); - vec_add1 (sw_if_indices, sw_if_index); - vec_add1 (weights, weight); - vec_add1 (table_ids, (u32)~0); - } + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index, + &rpath.frp_weight)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U weight %u", - unformat_ip6_address, &ip6_addr, - unformat_vnet_sw_interface, vnm, &sw_if_index, - &weight)) - { - vec_add1 (ip6_via_next_hops, ip6_addr); - vec_add1 (sw_if_indices, sw_if_index); - vec_add1 (weights, weight); - vec_add1 (table_ids, (u32)~0); - } + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index, + &rpath.frp_weight)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U", - unformat_ip4_address, &ip4_addr, - unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - vec_add1 (ip4_via_next_hops, ip4_addr); - vec_add1 (sw_if_indices, sw_if_index); - vec_add1 (weights, 1); - vec_add1 (table_ids, (u32)~0); - } + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U %U", - unformat_ip6_address, &ip6_addr, - unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - vec_add1 (ip6_via_next_hops, ip6_addr); - vec_add1 (sw_if_indices, sw_if_index); - vec_add1 (weights, 1); - vec_add1 (table_ids, (u32)~0); - } + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_weight = 1; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip4_address, + &rpath.frp_addr.ip4, + &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip6_address, + &rpath.frp_addr.ip6, + &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U", - unformat_ip4_address, &ip4_addr)) - { - vec_add1 (ip4_via_next_hops, ip4_addr); - vec_add1 (sw_if_indices, (u32)~0); - vec_add1 (weights, 1); - vec_add1 (table_ids, table_id); - } + unformat_ip4_address, + &rpath.frp_addr.ip4)) + { + /* + * the recursive next-hops are by default in the same table + * as the prefix + */ + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + vec_add1(rpaths, rpath); + } else if (unformat (line_input, "via %U", - unformat_ip6_address, &ip6_addr)) - { - vec_add1 (ip6_via_next_hops, ip6_addr); - vec_add1 (sw_if_indices, (u32)~0); - vec_add1 (weights, 1); - vec_add1 (table_ids, (u32)table_id); - } - - else if (vec_len (ip4_dst_addresses) > 0 - && unformat (line_input, "via %U", - unformat_ip_adjacency, vm, &parse_adj, ip4_rewrite_node.index)) - vec_add1 (add_adj, parse_adj); - - else if (vec_len (ip6_dst_addresses) > 0 - && unformat (line_input, "via %U", - unformat_ip_adjacency, vm, &parse_adj, ip6_rewrite_node.index)) - vec_add1 (add_adj, parse_adj); - else if (unformat (line_input, "lookup in table %d", &outer_table_id)) - { - uword * p; - - if (vec_len (ip4_dst_addresses) > 0) - p = hash_get (ip4_main.fib_index_by_table_id, outer_table_id); - else - p = hash_get (ip6_main.fib_index_by_table_id, outer_table_id); - - if (p == 0) - { - error = clib_error_return (0, "Nonexistent outer table id %d", - outer_table_id); - goto done; - } - - parse_adj.lookup_next_index = IP_LOOKUP_NEXT_LOCAL; - parse_adj.explicit_fib_index = p[0]; - vec_add1 (add_adj, parse_adj); - } + unformat_ip6_address, + &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP6; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "lookup in table %d", + &rpath.frp_fib_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = pfx.fp_proto; + vec_add1(rpaths, rpath); + } + else if (vec_len (prefixs) > 0 && + unformat (line_input, "via %U", + unformat_dpo, &dpo, prefixs[0].fp_proto)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + vec_add1 (dpos, dpo); + } else - { + { error = unformat_parse_error (line_input); goto done; - } + } } unformat_free (line_input); - if (vec_len (ip4_dst_addresses) + vec_len (ip6_dst_addresses) == 0) - { + if (vec_len (prefixs) == 0) + { error = clib_error_return (0, "expected ip4/ip6 destination address/length."); goto done; } - if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_dst_addresses) > 0) - { - error = clib_error_return (0, "mixed ip4/ip6 address/length."); - goto done; - } - - if (vec_len (ip4_dst_addresses) > 0 && vec_len (ip6_via_next_hops) > 0) - { - error = clib_error_return (0, "ip4 destinations with ip6 next hops."); - goto done; - } - - if (vec_len (ip6_dst_addresses) > 0 && vec_len (ip4_via_next_hops) > 0) - { - error = clib_error_return (0, "ip6 destinations with ip4 next hops."); - goto done; - } - - if (! is_del && vec_len (add_adj) + vec_len (weights) == 0) + if (!is_del && vec_len (rpaths) + vec_len (dpos) == 0) { - error = clib_error_return (0, "no next hops or adjacencies to add."); + error = clib_error_return (0, "expected paths."); goto done; } + if (~0 == table_id) { - int i; - ip4_main_t * im4 = &ip4_main; - ip6_main_t * im6 = &ip6_main; + /* + * if no table_id is passed we will manipulate the default + */ + fib_index = 0; + } + else + { + fib_index = fib_table_id_find_fib_index(prefixs[0].fp_proto, + table_id); - for (i = 0; i < vec_len (ip4_dst_addresses); i++) + if (~0 == fib_index) { - ip4_add_del_route_args_t a; - - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; - a.dst_address = ip4_dst_addresses[i]; - a.dst_address_length = dst_address_lengths[i]; - a.adj_index = ~0; - - if (is_del) - { - if (vec_len (ip4_via_next_hops) == 0) - { - uword * dst_hash, * dst_result; - u32 dst_address_u32; - ip4_fib_t * fib; - - fib = find_ip4_fib_by_table_index_or_id (im4, table_id, - 0 /* by table id */); - - a.flags |= IP4_ROUTE_FLAG_DEL; - dst_address_u32 = a.dst_address.as_u32 - & im4->fib_masks[a.dst_address_length]; - - dst_hash = - fib->adj_index_by_dst_address[a.dst_address_length]; - dst_result = hash_get (dst_hash, dst_address_u32); - if (dst_result) - a.adj_index = dst_result[0]; - else - { - clib_warning ("%U/%d not in FIB", - format_ip4_address, &a.dst_address, - a.dst_address_length); - continue; - } - - ip4_add_del_route (im4, &a); - ip4_maybe_remap_adjacencies (im4, table_id, - IP4_ROUTE_FLAG_TABLE_ID); - } - else - { - u32 i, j, n, f, incr; - ip4_address_t dst = a.dst_address; - f64 t[2]; - n = count; - t[0] = vlib_time_now (vm); - incr = 1<<(32 - a.dst_address_length); - for (i = 0; i < n; i++) - { - f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0; - a.dst_address = dst; - for (j = 0; j < vec_len (ip4_via_next_hops); j++) - { - if (table_ids[j] != (u32)~0) - { - uword * p = hash_get (im4->fib_index_by_table_id, - table_ids[j]); - if (p == 0) - { - clib_warning ("no such FIB table %d", - table_ids[j]); - continue; - } - table_ids[j] = p[0]; - } - - ip4_add_del_route_next_hop (im4, - IP4_ROUTE_FLAG_DEL | f, - &a.dst_address, - a.dst_address_length, - &ip4_via_next_hops[j], - sw_if_indices[j], - weights[j], (u32)~0, - table_ids[j] /* fib index */); - } - dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32)); - } - t[1] = vlib_time_now (vm); - if (count > 1) - vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); - } - } - else - { - if (vec_len (add_adj) > 0) - { - a.flags |= IP4_ROUTE_FLAG_ADD; - a.add_adj = add_adj; - a.n_add_adj = vec_len (add_adj); - - ip4_add_del_route (im4, &a); - } - else if (vec_len (ip4_via_next_hops) > 0) - { - u32 i, j, n, f, incr; - ip4_address_t dst = a.dst_address; - f64 t[2]; - n = count; - t[0] = vlib_time_now (vm); - incr = 1<<(32 - a.dst_address_length); - for (i = 0; i < n; i++) - { - f = i + 1 < n ? IP4_ROUTE_FLAG_NOT_LAST_IN_GROUP : 0; - a.dst_address = dst; - for (j = 0; j < vec_len (ip4_via_next_hops); j++) - { - if (table_ids[j] != (u32)~0) - { - uword * p = hash_get (im4->fib_index_by_table_id, - table_ids[j]); - if (p == 0) - { - clib_warning ("no such FIB table %d", - table_ids[j]); - continue; - } - table_ids[j] = p[0]; - } - ip4_add_del_route_next_hop (im4, - IP4_ROUTE_FLAG_ADD | f, - &a.dst_address, - a.dst_address_length, - &ip4_via_next_hops[j], - sw_if_indices[j], - weights[j], (u32)~0, - table_ids[j] /* fib index */); - } - dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32)); - } - t[1] = vlib_time_now (vm); - if (count > 1) - vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); - } - } + error = clib_error_return (0, + "Nonexistent table id %d", + table_id); + goto done; } + } - for (i = 0; i < vec_len (ip6_dst_addresses); i++) + for (i = 0; i < vec_len (prefixs); i++) + { + if (is_del && 0 == vec_len (rpaths)) { - ip6_add_del_route_args_t a; - - - memset (&a, 0, sizeof (a)); - a.flags = IP6_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; - a.dst_address = ip6_dst_addresses[i]; - a.dst_address_length = dst_address_lengths[i]; - a.adj_index = ~0; - - if (is_del) + fib_table_entry_delete(fib_index, + &prefixs[i], + FIB_SOURCE_CLI); + } + else if (!is_del && 1 == vec_len (dpos)) + { + fib_table_entry_special_dpo_add(fib_index, + &prefixs[i], + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpos[0]); + dpo_reset(&dpos[0]); + } + else if (vec_len (dpos) > 0) + { + error = clib_error_return(0 , "Load-balancing over multiple special adjacencies is unsupported"); + goto done; + } + else if (0 < vec_len (rpaths)) + { + u32 k, j, n, incr; + ip46_address_t dst = prefixs[i].fp_addr; + f64 t[2]; + n = count; + t[0] = vlib_time_now (vm); + incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) - + prefixs[i].fp_len); + + for (k = 0; k < n; k++) { - if (vec_len (ip6_via_next_hops) == 0) + for (j = 0; j < vec_len (rpaths); j++) { - BVT(clib_bihash_kv) kv, value; - ip6_address_t dst_address; - ip6_fib_t * fib; - - fib = find_ip6_fib_by_table_index_or_id (im6, table_id, - 0 /* by table id */); - - a.flags |= IP4_ROUTE_FLAG_DEL; - - dst_address = ip6_dst_addresses[i]; - - ip6_address_mask (&dst_address, - &im6->fib_masks[dst_address_length]); - - kv.key[0] = dst_address.as_u64[0]; - kv.key[1] = dst_address.as_u64[1]; - kv.key[2] = ((u64)(fib - im6->fibs)<<32) - | a.dst_address_length; - - if (BV(clib_bihash_search)(&im6->ip6_lookup_table, - &kv, &value) == 0) - a.adj_index = value.value; - else - { - clib_warning ("%U/%d not in FIB", - format_ip6_address, &a.dst_address, - a.dst_address_length); - continue; - } - - a.flags |= IP6_ROUTE_FLAG_DEL; - ip6_add_del_route (im6, &a); - ip6_maybe_remap_adjacencies (im6, table_id, - IP6_ROUTE_FLAG_TABLE_ID); + /* + * the CLI parsing stored table Ids, swap to FIB indicies + */ + rpaths[i].frp_fib_index = + fib_table_id_find_fib_index(prefixs[i].fp_proto, + rpaths[i].frp_fib_index); + + fib_prefix_t rpfx = { + .fp_len = prefixs[i].fp_len, + .fp_proto = prefixs[i].fp_proto, + .fp_addr = dst, + }; + + if (is_del) + fib_table_entry_path_remove2(fib_index, + &rpfx, + FIB_SOURCE_CLI, + &rpaths[j]); + else + fib_table_entry_path_add2(fib_index, + &rpfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + &rpaths[j]); } - else - { - u32 i; - for (i = 0; i < vec_len (ip6_via_next_hops); i++) - { - ip6_add_del_route_next_hop (im6, - IP6_ROUTE_FLAG_DEL, - &a.dst_address, - a.dst_address_length, - &ip6_via_next_hops[i], - sw_if_indices[i], - weights[i], (u32)~0, - table_ids[i] /* fib index */); - } - } - } - else - { - if (vec_len (add_adj) > 0) + + if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto) { - a.flags |= IP6_ROUTE_FLAG_ADD; - a.add_adj = add_adj; - a.n_add_adj = vec_len (add_adj); - - ip6_add_del_route (im6, &a); + dst.ip4.as_u32 = + clib_host_to_net_u32(incr + + clib_net_to_host_u32 (dst.ip4.as_u32)); } - else if (vec_len (ip6_via_next_hops) > 0) + else { - u32 i; - for (i = 0; i < vec_len (ip6_via_next_hops); i++) - { - ip6_add_del_route_next_hop (im6, - IP6_ROUTE_FLAG_ADD, - &a.dst_address, - a.dst_address_length, - &ip6_via_next_hops[i], - sw_if_indices[i], - weights[i], (u32)~0, - table_ids[i]); - } + int bucket = (incr < 64 ? 0 : 1); + dst.ip6.as_u64[bucket] = + clib_host_to_net_u64(incr + + clib_net_to_host_u64 ( + dst.ip6.as_u64[bucket])); + } } + t[1] = vlib_time_now (vm); + if (count > 1) + vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); + } + else + { + error = clib_error_return(0 , "Don't understand what you want..."); + goto done; } } + done: - vec_free (add_adj); - vec_free (weights); - vec_free (dst_address_lengths); - vec_free (ip4_dst_addresses); - vec_free (ip6_dst_addresses); - vec_free (ip4_via_next_hops); - vec_free (ip6_via_next_hops); + vec_free (dpos); + vec_free (prefixs); + vec_free (rpaths); return error; } @@ -1708,14 +712,14 @@ VLIB_CLI_COMMAND (ip_route_command, static) = { .is_mp_safe = 1, }; -/* +/* * The next two routines address a longstanding script hemorrhoid. * Probing a v4 or v6 neighbor needs to appear to be synchronous, * or dependent route-adds will simply fail. */ static clib_error_t * ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index, - int retry_count) + int retry_count) { vnet_main_t * vnm = vnet_get_main(); clib_error_t * e; @@ -1727,7 +731,7 @@ ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index, ASSERT (vlib_in_process_context(vm)); if (retry_count > 0) - vnet_register_ip6_neighbor_resolution_event + vnet_register_ip6_neighbor_resolution_event (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index, 1 /* event */, 0 /* data */); @@ -1735,17 +739,17 @@ ip6_probe_neighbor_wait (vlib_main_t *vm, ip6_address_t * a, u32 sw_if_index, { /* The interface may be down, etc. */ e = ip6_probe_neighbor (vm, a, sw_if_index); - + if (e) - return e; - + return e; + vlib_process_wait_for_event_or_clock (vm, 1.0); event_type = vlib_process_get_events (vm, &event_data); - switch (event_type) - { - case 1: /* resolved... */ - vlib_cli_output (vm, "Resolved %U", - format_ip6_address, a); + switch (event_type) + { + case 1: /* resolved... */ + vlib_cli_output (vm, "Resolved %U", + format_ip6_address, a); resolved = 1; goto done; @@ -1883,526 +887,3 @@ VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = { .short_help = "ip probe-neighbor <intfc> <ip4-addr> | <ip6-addr> [retry nn]", .is_mp_safe = 1, }; - -typedef CLIB_PACKED (struct { - ip4_address_t address; - - u32 address_length : 6; - - u32 index : 26; -}) ip4_route_t; - -static int -ip4_route_cmp (void * a1, void * a2) -{ - ip4_route_t * r1 = a1; - ip4_route_t * r2 = a2; - - int cmp = ip4_address_compare (&r1->address, &r2->address); - return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length); -} - -static clib_error_t * -ip4_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vnet_main_t * vnm = vnet_get_main(); - ip4_main_t * im4 = &ip4_main; - ip4_route_t * routes, * r; - ip4_fib_t * fib; - ip_lookup_main_t * lm = &im4->lookup_main; - uword * results, i; - int verbose, matching, mtrie, include_empty_fibs; - ip4_address_t matching_address; - u8 clear = 0; - int table_id = -1; - - routes = 0; - results = 0; - verbose = 1; - include_empty_fibs = 0; - matching = 0; - mtrie = 0; - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "brief") || unformat (input, "summary") - || unformat (input, "sum")) - verbose = 0; - - else if (unformat (input, "mtrie")) - mtrie = 1; - - else if (unformat (input, "include-empty")) - include_empty_fibs = 1; - - else if (unformat (input, "%U", unformat_ip4_address, &matching_address)) - matching = 1; - - else if (unformat (input, "clear")) - clear = 1; - - else if (unformat (input, "table %d", &table_id)) - ; - else - break; - } - - vec_foreach (fib, im4->fibs) - { - int fib_not_empty; - - fib_not_empty = 0; - for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) - { - uword * hash = fib->adj_index_by_dst_address[i]; - uword n_elts = hash_elts (hash); - if (n_elts) - { - fib_not_empty = 1; - break; - } - } - - if (fib_not_empty == 0 && include_empty_fibs == 0) - continue; - - if (table_id >= 0 && table_id != (int)fib->table_id) - continue; - - if (include_empty_fibs) - vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", - fib->table_id, fib - im4->fibs, - format_ip_flow_hash_config, fib->flow_hash_config); - - /* Show summary? */ - if (! verbose) - { - if (include_empty_fibs == 0) - vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", - fib->table_id, fib - im4->fibs, - format_ip_flow_hash_config, fib->flow_hash_config); - vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); - for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) - { - uword * hash = fib->adj_index_by_dst_address[i]; - uword n_elts = hash_elts (hash); - if (n_elts > 0) - vlib_cli_output (vm, "%20d%16d", i, n_elts); - } - continue; - } - - if (routes) - _vec_len (routes) = 0; - if (results) - _vec_len (results) = 0; - - for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) - { - uword * hash = fib->adj_index_by_dst_address[i]; - hash_pair_t * p; - ip4_route_t x; - - x.address_length = i; - - if (matching) - { - x.address.as_u32 = matching_address.as_u32 & im4->fib_masks[i]; - p = hash_get_pair (hash, x.address.as_u32); - if (p) - { - if (lm->fib_result_n_words > 1) - { - x.index = vec_len (results); - vec_add (results, p->value, lm->fib_result_n_words); - } - else - x.index = p->value[0]; - vec_add1 (routes, x); - } - } - else - { - hash_foreach_pair (p, hash, ({ - x.address.data_u32 = p->key; - if (lm->fib_result_n_words > 1) - { - x.index = vec_len (results); - vec_add (results, p->value, lm->fib_result_n_words); - } - else - x.index = p->value[0]; - - vec_add1 (routes, x); - })); - } - } - - vec_sort_with_function (routes, ip4_route_cmp); - if (vec_len(routes)) { - if (include_empty_fibs == 0) - vlib_cli_output (vm, "Table %d, fib_index %d, flow hash: %U", - fib->table_id, fib - im4->fibs, - format_ip_flow_hash_config, fib->flow_hash_config); - if (mtrie) - vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); - vlib_cli_output (vm, "%=20s%=16s%=16s%=16s", - "Destination", "Packets", "Bytes", "Adjacency"); - } - vec_foreach (r, routes) - { - vlib_counter_t c, sum; - uword i, j, n_left, n_nhs, adj_index, * result = 0; - ip_adjacency_t * adj; - ip_multipath_next_hop_t * nhs, tmp_nhs[1]; - - adj_index = r->index; - if (lm->fib_result_n_words > 1) - { - result = vec_elt_at_index (results, adj_index); - adj_index = result[0]; - } - - adj = ip_get_adjacency (lm, adj_index); - if (adj->n_adj == 1) - { - nhs = &tmp_nhs[0]; - nhs[0].next_hop_adj_index = ~0; /* not used */ - nhs[0].weight = 1; - n_nhs = 1; - } - else - { - ip_multipath_adjacency_t * madj; - madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle); - nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset); - n_nhs = madj->normalized_next_hops.count; - } - - n_left = nhs[0].weight; - vlib_counter_zero (&sum); - for (i = j = 0; i < adj->n_adj; i++) - { - n_left -= 1; - vlib_get_combined_counter (&lm->adjacency_counters, - adj_index + i, &c); - if (clear) - vlib_zero_combined_counter (&lm->adjacency_counters, - adj_index + i); - vlib_counter_add (&sum, &c); - if (n_left == 0) - { - u8 * msg = 0; - uword indent; - - if (j == 0) - msg = format (msg, "%-20U", - format_ip4_address_and_length, - r->address.data, r->address_length); - else - msg = format (msg, "%U", format_white_space, 20); - - msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes); - - indent = vec_len (msg); - msg = format (msg, "weight %d, index %d", - nhs[j].weight, adj_index + i); - - if (ip_adjacency_is_multipath(lm, adj_index)) - msg = format (msg, ", multipath"); - - msg = format (msg, "\n%U%U", - format_white_space, indent, - format_ip_adjacency, - vnm, lm, adj_index + i); - - vlib_cli_output (vm, "%v", msg); - vec_free (msg); - - if (result && lm->format_fib_result) - vlib_cli_output (vm, "%20s%U", "", - lm->format_fib_result, vm, lm, result, - i + 1 - nhs[j].weight, - nhs[j].weight); - - j++; - if (j < n_nhs) - { - n_left = nhs[j].weight; - vlib_counter_zero (&sum); - } - } - } - } - } - - vec_free (routes); - vec_free (results); - - return 0; -} - -/*? - * Show FIB/route entries - * - * @cliexpar - * @cliexstart{show ip fib} - * Display the IPv4 FIB. - * This command will run for a long time when the FIBs comprise millions of entries. - * vpp# sh ip fib - * Table 0 - * Destination Packets Bytes Adjacency - * 6.0.0.0/8 0 0 weight 1, index 3 - * arp fake-eth0 6.0.0.1/8 - * 6.0.0.1/32 0 0 weight 1, index 4 - * local 6.0.0.1/8 - * - * And so forth. Use 'show ip fib summary' for a summary: - * - * vpp# sh ip fib summary - * Table 0 - * Prefix length Count - * 8 1 - * 32 4 - * @cliexend - ?*/ -VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { - .path = "show ip fib", - .short_help = "show ip fib [mtrie] [summary] [table <n>] [<ip4-addr>] [clear] [include-empty]", - .function = ip4_show_fib, -}; - -typedef struct { - ip6_address_t address; - - u32 address_length; - - u32 index; -} ip6_route_t; - -typedef struct { - u32 fib_index; - ip6_route_t ** routep; -} add_routes_in_fib_arg_t; - -static void add_routes_in_fib (BVT(clib_bihash_kv) * kvp, void *arg) -{ - add_routes_in_fib_arg_t * ap = arg; - - if (kvp->key[2]>>32 == ap->fib_index) - { - ip6_address_t *addr; - ip6_route_t * r; - addr = (ip6_address_t *) kvp; - vec_add2 (*ap->routep, r, 1); - r->address = addr[0]; - r->address_length = kvp->key[2] & 0xFF; - r->index = kvp->value; - } -} - -typedef struct { - u32 fib_index; - u64 count_by_prefix_length[129]; -} count_routes_in_fib_at_prefix_length_arg_t; - -static void count_routes_in_fib_at_prefix_length -(BVT(clib_bihash_kv) * kvp, void *arg) -{ - count_routes_in_fib_at_prefix_length_arg_t * ap = arg; - int mask_width; - - if ((kvp->key[2]>>32) != ap->fib_index) - return; - - mask_width = kvp->key[2] & 0xFF; - - ap->count_by_prefix_length[mask_width]++; -} - -static int -ip6_route_cmp (void * a1, void * a2) -{ - ip6_route_t * r1 = a1; - ip6_route_t * r2 = a2; - - int cmp = ip6_address_compare (&r1->address, &r2->address); - return cmp ? cmp : ((int) r1->address_length - (int) r2->address_length); -} - -static clib_error_t * -ip6_show_fib (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vnet_main_t * vnm = vnet_get_main(); - ip6_main_t * im6 = &ip6_main; - ip6_route_t * routes, * r; - ip6_fib_t * fib; - ip_lookup_main_t * lm = &im6->lookup_main; - uword * results; - int verbose; - BVT(clib_bihash) * h = &im6->ip6_lookup_table; - __attribute__((unused)) u8 clear = 0; - add_routes_in_fib_arg_t _a, *a=&_a; - count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca; - - routes = 0; - results = 0; - verbose = 1; - if (unformat (input, "brief") || unformat (input, "summary") - || unformat (input, "sum")) - verbose = 0; - - if (unformat (input, "clear")) - clear = 1; - - vlib_cli_output (vm, "FIB lookup table: %d buckets, %lld MB heap", - im6->lookup_table_nbuckets, im6->lookup_table_size>>20); - vlib_cli_output (vm, "%U", format_mheap, h->mheap, 0 /*verbose*/); - vlib_cli_output (vm, " "); - - vec_foreach (fib, im6->fibs) - { - vlib_cli_output (vm, "VRF %d, fib_index %d, flow hash: %U", - fib->table_id, fib - im6->fibs, - format_ip_flow_hash_config, fib->flow_hash_config); - - /* Show summary? */ - if (! verbose) - { - int len; - vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); - - memset (ca, 0, sizeof(*ca)); - ca->fib_index = fib - im6->fibs; - - BV(clib_bihash_foreach_key_value_pair) - (h, count_routes_in_fib_at_prefix_length, ca); - - for (len = 128; len >= 0; len--) - { - if (ca->count_by_prefix_length[len]) - vlib_cli_output (vm, "%=20d%=16lld", - len, ca->count_by_prefix_length[len]); - } - continue; - } - - if (routes) - _vec_len (routes) = 0; - if (results) - _vec_len (results) = 0; - - a->fib_index = fib - im6->fibs; - a->routep = &routes; - - BV(clib_bihash_foreach_key_value_pair)(h, add_routes_in_fib, a); - - vec_sort_with_function (routes, ip6_route_cmp); - - vlib_cli_output (vm, "%=45s%=16s%=16s%=16s", - "Destination", "Packets", "Bytes", "Adjacency"); - vec_foreach (r, routes) - { - vlib_counter_t c, sum; - uword i, j, n_left, n_nhs, adj_index, * result = 0; - ip_adjacency_t * adj; - ip_multipath_next_hop_t * nhs, tmp_nhs[1]; - - adj_index = r->index; - if (lm->fib_result_n_words > 1) - { - result = vec_elt_at_index (results, adj_index); - adj_index = result[0]; - } - - adj = ip_get_adjacency (lm, adj_index); - if (adj->n_adj == 1) - { - nhs = &tmp_nhs[0]; - nhs[0].next_hop_adj_index = ~0; /* not used */ - nhs[0].weight = 1; - n_nhs = 1; - } - else - { - ip_multipath_adjacency_t * madj; - madj = vec_elt_at_index (lm->multipath_adjacencies, adj->heap_handle); - nhs = heap_elt_at_index (lm->next_hop_heap, madj->normalized_next_hops.heap_offset); - n_nhs = madj->normalized_next_hops.count; - } - - n_left = nhs[0].weight; - vlib_counter_zero (&sum); - for (i = j = 0; i < adj->n_adj; i++) - { - n_left -= 1; - vlib_get_combined_counter (&lm->adjacency_counters, - adj_index + i, &c); - if (clear) - vlib_zero_combined_counter (&lm->adjacency_counters, - adj_index + i); - vlib_counter_add (&sum, &c); - if (n_left == 0) - { - u8 * msg = 0; - uword indent; - - if (j == 0) - msg = format (msg, "%-45U", - format_ip6_address_and_length, - r->address.as_u8, r->address_length); - else - msg = format (msg, "%U", format_white_space, 20); - - msg = format (msg, "%16Ld%16Ld ", sum.packets, sum.bytes); - - indent = vec_len (msg); - msg = format (msg, "weight %d, index %d", - nhs[j].weight, adj_index + i); - - if (ip_adjacency_is_multipath(lm, adj_index + i)) - msg = format (msg, ", multipath"); - - msg = format (msg, "\n%U%U", - format_white_space, indent, - format_ip_adjacency, - vnm, lm, adj_index + i); - - vlib_cli_output (vm, "%v", msg); - vec_free (msg); - - j++; - if (j < n_nhs) - { - n_left = nhs[j].weight; - vlib_counter_zero (&sum); - } - } - } - - if (result && lm->format_fib_result) - vlib_cli_output (vm, "%20s%U", "", lm->format_fib_result, vm, lm, result, 0); - } - vlib_cli_output (vm, " "); - } - - vec_free (routes); - vec_free (results); - - return 0; -} - -/*? - * Show FIB6/route entries - * - * @cliexpar - * @cliexstart{show ip fib} - * Display the IPv6 FIB. - * This command will run for a long time when the FIBs comprise millions of entries. - * See 'show ip fib' - * @cliexend - ?*/ -VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { - .path = "show ip6 fib", - .short_help = "show ip6 fib [summary] [clear]", - .function = ip6_show_fib, -}; diff --git a/vnet/vnet/ip/lookup.h b/vnet/vnet/ip/lookup.h index dcc9d25fed0..c8dcc141430 100644 --- a/vnet/vnet/ip/lookup.h +++ b/vnet/vnet/ip/lookup.h @@ -45,7 +45,6 @@ * - Callbacks on route add. * - Callbacks on interface address change. */ - #ifndef included_ip_lookup_h #define included_ip_lookup_h @@ -53,12 +52,11 @@ #include <vlib/buffer.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> +#include <vnet/fib/fib_node.h> +#include <vnet/dpo/dpo.h> /** @brief Common (IP4/IP6) next index stored in adjacency. */ typedef enum { - /** Packet does not match any route in table. */ - IP_LOOKUP_NEXT_MISS, - /** Adjacency to drop this packet. */ IP_LOOKUP_NEXT_DROP, /** Adjacency to punt this packet. */ @@ -67,27 +65,26 @@ typedef enum { /** This packet is for one of our own IP addresses. */ IP_LOOKUP_NEXT_LOCAL, - /** This packet matches an "interface route" and packets + /** This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite string for this destination. */ IP_LOOKUP_NEXT_ARP, + /** This packet matches an "interface route" and packets + need to be passed to ARP to find rewrite string for + this destination. */ + IP_LOOKUP_NEXT_GLEAN, + /** This packet is to be rewritten and forwarded to the next processing node. This is typically the output interface but might be another node for further output processing. */ IP_LOOKUP_NEXT_REWRITE, - /** This packet needs to be classified */ - IP_LOOKUP_NEXT_CLASSIFY, - - /** This packet needs to go to MAP - RFC7596, RFC7597 */ - IP_LOOKUP_NEXT_MAP, + /** This packets follow a load-balance */ + IP_LOOKUP_NEXT_LOAD_BALANCE, - /** This packet needs to go to MAP with Translation - RFC7599 */ - IP_LOOKUP_NEXT_MAP_T, - - /** This packets needs to go to indirect next hop */ - IP_LOOKUP_NEXT_INDIRECT, + /** This packets follow a mid-chain adjacency */ + IP_LOOKUP_NEXT_MIDCHAIN, /** This packets needs to go to ICMP error */ IP_LOOKUP_NEXT_ICMP_ERROR, @@ -100,7 +97,7 @@ typedef enum { } ip4_lookup_next_t; typedef enum { - /** Hop-by-hop header handling */ + /* Hop-by-hop header handling */ IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT, IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP, IP6_LOOKUP_NEXT_POP_HOP_BY_HOP, @@ -108,30 +105,26 @@ typedef enum { } ip6_lookup_next_t; #define IP4_LOOKUP_NEXT_NODES { \ - [IP_LOOKUP_NEXT_MISS] = "ip4-miss", \ [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \ [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \ [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \ [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \ + [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \ [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite-transit", \ - [IP_LOOKUP_NEXT_CLASSIFY] = "ip4-classify", \ - [IP_LOOKUP_NEXT_MAP] = "ip4-map", \ - [IP_LOOKUP_NEXT_MAP_T] = "ip4-map-t", \ - [IP_LOOKUP_NEXT_INDIRECT] = "ip4-indirect", \ + [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \ + [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip4-load-balance", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \ } #define IP6_LOOKUP_NEXT_NODES { \ - [IP_LOOKUP_NEXT_MISS] = "ip6-miss", \ [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \ [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \ [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \ [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \ + [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \ [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \ - [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify", \ - [IP_LOOKUP_NEXT_MAP] = "ip6-map", \ - [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t", \ - [IP_LOOKUP_NEXT_INDIRECT] = "ip6-indirect", \ + [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \ + [IP_LOOKUP_NEXT_LOAD_BALANCE] = "ip6-load-balance", \ [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \ [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \ [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \ @@ -157,20 +150,20 @@ _(dport, IP_FLOW_HASH_DST_PORT) \ _(proto, IP_FLOW_HASH_PROTO) \ _(reverse, IP_FLOW_HASH_REVERSE_SRC_DST) +/** + * A flow hash configuration is a mask of the flow hash options + */ +typedef u32 flow_hash_config_t; + #define IP_ADJACENCY_OPAQUE_SZ 16 /** @brief IP unicast adjacency. @note cache aligned. */ typedef struct { CLIB_CACHE_LINE_ALIGN_MARK(cacheline0); - /** Handle for this adjacency in adjacency heap. */ + /* Handle for this adjacency in adjacency heap. */ u32 heap_handle; - STRUCT_MARK(signature_start); - - /** Interface address index for this local/arp adjacency. */ - u32 if_address_index; - /** Number of adjecencies in block. Greater than 1 means multipath; otherwise equal to 1. */ u16 n_adj; @@ -181,27 +174,63 @@ typedef struct { u16 lookup_next_index_as_int; }; + /** Interface address index for this local/arp adjacency. */ + u32 if_address_index; + /** Force re-lookup in a different FIB. ~0 => normal behavior */ - i16 explicit_fib_index; u16 mcast_group_index; /** Highest possible perf subgraph arc interposition, e.g. for ip6 ioam */ u16 saved_lookup_next_index; + /* + * link/ether-type + */ + u8 ia_link; + u8 ia_nh_proto; + union { - /** IP_LOOKUP_NEXT_ARP only */ - struct { - ip46_address_t next_hop; - } arp; - /** IP_LOOKUP_NEXT_CLASSIFY only */ - struct { - u16 table_index; - } classify; - /** IP_LOOKUP_NEXT_INDIRECT only */ - struct { - ip46_address_t next_hop; - } indirect; - u8 opaque[IP_ADJACENCY_OPAQUE_SZ]; + union { + /** + * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE + * + * neighbour adjacency sub-type; + */ + struct { + ip46_address_t next_hop; + } nbr; + /** + * IP_LOOKUP_NEXT_MIDCHAIN + * + * A nbr adj that is also recursive. Think tunnels. + * A nbr adj can transition to be of type MDICHAIN + * so be sure to leave the two structs with the next_hop + * fields aligned. + */ + struct { + /** + * The recursive next-hop + */ + ip46_address_t next_hop; + /** + * The node index of the tunnel's post rewrite/TX function. + */ + u32 tx_function_node; + /** + * The next DPO to use + */ + dpo_id_t next_dpo; + } midchain; + /** + * IP_LOOKUP_NEXT_GLEAN + * + * Glean the address to ARP for from the packet's destination + */ + struct { + ip46_address_t receive_addr; + } glean; + } sub_type; + u16 opaque[IP_ADJACENCY_OPAQUE_SZ]; }; /** @brief Special format function for this adjacency. @@ -210,63 +239,32 @@ typedef struct { * the first cache line reads "full" on the free space gas gauge. */ u32 special_adjacency_format_function_index; /* 0 is invalid */ - STRUCT_MARK(signature_end); - - /** Number of FIB entries sharing this adjacency */ - u32 share_count; - /** Use this adjacency instead */ - u32 next_adj_with_signature; CLIB_CACHE_LINE_ALIGN_MARK(cacheline1); - /** Rewrite in second/third cache lines */ + /* Rewrite in second/third cache lines */ vnet_declare_rewrite (VLIB_BUFFER_PRE_DATA_SIZE); + + /* + * member not accessed in the data plane are relgated to the + * remaining cachelines + */ + fib_node_t ia_node; } ip_adjacency_t; -static inline uword -vnet_ip_adjacency_signature (ip_adjacency_t * adj) -{ - uword signature = 0xfeedfaceULL; - - /* Skip heap handle, sum everything up to but not including share_count */ - signature = hash_memory - (STRUCT_MARK_PTR(adj, signature_start), - STRUCT_OFFSET_OF(ip_adjacency_t, signature_end) - - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start), - signature); - - /* and the rewrite */ - signature = hash_memory (&adj->rewrite_header, VLIB_BUFFER_PRE_DATA_SIZE, - signature); - return signature; -} +_Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline0) == 0), + "IP adjacency cachline 0 is not offset"); +_Static_assert((STRUCT_OFFSET_OF(ip_adjacency_t, cacheline1) == + CLIB_CACHE_LINE_BYTES), + "IP adjacency cachline 1 is more than one cachline size offset"); -static inline int -vnet_ip_adjacency_share_compare (ip_adjacency_t * a1, ip_adjacency_t *a2) -{ - if (memcmp (STRUCT_MARK_PTR(a1, signature_start), - STRUCT_MARK_PTR(a2, signature_start), - STRUCT_OFFSET_OF(ip_adjacency_t, signature_end) - - STRUCT_OFFSET_OF(ip_adjacency_t, signature_start))) - return 0; - if (memcmp (&a1->rewrite_header, &a2->rewrite_header, - VLIB_BUFFER_PRE_DATA_SIZE)) - return 0; - return 1; -} +/* An all zeros address */ +extern const ip46_address_t zero_addr; /* Index into adjacency table. */ typedef u32 ip_adjacency_index_t; typedef struct { - /* Directly connected next-hop adjacency index. */ - u32 next_hop_adj_index; - - /* Path weight for this adjacency. */ - u32 weight; -} ip_multipath_next_hop_t; - -typedef struct { /* Adjacency index of first index in block. */ u32 adj_index; @@ -276,11 +274,7 @@ typedef struct { /* Number of prefixes that point to this adjacency. */ u32 reference_count; - /* Normalized next hops are used as hash keys: they are sorted by weight - and weights are chosen so they add up to 1 << log2_n_adj_in_block (with - zero-weighted next hops being deleted). - Unnormalized next hops are saved so that control plane has a record of exactly - what the RIB told it. */ + /* Normalized next hops are saved for stats/display purposes */ struct { /* Number of hops in the multipath. */ u32 count; @@ -290,7 +284,7 @@ typedef struct { /* Heap handle used to for example free block when we're done with it. */ u32 heap_handle; - } normalized_next_hops, unnormalized_next_hops; + } normalized_next_hops; } ip_multipath_adjacency_t; /* IP multicast adjacency. */ @@ -397,20 +391,11 @@ typedef struct ip_adj_register_struct { } ip_adj_register_t; typedef struct ip_lookup_main_t { - /** Adjacency heap. */ + /* Adjacency heap. */ ip_adjacency_t * adjacency_heap; - /** Adjacency packet/byte counters indexed by adjacency index. */ - vlib_combined_counter_main_t adjacency_counters; - - /** Heap of (next hop, weight) blocks. Sorted by next hop. */ - ip_multipath_next_hop_t * next_hop_heap; - - /** Indexed by heap_handle from ip_adjacency_t. */ - ip_multipath_adjacency_t * multipath_adjacencies; - - /** Adjacency by signature hash */ - uword * adj_index_by_signature; + /** load-balance packet/byte counters indexed by LB index. */ + vlib_combined_counter_main_t load_balance_counters; /** any-tx-feature-enabled interface bitmap */ uword * tx_sw_if_has_ip_output_features; @@ -418,29 +403,6 @@ typedef struct ip_lookup_main_t { /** count of enabled features, per sw_if_index, to maintain bitmap */ i16 * tx_feature_count_by_sw_if_index; - /** Temporary vectors for looking up next hops in hash. */ - ip_multipath_next_hop_t * next_hop_hash_lookup_key; - ip_multipath_next_hop_t * next_hop_hash_lookup_key_normalized; - - /** Hash table mapping normalized next hops and weights - to multipath adjacency index. */ - uword * multipath_adjacency_by_next_hops; - - u32 * adjacency_remap_table; - u32 n_adjacency_remaps; - - /** If average error per adjacency is less than this threshold adjacency block - size is accepted. */ - f64 multipath_next_hop_error_tolerance; - - /** Adjacency index for routing table misses, local punts, and drops. */ - u32 miss_adj_index, drop_adj_index, local_adj_index; - - /** Miss adjacency is always first in adjacency table. */ -#define IP_LOOKUP_MISS_ADJ_INDEX 0 - - ip_add_del_adjacency_callback_t * add_del_adjacency_callbacks; - /** Pool of addresses that are assigned to interfaces. */ ip_interface_address_t * if_address_pool; @@ -501,54 +463,6 @@ do { \ CLIB_PREFETCH (_adj, sizeof (_adj[0]), type); \ } while (0) -/* Adds a next node to ip4 or ip6 lookup node which can be then used in adjacencies. - * @param vlib_main pointer - * @param lm ip4_main.lookup_main or ip6_main.lookup_main - * @param reg registration structure - * @param next_node_index Returned index to be used in adjacencies. - * @return 0 on success. -1 on failure. - */ -int ip_register_adjacency(vlib_main_t *vm, u8 is_ip4, - ip_adj_register_t *reg); - -/* - * Construction helpers to add IP adjacency at init. - */ -#define VNET_IP_REGISTER_ADJACENCY(ip,x,...) \ - __VA_ARGS__ ip_adj_register_t ip##adj_##x; \ -static void __vnet_##ip##_register_adjacency_##x (void) \ - __attribute__((__constructor__)) ; \ -static void __vnet_##ip##_register_adjacency_##x (void) \ -{ \ - ip_lookup_main_t *lm = &ip##_main.lookup_main; \ - ip##adj_##x.next = lm->registered_adjacencies; \ - lm->registered_adjacencies = &ip##adj_##x; \ -} \ -__VA_ARGS__ ip_adj_register_t ip##adj_##x - -#define VNET_IP4_REGISTER_ADJACENCY(x,...) \ - VNET_IP_REGISTER_ADJACENCY(ip4, x, __VA_ARGS__) - -#define VNET_IP6_REGISTER_ADJACENCY(x,...) \ - VNET_IP_REGISTER_ADJACENCY(ip6, x, __VA_ARGS__) - -static inline void -ip_register_add_del_adjacency_callback(ip_lookup_main_t * lm, - ip_add_del_adjacency_callback_t cb) -{ - vec_add1(lm->add_del_adjacency_callbacks, cb); -} - -always_inline void -ip_call_add_del_adjacency_callbacks (ip_lookup_main_t * lm, u32 adj_index, u32 is_del) -{ - ip_adjacency_t * adj; - uword i; - adj = ip_get_adjacency (lm, adj_index); - for (i = 0; i < vec_len (lm->add_del_adjacency_callbacks); i++) - lm->add_del_adjacency_callbacks[i] (lm, adj_index, adj, is_del); -} - /* Create new block of given number of contiguous adjacencies. */ ip_adjacency_t * ip_add_adjacency (ip_lookup_main_t * lm, @@ -556,38 +470,6 @@ ip_add_adjacency (ip_lookup_main_t * lm, u32 n_adj, u32 * adj_index_result); -void ip_del_adjacency (ip_lookup_main_t * lm, u32 adj_index); -void -ip_update_adjacency (ip_lookup_main_t * lm, - u32 adj_index, - ip_adjacency_t * copy_adj); - -static inline int -ip_adjacency_is_multipath(ip_lookup_main_t * lm, u32 adj_index) -{ - if (!vec_len(lm->multipath_adjacencies)) - return 0; - - if (vec_len(lm->multipath_adjacencies) < adj_index - 1) - return 0; - - - return (lm->multipath_adjacencies[adj_index].adj_index == adj_index && - lm->multipath_adjacencies[adj_index].n_adj_in_block > 0); -} - -void -ip_multipath_adjacency_free (ip_lookup_main_t * lm, - ip_multipath_adjacency_t * a); - -u32 -ip_multipath_adjacency_add_del_next_hop (ip_lookup_main_t * lm, - u32 is_del, - u32 old_mp_adj_index, - u32 next_hop_adj_index, - u32 next_hop_weight, - u32 * new_mp_adj_index); - clib_error_t * ip_interface_address_add_del (ip_lookup_main_t * lm, u32 sw_if_index, @@ -596,6 +478,9 @@ ip_interface_address_add_del (ip_lookup_main_t * lm, u32 is_del, u32 * result_index); +u8 * +format_ip_flow_hash_config (u8 * s, va_list * args); + always_inline ip_interface_address_t * ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib) { @@ -603,28 +488,14 @@ ip_get_interface_address (ip_lookup_main_t * lm, void * addr_fib) return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0; } +u32 +fib_table_id_find_fib_index (fib_protocol_t proto, + u32 table_id); + always_inline void * ip_interface_address_get_address (ip_lookup_main_t * lm, ip_interface_address_t * a) { return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); } -always_inline ip_interface_address_t * -ip_interface_address_for_packet (ip_lookup_main_t * lm, vlib_buffer_t * b, u32 sw_if_index) -{ - ip_adjacency_t * adj; - u32 if_address_index; - - adj = ip_get_adjacency (lm, vnet_buffer (b)->ip.adj_index[VLIB_TX]); - - ASSERT (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP - || adj->lookup_next_index == IP_LOOKUP_NEXT_LOCAL); - if_address_index = adj->if_address_index; - if_address_index = (if_address_index == ~0 ? - vec_elt (lm->if_address_pool_index_by_sw_if_index, sw_if_index) - : if_address_index); - - return (if_address_index != ~0)?pool_elt_at_index (lm->if_address_pool, if_address_index):NULL; -} - #define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \ do { \ vnet_main_t *_vnm = vnet_get_main(); \ @@ -653,7 +524,5 @@ do { \ } while (0) void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index); -u32 vnet_register_special_adjacency_format_function -(ip_lookup_main_t * lm, format_function_t * fp); #endif /* included_ip_lookup_h */ diff --git a/vnet/vnet/ip/ping.c b/vnet/vnet/ip/ping.c index b5842a69c50..3bc4da882d5 100644 --- a/vnet/vnet/ip/ping.c +++ b/vnet/vnet/ip/ping.c @@ -14,6 +14,9 @@ */ #include <vnet/ip/ping.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/fib_entry.h> u8 * format_icmp4_input_trace (u8 * s, va_list * va) @@ -278,7 +281,14 @@ send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, ip6_address_t * pa6, vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */ fib_index0 = 0; - adj_index0 = ip6_fib_lookup_with_table (im, fib_index0, pa6); + adj_index0 = fib_entry_get_adj(ip6_fib_table_lookup(fib_index0, pa6, 128)); + + if (ADJ_INDEX_INVALID == adj_index0) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_INTERFACE; + } + sw_if_index0 = adj_index_to_sw_if_index (vm, lm, ip6_lookup_next_nodes, adj_index0, sw_if_index, verbose); @@ -362,7 +372,15 @@ send_ip4_ping (vlib_main_t * vm, vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0; vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0; /* use interface VRF */ fib_index0 = 0; - adj_index0 = ip4_fib_lookup_with_table (im, fib_index0, pa4, 0); + adj_index0 = fib_entry_get_adj(ip4_fib_table_lookup( + ip4_fib_get(fib_index0), pa4, 32)); + + if (ADJ_INDEX_INVALID == adj_index0) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_INTERFACE; + } + sw_if_index0 = adj_index_to_sw_if_index (vm, lm, ip4_lookup_next_nodes, adj_index0, sw_if_index, verbose); diff --git a/vnet/vnet/ip/udp.h b/vnet/vnet/ip/udp.h index 1cf525c6093..1845fa74a46 100644 --- a/vnet/vnet/ip/udp.h +++ b/vnet/vnet/ip/udp.h @@ -115,14 +115,13 @@ void udp_register_dst_port (vlib_main_t * vm, u32 node_index, u8 is_ip4); always_inline void -ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, +ip_udp_fixup_one (vlib_main_t * vm, + vlib_buffer_t * b0, u8 is_ip4) { u16 new_l0; udp_header_t * udp0; - vlib_buffer_advance (b0, - ec_len); - if (is_ip4) { ip4_header_t * ip0; @@ -131,9 +130,6 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, ip0 = vlib_buffer_get_current(b0); - /* Apply the encap string. */ - clib_memcpy(ip0, ec0, ec_len); - /* fix the <bleep>ing outer-IP checksum */ sum0 = ip0->checksum; /* old_l0 always 0, see the rewrite setup */ @@ -157,9 +153,6 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, ip0 = vlib_buffer_get_current(b0); - /* Apply the encap string. */ - clib_memcpy(ip0, ec0, ec_len); - new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) - sizeof (*ip0)); ip0->payload_length = new_l0; @@ -175,6 +168,33 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, udp0->checksum = 0xffff; } } +always_inline void +ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len, + u8 is_ip4) +{ + vlib_buffer_advance (b0, - ec_len); + + if (is_ip4) + { + ip4_header_t * ip0; + + ip0 = vlib_buffer_get_current(b0); + + /* Apply the encap string. */ + clib_memcpy(ip0, ec0, ec_len); + ip_udp_fixup_one(vm, b0, 1); + } + else + { + ip6_header_t * ip0; + + ip0 = vlib_buffer_get_current(b0); + + /* Apply the encap string. */ + clib_memcpy(ip0, ec0, ec_len); + ip_udp_fixup_one(vm, b0, 0); + } +} always_inline void ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1, diff --git a/vnet/vnet/ipsec-gre/ipsec_gre.c b/vnet/vnet/ipsec-gre/ipsec_gre.c index 3d1b54fc7f9..cf0f391fede 100644 --- a/vnet/vnet/ipsec-gre/ipsec_gre.c +++ b/vnet/vnet/ipsec-gre/ipsec_gre.c @@ -25,18 +25,6 @@ ipsec_gre_main_t ipsec_gre_main; /** - * @brief IPv4 and GRE header. - * -*/ -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct -{ - ip4_header_t ip4; - gre_header_t gre; -}) ip4_and_gre_header_t; -/* *INDENT-OFF* */ - -/** * @brief IPv4 and GRE header union. * */ diff --git a/vnet/vnet/lisp-cp/control.c b/vnet/vnet/lisp-cp/control.c index 16d7bfa0e1f..5de30d5bb64 100644 --- a/vnet/vnet/lisp-cp/control.c +++ b/vnet/vnet/lisp-cp/control.c @@ -18,6 +18,8 @@ #include <vnet/lisp-cp/packets.h> #include <vnet/lisp-cp/lisp_msg_serdes.h> #include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_table.h> typedef struct { @@ -74,37 +76,36 @@ ip_interface_get_first_ip_address (lisp_cp_main_t * lcm, u32 sw_if_index, return 1; } -static u32 -ip_fib_lookup_with_table (lisp_cp_main_t * lcm, u32 fib_index, - ip_address_t * dst) +/** + * convert from a LISP address to a FIB prefix + */ +void +ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix) { - if (ip_addr_version (dst) == IP4) - return ip4_fib_lookup_with_table (lcm->im4, fib_index, &ip_addr_v4 (dst), - 0); + if (addr->version == IP4) + { + prefix->fp_len = 32; + prefix->fp_proto = FIB_PROTOCOL_IP4; + memset (&prefix->fp_addr.pad, 0, sizeof (prefix->fp_addr.pad)); + memcpy (&prefix->fp_addr.ip4, &addr->ip, sizeof (prefix->fp_addr.ip4)); + } else - return ip6_fib_lookup_with_table (lcm->im6, fib_index, &ip_addr_v6 (dst)); + { + prefix->fp_len = 128; + prefix->fp_proto = FIB_PROTOCOL_IP6; + memcpy (&prefix->fp_addr.ip6, &addr->ip, sizeof (prefix->fp_addr.ip6)); + } } -u32 -ip_fib_get_egress_iface_for_dst_with_lm (lisp_cp_main_t * lcm, - ip_address_t * dst, - ip_lookup_main_t * lm) +/** + * convert from a LISP to a FIB prefix + */ +void +ip_prefix_to_fib_prefix (const ip_prefix_t * ip_prefix, + fib_prefix_t * fib_prefix) { - u32 adj_index; - ip_adjacency_t *adj; - - adj_index = ip_fib_lookup_with_table (lcm, 0, dst); - adj = ip_get_adjacency (lm, adj_index); - - if (adj == 0) - return ~0; - - /* we only want outgoing routes */ - if (adj->lookup_next_index != IP_LOOKUP_NEXT_ARP - && adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) - return ~0; - - return adj->rewrite_header.sw_if_index; + ip_address_to_fib_prefix (&ip_prefix->addr, fib_prefix); + fib_prefix->fp_len = ip_prefix->len; } /** @@ -114,12 +115,14 @@ ip_fib_get_egress_iface_for_dst_with_lm (lisp_cp_main_t * lcm, u32 ip_fib_get_egress_iface_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst) { - ip_lookup_main_t *lm; + fib_node_index_t fei; + fib_prefix_t prefix; + + ip_address_to_fib_prefix (dst, &prefix); - lm = ip_addr_version (dst) == IP4 ? - &lcm->im4->lookup_main : &lcm->im6->lookup_main; + fei = fib_table_lookup (0, &prefix); - return ip_fib_get_egress_iface_for_dst_with_lm (lcm, dst, lm); + return (fib_entry_get_resolving_interface (fei)); } /** @@ -140,7 +143,7 @@ ip_fib_get_first_egress_ip_for_dst (lisp_cp_main_t * lcm, ip_address_t * dst, ipver = ip_addr_version (dst); lm = (ipver == IP4) ? &lcm->im4->lookup_main : &lcm->im6->lookup_main; - si = ip_fib_get_egress_iface_for_dst_with_lm (lcm, dst, lm); + si = ip_fib_get_egress_iface_for_dst (lcm, dst); if ((u32) ~ 0 == si) return 0; @@ -2871,28 +2874,14 @@ lisp_get_vni_from_buffer_ip (lisp_cp_main_t * lcm, vlib_buffer_t * b, u8 version) { uword *vnip; - u32 vni = ~0, table_id = ~0, fib_index; + u32 vni = ~0, table_id = ~0; - if (version == IP4) - { - ip4_fib_t *fib; - ip4_main_t *im4 = &ip4_main; - fib_index = vec_elt (im4->fib_index_by_sw_if_index, - vnet_buffer (b)->sw_if_index[VLIB_RX]); - fib = find_ip4_fib_by_table_index_or_id (im4, fib_index, - IP4_ROUTE_FLAG_FIB_INDEX); - table_id = fib->table_id; - } - else - { - ip6_fib_t *fib; - ip6_main_t *im6 = &ip6_main; - fib_index = vec_elt (im6->fib_index_by_sw_if_index, - vnet_buffer (b)->sw_if_index[VLIB_RX]); - fib = find_ip6_fib_by_table_index_or_id (im6, fib_index, - IP6_ROUTE_FLAG_FIB_INDEX); - table_id = fib->table_id; - } + table_id = + fib_table_get_table_id_for_sw_if_index (vnet_buffer (b)->sw_if_index + [VLIB_RX], + (version == + IP4 ? FIB_PROTOCOL_IP4 : + FIB_PROTOCOL_IP6)); vnip = hash_get (lcm->vni_by_table_id, table_id); if (vnip) @@ -2979,8 +2968,9 @@ get_src_and_dst_eids_from_buffer (lisp_cp_main_t * lcm, vlib_buffer_t * b, } static uword -lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) +lisp_cp_lookup_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int overlay) { u32 *from, *to_next_drop, di, si; lisp_cp_main_t *lcm = vnet_lisp_cp_get_main (); @@ -3010,6 +3000,7 @@ lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, pi0); b0->error = node->errors[LISP_CP_LOOKUP_ERROR_DROP]; + vnet_buffer (b0)->lisp.overlay_afi = overlay; /* src/dst eid pair */ get_src_and_dst_eids_from_buffer (lcm, b0, &src, &dst); @@ -3070,10 +3061,45 @@ lisp_cp_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, return from_frame->n_vectors; } +static uword +lisp_cp_lookup_ip4 (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_IP)); +} + +static uword +lisp_cp_lookup_ip6 (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_IP6)); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = { + .function = lisp_cp_lookup_ip4, + .name = "lisp-cp-lookup-ip4", + .vector_size = sizeof (u32), + .format_trace = format_lisp_cp_lookup_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = LISP_CP_LOOKUP_N_ERROR, + .error_strings = lisp_cp_lookup_error_strings, + + .n_next_nodes = LISP_CP_LOOKUP_N_NEXT, + + .next_nodes = { + [LISP_CP_LOOKUP_NEXT_DROP] = "error-drop", + [LISP_CP_LOOKUP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [LISP_CP_LOOKUP_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, +}; +/* *INDENT-ON* */ + /* *INDENT-OFF* */ -VLIB_REGISTER_NODE (lisp_cp_lookup_node) = { - .function = lisp_cp_lookup, - .name = "lisp-cp-lookup", +VLIB_REGISTER_NODE (lisp_cp_lookup_ip6_node) = { + .function = lisp_cp_lookup_ip6, + .name = "lisp-cp-lookup-ip6", .vector_size = sizeof (u32), .format_trace = format_lisp_cp_lookup_trace, .type = VLIB_NODE_TYPE_INTERNAL, diff --git a/vnet/vnet/lisp-cp/control.h b/vnet/vnet/lisp-cp/control.h index 76590b2c36b..02efd046170 100644 --- a/vnet/vnet/lisp-cp/control.h +++ b/vnet/vnet/lisp-cp/control.h @@ -149,7 +149,8 @@ typedef struct lisp_cp_main_t lisp_control_main; extern vlib_node_registration_t lisp_cp_input_node; -extern vlib_node_registration_t lisp_cp_lookup_node; +extern vlib_node_registration_t lisp_cp_lookup_ip4_node; +extern vlib_node_registration_t lisp_cp_lookup_ip6_node; clib_error_t *lisp_cp_init (); diff --git a/vnet/vnet/lisp-cp/lisp_cp_dpo.c b/vnet/vnet/lisp-cp/lisp_cp_dpo.c new file mode 100644 index 00000000000..0bb8098d6fc --- /dev/null +++ b/vnet/vnet/lisp-cp/lisp_cp_dpo.c @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/dpo/dpo.h> +#include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/lisp-cp/control.h> + +index_t +lisp_cp_dpo_get (fib_protocol_t proto) +{ + /* + * there are only two instances of this DPO type. + * we can use the protocol as the index + */ + return (proto); +} + +static u8* +format_lisp_cp_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*args, u32); + + return (format(s, "lisp-cp-punt-%U", + format_fib_protocol, index)); +} + +static void +lisp_cp_dpo_lock (dpo_id_t *dpo) +{ +} + +static void +lisp_cp_dpo_unlock (dpo_id_t *dpo) +{ +} + +const static dpo_vft_t lisp_cp_vft = { + .dv_lock = lisp_cp_dpo_lock, + .dv_unlock = lisp_cp_dpo_unlock, + .dv_format = format_lisp_cp_dpo, +}; + +/** + * @brief The per-protocol VLIB graph nodes that are assigned to a LISP-CP + * object. + * + * this means that these graph nodes are ones from which a LISP-CP is the + * parent object in the DPO-graph. + */ +const static char* const lisp_cp_ip4_nodes[] = +{ + "lisp-cp-lookup-ip4", + NULL, +}; +const static char* const lisp_cp_ip6_nodes[] = +{ + "lisp-cp-lookup-ip6", + NULL, +}; + +const static char* const * const lisp_cp_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = lisp_cp_ip4_nodes, + [DPO_PROTO_IP6] = lisp_cp_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +clib_error_t * +lisp_cp_dpo_module_init (vlib_main_t * vm) +{ + /* + * there are no exit arcs from the LIS-CP VLIB node, so we + * pass NULL as said node array. + */ + dpo_register(DPO_LISP_CP, &lisp_cp_vft, lisp_cp_nodes); + + return (NULL); +} + +VLIB_INIT_FUNCTION(lisp_cp_dpo_module_init); diff --git a/vnet/vnet/lisp-cp/lisp_cp_dpo.h b/vnet/vnet/lisp-cp/lisp_cp_dpo.h new file mode 100644 index 00000000000..ea97711a8de --- /dev/null +++ b/vnet/vnet/lisp-cp/lisp_cp_dpo.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LISP_CP_DPO_H__ +#define __LISP_CP_DPO_H__ + +#include <vnet/vnet.h> +#include <vnet/fib/fib_types.h> +#include <vnet/dpo/dpo.h> + +/** + * A representation of punt to the LISP control plane. + */ +typedef struct lisp_cp_dpo_t +{ + /** + * The transport payload type. + */ + fib_protocol_t lcd_proto; +} lisp_cp_dpo_t; + +extern index_t lisp_cp_dpo_get(fib_protocol_t proto); + +extern void lisp_cp_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/lisp-cp/lisp_types.c b/vnet/vnet/lisp-cp/lisp_types.c index b4fb1d91bfc..a2edb487cca 100644 --- a/vnet/vnet/lisp-cp/lisp_types.c +++ b/vnet/vnet/lisp-cp/lisp_types.c @@ -147,6 +147,8 @@ uword unformat_ip_address (unformat_input_t * input, va_list * args) { ip_address_t *a = va_arg (*args, ip_address_t *); + + memset (a, 0, sizeof (*a)); if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (a))) ip_addr_version (a) = IP4; else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (a))) @@ -331,8 +333,32 @@ unformat_negative_mapping_action (unformat_input_t * input, va_list * args) return 1; } +u8 * +format_negative_mapping_action (u8 * s, va_list * args) +{ + lisp_action_e action = va_arg (*args, lisp_action_e); + + switch (action) + { + case LISP_NO_ACTION: + s = format (s, "no-action"); + break; + case LISP_FORWARD_NATIVE: + s = format (s, "natively-forward"); + break; + case LISP_SEND_MAP_REQUEST: + s = format (s, "send-map-request"); + break; + case LISP_DROP: + default: + s = format (s, "drop"); + break; + } + return (s); +} + u16 -ip_address_size (ip_address_t * a) +ip_address_size (const ip_address_t * a) { switch (ip_addr_version (a)) { @@ -653,7 +679,7 @@ gid_address_free (gid_address_t * a) } int -ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2) +ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2) { int res = 0; if (ip_addr_version (ip1) != ip_addr_version (ip2)) @@ -670,19 +696,19 @@ ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2) } void -ip_address_copy (ip_address_t * dst, ip_address_t * src) +ip_address_copy (ip_address_t * dst, const ip_address_t * src) { clib_memcpy (dst, src, sizeof (ip_address_t)); } void -ip_address_copy_addr (void *dst, ip_address_t * src) +ip_address_copy_addr (void *dst, const ip_address_t * src) { clib_memcpy (dst, src, ip_address_size (src)); } void -ip_address_set (ip_address_t * dst, void *src, u8 version) +ip_address_set (ip_address_t * dst, const void *src, u8 version) { clib_memcpy (dst, src, ip_version_to_size (version)); ip_addr_version (dst) = version; diff --git a/vnet/vnet/lisp-cp/lisp_types.h b/vnet/vnet/lisp-cp/lisp_types.h index cb1b277b530..cd1d1b9a642 100644 --- a/vnet/vnet/lisp-cp/lisp_types.h +++ b/vnet/vnet/lisp-cp/lisp_types.h @@ -42,10 +42,10 @@ typedef CLIB_PACKED(struct ip_address #define ip_addr_v6(_a) (_a)->ip.v6 #define ip_addr_version(_a) (_a)->version -int ip_address_cmp (ip_address_t * ip1, ip_address_t * ip2); -void ip_address_copy (ip_address_t * dst, ip_address_t * src); -void ip_address_copy_addr (void *dst, ip_address_t * src); -void ip_address_set (ip_address_t * dst, void *src, u8 version); +int ip_address_cmp (const ip_address_t * ip1, const ip_address_t * ip2); +void ip_address_copy (ip_address_t * dst, const ip_address_t * src); +void ip_address_copy_addr (void *dst, const ip_address_t * src); +void ip_address_set (ip_address_t * dst, const void *src, u8 version); /* *INDENT-OFF* */ typedef CLIB_PACKED(struct ip_prefix @@ -63,6 +63,11 @@ typedef CLIB_PACKED(struct ip_prefix void ip_prefix_normalize (ip_prefix_t * a); +extern void ip_address_to_fib_prefix (const ip_address_t * addr, + fib_prefix_t * prefix); +extern void ip_prefix_to_fib_prefix (const ip_prefix_t * ipp, + fib_prefix_t * fibp); + typedef enum { /* NOTE: ip addresses are left out on purpose. Use max masked ip-prefixes @@ -107,6 +112,7 @@ typedef fid_address_t dp_address_t; #define fid_addr_ippref(_a) (_a)->ippref #define fid_addr_mac(_a) (_a)->mac #define fid_addr_type(_a) (_a)->type +u8 *format_fid_address (u8 * s, va_list * args); typedef struct { @@ -293,6 +299,7 @@ typedef struct uword unformat_negative_mapping_action (unformat_input_t * input, va_list * args); +u8 *format_negative_mapping_action (u8 *, va_list * args); typedef struct locator_pair { diff --git a/vnet/vnet/lisp-gpe/interface.c b/vnet/vnet/lisp-gpe/interface.c index abfdfdb89f2..52db1eb3628 100644 --- a/vnet/vnet/lisp-gpe/interface.c +++ b/vnet/vnet/lisp-gpe/interface.c @@ -26,6 +26,10 @@ #include <vnet/ip/udp.h> #include <vnet/ethernet/ethernet.h> #include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/adj/adj.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> #define foreach_lisp_gpe_tx_next \ _(DROP, "error-drop") \ @@ -56,147 +60,6 @@ format_lisp_gpe_tx_trace (u8 * s, va_list * args) return s; } -always_inline void -get_one_tunnel_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, - lisp_gpe_tunnel_t ** t0, u8 is_v4) -{ - u32 adj_index0, tunnel_index0; - ip_adjacency_t *adj0; - - /* Get adjacency and from it the tunnel_index */ - adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - - if (is_v4) - adj0 = ip_get_adjacency (lgm->lm4, adj_index0); - else - adj0 = ip_get_adjacency (lgm->lm6, adj_index0); - - tunnel_index0 = adj0->if_address_index; - t0[0] = pool_elt_at_index (lgm->tunnels, tunnel_index0); - - ASSERT (t0[0] != 0); -} - -always_inline void -encap_one_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, - lisp_gpe_tunnel_t * t0, u32 * next0) -{ - ASSERT (sizeof (ip4_udp_lisp_gpe_header_t) == 36); - ASSERT (sizeof (ip6_udp_lisp_gpe_header_t) == 56); - - lisp_gpe_sub_tunnel_t *st0; - u32 *sti0; - - sti0 = vec_elt_at_index (t0->sub_tunnels_lbv, - vnet_buffer (b0)->ip.flow_hash % - t0->sub_tunnels_lbv_count); - st0 = vec_elt_at_index (t0->sub_tunnels, sti0[0]); - if (st0->is_ip4) - { - ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1); - next0[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP; - } - else - { - ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 0); - next0[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP; - } - - /* Reset to look up tunnel partner in the configured FIB */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; -} - -always_inline void -get_two_tunnels_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, - vlib_buffer_t * b1, lisp_gpe_tunnel_t ** t0, - lisp_gpe_tunnel_t ** t1, u8 is_v4) -{ - u32 adj_index0, adj_index1, tunnel_index0, tunnel_index1; - ip_adjacency_t *adj0, *adj1; - - /* Get adjacency and from it the tunnel_index */ - adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; - - if (is_v4) - { - adj0 = ip_get_adjacency (lgm->lm4, adj_index0); - adj1 = ip_get_adjacency (lgm->lm4, adj_index1); - } - else - { - adj0 = ip_get_adjacency (lgm->lm6, adj_index0); - adj1 = ip_get_adjacency (lgm->lm6, adj_index1); - } - - tunnel_index0 = adj0->if_address_index; - tunnel_index1 = adj1->if_address_index; - - t0[0] = pool_elt_at_index (lgm->tunnels, tunnel_index0); - t1[0] = pool_elt_at_index (lgm->tunnels, tunnel_index1); - - ASSERT (t0[0] != 0); - ASSERT (t1[0] != 0); -} - -always_inline void -encap_two_inline (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, - vlib_buffer_t * b1, lisp_gpe_tunnel_t * t0, - lisp_gpe_tunnel_t * t1, u32 * next0, u32 * next1) -{ - ASSERT (sizeof (ip4_udp_lisp_gpe_header_t) == 36); - ASSERT (sizeof (ip6_udp_lisp_gpe_header_t) == 56); - - lisp_gpe_sub_tunnel_t *st0, *st1; - u32 *sti0, *sti1; - sti0 = vec_elt_at_index (t0->sub_tunnels_lbv, - vnet_buffer (b0)->ip.flow_hash % - t0->sub_tunnels_lbv_count); - sti1 = - vec_elt_at_index (t1->sub_tunnels_lbv, - vnet_buffer (b1)->ip.flow_hash % - t1->sub_tunnels_lbv_count); - st0 = vec_elt_at_index (t0->sub_tunnels, sti0[0]); - st1 = vec_elt_at_index (t1->sub_tunnels, sti1[0]); - - if (PREDICT_TRUE (st0->is_ip4 == st1->is_ip4)) - { - if (st0->is_ip4) - { - ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1); - ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 36, 1); - next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP; - } - else - { - ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 0); - ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 56, 0); - next0[0] = next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP; - } - } - else - { - if (st0->is_ip4) - { - ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 36, 1); - ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 56, 1); - next0[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP; - next1[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP; - } - else - { - ip_udp_encap_one (lgm->vlib_main, b0, st0->rewrite, 56, 1); - ip_udp_encap_one (lgm->vlib_main, b1, st1->rewrite, 36, 1); - next0[0] = LISP_GPE_TX_NEXT_IP6_LOOKUP; - next1[0] = LISP_GPE_TX_NEXT_IP4_LOOKUP; - } - } - - /* Reset to look up tunnel partner in the configured FIB */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->encap_fib_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->encap_fib_index; -} - #define is_v4_packet(_h) ((*(u8*) _h) & 0xF0) == 0x40 /** @@ -233,81 +96,12 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - u32 next0, next1; - lisp_gpe_tunnel_t *t0 = 0, *t1 = 0; - u8 is_v4_eid0, is_v4_eid1; - - next0 = next1 = LISP_GPE_TX_NEXT_IP4_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - is_v4_eid0 = is_v4_packet (vlib_buffer_get_current (b0)); - is_v4_eid1 = is_v4_packet (vlib_buffer_get_current (b1)); - - if (PREDICT_TRUE (is_v4_eid0 == is_v4_eid1)) - { - get_two_tunnels_inline (lgm, b0, b1, &t0, &t1, - is_v4_eid0 ? 1 : 0); - } - else - { - get_one_tunnel_inline (lgm, b0, &t0, is_v4_eid0 ? 1 : 0); - get_one_tunnel_inline (lgm, b1, &t1, is_v4_eid1 ? 1 : 0); - } - - encap_two_inline (lgm, b0, b1, t0, t1, &next0, &next1); - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0, - sizeof (*tr)); - tr->tunnel_index = t0 - lgm->tunnels; - } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b1, - sizeof (*tr)); - tr->tunnel_index = t1 - lgm->tunnels; - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, next0, - next1); - } - while (n_left_from > 0 && n_left_to_next > 0) { + u32 bi0, adj_index0, next0; + const ip_adjacency_t *adj0; + const dpo_id_t *dpo0; vlib_buffer_t *b0; - u32 bi0, next0 = LISP_GPE_TX_NEXT_IP4_LOOKUP; - lisp_gpe_tunnel_t *t0 = 0; u8 is_v4_0; bi0 = from[0]; @@ -319,16 +113,23 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); + /* Fixup the checksum and len fields in the LISP tunnel encap + * that was applied at the midchain node */ is_v4_0 = is_v4_packet (vlib_buffer_get_current (b0)); - get_one_tunnel_inline (lgm, b0, &t0, is_v4_0 ? 1 : 0); + ip_udp_fixup_one (lgm->vlib_main, b0, is_v4_0); - encap_one_inline (lgm, b0, t0, &next0); + /* Follow the DPO on which the midchain is stacked */ + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = adj_get (adj_index0); + dpo0 = &adj0->sub_type.midchain.next_dpo; + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { lisp_gpe_tx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); - tr->tunnel_index = t0 - lgm->tunnels; + tr->tunnel_index = adj_index0; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -348,7 +149,7 @@ format_lisp_gpe_name (u8 * s, va_list * args) } /* *INDENT-OFF* */ -VNET_DEVICE_CLASS (lisp_gpe_device_class,static) = { +VNET_DEVICE_CLASS (lisp_gpe_device_class) = { .name = "LISP_GPE", .format_device_name = format_lisp_gpe_name, .format_tx_trace = format_lisp_gpe_tx_trace, @@ -394,133 +195,51 @@ VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = { }; /* *INDENT-ON* */ -int -add_del_ip_prefix_route (ip_prefix_t * dst_prefix, u32 table_id, - ip_adjacency_t * add_adj, u8 is_add, u32 * adj_index) +static void +add_del_lisp_gpe_default_route (u32 table_id, fib_protocol_t proto, u8 is_add) { - uword *p; + fib_prefix_t prefix = { + .fp_proto = proto, + }; + u32 fib_index; - if (ip_prefix_version (dst_prefix) == IP4) + if (is_add) { - ip4_main_t *im4 = &ip4_main; - ip4_add_del_route_args_t a; - ip4_address_t addr = ip_prefix_v4 (dst_prefix); - - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; - a.adj_index = ~0; - a.dst_address_length = ip_prefix_len (dst_prefix); - a.dst_address = addr; - a.flags |= is_add ? IP4_ROUTE_FLAG_ADD : IP4_ROUTE_FLAG_DEL; - a.add_adj = add_adj; - a.n_add_adj = is_add ? 1 : 0; - - ip4_add_del_route (im4, &a); - - if (is_add) - { - p = ip4_get_route (im4, table_id, 0, addr.as_u8, - ip_prefix_len (dst_prefix)); - if (p == 0) - { - clib_warning ("Failed to insert route for eid %U!", - format_ip4_address_and_length, addr.as_u8, - ip_prefix_len (dst_prefix)); - return -1; - } - adj_index[0] = p[0]; - } + /* + * Add a deafult route that results in a control plane punt DPO + */ + dpo_id_t cp_punt = DPO_NULL; + + dpo_set (&cp_punt, DPO_LISP_CP, fib_proto_to_dpo (proto), proto); + + fib_index = + fib_table_find_or_create_and_lock (prefix.fp_proto, table_id); + fib_table_entry_special_dpo_add (fib_index, &prefix, FIB_SOURCE_LISP, + FIB_ENTRY_FLAG_EXCLUSIVE, &cp_punt); + dpo_unlock (&cp_punt); } else { - ip6_main_t *im6 = &ip6_main; - ip6_add_del_route_args_t a; - ip6_address_t addr = ip_prefix_v6 (dst_prefix); - - memset (&a, 0, sizeof (a)); - a.flags = IP6_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; - a.adj_index = ~0; - a.dst_address_length = ip_prefix_len (dst_prefix); - a.dst_address = addr; - a.flags |= is_add ? IP6_ROUTE_FLAG_ADD : IP6_ROUTE_FLAG_DEL; - a.add_adj = add_adj; - a.n_add_adj = is_add ? 1 : 0; - - ip6_add_del_route (im6, &a); - - if (is_add) - { - adj_index[0] = ip6_get_route (im6, table_id, 0, &addr, - ip_prefix_len (dst_prefix)); - if (adj_index[0] == 0) - { - clib_warning ("Failed to insert route for eid %U!", - format_ip6_address_and_length, addr.as_u8, - ip_prefix_len (dst_prefix)); - return -1; - } - } + fib_index = fib_table_find (prefix.fp_proto, table_id); + fib_table_entry_special_remove (fib_index, &prefix, FIB_SOURCE_LISP); + fib_table_unlock (fib_index, prefix.fp_proto); } - return 0; } -static void -add_del_lisp_gpe_default_route (u32 table_id, u8 is_v4, u8 is_add) +void +lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id) { - lisp_gpe_main_t *lgm = &lisp_gpe_main; - ip_adjacency_t adj; - ip_prefix_t prefix; - u32 adj_index = 0; - - /* setup adjacency */ - memset (&adj, 0, sizeof (adj)); - - adj.n_adj = 1; - adj.explicit_fib_index = ~0; - adj.lookup_next_index = is_v4 ? lgm->ip4_lookup_next_lgpe_ip4_lookup : - lgm->ip6_lookup_next_lgpe_ip6_lookup; - /* default route has tunnel_index ~0 */ - adj.rewrite_header.sw_if_index = ~0; - - /* set prefix to 0/0 */ - memset (&prefix, 0, sizeof (prefix)); - ip_prefix_version (&prefix) = is_v4 ? IP4 : IP6; - - /* add/delete route for prefix */ - add_del_ip_prefix_route (&prefix, table_id, &adj, is_add, &adj_index); -} + fib_node_index_t fib_index; -static void -lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id, u8 is_ip4) -{ - if (is_ip4) - { - ip4_main_t *im4 = &ip4_main; - ip4_fib_t *fib; - fib = find_ip4_fib_by_table_index_or_id (im4, table_id, - IP4_ROUTE_FLAG_TABLE_ID); + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); + ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip4_sw_interface_enable_disable (sw_if_index, 1); - /* fib's created if it doesn't exist */ - ASSERT (fib != 0); - - vec_validate (im4->fib_index_by_sw_if_index, sw_if_index); - im4->fib_index_by_sw_if_index[sw_if_index] = fib->index; - } - else - { - ip6_main_t *im6 = &ip6_main; - ip6_fib_t *fib; - fib = find_ip6_fib_by_table_index_or_id (im6, table_id, - IP6_ROUTE_FLAG_TABLE_ID); - - /* fib's created if it doesn't exist */ - ASSERT (fib != 0); - - vec_validate (im6->fib_index_by_sw_if_index, sw_if_index); - im6->fib_index_by_sw_if_index[sw_if_index] = fib->index; - } + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + ip6_sw_interface_enable_disable (sw_if_index, 1); } #define foreach_l2_lisp_gpe_tx_next \ @@ -605,71 +324,71 @@ l2_flow_hash (vlib_buffer_t * b0) return (u32) c; } -always_inline void -l2_process_one (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, u32 ti0, - u32 * next0) -{ - lisp_gpe_tunnel_t *t0; - - t0 = pool_elt_at_index (lgm->tunnels, ti0); - ASSERT (0 != t0); - - if (PREDICT_TRUE (LISP_NO_ACTION == t0->action)) - { - /* compute 'flow' hash */ - if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) - vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); - encap_one_inline (lgm, b0, t0, next0); - } - else - { - l2_process_tunnel_action (b0, t0->action, next0); - } -} - -always_inline void -l2_process_two (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1, - u32 ti0, u32 ti1, u32 * next0, u32 * next1) -{ - lisp_gpe_tunnel_t *t0, *t1; - - t0 = pool_elt_at_index (lgm->tunnels, ti0); - t1 = pool_elt_at_index (lgm->tunnels, ti1); - - ASSERT (0 != t0 && 0 != t1); - - if (PREDICT_TRUE (LISP_NO_ACTION == t0->action - && LISP_NO_ACTION == t1->action)) - { - if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) - vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); - if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1)) - vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); - encap_two_inline (lgm, b0, b1, t0, t1, next0, next1); - } - else - { - if (LISP_NO_ACTION == t0->action) - { - if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) - vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); - encap_one_inline (lgm, b0, t0, next0); - l2_process_tunnel_action (b1, t1->action, next1); - } - else if (LISP_NO_ACTION == t1->action) - { - if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1)) - vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); - encap_one_inline (lgm, b1, t1, next1); - l2_process_tunnel_action (b0, t0->action, next0); - } - else - { - l2_process_tunnel_action (b0, t0->action, next0); - l2_process_tunnel_action (b1, t1->action, next1); - } - } -} +/* always_inline void */ +/* l2_process_one (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, u32 ti0, */ +/* u32 * next0) */ +/* { */ +/* lisp_gpe_tunnel_t *t0; */ + +/* t0 = pool_elt_at_index (lgm->tunnels, ti0); */ +/* ASSERT (0 != t0); */ + +/* if (PREDICT_TRUE (LISP_NO_ACTION == t0->action)) */ +/* { */ +/* /\* compute 'flow' hash *\/ */ +/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */ +/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */ +/* encap_one_inline (lgm, b0, t0, next0); */ +/* } */ +/* else */ +/* { */ +/* l2_process_tunnel_action (b0, t0->action, next0); */ +/* } */ +/* } */ + +/* always_inline void */ +/* l2_process_two (lisp_gpe_main_t * lgm, vlib_buffer_t * b0, vlib_buffer_t * b1, */ +/* u32 ti0, u32 ti1, u32 * next0, u32 * next1) */ +/* { */ +/* lisp_gpe_tunnel_t *t0, *t1; */ + +/* t0 = pool_elt_at_index (lgm->tunnels, ti0); */ +/* t1 = pool_elt_at_index (lgm->tunnels, ti1); */ + +/* ASSERT (0 != t0 && 0 != t1); */ + +/* if (PREDICT_TRUE (LISP_NO_ACTION == t0->action */ +/* && LISP_NO_ACTION == t1->action)) */ +/* { */ +/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */ +/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */ +/* if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1)) */ +/* vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); */ +/* encap_two_inline (lgm, b0, b1, t0, t1, next0, next1); */ +/* } */ +/* else */ +/* { */ +/* if (LISP_NO_ACTION == t0->action) */ +/* { */ +/* if (PREDICT_TRUE (t0->sub_tunnels_lbv_count > 1)) */ +/* vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); */ +/* encap_one_inline (lgm, b0, t0, next0); */ +/* l2_process_tunnel_action (b1, t1->action, next1); */ +/* } */ +/* else if (LISP_NO_ACTION == t1->action) */ +/* { */ +/* if (PREDICT_TRUE (t1->sub_tunnels_lbv_count > 1)) */ +/* vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); */ +/* encap_one_inline (lgm, b1, t1, next1); */ +/* l2_process_tunnel_action (b0, t0->action, next0); */ +/* } */ +/* else */ +/* { */ +/* l2_process_tunnel_action (b0, t0->action, next0); */ +/* l2_process_tunnel_action (b1, t1->action, next1); */ +/* } */ +/* } */ +/* } */ /** * @brief LISP-GPE interface TX (encap) function for L2 overlays. @@ -710,9 +429,9 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, { u32 bi0, bi1; vlib_buffer_t *b0, *b1; - u32 next0, next1, ti0, ti1; + u32 next0, next1; lisp_gpe_tunnel_t *t0 = 0, *t1 = 0; - ethernet_header_t *e0, *e1; + // ethernet_header_t *e0, *e1; next0 = next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; @@ -742,49 +461,49 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); - e0 = vlib_buffer_get_current (b0); - e1 = vlib_buffer_get_current (b1); + /* e0 = vlib_buffer_get_current (b0); */ + /* e1 = vlib_buffer_get_current (b1); */ /* lookup dst + src mac */ - ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index, - e0->src_address, e0->dst_address); - ti1 = lisp_l2_fib_lookup (lgm, vnet_buffer (b1)->l2.bd_index, - e1->src_address, e1->dst_address); - - if (PREDICT_TRUE ((u32) ~ 0 != ti0) && (u32) ~ 0 != ti1) - { - /* process both tunnels */ - l2_process_two (lgm, b0, b1, ti0, ti1, &next0, &next1); - } - else - { - if ((u32) ~ 0 != ti0) - { - /* process tunnel for b0 */ - l2_process_one (lgm, b0, ti0, &next0); - - /* no tunnel found for b1, send to control plane */ - next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; - vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC; - } - else if ((u32) ~ 0 != ti1) - { - /* process tunnel for b1 */ - l2_process_one (lgm, b1, ti1, &next1); - - /* no tunnel found b0, send to control plane */ - next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; - vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; - } - else - { - /* no tunnels found */ - next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; - vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; - next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; - vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC; - } - } + /* ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index, */ + /* e0->src_address, e0->dst_address); */ + /* ti1 = lisp_l2_fib_lookup (lgm, vnet_buffer (b1)->l2.bd_index, */ + /* e1->src_address, e1->dst_address); */ + + /* if (PREDICT_TRUE ((u32) ~ 0 != ti0) && (u32) ~ 0 != ti1) */ + /* { */ + /* /\* process both tunnels *\/ */ + /* l2_process_two (lgm, b0, b1, ti0, ti1, &next0, &next1); */ + /* } */ + /* else */ + /* { */ + /* if ((u32) ~ 0 != ti0) */ + /* { */ + /* /\* process tunnel for b0 *\/ */ + /* l2_process_one (lgm, b0, ti0, &next0); */ + + /* /\* no tunnel found for b1, send to control plane *\/ */ + /* next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ + /* vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC; */ + /* } */ + /* else if ((u32) ~ 0 != ti1) */ + /* { */ + /* /\* process tunnel for b1 *\/ */ + /* l2_process_one (lgm, b1, ti1, &next1); */ + + /* /\* no tunnel found b0, send to control plane *\/ */ + /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ + /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */ + /* } */ + /* else */ + /* { */ + /* /\* no tunnels found *\/ */ + /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ + /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */ + /* next1 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ + /* vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_MAC; */ + /* } */ + /* } */ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -824,16 +543,16 @@ l2_lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node, ti0 = lisp_l2_fib_lookup (lgm, vnet_buffer (b0)->l2.bd_index, e0->src_address, e0->dst_address); - if (PREDICT_TRUE ((u32) ~ 0 != ti0)) - { - l2_process_one (lgm, b0, ti0, &next0); - } - else - { - /* no tunnel found send to control plane */ - next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; - vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; - } + /* if (PREDICT_TRUE ((u32) ~ 0 != ti0)) */ + /* { */ + /* l2_process_one (lgm, b0, ti0, &next0); */ + /* } */ + /* else */ + /* { */ + /* /\* no tunnel found send to control plane *\/ */ + /* next0 = L2_LISP_GPE_TX_NEXT_LISP_CP_LOOKUP; */ + /* vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_MAC; */ + /* } */ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -973,7 +692,6 @@ lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm, vnet_main_t *vnm = lgm->vnet_main; tunnel_lookup_t *l3_ifaces = &lgm->l3_ifaces; vnet_hw_interface_t *hi; - u32 lookup_next_index4, lookup_next_index6; uword *hip, *si; hip = hash_get (l3_ifaces->hw_if_index_by_dp_table, a->table_id); @@ -997,30 +715,10 @@ lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm, hi = create_lisp_gpe_iface (lgm, a->vni, a->table_id, &lisp_gpe_device_class, l3_ifaces); - /* set ingress arc from lgpe_ipX_lookup */ - lookup_next_index4 = vlib_node_add_next (lgm->vlib_main, - lgpe_ip4_lookup_node.index, - hi->output_node_index); - lookup_next_index6 = vlib_node_add_next (lgm->vlib_main, - lgpe_ip6_lookup_node.index, - hi->output_node_index); - hash_set (lgm->lgpe_ip4_lookup_next_index_by_table_id, a->table_id, - lookup_next_index4); - hash_set (lgm->lgpe_ip6_lookup_next_index_by_table_id, a->table_id, - lookup_next_index6); - - /* insert default routes that point to lgpe-ipx-lookup */ - add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */ 1, 1); - add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */ 0, 1); - - /* set egress arcs */ -#define _(sym,str) vlib_node_add_named_next_with_slot (vnm->vlib_main, \ - hi->tx_node_index, str, LISP_GPE_TX_NEXT_##sym); - foreach_lisp_gpe_tx_next -#undef _ - /* set interface in appropriate v4 and v6 FIBs */ - lisp_gpe_iface_set_table (hi->sw_if_index, a->table_id, 1); - lisp_gpe_iface_set_table (hi->sw_if_index, a->table_id, 0); + /* insert default routes that point to lisp-cp lookup */ + lisp_gpe_iface_set_table (hi->sw_if_index, a->table_id); + add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP4, 1); + add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP6, 1); /* enable interface */ vnet_sw_interface_set_flags (vnm, hi->sw_if_index, @@ -1037,11 +735,15 @@ lisp_gpe_add_del_l3_iface (lisp_gpe_main_t * lgm, return -1; } + hi = vnet_get_hw_interface (vnm, hip[0]); + remove_lisp_gpe_iface (lgm, hip[0], a->table_id, &lgm->l3_ifaces); /* unset default routes */ - add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */ 1, 0); - add_del_lisp_gpe_default_route (a->table_id, /* is_v4 */ 0, 0); + ip4_sw_interface_enable_disable (hi->sw_if_index, 0); + ip6_sw_interface_enable_disable (hi->sw_if_index, 0); + add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP4, 0); + add_del_lisp_gpe_default_route (a->table_id, FIB_PROTOCOL_IP6, 0); } return 0; diff --git a/vnet/vnet/lisp-gpe/ip_forward.c b/vnet/vnet/lisp-gpe/ip_forward.c index bd9951acefa..8a24ec0322c 100644 --- a/vnet/vnet/lisp-gpe/ip_forward.c +++ b/vnet/vnet/lisp-gpe/ip_forward.c @@ -12,1492 +12,257 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/** - * @file - * @brief LISP-GPE overlay IP forwarding logic and lookup data structures. - * - * Provides an implementation of a Source/Dest (SD) IP FIB that leverages the - * existing destination only FIB. Lookups are done in two stages, first the - * destination FIB looks up a packet's destination address and then if a - * an SD entry is hit, the destination adjacency will point to the second - * stage, the source FIB, where the packet's source is looked up. Note that a - * miss in the source FIB does not result in an overall SD lookup retry with - * a less specific entry from the destination FIB. - */ -#include <vnet/lisp-gpe/lisp_gpe.h> - -/** Sets adj index for destination address in IP4 FIB. Similar to the function - * in ip4_forward but this one avoids calling route callbacks */ -static void -ip4_sd_fib_set_adj_index (lisp_gpe_main_t * lgm, ip4_fib_t * fib, u32 flags, - u32 dst_address_u32, u32 dst_address_length, - u32 adj_index) -{ - ip_lookup_main_t *lm = lgm->lm4; - uword *hash; - - if (vec_bytes (fib->old_hash_values)) - memset (fib->old_hash_values, ~0, vec_bytes (fib->old_hash_values)); - if (vec_bytes (fib->new_hash_values)) - memset (fib->new_hash_values, ~0, vec_bytes (fib->new_hash_values)); - fib->new_hash_values[0] = adj_index; - - /* Make sure adj index is valid. */ - if (CLIB_DEBUG > 0) - (void) ip_get_adjacency (lm, adj_index); - - hash = fib->adj_index_by_dst_address[dst_address_length]; - - hash = _hash_set3 (hash, dst_address_u32, - fib->new_hash_values, fib->old_hash_values); - - fib->adj_index_by_dst_address[dst_address_length] = hash; -} - -/** Initialize the adjacency index by destination address vector for IP4 FIB. - * Copied from ip4_forward since it's static */ -static void -ip4_fib_init_adj_index_by_dst_address (ip_lookup_main_t * lm, - ip4_fib_t * fib, u32 address_length) -{ - hash_t *h; - uword max_index; - - ASSERT (lm->fib_result_n_bytes >= sizeof (uword)); - lm->fib_result_n_words = round_pow2 (lm->fib_result_n_bytes, sizeof (uword)) - / sizeof (uword); - - fib->adj_index_by_dst_address[address_length] = - hash_create (32 /* elts */ , lm->fib_result_n_words * sizeof (uword)); - - hash_set_flags (fib->adj_index_by_dst_address[address_length], - HASH_FLAG_NO_AUTO_SHRINK); - - h = hash_header (fib->adj_index_by_dst_address[address_length]); - max_index = (hash_value_bytes (h) / sizeof (fib->new_hash_values[0])) - 1; - - /* Initialize new/old hash value vectors. */ - vec_validate_init_empty (fib->new_hash_values, max_index, ~0); - vec_validate_init_empty (fib->old_hash_values, max_index, ~0); -} - -/** Add/del src route to IP4 SD FIB. */ -static void -ip4_sd_fib_add_del_src_route (lisp_gpe_main_t * lgm, - ip4_add_del_route_args_t * a) -{ - ip_lookup_main_t *lm = lgm->lm4; - ip4_fib_t *fib; - u32 dst_address, dst_address_length, adj_index, old_adj_index; - uword *hash, is_del; - - /* Either create new adjacency or use given one depending on arguments. */ - if (a->n_add_adj > 0) - ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); - else - adj_index = a->adj_index; - - dst_address = a->dst_address.data_u32; - dst_address_length = a->dst_address_length; - - fib = pool_elt_at_index (lgm->ip4_src_fibs, a->table_index_or_table_id); - - if (!fib->adj_index_by_dst_address[dst_address_length]) - ip4_fib_init_adj_index_by_dst_address (lm, fib, dst_address_length); - - hash = fib->adj_index_by_dst_address[dst_address_length]; - - is_del = (a->flags & IP4_ROUTE_FLAG_DEL) != 0; - - if (is_del) - { - fib->old_hash_values[0] = ~0; - hash = _hash_unset (hash, dst_address, fib->old_hash_values); - fib->adj_index_by_dst_address[dst_address_length] = hash; - } - else - ip4_sd_fib_set_adj_index (lgm, fib, a->flags, dst_address, - dst_address_length, adj_index); - - old_adj_index = fib->old_hash_values[0]; - - /* Avoid spurious reference count increments */ - if (old_adj_index == adj_index - && adj_index != ~0 && !(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) - { - ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index); - if (adj->share_count > 0) - adj->share_count--; - } - - ip4_fib_mtrie_add_del_route (fib, a->dst_address, dst_address_length, - is_del ? old_adj_index : adj_index, is_del); - - /* Delete old adjacency index if present and changed. */ - if (!(a->flags & IP4_ROUTE_FLAG_KEEP_OLD_ADJACENCY) - && old_adj_index != ~0 && old_adj_index != adj_index) - ip_del_adjacency (lm, old_adj_index); -} - -/** Get src route from IP4 SD FIB. */ -static void * -ip4_sd_get_src_route (lisp_gpe_main_t * lgm, u32 src_fib_index, - ip4_address_t * src, u32 address_length) -{ - ip4_fib_t *fib = pool_elt_at_index (lgm->ip4_src_fibs, src_fib_index); - uword *hash, *p; - hash = fib->adj_index_by_dst_address[address_length]; - p = hash_get (hash, src->as_u32); - return (void *) p; -} - -/* *INDENT-OFF* */ -typedef CLIB_PACKED (struct ip4_route { - ip4_address_t address; - u32 address_length : 6; - u32 index : 26; -}) ip4_route_t; -/* *INDENT-ON* */ - -/** Remove all routes from src IP4 FIB */ -void -ip4_sd_fib_clear_src_fib (lisp_gpe_main_t * lgm, ip4_fib_t * fib) -{ - ip4_route_t *routes = 0, *r; - u32 i; - - vec_reset_length (routes); - - for (i = 0; i < ARRAY_LEN (fib->adj_index_by_dst_address); i++) - { - uword *hash = fib->adj_index_by_dst_address[i]; - hash_pair_t *p; - ip4_route_t x; - - x.address_length = i; - x.index = 0; /* shut up coverity */ - - /* *INDENT-OFF* */ - hash_foreach_pair (p, hash, - ({ - x.address.data_u32 = p->key; - vec_add1 (routes, x); - })); - /* *INDENT-ON* */ - } - - vec_foreach (r, routes) - { - ip4_add_del_route_args_t a; - - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; - a.table_index_or_table_id = fib - lgm->ip4_src_fibs; - a.dst_address = r->address; - a.dst_address_length = r->address_length; - a.adj_index = ~0; - - ip4_sd_fib_add_del_src_route (lgm, &a); - } -} - -/** Test if IP4 FIB is empty */ -static u8 -ip4_fib_is_empty (ip4_fib_t * fib) -{ - u8 fib_is_empty; - int i; - - fib_is_empty = 1; - for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= 0; i--) - { - uword *hash = fib->adj_index_by_dst_address[i]; - uword n_elts = hash_elts (hash); - if (n_elts) - { - fib_is_empty = 0; - break; - } - } - return fib_is_empty; -} +#include <vnet/lisp-gpe/lisp_gpe_adjacency.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/load_balance.h> /** - * @brief Add/del route to IP4 SD FIB. - * - * Adds/remove routes to both destination and source FIBs. Entries added - * to destination FIB are associated to adjacencies that point to the source - * FIB and store the index of the particular source FIB associated to the - * destination. Source FIBs are locally managed (see @ref lgm->ip4_src_fibs - * and @ref lgm->ip6_src_fibs), but the adjacencies are allocated out of the - * global adjacency pool. + * @brief Add route to IP4 or IP6 Destination FIB. * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[out] dst_prefix Destination IP4 prefix. - * @param[in] src_prefix Source IP4 prefix. - * @param[in] table_id Table id. - * @param[in] add_adj Pointer to the adjacency to be added. - * @param[in] is_add Add/del flag. + * Add a route to the destination FIB that results in the lookup + * in the SRC FIB. The SRC FIB is created is it does not yet exist. * - * @return 0 on success. + * @param[in] dst_table_id Destination FIB Table-ID + * @param[in] dst_prefix Destination IP prefix. + * @param[out] src_fib_index The index/ID of the SRC FIB created. */ -static int -ip4_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id, - ip_adjacency_t * add_adj, u8 is_add) -{ - uword *p; - ip4_add_del_route_args_t a; - ip_adjacency_t *dst_adjp, dst_adj; - ip4_address_t dst = ip_prefix_v4 (dst_prefix), src; - u32 dst_address_length = ip_prefix_len (dst_prefix), src_address_length = 0; - ip4_fib_t *src_fib; - - if (src_prefix) - { - src = ip_prefix_v4 (src_prefix); - src_address_length = ip_prefix_len (src_prefix); - } - else - memset (&src, 0, sizeof (src)); - - /* lookup dst adj */ - p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, dst_address_length); - - if (is_add) - { - /* insert dst prefix to ip4 fib, if it's not in yet */ - if (p == 0) - { - /* allocate and init src ip4 fib */ - pool_get (lgm->ip4_src_fibs, src_fib); - ip4_mtrie_init (&src_fib->mtrie); - - /* configure adjacency */ - memset (&dst_adj, 0, sizeof (dst_adj)); - - /* reuse rewrite header to store pointer to src fib */ - dst_adj.rewrite_header.sw_if_index = src_fib - lgm->ip4_src_fibs; - - /* dst adj should point to lisp gpe lookup */ - dst_adj.lookup_next_index = lgm->ip4_lookup_next_lgpe_ip4_lookup; - - /* explicit_fib_index is used in IP6 FIB lookup, don't reuse it */ - dst_adj.explicit_fib_index = ~0; - dst_adj.n_adj = 1; - - /* make sure we have different signatures for adj in different tables - * but with the same lookup_next_index and for adj in the same table - * but associated to different destinations */ - dst_adj.if_address_index = table_id; - dst_adj.indirect.next_hop.ip4 = dst; - - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; /* vrf */ - a.adj_index = ~0; - a.dst_address_length = dst_address_length; - a.dst_address = dst; - a.flags |= IP4_ROUTE_FLAG_ADD; - a.add_adj = &dst_adj; - a.n_add_adj = 1; - - ip4_add_del_route (lgm->im4, &a); - - /* lookup dst adj to obtain the adj index */ - p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, - dst_address_length); - - /* make sure insertion succeeded */ - if (CLIB_DEBUG) - { - ASSERT (p != 0); - dst_adjp = ip_get_adjacency (lgm->lm4, p[0]); - ASSERT (dst_adjp->rewrite_header.sw_if_index - == dst_adj.rewrite_header.sw_if_index); - } - } - } - else - { - if (p == 0) - { - clib_warning - ("Trying to delete inexistent dst route for %U. Aborting", - format_ip4_address_and_length, dst.as_u8, dst_address_length); - return -1; - } - } - - dst_adjp = ip_get_adjacency (lgm->lm4, p[0]); - - /* add/del src prefix to src fib */ - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = dst_adjp->rewrite_header.sw_if_index; - a.adj_index = ~0; - a.flags |= is_add ? IP4_ROUTE_FLAG_ADD : IP4_ROUTE_FLAG_DEL; - a.add_adj = add_adj; - a.n_add_adj = is_add ? 1 : 0; - /* if src prefix is null, add 0/0 */ - a.dst_address_length = src_address_length; - a.dst_address = src; - ip4_sd_fib_add_del_src_route (lgm, &a); - - /* make sure insertion succeeded */ - if (CLIB_DEBUG && is_add) - { - uword *sai; - ip_adjacency_t *src_adjp; - sai = ip4_sd_get_src_route (lgm, dst_adjp->rewrite_header.sw_if_index, - &src, src_address_length); - ASSERT (sai != 0); - src_adjp = ip_get_adjacency (lgm->lm4, sai[0]); - ASSERT (src_adjp->if_address_index == add_adj->if_address_index); - } - - /* if a delete, check if there are elements left in the src fib */ - if (!is_add) - { - src_fib = pool_elt_at_index (lgm->ip4_src_fibs, - dst_adjp->rewrite_header.sw_if_index); - if (!src_fib) - return 0; - - /* if there's nothing left */ - if (ip4_fib_is_empty (src_fib)) - { - /* remove the src fib .. */ - pool_put (lgm->ip4_src_fibs, src_fib); - - /* .. and remove dst route */ - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; /* vrf */ - a.adj_index = ~0; - a.dst_address_length = dst_address_length; - a.dst_address = dst; - a.flags |= IP4_ROUTE_FLAG_DEL; - - ip4_add_del_route (lgm->im4, &a); - } - } - - return 0; -} - -/** - * @brief Retrieve IP4 SD FIB entry. - * - * Looks up SD IP4 route by first looking up the destination in VPP's main FIB - * and subsequently the source in the src FIB. The index of the source FIB is - * stored in the dst adjacency's rewrite_header.sw_if_index. If source is 0 - * do search with 0/0 src. - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[out] dst_prefix Destination IP4 prefix. - * @param[in] src_prefix Source IP4 prefix. - * @param[in] table_id Table id. - * - * @return pointer to the adjacency if route found. - */ -static void * -ip4_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id) -{ - uword *p; - ip4_address_t dst = ip_prefix_v4 (dst_prefix), src; - u32 dst_address_length = ip_prefix_len (dst_prefix), src_address_length = 0; - ip_adjacency_t *dst_adj; - - if (src_prefix) - { - src = ip_prefix_v4 (src_prefix); - src_address_length = ip_prefix_len (src_prefix); - } - else - memset (&src, 0, sizeof (src)); - - /* lookup dst adj */ - p = ip4_get_route (lgm->im4, table_id, 0, dst.as_u8, dst_address_length); - if (p == 0) - return p; - - dst_adj = ip_get_adjacency (lgm->lm4, p[0]); - return ip4_sd_get_src_route (lgm, dst_adj->rewrite_header.sw_if_index, &src, - src_address_length); -} - -/** Get src route from IP6 SD FIB. */ -static u32 -ip6_sd_get_src_route (lisp_gpe_main_t * lgm, u32 src_fib_index, - ip6_address_t * src, u32 address_length) -{ - int rv; - BVT (clib_bihash_kv) kv, value; - ip6_src_fib_t *fib = pool_elt_at_index (lgm->ip6_src_fibs, src_fib_index); - - ip6_address_t *mask; - - ASSERT (address_length <= 128); - - mask = &fib->fib_masks[address_length]; - - kv.key[0] = src->as_u64[0] & mask->as_u64[0]; - kv.key[1] = src->as_u64[1] & mask->as_u64[1]; - kv.key[2] = address_length; - - rv = BV (clib_bihash_search_inline_2) (&fib->ip6_lookup_table, &kv, &value); - if (rv == 0) - return value.value; - - return 0; -} - -static void -compute_prefix_lengths_in_search_order (ip6_src_fib_t * fib) -{ - int i; - vec_reset_length (fib->prefix_lengths_in_search_order); - /* Note: bitmap reversed so this is in fact a longest prefix match */ - - /* *INDENT-OFF* */ - clib_bitmap_foreach(i, fib->non_empty_dst_address_length_bitmap, ({ - int dst_address_length = 128 - i; - vec_add1 (fib->prefix_lengths_in_search_order, dst_address_length); - })); - /* *INDENT-ON* */ -} - -/** Add/del src route to IP6 SD FIB. Rewrite of ip6_add_del_route() because - * it uses im6 to find the FIB .*/ -static void -ip6_sd_fib_add_del_src_route (lisp_gpe_main_t * lgm, - ip6_add_del_route_args_t * a) +u32 +ip_dst_fib_add_route (u32 dst_fib_index, const ip_prefix_t * dst_prefix) { - ip_lookup_main_t *lm = lgm->lm6; - ip6_src_fib_t *fib; - ip6_address_t dst_address; - u32 dst_address_length, adj_index; - uword is_del; - u32 old_adj_index = ~0; - BVT (clib_bihash_kv) kv, value; - - vlib_smp_unsafe_warning (); - - is_del = (a->flags & IP6_ROUTE_FLAG_DEL) != 0; + fib_node_index_t src_fib_index; + fib_prefix_t dst_fib_prefix; + fib_node_index_t dst_fei; - /* Either create new adjacency or use given one depending on arguments. */ - if (a->n_add_adj > 0) - { - ip_add_adjacency (lm, a->add_adj, a->n_add_adj, &adj_index); - } - else - adj_index = a->adj_index; + ASSERT (NULL != dst_prefix); - dst_address = a->dst_address; - dst_address_length = a->dst_address_length; - fib = pool_elt_at_index (lgm->ip6_src_fibs, a->table_index_or_table_id); + ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix); - ASSERT (dst_address_length < ARRAY_LEN (fib->fib_masks)); - ip6_address_mask (&dst_address, &fib->fib_masks[dst_address_length]); + /* + * lookup the destination prefix in the VRF table and retrieve the + * LISP associated data + */ + dst_fei = fib_table_lookup_exact_match (dst_fib_index, &dst_fib_prefix); - /* refcount accounting */ - if (is_del) + /* + * If the FIB entry is not present, or not LISP sourced, add it + */ + if (dst_fei == FIB_NODE_INDEX_INVALID || + NULL == fib_entry_get_source_data (dst_fei, FIB_SOURCE_LISP)) { - ASSERT (fib->dst_address_length_refcounts[dst_address_length] > 0); - if (--fib->dst_address_length_refcounts[dst_address_length] == 0) - { - fib->non_empty_dst_address_length_bitmap = - clib_bitmap_set (fib->non_empty_dst_address_length_bitmap, - 128 - dst_address_length, 0); - compute_prefix_lengths_in_search_order (fib); - } + dpo_id_t src_lkup_dpo = DPO_NULL; + + /* create a new src FIB. */ + src_fib_index = + fib_table_create_and_lock (dst_fib_prefix.fp_proto, + "LISP-src for [%d,%U]", + dst_fib_index, + format_fib_prefix, &dst_fib_prefix); + + /* + * create a data-path object to perform the source address lookup + * in the SRC FIB + */ + lookup_dpo_add_or_lock_w_fib_index (src_fib_index, + (ip_prefix_version (dst_prefix) == + IP6 ? DPO_PROTO_IP6 : + DPO_PROTO_IP4), + LOOKUP_INPUT_SRC_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + &src_lkup_dpo); + + /* + * add the entry to the destination FIB that uses the lookup DPO + */ + dst_fei = fib_table_entry_special_dpo_add (dst_fib_index, + &dst_fib_prefix, + FIB_SOURCE_LISP, + FIB_ENTRY_FLAG_EXCLUSIVE, + &src_lkup_dpo); + + /* + * the DPO is locked by the FIB entry, and we have no further + * need for it. + */ + dpo_unlock (&src_lkup_dpo); + + /* + * save the SRC FIB index on the entry so we can retrieve it for + * subsequent routes. + */ + fib_entry_set_source_data (dst_fei, FIB_SOURCE_LISP, &src_fib_index); } else { - fib->dst_address_length_refcounts[dst_address_length]++; - - fib->non_empty_dst_address_length_bitmap = - clib_bitmap_set (fib->non_empty_dst_address_length_bitmap, - 128 - dst_address_length, 1); - compute_prefix_lengths_in_search_order (fib); - } - - kv.key[0] = dst_address.as_u64[0]; - kv.key[1] = dst_address.as_u64[1]; - kv.key[2] = dst_address_length; - - if (BV (clib_bihash_search) (&fib->ip6_lookup_table, &kv, &value) == 0) - old_adj_index = value.value; - - if (is_del) - BV (clib_bihash_add_del) (&fib->ip6_lookup_table, &kv, 0 /* is_add */ ); - else - { - /* Make sure adj index is valid. */ - if (CLIB_DEBUG > 0) - (void) ip_get_adjacency (lm, adj_index); - - kv.value = adj_index; - - BV (clib_bihash_add_del) (&fib->ip6_lookup_table, &kv, 1 /* is_add */ ); + /* + * destination FIB entry already present + */ + src_fib_index = *(u32 *) fib_entry_get_source_data (dst_fei, + FIB_SOURCE_LISP); } - /* Avoid spurious reference count increments */ - if (old_adj_index == adj_index - && !(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY)) - { - ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index); - if (adj->share_count > 0) - adj->share_count--; - } - - /* Delete old adjacency index if present and changed. */ - { - if (!(a->flags & IP6_ROUTE_FLAG_KEEP_OLD_ADJACENCY) - && old_adj_index != ~0 && old_adj_index != adj_index) - ip_del_adjacency (lm, old_adj_index); - } -} - -static void -ip6_src_fib_init (ip6_src_fib_t * fib) -{ - uword i; - - for (i = 0; i < ARRAY_LEN (fib->fib_masks); i++) - { - u32 j, i0, i1; - - i0 = i / 32; - i1 = i % 32; - - for (j = 0; j < i0; j++) - fib->fib_masks[i].as_u32[j] = ~0; - - if (i1) - fib->fib_masks[i].as_u32[i0] = - clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1)); - } - - if (fib->lookup_table_nbuckets == 0) - fib->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS; - - fib->lookup_table_nbuckets = 1 << max_log2 (fib->lookup_table_nbuckets); - - if (fib->lookup_table_size == 0) - fib->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE; - - BV (clib_bihash_init) (&fib->ip6_lookup_table, "ip6 lookup table", - fib->lookup_table_nbuckets, fib->lookup_table_size); - + return (src_fib_index); } /** - * @brief Add/del route to IP6 SD FIB. - * - * Adds/remove routes to both destination and source FIBs. Entries added - * to destination FIB are associated to adjacencies that point to the source - * FIB and store the index of the particular source FIB associated to the - * destination. Source FIBs are locally managed (see @ref lgm->ip4_src_fibs - * and @ref lgm->ip6_src_fibs), but the adjacencies are allocated out of the - * global adjacency pool. + * @brief Del route to IP4 or IP6 SD FIB. * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[out] dst_prefix Destination IP6 prefix. - * @param[in] src_prefix Source IP6 prefix. - * @param[in] table_id Table id. - * @param[in] add_adj Pointer to the adjacency to be added. - * @param[in] is_add Add/del flag. + * Remove routes from both destination and source FIBs. * - * @return 0 on success. + * @param[in] src_fib_index The index/ID of the SRC FIB + * @param[in] src_prefix Source IP prefix. + * @param[in] dst_fib_index The index/ID of the DST FIB + * @param[in] dst_prefix Destination IP prefix. */ -static int -ip6_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id, - ip_adjacency_t * add_adj, u8 is_add) +void +ip_src_dst_fib_del_route (u32 src_fib_index, + const ip_prefix_t * src_prefix, + u32 dst_fib_index, const ip_prefix_t * dst_prefix) { - u32 adj_index; - ip6_add_del_route_args_t a; - ip_adjacency_t *dst_adjp, dst_adj; - ip6_address_t dst = ip_prefix_v6 (dst_prefix), src; - u32 dst_address_length = ip_prefix_len (dst_prefix), src_address_length = 0; - ip6_src_fib_t *src_fib; - - if (src_prefix) - { - src = ip_prefix_v6 (src_prefix); - src_address_length = ip_prefix_len (src_prefix); - } - else - memset (&src, 0, sizeof (src)); - - /* lookup dst adj and create it if it doesn't exist */ - adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst, dst_address_length); + fib_prefix_t dst_fib_prefix, src_fib_prefix; - if (is_add) - { - /* insert dst prefix to ip6 fib, if it's not in yet */ - if (adj_index == 0) - { - /* allocate and init src ip6 fib */ - pool_get (lgm->ip6_src_fibs, src_fib); - memset (src_fib, 0, sizeof (src_fib[0])); - ip6_src_fib_init (src_fib); - - memset (&dst_adj, 0, sizeof (dst_adj)); - - /* reuse rewrite header to store pointer to src fib */ - dst_adj.rewrite_header.sw_if_index = src_fib - lgm->ip6_src_fibs; - - /* dst adj should point to lisp gpe ip lookup */ - dst_adj.lookup_next_index = lgm->ip6_lookup_next_lgpe_ip6_lookup; - - /* explicit_fib_index is used in IP6 FIB lookup, don't reuse it */ - dst_adj.explicit_fib_index = ~0; - dst_adj.n_adj = 1; - - /* make sure we have different signatures for adj in different tables - * but with the same lookup_next_index and for adj in the same table - * but associated to different destinations */ - dst_adj.if_address_index = table_id; - dst_adj.indirect.next_hop.ip6 = dst; - - memset (&a, 0, sizeof (a)); - a.flags = IP6_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; /* vrf */ - a.adj_index = ~0; - a.dst_address_length = dst_address_length; - a.dst_address = dst; - a.flags |= IP6_ROUTE_FLAG_ADD; - a.add_adj = &dst_adj; - a.n_add_adj = 1; - - ip6_add_del_route (lgm->im6, &a); - - /* lookup dst adj to obtain the adj index */ - adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst, - dst_address_length); - - /* make sure insertion succeeded */ - if (CLIB_DEBUG) - { - ASSERT (adj_index != 0); - dst_adjp = ip_get_adjacency (lgm->lm6, adj_index); - ASSERT (dst_adjp->rewrite_header.sw_if_index - == dst_adj.rewrite_header.sw_if_index); - } - } - } - else - { - if (adj_index == 0) - { - clib_warning - ("Trying to delete inexistent dst route for %U. Aborting", - format_ip_prefix, dst_prefix); - return -1; - } - } - - dst_adjp = ip_get_adjacency (lgm->lm6, adj_index); - - /* add/del src prefix to src fib */ - memset (&a, 0, sizeof (a)); - a.flags = IP6_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = dst_adjp->rewrite_header.sw_if_index; - a.adj_index = ~0; - a.flags |= is_add ? IP6_ROUTE_FLAG_ADD : IP6_ROUTE_FLAG_DEL; - a.add_adj = add_adj; - a.n_add_adj = is_add ? 1 : 0; - /* if src prefix is null, add ::0 */ - a.dst_address_length = src_address_length; - a.dst_address = src; - ip6_sd_fib_add_del_src_route (lgm, &a); - - /* make sure insertion succeeded */ - if (CLIB_DEBUG && is_add) - { - u32 sai; - ip_adjacency_t *src_adjp; - sai = ip6_sd_get_src_route (lgm, dst_adjp->rewrite_header.sw_if_index, - &src, src_address_length); - ASSERT (sai != 0); - src_adjp = ip_get_adjacency (lgm->lm6, sai); - ASSERT (src_adjp->if_address_index == add_adj->if_address_index); - } - - /* if a delete, check if there are elements left in the src fib */ - if (!is_add) - { - src_fib = pool_elt_at_index (lgm->ip6_src_fibs, - dst_adjp->rewrite_header.sw_if_index); - if (!src_fib) - return 0; - - /* if there's nothing left */ - if (clib_bitmap_count_set_bits - (src_fib->non_empty_dst_address_length_bitmap) == 0) - { - /* remove src fib .. */ - pool_put (lgm->ip6_src_fibs, src_fib); + ASSERT (NULL != dst_prefix); + ASSERT (NULL != src_prefix); - /* .. and remove dst route */ - memset (&a, 0, sizeof (a)); - a.flags = IP6_ROUTE_FLAG_TABLE_ID; - a.table_index_or_table_id = table_id; /* vrf */ - a.adj_index = ~0; - a.dst_address_length = dst_address_length; - a.dst_address = dst; - a.flags |= IP6_ROUTE_FLAG_DEL; + ip_prefix_to_fib_prefix (dst_prefix, &dst_fib_prefix); + ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); - ip6_add_del_route (lgm->im6, &a); - } - } - - return 0; -} + fib_table_entry_delete (src_fib_index, &src_fib_prefix, FIB_SOURCE_LISP); -/** - * @brief Retrieve IP6 SD FIB entry. - * - * Looks up SD IP6 route by first looking up the destination in VPP's main FIB - * and subsequently the source in the src FIB. The index of the source FIB is - * stored in the dst adjacency's @ref rewrite_header.sw_if_index. If source is - * 0 do search with ::/0 src. - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[out] dst_prefix Destination IP6 prefix. - * @param[in] src_prefix Source IP6 prefix. - * @param[in] table_id Table id. - * - * @return adjacency index if route found. - */ -static u32 -ip6_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id) -{ - u32 adj_index; - ip6_address_t dst = ip_prefix_v6 (dst_prefix), src; - u32 dst_address_length = ip_prefix_len (dst_prefix), src_address_length = 0; - ip_adjacency_t *dst_adj; - - if (src_prefix) + if (0 == fib_table_get_num_entries (src_fib_index, + src_fib_prefix.fp_proto, + FIB_SOURCE_LISP)) { - src = ip_prefix_v6 (src_prefix); - src_address_length = ip_prefix_len (src_prefix); + /* + * there's nothing left, unlock the source FIB and the + * destination route + */ + fib_table_entry_special_remove (dst_fib_index, + &dst_fib_prefix, FIB_SOURCE_LISP); + fib_table_unlock (src_fib_index, src_fib_prefix.fp_proto); } - else - memset (&src, 0, sizeof (src)); - - /* lookup dst adj */ - adj_index = ip6_get_route (lgm->im6, table_id, 0, &dst, dst_address_length); - if (adj_index == 0) - return adj_index; - - dst_adj = ip_get_adjacency (lgm->lm6, adj_index); - return ip6_sd_get_src_route (lgm, dst_adj->rewrite_header.sw_if_index, &src, - src_address_length); -} - -/** - * @brief Add/del route to IP4 or IP6 SD FIB. - * - * Adds/remove routes to both destination and source FIBs. Entries added - * to destination FIB are associated to adjacencies that point to the source - * FIB and store the index of the particular source FIB associated to the - * destination. Source FIBs are locally managed (see @ref lgm->ip4_src_fibs - * and @ref lgm->ip6_src_fibs), but the adjacencies are allocated out of the - * global adjacency pool. - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[out] dst_prefix Destination IP prefix. - * @param[in] src_prefix Source IP prefix. - * @param[in] table_id Table id. - * @param[in] add_adj Pointer to the adjacency to be added. - * @param[in] is_add Add/del flag. - * - * @return 0 on success. - */ -int -ip_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id, - ip_adjacency_t * add_adj, u8 is_add) -{ - return (ip_prefix_version (dst_prefix) == IP4 ? - ip4_sd_fib_add_del_route : ip6_sd_fib_add_del_route) (lgm, - dst_prefix, - src_prefix, - table_id, - add_adj, - is_add); } /** - * @brief Retrieve IP4 or IP6 SD FIB entry. + * @brief Add route to IP4 or IP6 SRC FIB. * - * Looks up SD IP route by first looking up the destination in VPP's main FIB - * and subsequently the source in the src FIB. The index of the source FIB is - * stored in the dst adjacency's @ref rewrite_header.sw_if_index. If source is - * 0 do search with ::/0 src. + * Adds a route to in the LISP SRC FIB with the result of the route + * being the DPO passed. * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[out] dst_prefix Destination IP prefix. + * @param[in] src_fib_index The index/ID of the SRC FIB * @param[in] src_prefix Source IP prefix. - * @param[in] table_id Table id. - * - * @return adjacency index if route found. + * @param[in] src_dpo The DPO the route will link to. */ -u32 -ip_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id) -{ - if (ip_prefix_version (dst_prefix) == IP4) - { - u32 *adj_index = ip4_sd_fib_get_route (lgm, dst_prefix, src_prefix, - table_id); - return (adj_index == 0) ? 0 : adj_index[0]; - } - else - return ip6_sd_fib_get_route (lgm, dst_prefix, src_prefix, table_id); -} - -always_inline void -ip4_src_fib_lookup_one (lisp_gpe_main_t * lgm, u32 src_fib_index0, - ip4_address_t * addr0, u32 * src_adj_index0) -{ - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip4_fib_mtrie_t *mtrie0; - - /* if default route not hit in ip4 lookup */ - if (PREDICT_TRUE (src_fib_index0 != (u32) ~ 0)) - { - mtrie0 = &vec_elt_at_index (lgm->ip4_src_fibs, src_fib_index0)->mtrie; - - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); - - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY) ? - mtrie0->default_leaf : leaf0; - src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - } - else - src_adj_index0[0] = ~0; -} - -always_inline void -ip4_src_fib_lookup_two (lisp_gpe_main_t * lgm, u32 src_fib_index0, - u32 src_fib_index1, ip4_address_t * addr0, - ip4_address_t * addr1, u32 * src_adj_index0, - u32 * src_adj_index1) +void +ip_src_fib_add_route_w_dpo (u32 src_fib_index, + const ip_prefix_t * src_prefix, + const dpo_id_t * src_dpo) { - ip4_fib_mtrie_leaf_t leaf0, leaf1; - ip4_fib_mtrie_t *mtrie0, *mtrie1; - - /* if default route not hit in ip4 lookup */ - if (PREDICT_TRUE - (src_fib_index0 != (u32) ~ 0 && src_fib_index1 != (u32) ~ 0)) - { - mtrie0 = &vec_elt_at_index (lgm->ip4_src_fibs, src_fib_index0)->mtrie; - mtrie1 = &vec_elt_at_index (lgm->ip4_src_fibs, src_fib_index1)->mtrie; - - leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; - - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 0); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 0); + fib_prefix_t src_fib_prefix; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 1); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 1); + ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2); + /* + * add the entry into the source fib. + */ + fib_node_index_t src_fei; - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3); + src_fei = fib_table_lookup_exact_match (src_fib_index, &src_fib_prefix); - /* Handle default route. */ - leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY) ? - mtrie0->default_leaf : leaf0; - leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY) ? - mtrie1->default_leaf : leaf1; - src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - } - else + if (FIB_NODE_INDEX_INVALID == src_fei || + !fib_entry_is_sourced (src_fei, FIB_SOURCE_LISP)) { - ip4_src_fib_lookup_one (lgm, src_fib_index0, addr0, src_adj_index0); - ip4_src_fib_lookup_one (lgm, src_fib_index1, addr1, src_adj_index1); + fib_table_entry_special_dpo_add (src_fib_index, + &src_fib_prefix, + FIB_SOURCE_LISP, + FIB_ENTRY_FLAG_EXCLUSIVE, src_dpo); } } -/** - * @brief IPv4 src lookup node. - * @node lgpe-ip4-lookup - * - * The LISP IPv4 source lookup dispatch node. - * - * This is the IPv4 source lookup dispatch node. It first looks up the - * adjacency hit in the main (destination) FIB and then uses its - * <code>rewrite_header.sw_if_index</code>to find the source FIB wherein - * the source IP is subsequently looked up. Data in the resulting adjacency - * is used to decide the next node (the lisp_gpe interface) and if a flow - * hash must be computed, when traffic can be load balanced over multiple - * tunnels. - * - * - * @param[in] vm vlib_main_t corresponding to current thread. - * @param[in] node vlib_node_runtime_t data for this node. - * @param[in] frame vlib_frame_t whose contents should be dispatched. - * - * @return number of vectors in frame. - */ -always_inline uword -lgpe_ip4_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) +static void +ip_address_to_46 (const ip_address_t * addr, + ip46_address_t * a, fib_protocol_t * proto) { - u32 n_left_from, next_index, *from, *to_next; - lisp_gpe_main_t *lgm = &lisp_gpe_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) + *proto = (IP4 == ip_addr_version (addr) ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); + switch (*proto) { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - ip4_header_t *ip0, *ip1; - u32 dst_adj_index0, src_adj_index0, src_fib_index0; - u32 dst_adj_index1, src_adj_index1, src_fib_index1; - ip_adjacency_t *dst_adj0, *src_adj0, *dst_adj1, *src_adj1; - u32 next0, next1; - - next0 = next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* dst lookup was done by ip4 lookup */ - dst_adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - dst_adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; - - dst_adj0 = ip_get_adjacency (lgm->lm4, dst_adj_index0); - dst_adj1 = ip_get_adjacency (lgm->lm4, dst_adj_index1); - - src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; - src_fib_index1 = dst_adj1->rewrite_header.sw_if_index; - - ip4_src_fib_lookup_two (lgm, src_fib_index0, src_fib_index1, - &ip0->src_address, &ip1->src_address, - &src_adj_index0, &src_adj_index1); - - /* if a source fib exists */ - if (PREDICT_TRUE ((u32) ~ 0 != src_adj_index0 - && (u32) ~ 0 != src_adj_index1)) - { - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = src_adj_index1; - - src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0); - src_adj1 = ip_get_adjacency (lgm->lm4, src_adj_index1); - - next0 = src_adj0->explicit_fib_index; - next1 = src_adj1->explicit_fib_index; - - /* prepare buffer for lisp-gpe output node */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - src_adj0->rewrite_header.sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = - src_adj1->rewrite_header.sw_if_index; - - /* if multipath: saved_lookup_next_index is reused to store - * nb of sub-tunnels. If greater than 1, multipath is on. - * Note that flow hash should be 0 after ipx lookup! */ - if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1)) - vnet_buffer (b0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT); - - if (PREDICT_TRUE (src_adj1->saved_lookup_next_index > 1)) - vnet_buffer (b1)->ip.flow_hash = - ip4_compute_flow_hash (ip1, IP_FLOW_HASH_DEFAULT); - } - else - { - if ((u32) ~ 0 != src_adj_index0) - { - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0); - next0 = src_adj0->explicit_fib_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - src_adj0->rewrite_header.sw_if_index; - - if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1)) - vnet_buffer (b0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT); - } - else - { - next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - } - - if ((u32) ~ 0 != src_adj_index1) - { - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = src_adj_index1; - src_adj1 = ip_get_adjacency (lgm->lm4, src_adj_index1); - next1 = src_adj1->explicit_fib_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = - src_adj1->rewrite_header.sw_if_index; - if (PREDICT_TRUE (src_adj1->saved_lookup_next_index > 1)) - vnet_buffer (b1)->ip.flow_hash = - ip4_compute_flow_hash (ip1, IP_FLOW_HASH_DEFAULT); - } - else - { - next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - } - } - - /* mark the packets for CP lookup if needed */ - if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next0)) - vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP; - if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next1)) - vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_IP; - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, next0, - next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *b0; - ip4_header_t *ip0; - u32 bi0, dst_adj_index0, src_adj_index0, src_fib_index0; - u32 next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - ip_adjacency_t *dst_adj0, *src_adj0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - - /* dst lookup was done by ip4 lookup */ - dst_adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - dst_adj0 = ip_get_adjacency (lgm->lm4, dst_adj_index0); - src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; - - /* do src lookup */ - ip4_src_fib_lookup_one (lgm, src_fib_index0, &ip0->src_address, - &src_adj_index0); - - /* if a source fib exists */ - if (PREDICT_TRUE ((u32) ~ 0 != src_adj_index0)) - { - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - src_adj0 = ip_get_adjacency (lgm->lm4, src_adj_index0); - next0 = src_adj0->explicit_fib_index; - - /* prepare packet for lisp-gpe output node */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - src_adj0->rewrite_header.sw_if_index; - - /* if multipath: saved_lookup_next_index is reused to store - * nb of sub-tunnels. If greater than 1, multipath is on */ - if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1)) - vnet_buffer (b0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT); - } - else - { - next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - } - - if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next0)) - vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + case FIB_PROTOCOL_IP4: + a->ip4 = addr->ip.v4; + break; + case FIB_PROTOCOL_IP6: + a->ip6 = addr->ip.v6; + break; + default: + ASSERT (0); + break; } - return from_frame->n_vectors; } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (lgpe_ip4_lookup_node) = { - .function = lgpe_ip4_lookup, - .name = "lgpe-ip4-lookup", - .vector_size = sizeof (u32), - - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_next_nodes = LGPE_IP4_LOOKUP_N_NEXT, - .next_nodes = { -#define _(sym,str) [LGPE_IP4_LOOKUP_NEXT_##sym] = str, - foreach_lgpe_ip4_lookup_next -#undef _ - }, -}; -/* *INDENT-ON* */ - -static u32 -ip6_src_fib_lookup (lisp_gpe_main_t * lgm, u32 src_fib_index, - ip6_address_t * src) +static fib_route_path_t * +ip_src_fib_mk_paths (const lisp_fwd_path_t * paths) { - int i, len; - int rv; - BVT (clib_bihash_kv) kv, value; - ip6_src_fib_t *fib = pool_elt_at_index (lgm->ip6_src_fibs, src_fib_index); + const lisp_gpe_adjacency_t *ladj; + fib_route_path_t *rpaths = NULL; + u8 best_priority; + u32 ii; - len = vec_len (fib->prefix_lengths_in_search_order); + vec_validate (rpaths, vec_len (paths) - 1); - for (i = 0; i < len; i++) - { - int dst_address_length = fib->prefix_lengths_in_search_order[i]; - ip6_address_t *mask; + best_priority = paths[0].priority; - ASSERT (dst_address_length >= 0 && dst_address_length <= 128); + vec_foreach_index (ii, paths) + { + if (paths[0].priority != best_priority) + break; - mask = &fib->fib_masks[dst_address_length]; + ladj = lisp_gpe_adjacency_get (paths[ii].lisp_adj); - kv.key[0] = src->as_u64[0] & mask->as_u64[0]; - kv.key[1] = src->as_u64[1] & mask->as_u64[1]; - kv.key[2] = dst_address_length; + ip_address_to_46 (&ladj->remote_rloc, + &rpaths[ii].frp_addr, &rpaths[ii].frp_proto); - rv = - BV (clib_bihash_search_inline_2) (&fib->ip6_lookup_table, &kv, - &value); - if (rv == 0) - return value.value; - } - - return 0; -} + rpaths[ii].frp_sw_if_index = ladj->sw_if_index; + rpaths[ii].frp_weight = (paths[ii].weight ? paths[ii].weight : 1); + rpaths[ii].frp_label = MPLS_LABEL_INVALID; + } -always_inline void -ip6_src_fib_lookup_one (lisp_gpe_main_t * lgm, u32 src_fib_index0, - ip6_address_t * addr0, u32 * src_adj_index0) -{ - /* if default route not hit in ip6 lookup */ - if (PREDICT_TRUE (src_fib_index0 != (u32) ~ 0)) - src_adj_index0[0] = ip6_src_fib_lookup (lgm, src_fib_index0, addr0); - else - src_adj_index0[0] = ~0; -} + ASSERT (0 != vec_len (rpaths)); -always_inline void -ip6_src_fib_lookup_two (lisp_gpe_main_t * lgm, u32 src_fib_index0, - u32 src_fib_index1, ip6_address_t * addr0, - ip6_address_t * addr1, u32 * src_adj_index0, - u32 * src_adj_index1) -{ - /* if default route not hit in ip6 lookup */ - if (PREDICT_TRUE - (src_fib_index0 != (u32) ~ 0 && src_fib_index1 != (u32) ~ 0)) - { - src_adj_index0[0] = ip6_src_fib_lookup (lgm, src_fib_index0, addr0); - src_adj_index1[0] = ip6_src_fib_lookup (lgm, src_fib_index1, addr1); - } - else - { - ip6_src_fib_lookup_one (lgm, src_fib_index0, addr0, src_adj_index0); - ip6_src_fib_lookup_one (lgm, src_fib_index1, addr1, src_adj_index1); - } + return (rpaths); } /** - * @brief IPv6 src lookup node. - * @node lgpe-ip6-lookup - * - * The LISP IPv6 source lookup dispatch node. + * @brief Add route to IP4 or IP6 SRC FIB. * - * This is the IPv6 source lookup dispatch node. It first looks up the - * adjacency hit in the main (destination) FIB and then uses its - * <code>rewrite_header.sw_if_index</code>to find the source FIB wherein - * the source IP is subsequently looked up. Data in the resulting adjacency - * is used to decide the next node (the lisp_gpe interface) and if a flow - * hash must be computed, when traffic can be load balanced over multiple - * tunnels. + * Adds a route to in the LISP SRC FIB for the tunnel. * - * @param[in] vm vlib_main_t corresponding to current thread. - * @param[in] node vlib_node_runtime_t data for this node. - * @param[in] frame vlib_frame_t whose contents should be dispatched. - * - * @return number of vectors in frame. + * @param[in] src_fib_index The index/ID of the SRC FIB + * @param[in] src_prefix Source IP prefix. + * @param[in] paths The paths from which to construct the + * load balance */ -always_inline uword -lgpe_ip6_lookup (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * from_frame) +void +ip_src_fib_add_route (u32 src_fib_index, + const ip_prefix_t * src_prefix, + const lisp_fwd_path_t * paths) { - u32 n_left_from, next_index, *from, *to_next; - lisp_gpe_main_t *lgm = &lisp_gpe_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - ip6_header_t *ip0, *ip1; - u32 dst_adj_index0, src_adj_index0, src_fib_index0, dst_adj_index1, - src_adj_index1, src_fib_index1; - ip_adjacency_t *dst_adj0, *src_adj0, *dst_adj1, *src_adj1; - u32 next0, next1; - - next0 = next1 = LGPE_IP6_LOOKUP_NEXT_LISP_CP_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* dst lookup was done by ip6 lookup */ - dst_adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - dst_adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; - - dst_adj0 = ip_get_adjacency (lgm->lm6, dst_adj_index0); - dst_adj1 = ip_get_adjacency (lgm->lm6, dst_adj_index1); - - src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; - src_fib_index1 = dst_adj1->rewrite_header.sw_if_index; - - ip6_src_fib_lookup_two (lgm, src_fib_index0, src_fib_index1, - &ip0->src_address, &ip1->src_address, - &src_adj_index0, &src_adj_index1); - - /* if a source fib exists */ - if (PREDICT_TRUE ((u32) ~ 0 != src_adj_index0 - && (u32) ~ 0 != src_adj_index1)) - { - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = src_adj_index1; + fib_prefix_t src_fib_prefix; + fib_route_path_t *rpaths; - src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0); - src_adj1 = ip_get_adjacency (lgm->lm6, src_adj_index1); + ip_prefix_to_fib_prefix (src_prefix, &src_fib_prefix); - next0 = src_adj0->explicit_fib_index; - next1 = src_adj1->explicit_fib_index; + rpaths = ip_src_fib_mk_paths (paths); - /* prepare buffer for lisp-gpe output node */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - src_adj0->rewrite_header.sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = - src_adj1->rewrite_header.sw_if_index; - - /* if multipath: saved_lookup_next_index is reused to store - * nb of sub-tunnels. If greater than 1, multipath is on. - * Note that flow hash should be 0 after ipx lookup! */ - if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1)) - vnet_buffer (b0)->ip.flow_hash = - ip6_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT); - - if (PREDICT_TRUE (src_adj1->saved_lookup_next_index > 1)) - vnet_buffer (b1)->ip.flow_hash = - ip6_compute_flow_hash (ip1, IP_FLOW_HASH_DEFAULT); - } - else - { - if (src_adj_index0 != (u32) ~ 0) - { - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0); - next0 = src_adj0->explicit_fib_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - src_adj0->rewrite_header.sw_if_index; - - if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1)) - vnet_buffer (b0)->ip.flow_hash = - ip6_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT); - } - else - { - next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - } - - if (src_adj_index1 != (u32) ~ 0) - { - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = src_adj_index1; - src_adj1 = ip_get_adjacency (lgm->lm6, src_adj_index1); - next1 = src_adj1->explicit_fib_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = - src_adj1->rewrite_header.sw_if_index; - - if (PREDICT_TRUE (src_adj1->saved_lookup_next_index > 1)) - vnet_buffer (b1)->ip.flow_hash = - ip6_compute_flow_hash (ip1, IP_FLOW_HASH_DEFAULT); - } - else - { - next1 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - } - } - - /* mark the packets for CP lookup if needed */ - if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next0)) - vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP; - if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next1)) - vnet_buffer (b1)->lisp.overlay_afi = LISP_AFI_IP; - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, bi0, bi1, next0, - next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *b0; - ip6_header_t *ip0; - u32 bi0, dst_adj_index0, src_adj_index0, src_fib_index0; - u32 next0 = LGPE_IP6_LOOKUP_NEXT_LISP_CP_LOOKUP; - ip_adjacency_t *dst_adj0, *src_adj0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - - /* dst lookup was done by ip6 lookup */ - dst_adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - dst_adj0 = ip_get_adjacency (lgm->lm6, dst_adj_index0); - src_fib_index0 = dst_adj0->rewrite_header.sw_if_index; - - /* do src lookup */ - ip6_src_fib_lookup_one (lgm, src_fib_index0, &ip0->src_address, - &src_adj_index0); - - /* if a source fib exists */ - if (PREDICT_TRUE (src_adj_index0 != (u32) ~ 0)) - { - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = src_adj_index0; - src_adj0 = ip_get_adjacency (lgm->lm6, src_adj_index0); - next0 = src_adj0->explicit_fib_index; - - /* prepare packet for lisp-gpe output node */ - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - src_adj0->rewrite_header.sw_if_index; - - /* if multipath: saved_lookup_next_index is reused to store - * nb of sub-tunnels. If greater than 1, multipath is on */ - if (PREDICT_TRUE (src_adj0->saved_lookup_next_index > 1)) - vnet_buffer (b0)->ip.flow_hash = - ip6_compute_flow_hash (ip0, IP_FLOW_HASH_DEFAULT); - } - else - { - next0 = LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP; - } - - /* mark the packets for CP lookup if needed */ - if (PREDICT_FALSE (LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP == next0)) - vnet_buffer (b0)->lisp.overlay_afi = LISP_AFI_IP; - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return from_frame->n_vectors; + fib_table_entry_update (src_fib_index, + &src_fib_prefix, + FIB_SOURCE_LISP, FIB_ENTRY_FLAG_NONE, rpaths); + vec_free (rpaths); } -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (lgpe_ip6_lookup_node) = { - .function = lgpe_ip6_lookup, - .name = "lgpe-ip6-lookup", - .vector_size = sizeof (u32), - - .type = VLIB_NODE_TYPE_INTERNAL, - - .n_next_nodes = LGPE_IP6_LOOKUP_N_NEXT, - .next_nodes = { -#define _(sym,str) [LGPE_IP6_LOOKUP_NEXT_##sym] = str, - foreach_lgpe_ip6_lookup_next -#undef _ - }, -}; -/* *INDENT-ON* */ - /* * fd.io coding-style-patch-verification: ON * diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.c b/vnet/vnet/lisp-gpe/lisp_gpe.c index 579422b484b..f05c6a2028f 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe.c +++ b/vnet/vnet/lisp-gpe/lisp_gpe.c @@ -19,435 +19,230 @@ */ #include <vnet/lisp-gpe/lisp_gpe.h> -#include <vppinfra/math.h> +#include <vnet/lisp-gpe/lisp_gpe_adjacency.h> +#include <vnet/adj/adj_midchain.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_path_list.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/load_balance.h> /** LISP-GPE global state */ lisp_gpe_main_t lisp_gpe_main; /** - * @brief Compute IP-UDP-GPE sub-tunnel encap/rewrite header. - * - * @param[in] t Parent of the sub-tunnel. - * @param[in] st Sub-tunnel. - * @param[in] lp Local and remote locators used in the encap header. - * - * @return 0 on success. + * @brief A Pool of all LISP forwarding entries */ -static int -lisp_gpe_rewrite (lisp_gpe_tunnel_t * t, lisp_gpe_sub_tunnel_t * st, - locator_pair_t * lp) -{ - u8 *rw = 0; - lisp_gpe_header_t *lisp0; - int len; - - if (ip_addr_version (&lp->lcl_loc) == IP4) - { - ip4_header_t *ip0; - ip4_udp_lisp_gpe_header_t *h0; - len = sizeof (*h0); +static lisp_fwd_entry_t *lisp_fwd_entry_pool; - vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES); +/** + * DB of all forwarding entries. The Key is:{l-EID,r-EID,vni} + * where the EID encodes L2 or L3 + */ +static uword *lisp_gpe_fwd_entries; - h0 = (ip4_udp_lisp_gpe_header_t *) rw; +static void +create_fib_entries (lisp_fwd_entry_t * lfe) +{ + dpo_proto_t dproto; - /* Fixed portion of the (outer) ip4 header */ - ip0 = &h0->ip4; - ip0->ip_version_and_header_length = 0x45; - ip0->ttl = 254; - ip0->protocol = IP_PROTOCOL_UDP; + dproto = (ip_prefix_version (&lfe->key->rmt.ippref) == IP4 ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - /* we fix up the ip4 header length and checksum after-the-fact */ - ip_address_copy_addr (&ip0->src_address, &lp->lcl_loc); - ip_address_copy_addr (&ip0->dst_address, &lp->rmt_loc); - ip0->checksum = ip4_header_checksum (ip0); + lfe->src_fib_index = ip_dst_fib_add_route (lfe->eid_fib_index, + &lfe->key->rmt.ippref); - /* UDP header, randomize src port on something, maybe? */ - h0->udp.src_port = clib_host_to_net_u16 (4341); - h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe); + if (LISP_FWD_ENTRY_TYPE_NEGATIVE == lfe->type) + { + dpo_id_t dpo = DPO_NULL; - /* LISP-gpe header */ - lisp0 = &h0->lisp; + switch (lfe->action) + { + case LISP_NO_ACTION: + /* TODO update timers? */ + case LISP_FORWARD_NATIVE: + /* TODO check if route/next-hop for eid exists in fib and add + * more specific for the eid with the next-hop found */ + case LISP_SEND_MAP_REQUEST: + /* insert tunnel that always sends map-request */ + dpo_set (&dpo, DPO_LISP_CP, 0, dproto); + break; + case LISP_DROP: + /* for drop fwd entries, just add route, no need to add encap tunnel */ + dpo_copy (&dpo, drop_dpo_get (dproto)); + break; + } + ip_src_fib_add_route_w_dpo (lfe->src_fib_index, + &lfe->key->lcl.ippref, &dpo); + dpo_reset (&dpo); } else { - ip6_header_t *ip0; - ip6_udp_lisp_gpe_header_t *h0; - len = sizeof (*h0); - - vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES); - - h0 = (ip6_udp_lisp_gpe_header_t *) rw; - - /* Fixed portion of the (outer) ip6 header */ - ip0 = &h0->ip6; - ip0->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 (0x6 << 28); - ip0->hop_limit = 254; - ip0->protocol = IP_PROTOCOL_UDP; - - /* we fix up the ip6 header length after-the-fact */ - ip_address_copy_addr (&ip0->src_address, &lp->lcl_loc); - ip_address_copy_addr (&ip0->dst_address, &lp->rmt_loc); - - /* UDP header, randomize src port on something, maybe? */ - h0->udp.src_port = clib_host_to_net_u16 (4341); - h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe); - - /* LISP-gpe header */ - lisp0 = &h0->lisp; + ip_src_fib_add_route (lfe->src_fib_index, + &lfe->key->lcl.ippref, lfe->paths); } - - lisp0->flags = t->flags; - lisp0->ver_res = t->ver_res; - lisp0->res = t->res; - lisp0->next_protocol = t->next_protocol; - lisp0->iid = clib_host_to_net_u32 (t->vni); - - st->is_ip4 = ip_addr_version (&lp->lcl_loc) == IP4; - st->rewrite = rw; - return 0; } -static int -weight_cmp (normalized_sub_tunnel_weights_t * a, - normalized_sub_tunnel_weights_t * b) +static void +delete_fib_entries (lisp_fwd_entry_t * lfe) { - int cmp = a->weight - b->weight; - return (cmp == 0 - ? a->sub_tunnel_index - b->sub_tunnel_index : (cmp > 0 ? -1 : 1)); + ip_src_dst_fib_del_route (lfe->src_fib_index, + &lfe->key->lcl.ippref, + lfe->eid_fib_index, &lfe->key->rmt.ippref); } -/** - * @brief Computes sub-tunnel load balancing vector. - * - * Algorithm is identical to that used for building unequal-cost multipath - * adjacencies. Saves normalized sub-tunnel weights and builds load-balancing - * vector consisting of list of sub-tunnel indexes replicated according to - * weight. - * - * @param[in] t Tunnel for which load balancing vector is computed. - */ static void -compute_sub_tunnels_balancing_vector (lisp_gpe_tunnel_t * t) +gid_to_dp_address (gid_address_t * g, dp_address_t * d) { - uword n_sts, i, n_nsts, n_nsts_left; - f64 sum_weight, norm, error, tolerance; - normalized_sub_tunnel_weights_t *nsts = 0, *stp; - lisp_gpe_sub_tunnel_t *sts = t->sub_tunnels; - u32 *st_lbv = 0; - - /* Accept 1% error */ - tolerance = .01; - - n_sts = vec_len (sts); - vec_validate (nsts, 2 * n_sts - 1); - - sum_weight = 0; - for (i = 0; i < n_sts; i++) + switch (gid_address_type (g)) { - /* Find total weight to normalize weights. */ - sum_weight += sts[i].weight; - - /* build normalized sub tunnels vector */ - nsts[i].weight = sts[i].weight; - nsts[i].sub_tunnel_index = i; - } - - n_nsts = n_sts; - if (n_sts == 1) - { - nsts[0].weight = 1; - _vec_len (nsts) = 1; - goto build_lbv; + case GID_ADDR_IP_PREFIX: + case GID_ADDR_SRC_DST: + ip_prefix_copy (&d->ippref, &gid_address_ippref (g)); + d->type = FID_ADDR_IP_PREF; + break; + case GID_ADDR_MAC: + default: + mac_copy (&d->mac, &gid_address_mac (g)); + d->type = FID_ADDR_MAC; + break; } +} - /* Sort sub-tunnels by weight */ - qsort (nsts, n_nsts, sizeof (u32), (void *) weight_cmp); +static lisp_fwd_entry_t * +find_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a, + lisp_gpe_fwd_entry_key_t * key) +{ + uword *p; - /* Save copies of all next hop weights to avoid being overwritten in loop below. */ - for (i = 0; i < n_nsts; i++) - nsts[n_nsts + i].weight = nsts[i].weight; + memset (key, 0, sizeof (*key)); - /* Try larger and larger power of 2 sized blocks until we - find one where traffic flows to within 1% of specified weights. */ - for (n_nsts = max_pow2 (n_sts);; n_nsts *= 2) + if (GID_ADDR_IP_PREFIX == gid_address_type (&a->rmt_eid)) { - error = 0; - - norm = n_nsts / sum_weight; - n_nsts_left = n_nsts; - for (i = 0; i < n_sts; i++) - { - f64 nf = nsts[n_sts + i].weight * norm; - word n = flt_round_nearest (nf); - - n = n > n_nsts_left ? n_nsts_left : n; - n_nsts_left -= n; - error += fabs (nf - n); - nsts[i].weight = n; - } - - nsts[0].weight += n_nsts_left; - - /* Less than 5% average error per adjacency with this size adjacency block? */ - if (error <= tolerance * n_nsts) - { - /* Truncate any next hops with zero weight. */ - _vec_len (nsts) = i; - break; - } + /* + * the ip version of the source is not set to ip6 when the + * source is all zeros. force it. + */ + ip_prefix_version (&gid_address_ippref (&a->lcl_eid)) = + ip_prefix_version (&gid_address_ippref (&a->rmt_eid)); } -build_lbv: + gid_to_dp_address (&a->rmt_eid, &key->rmt); + gid_to_dp_address (&a->lcl_eid, &key->lcl); + key->vni = a->vni; - /* build load balancing vector */ - vec_foreach (stp, nsts) - { - for (i = 0; i < stp[0].weight; i++) - vec_add1 (st_lbv, stp[0].sub_tunnel_index); - } + p = hash_get_mem (lisp_gpe_fwd_entries, key); - t->sub_tunnels_lbv = st_lbv; - t->sub_tunnels_lbv_count = n_nsts; - t->norm_sub_tunnel_weights = nsts; + if (NULL != p) + { + return (pool_elt_at_index (lisp_fwd_entry_pool, p[0])); + } + return (NULL); } -/** Create sub-tunnels and load-balancing vector for all locator pairs - * associated to a tunnel.*/ -static void -create_sub_tunnels (lisp_gpe_main_t * lgm, lisp_gpe_tunnel_t * t) +static int +lisp_gpe_fwd_entry_path_sort (void *a1, void *a2) { - lisp_gpe_sub_tunnel_t st; - locator_pair_t *lp = 0; - int i; - - /* create sub-tunnels for all locator pairs */ - for (i = 0; i < vec_len (t->locator_pairs); i++) - { - lp = &t->locator_pairs[i]; - st.locator_pair_index = i; - st.parent_index = t - lgm->tunnels; - st.weight = lp->weight; - - /* compute rewrite for sub-tunnel */ - lisp_gpe_rewrite (t, &st, lp); - vec_add1 (t->sub_tunnels, st); - } + lisp_fwd_path_t *p1 = a1, *p2 = a2; - /* normalize weights and compute sub-tunnel load balancing vector */ - compute_sub_tunnels_balancing_vector (t); + return (p1->priority - p2->priority); } -#define foreach_copy_field \ -_(encap_fib_index) \ -_(decap_fib_index) \ -_(decap_next_index) \ -_(vni) \ -_(action) - /** - * @brief Create/delete IP encapsulated tunnel. + * @brief Add/Delete LISP IP forwarding entry. * - * Builds GPE tunnel for L2 or L3 packets and populates tunnel pool - * @ref lisp_gpe_tunnel_by_key in @ref lisp_gpe_main_t. + * creation of forwarding entries for IP LISP overlay: * - * @param[in] a Tunnel parameters. - * @param[in] is_l2 Flag indicating if encapsulated content is l2. - * @param[out] tun_index_res Tunnel index. + * @param[in] lgm Reference to @ref lisp_gpe_main_t. + * @param[in] a Parameters for building the forwarding entry. * * @return 0 on success. */ static int -add_del_ip_tunnel (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, u8 is_l2, - u32 * tun_index_res) +add_ip_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) { - lisp_gpe_main_t *lgm = &lisp_gpe_main; - lisp_gpe_tunnel_t *t = 0; - lisp_gpe_tunnel_key_t key; - lisp_gpe_sub_tunnel_t *stp = 0; - uword *p; - - /* prepare tunnel key */ - memset (&key, 0, sizeof (key)); - - /* fill in the key's remote eid */ - if (!is_l2) - ip_prefix_copy (&key.rmt.ippref, &gid_address_ippref (&a->rmt_eid)); - else - mac_copy (&key.rmt.mac, &gid_address_mac (&a->rmt_eid)); - - key.vni = clib_host_to_net_u32 (a->vni); - - p = mhash_get (&lgm->lisp_gpe_tunnel_by_key, &key); - - if (a->is_add) - { - /* adding a tunnel: tunnel must not already exist */ - if (p) - return VNET_API_ERROR_INVALID_VALUE; - - if (a->decap_next_index >= LISP_GPE_INPUT_N_NEXT) - return VNET_API_ERROR_INVALID_DECAP_NEXT; + lisp_gpe_fwd_entry_key_t key; + lisp_fwd_entry_t *lfe; + fib_protocol_t fproto; - pool_get_aligned (lgm->tunnels, t, CLIB_CACHE_LINE_BYTES); - memset (t, 0, sizeof (*t)); + lfe = find_fwd_entry (lgm, a, &key); - /* copy from arg structure */ -#define _(x) t->x = a->x; - foreach_copy_field; -#undef _ + if (NULL != lfe) + /* don't support updates */ + return VNET_API_ERROR_INVALID_VALUE; - t->locator_pairs = vec_dup (a->locator_pairs); + pool_get (lisp_fwd_entry_pool, lfe); + memset (lfe, 0, sizeof (*lfe)); + lfe->key = clib_mem_alloc (sizeof (key)); + memcpy (lfe->key, &key, sizeof (key)); - /* if vni is non-default */ - if (a->vni) - t->flags = LISP_GPE_FLAGS_I; + hash_set_mem (lisp_gpe_fwd_entries, lfe->key, lfe - lisp_fwd_entry_pool); - /* work in lisp-gpe not legacy mode */ - t->flags |= LISP_GPE_FLAGS_P; + fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); - /* next proto */ - if (!is_l2) - t->next_protocol = ip_prefix_version (&key.rmt.ippref) == IP4 ? - LISP_GPE_NEXT_PROTO_IP4 : LISP_GPE_NEXT_PROTO_IP6; - else - t->next_protocol = LISP_GPE_NEXT_PROTO_ETHERNET; - - /* build sub-tunnels for lowest priority locator-pairs */ - if (!a->is_negative) - create_sub_tunnels (lgm, t); - - mhash_set (&lgm->lisp_gpe_tunnel_by_key, &key, t - lgm->tunnels, 0); + lfe->type = (a->is_negative ? + LISP_FWD_ENTRY_TYPE_NEGATIVE : LISP_FWD_ENTRY_TYPE_NORMAL); + lfe->eid_table_id = a->table_id; + lfe->eid_fib_index = fib_table_find_or_create_and_lock (fproto, + lfe->eid_table_id); - /* return tunnel index */ - if (tun_index_res) - tun_index_res[0] = t - lgm->tunnels; - } - else + if (LISP_FWD_ENTRY_TYPE_NEGATIVE != lfe->type) { - /* deleting a tunnel: tunnel must exist */ - if (!p) - { - clib_warning ("Tunnel for eid %U doesn't exist!", - format_gid_address, &a->rmt_eid); - return VNET_API_ERROR_NO_SUCH_ENTRY; - } - - t = pool_elt_at_index (lgm->tunnels, p[0]); + lisp_fwd_path_t *path; + u32 index; - mhash_unset (&lgm->lisp_gpe_tunnel_by_key, &key, 0); + vec_validate (lfe->paths, vec_len (a->locator_pairs) - 1); - vec_foreach (stp, t->sub_tunnels) + vec_foreach_index (index, a->locator_pairs) { - vec_free (stp->rewrite); + path = &lfe->paths[index]; + + path->priority = a->locator_pairs[index].priority; + path->weight = a->locator_pairs[index].weight; + + path->lisp_adj = + lisp_gpe_adjacency_find_or_create_and_lock (&a->locator_pairs + [index], + lfe->eid_table_id, + lfe->key->vni); } - vec_free (t->sub_tunnels); - vec_free (t->sub_tunnels_lbv); - vec_free (t->locator_pairs); - pool_put (lgm->tunnels, t); + vec_sort_with_function (lfe->paths, lisp_gpe_fwd_entry_path_sort); } - return 0; + create_fib_entries (lfe); + + return (0); } -/** - * @brief Build IP adjacency for LISP Source/Dest FIB. - * - * Because LISP forwarding does not follow typical IP forwarding path, the - * adjacency's fields are overloaded (i.e., hijacked) to carry LISP specific - * data concerning the lisp-gpe interface the packets hitting the adjacency - * should be sent to and the tunnel that should be used. - * - * @param[in] lgm Reference to @ref lisp_gpe_main_t. - * @param[out] adj Adjacency to be populated. - * @param[in] table_id VRF for adjacency. - * @param[in] vni Virtual Network identifier (tenant id). - * @param[in] tun_index Tunnel index. - * @param[in] n_sub_tun Number of sub-tunnels. - * @param[in] is_negative Flag to indicate if the adjacency is for a - * negative mapping. - * @param[in] action Action to be taken for negative mapping. - * @param[in] ip_ver IP version for the adjacency. - * - * @return 0 on success. - */ -static int -build_ip_adjacency (lisp_gpe_main_t * lgm, ip_adjacency_t * adj, u32 table_id, - u32 vni, u32 tun_index, u32 n_sub_tun, u8 is_negative, - u8 action, u8 ip_ver) +static void +del_ip_fwd_entry_i (lisp_fwd_entry_t * lfe) { - uword *lookup_next_index, *lgpe_sw_if_index, *lnip; + lisp_fwd_path_t *path; + fib_protocol_t fproto; - memset (adj, 0, sizeof (adj[0])); - adj->n_adj = 1; - /* fill in lookup_next_index with a 'legal' value to avoid problems */ - adj->lookup_next_index = (ip_ver == IP4) ? - lgm->ip4_lookup_next_lgpe_ip4_lookup : - lgm->ip6_lookup_next_lgpe_ip6_lookup; + vec_foreach (path, lfe->paths) + { + lisp_gpe_adjacency_unlock (path->lisp_adj); + } - /* positive mapping */ - if (!is_negative) - { - /* send packets that hit this adj to lisp-gpe interface output node in - * requested vrf. */ - lnip = (ip_ver == IP4) ? - lgm->lgpe_ip4_lookup_next_index_by_table_id : - lgm->lgpe_ip6_lookup_next_index_by_table_id; - lookup_next_index = hash_get (lnip, table_id); - lgpe_sw_if_index = hash_get (lgm->l3_ifaces.sw_if_index_by_vni, vni); - - /* the assumption is that the interface must've been created before - * programming the dp */ - ASSERT (lookup_next_index != 0 && lgpe_sw_if_index != 0); - - /* hijack explicit fib index to store lisp interface node index, - * if_address_index for the tunnel index and saved lookup next index - * for the number of sub tunnels */ - adj->explicit_fib_index = lookup_next_index[0]; - adj->if_address_index = tun_index; - adj->rewrite_header.sw_if_index = lgpe_sw_if_index[0]; - adj->saved_lookup_next_index = n_sub_tun; - } - /* negative mapping */ - else - { - adj->rewrite_header.sw_if_index = ~0; - adj->rewrite_header.next_index = ~0; - adj->if_address_index = tun_index; + delete_fib_entries (lfe); - switch (action) - { - case LISP_NO_ACTION: - /* TODO update timers? */ - case LISP_FORWARD_NATIVE: - /* TODO check if route/next-hop for eid exists in fib and add - * more specific for the eid with the next-hop found */ - case LISP_SEND_MAP_REQUEST: - /* insert tunnel that always sends map-request */ - adj->explicit_fib_index = (ip_ver == IP4) ? - LGPE_IP4_LOOKUP_NEXT_LISP_CP_LOOKUP : - LGPE_IP6_LOOKUP_NEXT_LISP_CP_LOOKUP; - break; - case LISP_DROP: - /* for drop fwd entries, just add route, no need to add encap tunnel */ - adj->explicit_fib_index = (ip_ver == IP4 ? - LGPE_IP4_LOOKUP_NEXT_DROP : - LGPE_IP6_LOOKUP_NEXT_DROP); - break; - default: - return -1; - } - } - return 0; + fproto = (IP4 == ip_prefix_version (&fid_addr_ippref (&lfe->key->rmt)) ? + FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6); + fib_table_unlock (lfe->eid_fib_index, fproto); + + hash_unset_mem (lisp_gpe_fwd_entries, lfe->key); + clib_mem_free (lfe->key); + pool_put (lisp_fwd_entry_pool, lfe); } /** * @brief Add/Delete LISP IP forwarding entry. * - * Coordinates the creation/removal of forwarding entries for IP LISP overlay: - * creates lisp-gpe tunnel, builds tunnel customized forwarding entry and - * injects new route in Source/Dest FIB. + * removal of forwarding entries for IP LISP overlay: * * @param[in] lgm Reference to @ref lisp_gpe_main_t. * @param[in] a Parameters for building the forwarding entry. @@ -455,63 +250,21 @@ build_ip_adjacency (lisp_gpe_main_t * lgm, ip_adjacency_t * adj, u32 table_id, * @return 0 on success. */ static int -add_del_ip_fwd_entry (lisp_gpe_main_t * lgm, - vnet_lisp_gpe_add_del_fwd_entry_args_t * a) +del_ip_fwd_entry (lisp_gpe_main_t * lgm, + vnet_lisp_gpe_add_del_fwd_entry_args_t * a) { - ip_adjacency_t adj, *adjp; - lisp_gpe_tunnel_t *t; - u32 rv, tun_index = ~0, n_sub_tuns = 0; - ip_prefix_t *rmt_pref, *lcl_pref; - u8 ip_ver; - - rmt_pref = &gid_address_ippref (&a->rmt_eid); - lcl_pref = &gid_address_ippref (&a->lcl_eid); - ip_ver = ip_prefix_version (rmt_pref); - - /* add/del tunnel to tunnels pool and prepares rewrite */ - if (0 != a->locator_pairs) - { - rv = add_del_ip_tunnel (a, 0 /* is_l2 */ , &tun_index); - if (rv) - { - clib_warning ("failed to build tunnel!"); - return rv; - } - if (a->is_add) - { - t = pool_elt_at_index (lgm->tunnels, tun_index); - n_sub_tuns = t->sub_tunnels_lbv_count; - } - } - - /* setup adjacency for eid */ - rv = build_ip_adjacency (lgm, &adj, a->table_id, a->vni, tun_index, - n_sub_tuns, a->is_negative, a->action, ip_ver); - - /* add/delete route for eid */ - rv |= ip_sd_fib_add_del_route (lgm, rmt_pref, lcl_pref, a->table_id, &adj, - a->is_add); - - if (rv) - { - clib_warning ("failed to insert route for tunnel!"); - return rv; - } + lisp_gpe_fwd_entry_key_t key; + lisp_fwd_entry_t *lfe; - /* check that everything worked */ - if (CLIB_DEBUG && a->is_add) - { - u32 adj_index; - adj_index = ip_sd_fib_get_route (lgm, rmt_pref, lcl_pref, a->table_id); - ASSERT (adj_index != 0); + lfe = find_fwd_entry (lgm, a, &key); - adjp = ip_get_adjacency ((ip_ver == IP4) ? lgm->lm4 : lgm->lm6, - adj_index); + if (NULL == lfe) + /* no such entry */ + return VNET_API_ERROR_INVALID_VALUE; - ASSERT (adjp != 0 && adjp->if_address_index == tun_index); - } + del_ip_fwd_entry_i (lfe); - return rv; + return (0); } static void @@ -536,7 +289,7 @@ make_mac_fib_key (BVT (clib_bihash_kv) * kv, u16 bd_index, u8 src_mac[6], * * @return index of mapping matching the lookup key. */ -u32 +index_t lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6], u8 dst_mac[6]) { @@ -555,7 +308,7 @@ lisp_l2_fib_lookup (lisp_gpe_main_t * lgm, u16 bd_index, u8 src_mac[6], return value.value; } - return ~0; + return lisp_gpe_main.l2_lb_miss; } /** @@ -601,6 +354,12 @@ l2_fib_init (lisp_gpe_main_t * lgm) BV (clib_bihash_init) (&lgm->l2_fib, "l2 fib", 1 << max_log2 (L2_FIB_DEFAULT_HASH_NUM_BUCKETS), L2_FIB_DEFAULT_HASH_MEMORY_SIZE); + + /* + * the result from a 'miss' in a L2 Table + */ + lgm->l2_lb_miss = load_balance_create (1, DPO_PROTO_IP4, 0); + load_balance_set_bucket (lgm->l2_lb_miss, 0, drop_dpo_get (DPO_PROTO_IP4)); } /** @@ -618,27 +377,75 @@ static int add_del_l2_fwd_entry (lisp_gpe_main_t * lgm, vnet_lisp_gpe_add_del_fwd_entry_args_t * a) { - int rv; - u32 tun_index; - bd_main_t *bdm = &bd_main; - uword *bd_indexp; - - /* create tunnel */ - rv = add_del_ip_tunnel (a, 1 /* is_l2 */ , &tun_index); - if (rv) - return rv; - - bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id); - if (!bd_indexp) - { - clib_warning ("bridge domain %d doesn't exist", a->bd_id); - return -1; - } - - /* add entry to l2 lisp fib */ - lisp_l2_fib_add_del_entry (lgm, bd_indexp[0], gid_address_mac (&a->lcl_eid), - gid_address_mac (&a->rmt_eid), tun_index, - a->is_add); + /* lisp_gpe_fwd_entry_key_t key; */ + /* lisp_fwd_entry_t *lfe; */ + /* fib_protocol_t fproto; */ + /* uword *bd_indexp; */ + + /* bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id); */ + /* if (!bd_indexp) */ + /* { */ + /* clib_warning ("bridge domain %d doesn't exist", a->bd_id); */ + /* return -1; */ + /* } */ + + /* lfe = find_fwd_entry(lgm, a, &key); */ + + /* if (NULL != lfe) */ + /* /\* don't support updates *\/ */ + /* return VNET_API_ERROR_INVALID_VALUE; */ + + /* int rv; */ + /* u32 tun_index; */ + /* fib_node_index_t old_path_list; */ + /* bd_main_t *bdm = &bd_main; */ + /* fib_route_path_t *rpaths; */ + /* lisp_gpe_tunnel_t *t; */ + /* const dpo_id_t *dpo; */ + /* index_t lbi; */ + + /* /\* create tunnel *\/ */ + /* rv = add_del_ip_tunnel (a, 1 /\* is_l2 *\/ , &tun_index, NULL); */ + /* if (rv) */ + /* return rv; */ + + /* bd_indexp = hash_get (bdm->bd_index_by_bd_id, a->bd_id); */ + /* if (!bd_indexp) */ + /* { */ + /* clib_warning ("bridge domain %d doesn't exist", a->bd_id); */ + /* return -1; */ + /* } */ + + /* t = pool_elt_at_index (lgm->tunnels, tun_index); */ + /* old_path_list = t->l2_path_list; */ + + /* if (LISP_NO_ACTION == t->action) */ + /* { */ + /* rpaths = lisp_gpe_mk_paths_for_sub_tunnels (t); */ + + /* t->l2_path_list = fib_path_list_create (FIB_PATH_LIST_FLAG_NONE, */ + /* rpaths); */ + + /* vec_free (rpaths); */ + /* fib_path_list_lock (t->l2_path_list); */ + + /* dpo = fib_path_list_contribute_forwarding (t->l2_path_list, */ + /* FIB_FORW_CHAIN_TYPE_UNICAST_IP); */ + /* lbi = dpo->dpoi_index; */ + /* } */ + /* else if (LISP_SEND_MAP_REQUEST == t->action) */ + /* { */ + /* lbi = lgm->l2_lb_cp_lkup; */ + /* } */ + /* else */ + /* { */ + /* lbi = lgm->l2_lb_miss; */ + /* } */ + /* fib_path_list_unlock (old_path_list); */ + + /* /\* add entry to l2 lisp fib *\/ */ + /* lisp_l2_fib_add_del_entry (lgm, bd_indexp[0], gid_address_mac (&a->lcl_eid), */ + /* gid_address_mac (&a->rmt_eid), lbi, a->is_add); */ return 0; } @@ -669,7 +476,11 @@ vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, switch (type) { case GID_ADDR_IP_PREFIX: - return add_del_ip_fwd_entry (lgm, a); + if (a->is_add) + return add_ip_fwd_entry (lgm, a); + else + return del_ip_fwd_entry (lgm, a); + break; case GID_ADDR_MAC: return add_del_l2_fwd_entry (lgm, a); default: @@ -807,103 +618,77 @@ done: /* *INDENT-OFF* */ VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = { - .path = "lisp gpe tunnel", - .short_help = "lisp gpe tunnel add/del vni <vni> vrf <vrf> [leid <leid>]" + .path = "lisp gpe entry", + .short_help = "lisp gpe entry add/del vni <vni> vrf <vrf> [leid <leid>]" "reid <reid> [loc-pair <lloc> <rloc> p <priority> w <weight>] " "[negative action <action>]", .function = lisp_gpe_add_del_fwd_entry_command_fn, }; /* *INDENT-ON* */ -/** Format LISP-GPE next indexes. */ static u8 * -format_decap_next (u8 * s, va_list * args) +format_lisp_fwd_path (u8 * s, va_list ap) { - u32 next_index = va_arg (*args, u32); + lisp_fwd_path_t *lfp = va_arg (ap, lisp_fwd_path_t *); - switch (next_index) - { - case LISP_GPE_INPUT_NEXT_DROP: - return format (s, "drop"); - case LISP_GPE_INPUT_NEXT_IP4_INPUT: - return format (s, "ip4"); - case LISP_GPE_INPUT_NEXT_IP6_INPUT: - return format (s, "ip6"); - default: - return format (s, "unknown %d", next_index); - } - return s; + s = format (s, "pirority:%d weight:%d ", lfp->priority, lfp->weight); + s = format (s, "adj:[%U]\n", + format_lisp_gpe_adjacency, + lisp_gpe_adjacency_get (lfp->lisp_adj), + LISP_GPE_ADJ_FORMAT_FLAG_NONE); + + return (s); } -/** Format LISP-GPE tunnel. */ -u8 * -format_lisp_gpe_tunnel (u8 * s, va_list * args) +static u8 * +format_lisp_gpe_fwd_entry (u8 * s, va_list ap) { - lisp_gpe_tunnel_t *t = va_arg (*args, lisp_gpe_tunnel_t *); - lisp_gpe_main_t *lgm = vnet_lisp_gpe_get_main (); - locator_pair_t *lp = 0; - normalized_sub_tunnel_weights_t *nstw; - - s = - format (s, "tunnel %d vni %d (0x%x)\n", t - lgm->tunnels, t->vni, t->vni); - s = - format (s, " fibs: encap %d, decap %d decap next %U\n", - t->encap_fib_index, t->decap_fib_index, format_decap_next, - t->decap_next_index); - s = format (s, " lisp ver %d ", (t->ver_res >> 6)); - -#define _(n,v) if (t->flags & v) s = format (s, "%s-bit ", #n); - foreach_lisp_gpe_flag_bit; -#undef _ - - s = format (s, "next_protocol %d ver_res %x res %x\n", - t->next_protocol, t->ver_res, t->res); - - s = format (s, " locator-pairs:\n"); - vec_foreach (lp, t->locator_pairs) - { - s = format (s, " local: %U remote: %U weight %d\n", - format_ip_address, &lp->lcl_loc, format_ip_address, - &lp->rmt_loc, lp->weight); - } + lisp_fwd_entry_t *lfe = va_arg (ap, lisp_fwd_entry_t *); - s = format (s, " active sub-tunnels:\n"); - vec_foreach (nstw, t->norm_sub_tunnel_weights) - { - lp = vec_elt_at_index (t->locator_pairs, nstw->sub_tunnel_index); - s = format (s, " local: %U remote: %U weight %d\n", format_ip_address, - &lp->lcl_loc, format_ip_address, &lp->rmt_loc, nstw->weight); - } - return s; + s = format (s, "VNI:%d VRF:%d EID: %U -> %U", + lfe->key->vni, lfe->eid_table_id, + format_fid_address, &lfe->key->lcl, + format_fid_address, &lfe->key->rmt); + if (LISP_FWD_ENTRY_TYPE_NEGATIVE == lfe->type) + { + s = format (s, "\n Negative - action:%U", + format_negative_mapping_action, lfe->action); + } + else + { + lisp_fwd_path_t *path; + + s = format (s, "\n via:"); + vec_foreach (path, lfe->paths) + { + s = format (s, "\n %U", format_lisp_fwd_path, path); + } + } + + return (s); } -/** CLI command to show LISP-GPE tunnels. */ static clib_error_t * -show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) +lisp_gpe_fwd_entry_show (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) { - lisp_gpe_main_t *lgm = &lisp_gpe_main; - lisp_gpe_tunnel_t *t; - - if (pool_elts (lgm->tunnels) == 0) - vlib_cli_output (vm, "No lisp-gpe tunnels configured..."); + lisp_fwd_entry_t *lfe; - /* *INDENT-OFF* */ - pool_foreach (t, lgm->tunnels, +/* *INDENT-OFF* */ + pool_foreach (lfe, lisp_fwd_entry_pool, ({ - vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, t); + vlib_cli_output (vm, "%U", format_lisp_gpe_fwd_entry, lfe); })); - /* *INDENT-ON* */ +/* *INDENT-ON* */ - return 0; + return (NULL); } /* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) = -{ - .path = "show lisp gpe tunnel", - .function = show_lisp_gpe_tunnel_command_fn, +VLIB_CLI_COMMAND (lisp_gpe_fwd_entry_show_command, static) = { + .path = "show lisp gpe entry", + .short_help = "show lisp gpe entry vni <vni> vrf <vrf> [leid <leid>] reid <reid>", + .function = lisp_gpe_fwd_entry_show, }; /* *INDENT-ON* */ @@ -921,29 +706,9 @@ clib_error_t * vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) { lisp_gpe_main_t *lgm = &lisp_gpe_main; - vnet_main_t *vnm = lgm->vnet_main; if (a->is_en) { - /* add lgpe_ip4_lookup as possible next_node for ip4 lookup */ - if (lgm->ip4_lookup_next_lgpe_ip4_lookup == ~0) - { - lgm->ip4_lookup_next_lgpe_ip4_lookup = - vlib_node_add_next (vnm->vlib_main, ip4_lookup_node.index, - lgpe_ip4_lookup_node.index); - } - /* add lgpe_ip6_lookup as possible next_node for ip6 lookup */ - if (lgm->ip6_lookup_next_lgpe_ip6_lookup == ~0) - { - lgm->ip6_lookup_next_lgpe_ip6_lookup = - vlib_node_add_next (vnm->vlib_main, ip6_lookup_node.index, - lgpe_ip6_lookup_node.index); - } - else - { - /* ask cp to re-add ifaces and defaults */ - } - lgm->is_en = 1; } else @@ -951,37 +716,17 @@ vnet_lisp_gpe_enable_disable (vnet_lisp_gpe_enable_disable_args_t * a) CLIB_UNUSED (uword * val); hash_pair_t *p; u32 *dp_tables = 0, *dp_table; - lisp_gpe_tunnel_key_t *tunnels = 0, *tunnel; - vnet_lisp_gpe_add_del_fwd_entry_args_t _at, *at = &_at; vnet_lisp_gpe_add_del_iface_args_t _ai, *ai = &_ai; + lisp_fwd_entry_t *lfe; - /* remove all tunnels */ - + /* remove all entries */ /* *INDENT-OFF* */ - mhash_foreach(tunnel, val, &lgm->lisp_gpe_tunnel_by_key, ({ - vec_add1(tunnels, tunnel[0]); + pool_foreach (lfe, lisp_fwd_entry_pool, + ({ + del_ip_fwd_entry_i (lfe); })); /* *INDENT-ON* */ - vec_foreach (tunnel, tunnels) - { - memset (at, 0, sizeof (at[0])); - at->is_add = 0; - if (tunnel->rmt.type == GID_ADDR_IP_PREFIX) - { - gid_address_type (&at->rmt_eid) = GID_ADDR_IP_PREFIX; - ip_prefix_copy (&gid_address_ippref (&at->rmt_eid), - &tunnel->rmt.ippref); - } - else - { - gid_address_type (&at->rmt_eid) = GID_ADDR_MAC; - mac_copy (&gid_address_mac (&at->rmt_eid), &tunnel->rmt.mac); - } - vnet_lisp_gpe_add_del_fwd_entry (at, 0); - } - vec_free (tunnels); - /* disable all l3 ifaces */ /* *INDENT-OFF* */ @@ -1109,6 +854,7 @@ format_vnet_lisp_gpe_status (u8 * s, va_list * args) return format (s, "%s", lgm->is_en ? "enabled" : "disabled"); } + /** LISP-GPE init function. */ clib_error_t * lisp_gpe_init (vlib_main_t * vm) @@ -1128,11 +874,10 @@ lisp_gpe_init (vlib_main_t * vm) lgm->im6 = &ip6_main; lgm->lm4 = &ip4_main.lookup_main; lgm->lm6 = &ip6_main.lookup_main; - lgm->ip4_lookup_next_lgpe_ip4_lookup = ~0; - lgm->ip6_lookup_next_lgpe_ip6_lookup = ~0; - mhash_init (&lgm->lisp_gpe_tunnel_by_key, sizeof (uword), - sizeof (lisp_gpe_tunnel_key_t)); + lisp_gpe_fwd_entries = hash_create_mem (0, + sizeof (lisp_gpe_fwd_entry_key_t), + sizeof (uword)); l2_fib_init (lgm); diff --git a/vnet/vnet/lisp-gpe/lisp_gpe.h b/vnet/vnet/lisp-gpe/lisp_gpe.h index 4a8bdfe7f93..66009cc1947 100644 --- a/vnet/vnet/lisp-gpe/lisp_gpe.h +++ b/vnet/vnet/lisp-gpe/lisp_gpe.h @@ -30,6 +30,7 @@ #include <vnet/ip/udp.h> #include <vnet/lisp-cp/lisp_types.h> #include <vnet/lisp-gpe/lisp_gpe_packet.h> +#include <vnet/adj/adj_types.h> /** IP4-UDP-LISP encap header */ /* *INDENT-OFF* */ @@ -49,37 +50,6 @@ typedef CLIB_PACKED (struct { }) ip6_udp_lisp_gpe_header_t; /* *INDENT-ON* */ -/** LISP-GPE tunnel key */ -typedef struct -{ - union - { - struct - { - dp_address_t rmt; - dp_address_t lcl; - u32 vni; - }; - u8 as_u8[40]; - }; -} lisp_gpe_tunnel_key_t; - -typedef struct lisp_gpe_sub_tunnel -{ - /** Rewrite string. $$$$ embed vnet_rewrite header */ - u8 *rewrite; - u32 parent_index; - u32 locator_pair_index; - u8 weight; - u8 is_ip4; -} lisp_gpe_sub_tunnel_t; - -typedef struct nomalized_sub_tunnel -{ - u32 sub_tunnel_index; - u8 weight; -} normalized_sub_tunnel_weights_t; - /** LISP-GPE tunnel structure */ typedef struct { @@ -87,17 +57,7 @@ typedef struct locator_pair_t *locator_pairs; /** locator-pairs with best priority become sub-tunnels */ - lisp_gpe_sub_tunnel_t *sub_tunnels; - - /** sub-tunnels load balancing vector: contains list of sub-tunnel - * indexes replicated according to weight */ - u32 *sub_tunnels_lbv; - - /** number of entries in load balancing vector */ - u32 sub_tunnels_lbv_count; - - /** normalized sub tunnel weights */ - normalized_sub_tunnel_weights_t *norm_sub_tunnel_weights; + u32 *sub_tunnels; /** decap next index */ u32 decap_next_index; @@ -109,10 +69,16 @@ typedef struct u32 encap_fib_index; /* tunnel partner lookup here */ u32 decap_fib_index; /* inner IP lookup here */ + /** index of the source address lookup FIB */ + u32 src_fib_index; + /** vnet intfc hw/sw_if_index */ u32 hw_if_index; u32 sw_if_index; + /** L2 path-list */ + fib_node_index_t l2_path_list; + /** action for 'negative' tunnels */ u8 action; @@ -124,6 +90,112 @@ typedef struct u32 vni; } lisp_gpe_tunnel_t; +/** + * @brief A path on which to forward lisp traffic + */ +typedef struct lisp_fwd_path_t_ +{ + /** + * The adjacency constructed for the locator pair + */ + index_t lisp_adj; + + /** + * Priority. Only the paths with the best priority will be installed in FIB + */ + u8 priority; + + /** + * [UE]CMP weigt for the path + */ + u8 weight; + +} lisp_fwd_path_t; + +/** + * @brief A Forwarding entry can be 'normal' or 'negative' + * Negative implies we deliberately want to add a FIB entry for an EID + * that results in 'spcial' behaviour determined by an 'action'. + * @normal' means send it down some tunnels. + */ +typedef enum lisp_fwd_entry_type_t_ +{ + LISP_FWD_ENTRY_TYPE_NORMAL, + LISP_FWD_ENTRY_TYPE_NEGATIVE, +} lisp_fwd_entry_type_t; + +typedef enum +{ + NO_ACTION, + FORWARD_NATIVE, + SEND_MAP_REQUEST, + DROP +} negative_fwd_actions_e; + +/** + * LISP-GPE fwd entry key + */ +typedef struct lisp_gpe_fwd_entry_key_t_ +{ + dp_address_t rmt; + dp_address_t lcl; + u32 vni; +} lisp_gpe_fwd_entry_key_t; + +/** + * @brief A LISP Forwarding Entry + * + * A forwarding entry is from a locai EID to a remote EID over a set of rloc pairs + */ +typedef struct lisp_fwd_entry_t_ +{ + /** + * The Entry's key: {lEID,r-EID,vni} + */ + lisp_gpe_fwd_entry_key_t *key; + + /** + * The VRF (in the case of L3) or Bridge-Domain (for L2) index + */ + union + { + u32 eid_table_id; + u32 eid_bd_index; + }; + + /** + * The forwarding entry type + */ + lisp_fwd_entry_type_t type; + + union + { + /** + * @brief When the type is 'normal' + * The RLOC pair that form the route's paths. i.e. where to send + * packets for this route. + */ + lisp_fwd_path_t *paths; + + /** + * @brief When the type is negative. The action to take. + */ + negative_fwd_actions_e action; + }; + + /** + * The FIB index for the overlay, i.e. the FIB in which the EIDs + * are present + */ + u32 eid_fib_index; + + /** + * The SRC-FIB index for created for anding source-route entries + */ + u32 src_fib_index; +} lisp_fwd_entry_t; + + #define foreach_lisp_gpe_ip_input_next \ _(DROP, "error-drop") \ _(IP4_INPUT, "ip4-input") \ @@ -147,30 +219,6 @@ typedef enum LISP_GPE_N_ERROR, } lisp_gpe_error_t; -/** IP4 source FIB. - * As a first step, reuse v4 fib. The goal of the typedef is - * to shield consumers from future updates that may result in the lisp ip4 fib - * diverging from ip4 fib - */ -typedef ip4_fib_t ip4_src_fib_t; - -/** IP6 source FIB */ -typedef struct ip6_src_fib -{ - BVT (clib_bihash) ip6_lookup_table; - - /** bitmap/vector of mask widths to search */ - uword *non_empty_dst_address_length_bitmap; - u8 *prefix_lengths_in_search_order; - ip6_address_t fib_masks[129]; - i32 dst_address_length_refcounts[129]; - - /** ip6 lookup table config parameters */ - u32 lookup_table_nbuckets; - uword lookup_table_size; -} ip6_src_fib_t; - -/** Tunnel lookup structure for L2 and L3 tunnels */ typedef struct tunnel_lookup { /** Lookup lisp-gpe interfaces by dp table (eg. vrf/bridge index) */ @@ -178,6 +226,8 @@ typedef struct tunnel_lookup /** lookup decap tunnel termination sw_if_index by vni and vice versa */ uword *sw_if_index_by_vni; + + // FIXME - Need this? uword *vni_by_sw_if_index; } tunnel_lookup_t; @@ -187,9 +237,6 @@ typedef struct lisp_gpe_main /** pool of encap tunnel instances */ lisp_gpe_tunnel_t *tunnels; - /** lookup tunnel by key */ - mhash_t lisp_gpe_tunnel_by_key; - /** Free vlib hw_if_indices */ u32 *free_tunnel_hw_if_indices; @@ -197,21 +244,8 @@ typedef struct lisp_gpe_main /* L3 data structures * ================== */ - - /** Pool of src fibs that are paired with dst fibs */ - ip4_src_fib_t *ip4_src_fibs; - ip6_src_fib_t *ip6_src_fibs; - tunnel_lookup_t l3_ifaces; - /** Lookup lgpe_ipX_lookup_next by vrf */ - uword *lgpe_ip4_lookup_next_index_by_table_id; - uword *lgpe_ip6_lookup_next_index_by_table_id; - - /** next node indexes that point ip4/6 lookup to lisp gpe ip lookup */ - u32 ip4_lookup_next_lgpe_ip4_lookup; - u32 ip6_lookup_next_lgpe_ip6_lookup; - /* L2 data structures * ================== */ @@ -220,6 +254,10 @@ typedef struct lisp_gpe_main tunnel_lookup_t l2_ifaces; + /** Load-balance for a miss in the table */ + index_t l2_lb_miss; + index_t l2_lb_cp_lkup; + /** convenience */ vlib_main_t *vlib_main; vnet_main_t *vnet_main; @@ -238,10 +276,10 @@ vnet_lisp_gpe_get_main () return &lisp_gpe_main; } -extern vlib_node_registration_t lgpe_ip4_lookup_node; -extern vlib_node_registration_t lgpe_ip6_lookup_node; + extern vlib_node_registration_t lisp_gpe_ip4_input_node; extern vlib_node_registration_t lisp_gpe_ip6_input_node; +extern vnet_hw_interface_class_t lisp_gpe_hw_class; u8 *format_lisp_gpe_header_with_length (u8 * s, va_list * args); @@ -291,7 +329,7 @@ typedef struct u8 is_negative; /** action for negative mappings */ - u8 action; + negative_fwd_actions_e action; /** local eid */ gid_address_t lcl_eid; @@ -332,13 +370,23 @@ int vnet_lisp_gpe_add_del_fwd_entry (vnet_lisp_gpe_add_del_fwd_entry_args_t * a, u32 * hw_if_indexp); -int -ip_sd_fib_add_del_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id, - ip_adjacency_t * add_adj, u8 is_add); -u32 -ip_sd_fib_get_route (lisp_gpe_main_t * lgm, ip_prefix_t * dst_prefix, - ip_prefix_t * src_prefix, u32 table_id); +extern void +ip_src_fib_add_route (u32 src_fib_index, + const ip_prefix_t * src_prefix, + const lisp_fwd_path_t * paths); +extern void +ip_src_dst_fib_del_route (u32 src_fib_index, + const ip_prefix_t * src_prefix, + u32 dst_table_id, const ip_prefix_t * dst_prefix); +extern void +ip_src_fib_add_route_w_dpo (u32 src_fib_index, + const ip_prefix_t * src_prefix, + const dpo_id_t * src_dpo); +extern u32 +ip_dst_fib_add_route (u32 dst_table_id, const ip_prefix_t * dst_prefix); + +extern fib_route_path_t *lisp_gpe_mk_paths_for_sub_tunnels (lisp_gpe_tunnel_t + * t); #define foreach_lgpe_ip4_lookup_next \ _(DROP, "error-drop") \ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c new file mode 100644 index 00000000000..861f0dd38c0 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.c @@ -0,0 +1,437 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE adjacencys. + * + */ + +#include <vnet/dpo/dpo.h> +#include <vnet/lisp-gpe/lisp_gpe_sub_interface.h> +#include <vnet/lisp-gpe/lisp_gpe_adjacency.h> +#include <vnet/lisp-gpe/lisp_gpe_tunnel.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/adj/adj_midchain.h> + +/** + * Memory pool of all adjacencies + */ +static lisp_gpe_adjacency_t *lisp_adj_pool; + +/** + * Hash table of all adjacencies. key:{nh, itf} + * We never have an all zeros address since the interfaces are multi-access, + * therefore there is no ambiguity between a v4 and v6 next-hop, so we don't + * need to add the protocol to the key. + */ +static +BVT (clib_bihash) + lisp_adj_db; + +#define LISP_ADJ_SET_KEY(_key, _itf, _nh) \ +{ \ + _key.key[0] = (_nh)->ip.v6.as_u64[0]; \ + _key.key[1] = (_nh)->ip.v6.as_u64[1]; \ + _key.key[2] = (_itf); \ +} + + static index_t lisp_adj_find (const ip_address_t * addr, u32 sw_if_index) +{ + BVT (clib_bihash_kv) kv; + + LISP_ADJ_SET_KEY (kv, sw_if_index, addr); + + if (BV (clib_bihash_search) (&lisp_adj_db, &kv, &kv) < 0) + { + return (INDEX_INVALID); + } + else + { + return (kv.value); + } +} + +static void +lisp_adj_insert (const ip_address_t * addr, u32 sw_if_index, index_t ai) +{ + BVT (clib_bihash_kv) kv; + + LISP_ADJ_SET_KEY (kv, sw_if_index, addr); + kv.value = ai; + + BV (clib_bihash_add_del) (&lisp_adj_db, &kv, 1); +} + +static void +lisp_adj_remove (const ip_address_t * addr, u32 sw_if_index) +{ + BVT (clib_bihash_kv) kv; + + LISP_ADJ_SET_KEY (kv, sw_if_index, addr); + + BV (clib_bihash_add_del) (&lisp_adj_db, &kv, 0); +} + +static lisp_gpe_adjacency_t * +lisp_gpe_adjacency_get_i (index_t lai) +{ + return (pool_elt_at_index (lisp_adj_pool, lai)); +} + +fib_forward_chain_type_t +lisp_gpe_adj_get_fib_chain_type (const lisp_gpe_adjacency_t * ladj) +{ + switch (ip_addr_version (&ladj->remote_rloc)) + { + case IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + default: + ASSERT (0); + break; + } + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); +} + +/** + * @brief Stack the tunnel's midchain on the IP forwarding chain of the via + */ +static void +lisp_gpe_adj_stack (lisp_gpe_adjacency_t * ladj) +{ + const lisp_gpe_tunnel_2_t *lgt; + dpo_id_t tmp = DPO_NULL; + fib_link_t linkt; + + lgt = lisp_gpe_tunnel_get (ladj->tunnel_index); + fib_entry_contribute_forwarding (lgt->fib_entry_index, + lisp_gpe_adj_get_fib_chain_type (ladj), + &tmp); + + FOR_EACH_FIB_IP_LINK (linkt) + { + adj_nbr_midchain_stack (ladj->adjs[linkt], &tmp); + } + dpo_reset (&tmp); +} + +static lisp_gpe_next_protocol_e +lisp_gpe_adj_proto_from_fib_link_type (fib_link_t linkt) +{ + switch (linkt) + { + case FIB_LINK_IP4: + return (LISP_GPE_INPUT_NEXT_IP4_INPUT); + case FIB_LINK_IP6: + return (LISP_GPE_INPUT_NEXT_IP6_INPUT); + default: + ASSERT (0); + } + return (LISP_GPE_INPUT_NEXT_DROP); +} + +index_t +lisp_gpe_adjacency_find_or_create_and_lock (const locator_pair_t * pair, + u32 overlay_table_id, u32 vni) +{ + const lisp_gpe_tunnel_2_t *lgt; + lisp_gpe_adjacency_t *ladj; + index_t lai, l3si; + + /* + * first find the L3 sub-interface that corresponds to the loacl-rloc and vni + */ + l3si = lisp_gpe_sub_interface_find_or_create_and_lock (&pair->lcl_loc, + overlay_table_id, + vni); + + /* + * find an existing or create a new adj + */ + lai = lisp_adj_find (&pair->rmt_loc, l3si); + + if (INDEX_INVALID == lai) + { + const lisp_gpe_sub_interface_t *l3s; + u8 *rewrite = NULL; + fib_link_t linkt; + fib_prefix_t nh; + + pool_get (lisp_adj_pool, ladj); + memset (ladj, 0, sizeof (*ladj)); + lai = (ladj - lisp_adj_pool); + + ladj->remote_rloc = pair->rmt_loc; + ladj->vni = vni; + /* transfer the lock to the adj */ + ladj->lisp_l3_sub_index = l3si; + + l3s = lisp_gpe_sub_interface_get (l3si); + ladj->sw_if_index = l3s->sw_if_index; + + /* if vni is non-default */ + if (ladj->vni) + ladj->flags = LISP_GPE_FLAGS_I; + + /* work in lisp-gpe not legacy mode */ + ladj->flags |= LISP_GPE_FLAGS_P; + + /* + * find the tunnel that will provide the underlying transport + * and hence the rewrite. + * The RLOC FIB index is default table - always. + */ + ladj->tunnel_index = lisp_gpe_tunnel_find_or_create_and_lock (pair, 0); + + lgt = lisp_gpe_tunnel_get (ladj->tunnel_index); + + /* + * become of child of the RLOC FIB entry so we are updated when + * its reachability changes, allowing us to re-stack the midcahins + */ + ladj->fib_entry_child_index = fib_entry_child_add (lgt->fib_entry_index, + FIB_NODE_TYPE_LISP_ADJ, + lai); + ip_address_to_fib_prefix (&pair->rmt_loc, &nh); + + /* + * construct and stack the FIB midchain adjacencies + */ + FOR_EACH_FIB_IP_LINK (linkt) + { + ladj->adjs[linkt] = adj_nbr_add_or_lock (nh.fp_proto, + linkt, + &nh.fp_addr, + ladj->sw_if_index); + + rewrite = + lisp_gpe_tunnel_build_rewrite (lgt, ladj, + lisp_gpe_adj_proto_from_fib_link_type + (linkt)); + + adj_nbr_midchain_update_rewrite (ladj->adjs[linkt], + vnet_get_sup_hw_interface + (vnet_get_main (), + ladj->sw_if_index)->tx_node_index, + rewrite); + + vec_free (rewrite); + } + + lisp_gpe_adj_stack (ladj); + + lisp_adj_insert (&ladj->remote_rloc, ladj->lisp_l3_sub_index, lai); + } + else + { + /* unlock the interface from the find. */ + lisp_gpe_sub_interface_unlock (l3si); + ladj = lisp_gpe_adjacency_get_i (lai); + } + + ladj->locks++; + + return (lai); +} + +/** + * @brief Get a pointer to a tunnel from a pointer to a FIB node + */ +static lisp_gpe_adjacency_t * +lisp_gpe_adjacency_from_fib_node (const fib_node_t * node) +{ + return ((lisp_gpe_adjacency_t *) + ((char *) node - + STRUCT_OFFSET_OF (lisp_gpe_adjacency_t, fib_node))); +} + +static void +lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_t * ladj) +{ + /* + * no children so we are not counting locks. no-op. + * at least not counting + */ + lisp_adj_remove (&ladj->remote_rloc, ladj->lisp_l3_sub_index); + + /* + * unlock the resources this adj holds + */ + lisp_gpe_tunnel_unlock (ladj->tunnel_index); + lisp_gpe_sub_interface_unlock (ladj->lisp_l3_sub_index); + + pool_put (lisp_adj_pool, ladj); +} + +void +lisp_gpe_adjacency_unlock (index_t lai) +{ + lisp_gpe_adjacency_t *ladj; + + ladj = lisp_gpe_adjacency_get_i (lai); + + ladj->locks--; + + if (0 == ladj->locks) + { + lisp_gpe_adjacency_last_lock_gone (ladj); + } +} + +const lisp_gpe_adjacency_t * +lisp_gpe_adjacency_get (index_t lai) +{ + return (lisp_gpe_adjacency_get_i (lai)); +} + + +/** + * @brief LISP GPE tunnel back walk + * + * The FIB entry through which this tunnel resolves has been updated. + * re-stack the midchain on the new forwarding. + */ +static fib_node_back_walk_rc_t +lisp_gpe_adjacency_back_walk (fib_node_t * node, + fib_node_back_walk_ctx_t * ctx) +{ + lisp_gpe_adj_stack (lisp_gpe_adjacency_from_fib_node (node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +static fib_node_t * +lisp_gpe_adjacency_get_fib_node (fib_node_index_t index) +{ + lisp_gpe_adjacency_t *ladj; + + ladj = pool_elt_at_index (lisp_adj_pool, index); + return (&ladj->fib_node); +} + +static void +lisp_gpe_adjacency_last_fib_lock_gone (fib_node_t * node) +{ + lisp_gpe_adjacency_last_lock_gone (lisp_gpe_adjacency_from_fib_node (node)); +} + +const static fib_node_vft_t lisp_gpe_tuennel_vft = { + .fnv_get = lisp_gpe_adjacency_get_fib_node, + .fnv_back_walk = lisp_gpe_adjacency_back_walk, + .fnv_last_lock = lisp_gpe_adjacency_last_fib_lock_gone, +}; + +u8 * +format_lisp_gpe_adjacency (u8 * s, va_list * args) +{ + lisp_gpe_adjacency_t *ladj = va_arg (*args, lisp_gpe_adjacency_t *); + lisp_gpe_adjacency_format_flags_t flags = + va_arg (args, lisp_gpe_adjacency_format_flags_t); + + if (flags & LISP_GPE_ADJ_FORMAT_FLAG_DETAIL) + { + s = + format (s, "index %d locks:%d\n", ladj - lisp_adj_pool, ladj->locks); + } + + s = format (s, " vni: %d,", ladj->vni); + s = format (s, " remote-RLOC: %U,", format_ip_address, &ladj->remote_rloc); + + if (flags & LISP_GPE_ADJ_FORMAT_FLAG_DETAIL) + { + s = format (s, " %U\n", + format_lisp_gpe_sub_interface, + lisp_gpe_sub_interface_get (ladj->lisp_l3_sub_index)); + s = format (s, " %U\n", + format_lisp_gpe_tunnel, + lisp_gpe_tunnel_get (ladj->tunnel_index)); + s = format (s, " FIB adjacencies: IPV4:%d IPv6:%d\n", + ladj->adjs[FIB_LINK_IP4], ladj->adjs[FIB_LINK_IP6]); + } + else + { + s = format (s, " LISP L3 sub-interface index: %d,", + ladj->lisp_l3_sub_index); + s = format (s, " LISP tunnel index: %d", ladj->tunnel_index); + } + + + return (s); +} + +static clib_error_t * +lisp_gpe_adjacency_show (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + lisp_gpe_adjacency_t *ladj; + index_t index; + + if (pool_elts (lisp_adj_pool) == 0) + vlib_cli_output (vm, "No lisp-gpe Adjacencies"); + + if (unformat (input, "%d", &index)) + { + ladj = lisp_gpe_adjacency_get_i (index); + vlib_cli_output (vm, "%U", format_lisp_gpe_adjacency, ladj, + LISP_GPE_ADJ_FORMAT_FLAG_DETAIL); + } + else + { + /* *INDENT-OFF* */ + pool_foreach (ladj, lisp_adj_pool, + ({ + vlib_cli_output (vm, "[%d] %U\n", + ladj - lisp_adj_pool, + format_lisp_gpe_adjacency, ladj, + LISP_GPE_ADJ_FORMAT_FLAG_NONE); + })); + /* *INDENT-ON* */ + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) = +{ + .path = "show lisp gpe adjacency", + .function = lisp_gpe_adjacency_show, +}; +/* *INDENT-ON* */ + +#define LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (256) +#define LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (1<<20) + +static clib_error_t * +lisp_gpe_adj_module_init (vlib_main_t * vm) +{ + BV (clib_bihash_init) (&lisp_adj_db, + "Adjacency Neighbour table", + LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS, + LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE); + + fib_node_register_type (FIB_NODE_TYPE_LISP_ADJ, &lisp_gpe_tuennel_vft); + return (NULL); +} + +VLIB_INIT_FUNCTION (lisp_gpe_adj_module_init) +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h new file mode 100644 index 00000000000..f6a66cddf0b --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_adjacency.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE adjacencys. + * + */ + +#ifndef LISP_GPE_ADJACENCY_H__ +#define LISP_GPE_ADJACENCY_H__ + +#include <vnet/fib/fib_node.h> +#include <vnet/lisp-gpe/lisp_gpe.h> + +/** + * @brief A LISP GPE Adjacency. + * + * A adjacency represents peer on an L3 sub-interface to which to send traffic. + * adjacencies are thus present in the EID space. + * The peer is identified by the key:{remote-rloc, sub-interface}, which is + * equivalent to the usal adjacency key {next-hop, interface}. So curiously + * the rloc address from the underlay is used as a next hop address in the overlay + * This is OK because: + * 1 - the RLOC is unique in the underlay AND there is only one underlay VRF per + * overlay + * 2 - the RLOC may overlap with an address in the overlay, but we do not create + * an adj-fib (i.e. a route in the overlay FIB for the rloc) + * + * + */ +typedef struct lisp_gpe_adjacency_t_ +{ + /** + * The LISP adj is a part of the FIB control plane graph. + */ + fib_node_t fib_node; + + /** + * remote RLOC. The adjacency's next-hop + */ + ip_address_t remote_rloc; + + /** + * The VNI. Used in combination with the local-rloc to get the sub-interface + */ + u32 vni; + + /** + * The number of locks/reference counts on the adjacency. + */ + u32 locks; + + /** + * The index of the LISP L3 subinterface + */ + u32 lisp_l3_sub_index; + + /** + * The SW IF index of the sub-interface this adjacency uses. + * Cached for convenience from the LISP L3 sub-interface + */ + u32 sw_if_index; + + /** + * The index of the LISP GPE tunnel that provides the transport + * in the underlay. + */ + u32 tunnel_index; + + /** + * Per-link-type FIB adjacencies contributed. + * These will be used as a result of a FIB lookup. + */ + adj_index_t adjs[FIB_LINK_NUM]; + + /** + * This adjacency is a child of the FIB entry to reach the RLOC. + * This is so when the reachability of that RLOC changes, we can restack + * the FIB adjacnecies. + */ + u32 fib_entry_child_index; + + /** + * LISP header fields in HOST byte order + */ + u8 flags; + u8 ver_res; + u8 res; + u8 next_protocol; + +} lisp_gpe_adjacency_t; + +extern index_t lisp_gpe_adjacency_find_or_create_and_lock (const + locator_pair_t * + pair, + u32 rloc_fib_index, + u32 vni); + +extern void lisp_gpe_adjacency_unlock (index_t l3si); + +extern const lisp_gpe_adjacency_t *lisp_gpe_adjacency_get (index_t l3si); + +/** + * @brief Flags for displaying the adjacency + */ +typedef enum lisp_gpe_adjacency_format_flags_t_ +{ + LISP_GPE_ADJ_FORMAT_FLAG_NONE, + LISP_GPE_ADJ_FORMAT_FLAG_DETAIL, +} lisp_gpe_adjacency_format_flags_t; + +extern u8 *format_lisp_gpe_adjacency (u8 * s, va_list * args); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c new file mode 100644 index 00000000000..220802b17c7 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.c @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief LISP sub-interfaces. + * + */ +#include <vnet/lisp-gpe/lisp_gpe_sub_interface.h> +#include <vnet/fib/fib_table.h> +#include <vnet/interface.h> + +/** + * @brief Pool of all l3-sub-interfaces + */ +static lisp_gpe_sub_interface_t *lisp_gpe_sub_interface_pool; + +/** + * A DB of all LISP L3 sub-interfaces. The key is:{VNI,l-RLOC} + */ +static uword *lisp_gpe_sub_interfaces; + +/** + * A DB of all VNET L3 sub-interfaces. The key is:{VNI,l-RLOC} + * Used in the data-plane for interface lookup on decap. + */ +uword *lisp_gpe_sub_interfaces_sw_if_index; + +/** + * The next available sub-interface ID. FIXME + */ +static u32 lisp_gpe_sub_interface_id; + + +static index_t +lisp_gpe_sub_interface_db_find (const ip_address_t * lrloc, u32 vni) +{ + uword *p; + + lisp_gpe_sub_interface_key_t key = { + .local_rloc = *lrloc, + .vni = clib_host_to_net_u32 (vni), + }; + + p = hash_get_mem (lisp_gpe_sub_interfaces, &key); + + if (NULL == p) + return (INDEX_INVALID); + else + return (p[0]); +} + +static void +lisp_gpe_sub_interface_db_insert (const lisp_gpe_sub_interface_t * l3s) +{ + hash_set_mem (lisp_gpe_sub_interfaces, + &l3s->key, l3s - lisp_gpe_sub_interface_pool); + hash_set_mem (lisp_gpe_sub_interfaces_sw_if_index, + &l3s->key, l3s->sw_if_index); +} + +static void +lisp_gpe_sub_interface_db_remove (const lisp_gpe_sub_interface_t * l3s) +{ + hash_unset_mem (lisp_gpe_sub_interfaces, &l3s->key); + hash_unset_mem (lisp_gpe_sub_interfaces_sw_if_index, &l3s->key); +} + +lisp_gpe_sub_interface_t * +lisp_gpe_sub_interface_get_i (index_t l3si) +{ + return (pool_elt_at_index (lisp_gpe_sub_interface_pool, l3si)); +} + +static void +lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id) +{ + fib_node_index_t fib_index; + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); + ASSERT (FIB_NODE_INDEX_INVALID != fib_index); + + vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index); + ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + // FIXME. enable When we get an adj + ip4_sw_interface_enable_disable (sw_if_index, 1); + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id); + ASSERT (FIB_NODE_INDEX_INVALID != fib_index); + + vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index); + ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index; + // FIXME. enable When we get an adj + ip6_sw_interface_enable_disable (sw_if_index, 1); +} + +static void +lisp_gpe_sub_interface_unset_table (u32 sw_if_index, u32 table_id) +{ + ip4_main.fib_index_by_sw_if_index[sw_if_index] = 0; + ip4_sw_interface_enable_disable (sw_if_index, 0); + + ip6_main.fib_index_by_sw_if_index[sw_if_index] = 0; + ip6_sw_interface_enable_disable (sw_if_index, 0); +} + +index_t +lisp_gpe_sub_interface_find_or_create_and_lock (const ip_address_t * lrloc, + u32 overlay_table_id, u32 vni) +{ + lisp_gpe_sub_interface_t *l3s; + lisp_gpe_main_t *lgm = &lisp_gpe_main; + index_t l3si; + + l3si = lisp_gpe_sub_interface_db_find (lrloc, vni); + + if (INDEX_INVALID == l3si) + { + vnet_hw_interface_t *hi; + clib_error_t *error; + u32 sub_sw_if_index; + uword *p; + + /* + * find the main interface from the VNI + */ + p = hash_get (lgm->l3_ifaces.sw_if_index_by_vni, vni); + + if (NULL == p) + return (INDEX_INVALID); + + hi = vnet_get_hw_interface (vnet_get_main (), p[0]); + + if (NULL == hi) + return (INDEX_INVALID); + + vnet_sw_interface_t sub_itf_template = { + .type = VNET_SW_INTERFACE_TYPE_SUB, + .sup_sw_if_index = hi->sw_if_index, + .sub.id = lisp_gpe_sub_interface_id++, + }; + + error = vnet_create_sw_interface (vnet_get_main (), + &sub_itf_template, &sub_sw_if_index); + + if (NULL != error) + return (INDEX_INVALID); + + pool_get (lisp_gpe_sub_interface_pool, l3s); + memset (l3s, 0, sizeof (*l3s)); + l3s->key = clib_mem_alloc (sizeof (*l3s->key)); + memset (l3s->key, 0, sizeof (*l3s->key)); + + l3s->key->local_rloc = *lrloc; + l3s->key->vni = clib_host_to_net_u32 (vni); + l3s->main_sw_if_index = hi->sw_if_index; + l3s->sw_if_index = sub_sw_if_index; + l3s->eid_table_id = overlay_table_id; + + l3si = (l3s - lisp_gpe_sub_interface_pool); + + lisp_gpe_sub_interface_set_table (l3s->sw_if_index, l3s->eid_table_id); + vnet_sw_interface_set_flags (vnet_get_main (), + l3s->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + lisp_gpe_sub_interface_db_insert (l3s); + } + else + { + l3s = lisp_gpe_sub_interface_get_i (l3si); + } + + l3s->locks++; + + return (l3si); +} + +void +lisp_gpe_sub_interface_unlock (index_t l3si) +{ + lisp_gpe_sub_interface_t *l3s; + + l3s = lisp_gpe_sub_interface_get_i (l3si); + + l3s->locks--; + + if (0 == l3s->locks) + { + lisp_gpe_sub_interface_unset_table (l3s->sw_if_index, + l3s->eid_table_id); + + vnet_sw_interface_set_flags (vnet_get_main (), l3s->sw_if_index, 0); + vnet_delete_sub_interface (l3s->sw_if_index); + + lisp_gpe_sub_interface_db_remove (l3s); + + clib_mem_free (l3s->key); + pool_put (lisp_gpe_sub_interface_pool, l3s); + } +} + +const lisp_gpe_sub_interface_t * +lisp_gpe_sub_interface_get (index_t l3si) +{ + return (lisp_gpe_sub_interface_get_i (l3si)); +} + +u8 * +format_lisp_gpe_sub_interface (u8 * s, va_list ap) +{ + lisp_gpe_sub_interface_t *l3s = va_arg (ap, lisp_gpe_sub_interface_t *); + vnet_main_t *vnm = vnet_get_main (); + + s = format (s, "%=16U", + format_vnet_sw_interface_name, + vnm, vnet_get_sw_interface (vnm, l3s->sw_if_index)); + s = format (s, "%=10d", clib_net_to_host_u32 (l3s->key->vni)); + s = format (s, "%=12d", l3s->sw_if_index); + s = format (s, "%U", format_ip_address, &l3s->key->local_rloc); + + return (s); +} + +/** CLI command to show LISP-GPE interfaces. */ +static clib_error_t * +lisp_gpe_sub_interface_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + lisp_gpe_sub_interface_t *l3s; + + vlib_cli_output (vm, "%=16s%=10s%=12s%s", "Name", "VNI", "SW IF Index", + "local RLOC"); + + /* *INDENT-OFF* */ + pool_foreach (l3s, lisp_gpe_sub_interface_pool, + ({ + vlib_cli_output (vm, "%U", format_lisp_gpe_sub_interface, l3s); + })); + /* *INDENT-ON* */ + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (lisp_gpe_sub_interface_command) = { + .path = "show lisp gpe sub-interface", + .short_help = "show lisp gpe sub-interface", + .function = lisp_gpe_sub_interface_show, +}; +/* *INDENT-ON* */ + +static clib_error_t * +lisp_gpe_sub_interface_module_init (vlib_main_t * vm) +{ + lisp_gpe_sub_interfaces = + hash_create_mem (0, + sizeof (lisp_gpe_sub_interface_key_t), sizeof (uword)); + lisp_gpe_sub_interfaces_sw_if_index = + hash_create_mem (0, + sizeof (lisp_gpe_sub_interface_key_t), sizeof (uword)); + + return (NULL); +} + +VLIB_INIT_FUNCTION (lisp_gpe_sub_interface_module_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.h b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.h new file mode 100644 index 00000000000..ad942f415d1 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_sub_interface.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief LISP sub-interfaces. + * + */ + +#ifndef __LISP_GPE_SUB_INTERFACE_H__ +#define __LISP_GPE_SUB_INTERFACE_H__ + +#include <vnet/lisp-gpe/lisp_gpe.h> + +/** + * A Key for lookup in the L£ sub-interface DB + */ +typedef struct lisp_gpe_sub_interface_key_t_ +{ + /** + * The local-RLOC. This is the interface's 'source' address. + */ + ip_address_t local_rloc; + + /** + * The VNI. In network byte order! + */ + u32 vni; +} lisp_gpe_sub_interface_key_t; + +/** + * @brief A LISP L3 sub-interface + * + * A LISP sub-interface is a multi-access interface, whose local address is a + * single local-RLOC. Adjacencies that form on this sub-interface, represent + * remote RLOCs. + * This is analogous to an ethernet interface. + * As with all interface types it can only be present in one VRF, hence a + * LISP sub-interface is per-local-rloc and per-VNI. + */ +typedef struct lisp_gpe_sub_interface_t_ +{ + /** + * The interface's key inthe DB; rloc & vni; + * The key is allocated from the heap so it can be used in the hash-table. + * if it's part of the object, then it is subjet to realloc, which no-worky. + */ + lisp_gpe_sub_interface_key_t *key; + + /** + * The Table-ID in the overlay that this interface is bound to. + */ + u32 eid_table_id; + + /** + * A reference counting lock on the number of users of this interface. + * When this count drops to 0 the interface is deleted. + */ + u32 locks; + + /** + * The SW if index assigned to this sub-interface + */ + u32 sw_if_index; + + /** + * The SW IF index assigned to the main interface of which this is a sub. + */ + u32 main_sw_if_index; +} lisp_gpe_sub_interface_t; + +extern index_t lisp_gpe_sub_interface_find_or_create_and_lock (const + ip_address_t * + lrloc, + u32 + eid_table_id, + u32 vni); + +extern u8 *format_lisp_gpe_sub_interface (u8 * s, va_list ap); + +extern void lisp_gpe_sub_interface_unlock (index_t itf); + +extern const lisp_gpe_sub_interface_t *lisp_gpe_sub_interface_get (index_t + itf); + +/** + * A DB of all L3 sub-interfaces. The key is:{VNI,l-RLOC} + */ +extern uword *lisp_gpe_sub_interfaces_sw_if_index; + +/** + * @brief + * Get a VNET L3 interface matching the local-RLOC and VNI + * Called from the data-plane + */ +always_inline u32 +lisp_gpe_sub_interface_find_ip6 (const ip6_address_t * addr, u32 vni) +{ + lisp_gpe_sub_interface_key_t key; + const uword *p; + + key.local_rloc.ip.v6.as_u64[0] = addr->as_u64[0]; + key.local_rloc.ip.v6.as_u64[1] = addr->as_u64[1]; + key.local_rloc.version = IP6; + key.vni = vni; + + p = hash_get_mem (&lisp_gpe_sub_interfaces_sw_if_index, &key); + + if (NULL != p) + return p[0]; + + return (INDEX_INVALID); +} + +/** + * @brief + * Get a VNET L3 interface matching the local-RLOC and VNI + * Called from the data-plane + */ +always_inline index_t +lisp_gpe_sub_interface_find_ip4 (const ip4_address_t * addr, u32 vni) +{ + lisp_gpe_sub_interface_key_t key; + const uword *p; + + key.local_rloc.ip.v4.as_u32 = addr->as_u32; + key.local_rloc.version = IP4; + key.vni = vni; + + p = hash_get_mem (&lisp_gpe_sub_interfaces_sw_if_index, &key); + + if (NULL != p) + return p[0]; + + return (INDEX_INVALID); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ + +#endif diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c new file mode 100644 index 00000000000..0aecc0a1aa4 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE tunnels. + * + */ +#include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/lisp-gpe/lisp_gpe_tunnel.h> +#include <vnet/lisp-gpe/lisp_gpe_adjacency.h> + +#include <vnet/fib/fib_table.h> + +/** + * @brief Pool of all LISP tunnels + */ +static lisp_gpe_tunnel_2_t *lisp_gpe_tunnel_pool; + +/** + * @brief a DB of all tunnels + */ +static uword *lisp_gpe_tunnel_db; + +/** + * @brief Compute IP-UDP-GPE sub-tunnel encap/rewrite header. + * + * @param[in] t Parent of the sub-tunnel. + * @param[in] st Sub-tunnel. + * @param[in] lp Local and remote locators used in the encap header. + * + * @return 0 on success. + */ +u8 * +lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_2_t * lgt, + const lisp_gpe_adjacency_t * ladj, + lisp_gpe_next_protocol_e payload_proto) +{ + lisp_gpe_header_t *lisp0; + u8 *rw = 0; + int len; + + if (IP4 == ip_addr_version (&lgt->key->lcl)) + { + ip4_udp_lisp_gpe_header_t *h0; + ip4_header_t *ip0; + + len = sizeof (*h0); + + vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES); + + h0 = (ip4_udp_lisp_gpe_header_t *) rw; + + /* Fixed portion of the (outer) ip4 header */ + ip0 = &h0->ip4; + ip0->ip_version_and_header_length = 0x45; + ip0->ttl = 254; + ip0->protocol = IP_PROTOCOL_UDP; + + /* we fix up the ip4 header length and checksum after-the-fact */ + ip_address_copy_addr (&ip0->src_address, &lgt->key->lcl); + ip_address_copy_addr (&ip0->dst_address, &lgt->key->rmt); + ip0->checksum = ip4_header_checksum (ip0); + + /* UDP header, randomize src port on something, maybe? */ + h0->udp.src_port = clib_host_to_net_u16 (4341); + h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe); + + /* LISP-gpe header */ + lisp0 = &h0->lisp; + } + else + { + ip6_udp_lisp_gpe_header_t *h0; + ip6_header_t *ip0; + + len = sizeof (*h0); + + vec_validate_aligned (rw, len - 1, CLIB_CACHE_LINE_BYTES); + + h0 = (ip6_udp_lisp_gpe_header_t *) rw; + + /* Fixed portion of the (outer) ip6 header */ + ip0 = &h0->ip6; + ip0->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + ip0->hop_limit = 254; + ip0->protocol = IP_PROTOCOL_UDP; + + /* we fix up the ip6 header length after-the-fact */ + ip_address_copy_addr (&ip0->src_address, &lgt->key->lcl); + ip_address_copy_addr (&ip0->dst_address, &lgt->key->rmt); + + /* UDP header, randomize src port on something, maybe? */ + h0->udp.src_port = clib_host_to_net_u16 (4341); + h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_lisp_gpe); + + /* LISP-gpe header */ + lisp0 = &h0->lisp; + } + + lisp0->flags = ladj->flags; + lisp0->ver_res = 0; + lisp0->res = 0; + lisp0->next_protocol = payload_proto; + lisp0->iid = clib_host_to_net_u32 (ladj->vni); + + return (rw); +} + +static lisp_gpe_tunnel_2_t * +lisp_gpe_tunnel_db_find (const lisp_gpe_tunnel_key_t * key) +{ + uword *p; + + p = hash_get_mem (lisp_gpe_tunnel_db, (void *) key); + + if (NULL != p) + { + return (pool_elt_at_index (lisp_gpe_tunnel_pool, p[0])); + } + return (NULL); +} + +lisp_gpe_tunnel_2_t * +lisp_gpe_tunnel_get_i (index_t lgti) +{ + return (pool_elt_at_index (lisp_gpe_tunnel_pool, lgti)); +} + +index_t +lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * pair, + u32 rloc_fib_index) +{ + lisp_gpe_tunnel_key_t key = { + .lcl = pair->lcl_loc, + .rmt = pair->rmt_loc, + .fib_index = rloc_fib_index, + }; + lisp_gpe_tunnel_2_t *lgt; + fib_prefix_t pfx; + + lgt = lisp_gpe_tunnel_db_find (&key); + + if (NULL == lgt) + { + pool_get (lisp_gpe_tunnel_pool, lgt); + memset (lgt, 0, sizeof (*lgt)); + + lgt->key = clib_mem_alloc (sizeof (*lgt->key)); + memset (lgt->key, 0, sizeof (*lgt->key)); + + lgt->key->rmt = pair->rmt_loc; + lgt->key->lcl = pair->lcl_loc; + lgt->key->fib_index = rloc_fib_index; + + /* + * source the FIB entry for the RLOC so we can track its forwarding + * chain + */ + ip_address_to_fib_prefix (&lgt->key->rmt, &pfx); + + lgt->fib_entry_index = fib_table_entry_special_add (rloc_fib_index, + &pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + + hash_set_mem (lisp_gpe_tunnel_db, &lgt->key, + (lgt - lisp_gpe_tunnel_pool)); + } + + lgt->locks++; + + return (lgt - lisp_gpe_tunnel_pool); +} + +void +lisp_gpe_tunnel_unlock (index_t lgti) +{ + lisp_gpe_tunnel_2_t *lgt; + + lgt = lisp_gpe_tunnel_get_i (lgti); + lgt->locks--; + + if (0 == lgt->locks) + { + hash_unset_mem (lisp_gpe_tunnel_db, &lgt->key); + clib_mem_free (lgt->key); + pool_put (lisp_gpe_tunnel_pool, lgt); + } +} + +const lisp_gpe_tunnel_2_t * +lisp_gpe_tunnel_get (index_t lgti) +{ + return (lisp_gpe_tunnel_get_i (lgti)); +} + +/** Format LISP-GPE tunnel. */ +u8 * +format_lisp_gpe_tunnel (u8 * s, va_list * args) +{ + lisp_gpe_tunnel_2_t *lgt = va_arg (*args, lisp_gpe_tunnel_2_t *); + + s = format (s, "tunnel %d\n", lgt - lisp_gpe_tunnel_pool); + s = format (s, " fib-index: %d, locks:%d \n", + lgt->key->fib_index, lgt->locks); + s = format (s, " lisp ver 0\n"); + + s = format (s, " locator-pair:\n"); + s = format (s, " local: %U remote: %U\n", + format_ip_address, &lgt->key->lcl, + format_ip_address, &lgt->key->rmt); + s = format (s, " RLOC FIB entry: %d\n", lgt->fib_entry_index); + + return s; +} + +/** + * CLI command to show LISP-GPE tunnels. + */ +static clib_error_t * +show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + lisp_gpe_tunnel_2_t *lgt; + index_t index; + + if (pool_elts (lisp_gpe_tunnel_pool) == 0) + vlib_cli_output (vm, "No lisp-gpe tunnels configured..."); + + if (unformat (input, "%d", &index)) + { + lgt = lisp_gpe_tunnel_get_i (index); + vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, lgt); + } + else + { + /* *INDENT-OFF* */ + pool_foreach (lgt, lisp_gpe_tunnel_pool, + ({ + vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, lgt); + })); + /* *INDENT-ON* */ + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) = +{ + .path = "show lisp gpe tunnel", + .function = show_lisp_gpe_tunnel_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +lisp_gpe_tunnel_module_init (vlib_main_t * vm) +{ + lisp_gpe_tunnel_db = hash_create_mem (0, + sizeof (lisp_gpe_fwd_entry_key_t), + sizeof (uword)); + + return (NULL); +} + +VLIB_INIT_FUNCTION (lisp_gpe_tunnel_module_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h new file mode 100644 index 00000000000..d417fa991a8 --- /dev/null +++ b/vnet/vnet/lisp-gpe/lisp_gpe_tunnel.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Common utility functions for IPv4, IPv6 and L2 LISP-GPE tunnels. + * + */ + +#ifndef LISP_GPE_TUNNEL_H__ +#define LISP_GPE_TUNNEL_H__ + +#include <vnet/lisp-gpe/lisp_gpe.h> +#include <vnet/lisp-gpe/lisp_gpe_packet.h> + +/** + * Forward declaration + */ +struct lisp_gpe_adjacency_t_; + +/** + * A Key for a tunnel + */ +typedef struct lisp_gpe_tunnel_key_t_ +{ + ip_address_t rmt; + ip_address_t lcl; + u32 fib_index; +} lisp_gpe_tunnel_key_t; + +/** + * @brief A LISP GPE Tunnel. + * + * A tunnel represents an associatation between a local and remote RLOC. + * As such it represents a unique LISP rewrite. + */ +typedef struct lisp_gpe_tunnel_2_t_ +{ + /** + * RLOC pair and rloc fib_index. This is the tunnel's key. + */ + lisp_gpe_tunnel_key_t *key; + + /** + * number of reference counting locks + */ + u32 locks; + + /** + * the FIB entry through which the remote rloc is reachable + s */ + fib_node_index_t fib_entry_index; +} lisp_gpe_tunnel_2_t; + +extern index_t lisp_gpe_tunnel_find_or_create_and_lock (const locator_pair_t * + pair, + u32 rloc_fib_index); + +extern void lisp_gpe_tunnel_unlock (index_t lgti); + +extern const lisp_gpe_tunnel_2_t *lisp_gpe_tunnel_get (index_t lgti); + +extern u8 *lisp_gpe_tunnel_build_rewrite (const lisp_gpe_tunnel_2_t * lgt, + const struct lisp_gpe_adjacency_t_ + *ladj, + lisp_gpe_next_protocol_e + payload_proto); +extern u8 *format_lisp_gpe_tunnel (u8 * s, va_list * args); + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/vnet/vnet/map/map.c b/vnet/vnet/map/map.c index 5b5bae54720..74a99057c90 100644 --- a/vnet/vnet/map/map.c +++ b/vnet/vnet/map/map.c @@ -15,6 +15,11 @@ * limitations under the License. */ +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/adj/adj.h> +#include <vnet/map/map_dpo.h> + #include "map.h" #ifndef __SSE4_2__ @@ -159,15 +164,12 @@ map_create_domain (ip4_address_t * ip4_prefix, u8 psid_offset, u8 psid_length, u32 * map_domain_index, u16 mtu, u8 flags) { + u8 suffix_len, suffix_shift; map_main_t *mm = &map_main; - ip4_main_t *im4 = &ip4_main; - ip6_main_t *im6 = &ip6_main; + dpo_id_t dpo_v4 = DPO_NULL; + dpo_id_t dpo_v6 = DPO_NULL; + fib_node_index_t fei; map_domain_t *d; - ip_adjacency_t adj; - ip4_add_del_route_args_t args4; - ip6_add_del_route_args_t args6; - u8 suffix_len, suffix_shift; - uword *p; /* Sanity check on the src prefix length */ if (flags & MAP_DOMAIN_TRANSLATION) @@ -236,73 +238,82 @@ map_create_domain (ip4_address_t * ip4_prefix, d->psid_mask = (1 << d->psid_length) - 1; d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length; - /* Init IP adjacency */ - memset (&adj, 0, sizeof (adj)); - adj.explicit_fib_index = ~0; - adj.lookup_next_index = - (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T : - IP_LOOKUP_NEXT_MAP; - p = (uword *) & adj.rewrite_data[0]; - *p = (uword) (*map_domain_index); + /* MAP data-plane object */ + if (d->flags & MAP_DOMAIN_TRANSLATION) + map_t_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4); + else + map_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4); + + /* Create ip4 route */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = d->ip4_prefix_len, + .fp_addr = { + .ip4 = d->ip4_prefix, + } + , + }; + fib_table_entry_special_dpo_add (0, &pfx, + FIB_SOURCE_MAP, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4); + dpo_reset (&dpo_v4); - if (ip4_get_route (im4, 0, 0, (u8 *) ip4_prefix, ip4_prefix_len)) + /* + * Multiple MAP domains may share same source IPv6 TEP. + * In this case the route will exist and be MAP sourced. + * Find the adj (if any) already contributed and modify it + */ + fib_prefix_t pfx6 = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = d->ip6_src_len, + .fp_addr = { + .ip6 = d->ip6_src, + } + , + }; + fei = fib_table_lookup_exact_match (0, &pfx6); + + if (FIB_NODE_INDEX_INVALID != fei) { - clib_warning ("IPv4 route already defined: %U/%d", format_ip4_address, - ip4_prefix, ip4_prefix_len); - pool_put (mm->domains, d); - return -1; - } + dpo_id_t dpo = DPO_NULL; - /* Create ip4 adjacency */ - memset (&args4, 0, sizeof (args4)); - args4.table_index_or_table_id = 0; - args4.flags = IP4_ROUTE_FLAG_ADD; - args4.dst_address.as_u32 = ip4_prefix->as_u32; - args4.dst_address_length = ip4_prefix_len; + if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_MAP, &dpo)) + { + /* + * modify the existing MAP to indicate it's shared + * skip to route add. + */ + const dpo_id_t *md_dpo; + map_dpo_t *md; - args4.adj_index = ~0; - args4.add_adj = &adj; - args4.n_add_adj = 1; - ip4_add_del_route (im4, &args4); + ASSERT (DPO_LOAD_BALANCE == dpo.dpoi_type); - /* Multiple MAP domains may share same source IPv6 TEP */ - u32 ai = ip6_get_route (im6, 0, 0, ip6_src, ip6_src_len); - if (ai > 0) - { - ip_lookup_main_t *lm6 = &ip6_main.lookup_main; - ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai); - if (adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP && - adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP_T) - { - clib_warning ("BR source address already assigned: %U", - format_ip6_address, ip6_src); - pool_put (mm->domains, d); - return -1; - } - /* Shared source */ - p = (uword *) & adj6->rewrite_data[0]; - p[0] = ~0; + md_dpo = load_balance_get_bucket (dpo.dpoi_index, 0); + md = map_dpo_get (md_dpo->dpoi_index); - /* - * Add refcount, so we don't accidentially delete the route - * underneath someone - */ - p[1]++; + md->md_domain = ~0; + dpo_copy (&dpo_v6, md_dpo); + dpo_reset (&dpo); + + goto route_add; + } } + + if (d->flags & MAP_DOMAIN_TRANSLATION) + map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); else - { - /* Create ip6 adjacency. */ - memset (&args6, 0, sizeof (args6)); - args6.table_index_or_table_id = 0; - args6.flags = IP6_ROUTE_FLAG_ADD; - args6.dst_address.as_u64[0] = ip6_src->as_u64[0]; - args6.dst_address.as_u64[1] = ip6_src->as_u64[1]; - args6.dst_address_length = ip6_src_len; - args6.adj_index = ~0; - args6.add_adj = &adj; - args6.n_add_adj = 1; - ip6_add_del_route (im6, &args6); - } + map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); + +route_add: + /* + * Create ip6 route. This is a reference counted add. If the prefix + * already exists and is MAP sourced, it is now MAP source n+1 times + * and will need to be removed n+1 times. + */ + fib_table_entry_special_dpo_add (0, &pfx6, + FIB_SOURCE_MAP, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6); + dpo_reset (&dpo_v6); /* Validate packet/byte counters */ map_domain_counter_lock (mm); @@ -332,12 +343,7 @@ int map_delete_domain (u32 map_domain_index) { map_main_t *mm = &map_main; - ip4_main_t *im4 = &ip4_main; - ip6_main_t *im6 = &ip6_main; map_domain_t *d; - ip_adjacency_t adj; - ip4_add_del_route_args_t args4; - ip6_add_del_route_args_t args6; if (pool_is_free_index (mm->domains, map_domain_index)) { @@ -348,47 +354,26 @@ map_delete_domain (u32 map_domain_index) d = pool_elt_at_index (mm->domains, map_domain_index); - memset (&adj, 0, sizeof (adj)); - adj.explicit_fib_index = ~0; - adj.lookup_next_index = - (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T : - IP_LOOKUP_NEXT_MAP; - - /* Delete ip4 adjacency */ - memset (&args4, 0, sizeof (args4)); - args4.table_index_or_table_id = 0; - args4.flags = IP4_ROUTE_FLAG_DEL; - args4.dst_address.as_u32 = d->ip4_prefix.as_u32; - args4.dst_address_length = d->ip4_prefix_len; - args4.adj_index = 0; - args4.add_adj = &adj; - args4.n_add_adj = 0; - ip4_add_del_route (im4, &args4); - - /* Delete ip6 adjacency */ - u32 ai = ip6_get_route (im6, 0, 0, &d->ip6_src, d->ip6_src_len); - if (ai > 0) - { - ip_lookup_main_t *lm6 = &ip6_main.lookup_main; - ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai); - - uword *p = (uword *) & adj6->rewrite_data[0]; - /* Delete route when no other domains use this source */ - if (p[1] == 0) - { - memset (&args6, 0, sizeof (args6)); - args6.table_index_or_table_id = 0; - args6.flags = IP6_ROUTE_FLAG_DEL; - args6.dst_address.as_u64[0] = d->ip6_src.as_u64[0]; - args6.dst_address.as_u64[1] = d->ip6_src.as_u64[1]; - args6.dst_address_length = d->ip6_src_len; - args6.adj_index = 0; - args6.add_adj = &adj; - args6.n_add_adj = 0; - ip6_add_del_route (im6, &args6); - } - p[1]--; - } + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = d->ip4_prefix_len, + .fp_addr = { + .ip4 = d->ip4_prefix, + } + , + }; + fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_MAP); + + fib_prefix_t pfx6 = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = d->ip6_src_len, + .fp_addr = { + .ip6 = d->ip6_src, + } + , + }; + fib_table_entry_special_remove (0, &pfx6, FIB_SOURCE_MAP); + /* Deleting rules */ if (d->rules) clib_mem_free (d->rules); @@ -448,17 +433,18 @@ static void map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6) { map_main_t *mm = &map_main; - ip4_main_t *im4 = &ip4_main; ip6_main_t *im6 = &ip6_main; if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0) { - mm->adj6_index = ip6_fib_lookup_with_table (im6, 0, ip6); + // FIXME NOT an ADJ + mm->adj6_index = ip6_fib_table_fwding_lookup (im6, 0, ip6); clib_warning ("FIB lookup results in: %u", mm->adj6_index); } if (ip4->as_u32 != 0) { - mm->adj4_index = ip4_fib_lookup_with_table (im4, 0, ip4, 0); + // FIXME NOT an ADJ + mm->adj4_index = ip4_fib_table_lookup_lb (0, ip4); clib_warning ("FIB lookup results in: %u", mm->adj4_index); } } @@ -2156,6 +2142,8 @@ map_init (vlib_main_t * vm) mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; map_ip6_reass_reinit (NULL, NULL); + map_dpo_module_init (); + return 0; } diff --git a/vnet/vnet/map/map.h b/vnet/vnet/map/map.h index fb532291f8a..b76891b69b3 100644 --- a/vnet/vnet/map/map.h +++ b/vnet/vnet/map/map.h @@ -17,6 +17,11 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> #include <vlib/vlib.h> +#include <vnet/fib/fib_types.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/adj/adj.h> +#include <vnet/map/map_dpo.h> +#include <vnet/dpo/load_balance.h> #define MAP_SKIP_IP6_LOOKUP 1 @@ -105,6 +110,9 @@ typedef struct u8 ip4_prefix_len; } map_domain_t; +_Static_assert ((sizeof (map_domain_t) <= CLIB_CACHE_LINE_BYTES), + "MAP domain fits in one cacheline"); + #define MAP_REASS_INDEX_NONE ((u16)0xffff) /* @@ -381,16 +389,17 @@ map_get_ip4 (ip6_address_t *addr) * Get the MAP domain from an IPv4 lookup adjacency. */ static_always_inline map_domain_t * -ip4_map_get_domain (u32 adj_index, u32 *map_domain_index) +ip4_map_get_domain (u32 mdi, + u32 *map_domain_index) { map_main_t *mm = &map_main; - ip_lookup_main_t *lm = &ip4_main.lookup_main; - ip_adjacency_t *adj = ip_get_adjacency(lm, adj_index); - ASSERT(adj); - uword *p = (uword *)adj->rewrite_data; - ASSERT(p); - *map_domain_index = p[0]; - return pool_elt_at_index(mm->domains, p[0]); + map_dpo_t *md; + + md = map_dpo_get(mdi); + + ASSERT(md); + *map_domain_index = md->md_domain; + return pool_elt_at_index(mm->domains, *map_domain_index); } /* @@ -399,36 +408,34 @@ ip4_map_get_domain (u32 adj_index, u32 *map_domain_index) * The IPv4 address is used otherwise. */ static_always_inline map_domain_t * -ip6_map_get_domain (u32 adj_index, ip4_address_t *addr, +ip6_map_get_domain (u32 mdi, ip4_address_t *addr, u32 *map_domain_index, u8 *error) { map_main_t *mm = &map_main; - ip4_main_t *im4 = &ip4_main; - ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + map_dpo_t *md; /* * Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA. * (That's done implicitly when MAP domain is looked up in the IPv4 FIB) */ #ifdef MAP_NONSHARED_DOMAIN_ENABLED - ip_lookup_main_t *lm6 = &ip6_main.lookup_main; - ip_adjacency_t *adj = ip_get_adjacency(lm6, adj_index); - ASSERT(adj); - uword *p = (uword *)adj->rewrite_data; - ASSERT(p); - *map_domain_index = p[0]; - if (p[0] != ~0) - return pool_elt_at_index(mm->domains, p[0]); -#endif + md = map_dpo_get(mdi); - u32 ai = ip4_fib_lookup_with_table(im4, 0, addr, 0); - ip_adjacency_t *adj4 = ip_get_adjacency (lm4, ai); - if (PREDICT_TRUE(adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP || - adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP_T)) { - uword *p = (uword *)adj4->rewrite_data; - *map_domain_index = p[0]; + ASSERT(md); + *map_domain_index = md->md_domain; + if (*map_domain_index != ~0) return pool_elt_at_index(mm->domains, *map_domain_index); - } +#endif + + u32 lbi = ip4_fib_forwarding_lookup(0, addr); + const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0); + if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type || + dpo->dpoi_type == map_t_dpo_type)) + { + md = map_dpo_get(dpo->dpoi_index); + *map_domain_index = md->md_domain; + return pool_elt_at_index(mm->domains, *map_domain_index); + } *error = MAP_ERROR_NO_DOMAIN; return NULL; } diff --git a/vnet/vnet/map/map_dpo.c b/vnet/vnet/map/map_dpo.c new file mode 100644 index 00000000000..df2b5fa4197 --- /dev/null +++ b/vnet/vnet/map/map_dpo.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/map/map_dpo.h> + +/** + * pool of all MPLS Label DPOs + */ +map_dpo_t *map_dpo_pool; + +/** + * The register MAP DPO type + */ +dpo_type_t map_dpo_type; +dpo_type_t map_t_dpo_type; + +static map_dpo_t * +map_dpo_alloc (void) +{ + map_dpo_t *md; + + pool_get_aligned(map_dpo_pool, md, CLIB_CACHE_LINE_BYTES); + memset(md, 0, sizeof(*md)); + + return (md); +} + +static index_t +map_dpo_get_index (map_dpo_t *md) +{ + return (md - map_dpo_pool); +} + +void +map_dpo_create (dpo_proto_t dproto, + u32 domain_index, + dpo_id_t *dpo) +{ + map_dpo_t *md; + + md = map_dpo_alloc(); + md->md_domain = domain_index; + md->md_proto = dproto; + + dpo_set(dpo, + map_dpo_type, + dproto, + map_dpo_get_index(md)); +} + +void +map_t_dpo_create (dpo_proto_t dproto, + u32 domain_index, + dpo_id_t *dpo) +{ + map_dpo_t *md; + + md = map_dpo_alloc(); + md->md_domain = domain_index; + md->md_proto = dproto; + + dpo_set(dpo, + map_t_dpo_type, + dproto, + map_dpo_get_index(md)); +} + + +u8* +format_map_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*args, u32); + map_dpo_t *md; + + md = map_dpo_get(index); + + return (format(s, "map:[%d]:%U domain:%d", + index, + format_dpo_proto, md->md_proto, + md->md_domain)); +} + +u8* +format_map_t_dpo (u8 *s, va_list *args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED(u32 indent) = va_arg (*args, u32); + map_dpo_t *md; + + md = map_dpo_get(index); + + return (format(s, "map-t:[%d]:%U domain:%d", + index, + format_dpo_proto, md->md_proto, + md->md_domain)); +} + + +static void +map_dpo_lock (dpo_id_t *dpo) +{ + map_dpo_t *md; + + md = map_dpo_get(dpo->dpoi_index); + + md->md_locks++; +} + +static void +map_dpo_unlock (dpo_id_t *dpo) +{ + map_dpo_t *md; + + md = map_dpo_get(dpo->dpoi_index); + + md->md_locks--; + + if (0 == md->md_locks) + { + pool_put(map_dpo_pool, md); + } +} + +const static dpo_vft_t md_vft = { + .dv_lock = map_dpo_lock, + .dv_unlock = map_dpo_unlock, + .dv_format = format_map_dpo, +}; + +const static char* const map_ip4_nodes[] = +{ + "ip4-map", + NULL, +}; +const static char* const map_ip6_nodes[] = +{ + "ip6-map", + NULL, +}; + +const static char* const * const map_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = map_ip4_nodes, + [DPO_PROTO_IP6] = map_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +const static dpo_vft_t md_t_vft = { + .dv_lock = map_dpo_lock, + .dv_unlock = map_dpo_unlock, + .dv_format = format_map_t_dpo, +}; + +const static char* const map_t_ip4_nodes[] = +{ + "ip4-map-t", + NULL, +}; +const static char* const map_t_ip6_nodes[] = +{ + "ip6-map-t", + NULL, +}; + +const static char* const * const map_t_nodes[DPO_PROTO_NUM] = +{ + [DPO_PROTO_IP4] = map_t_ip4_nodes, + [DPO_PROTO_IP6] = map_t_ip6_nodes, + [DPO_PROTO_MPLS] = NULL, +}; + +void +map_dpo_module_init (void) +{ + map_dpo_type = dpo_register_new_type(&md_vft, map_nodes); + map_t_dpo_type = dpo_register_new_type(&md_t_vft, map_t_nodes); +} diff --git a/vnet/vnet/map/map_dpo.h b/vnet/vnet/map/map_dpo.h new file mode 100644 index 00000000000..be510dbaea6 --- /dev/null +++ b/vnet/vnet/map/map_dpo.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MAP_DPO_H__ +#define __MAP_DPO_H__ + +#include <vnet/vnet.h> +#include <vnet/dpo/dpo.h> + +/** + * A representation of a MAP DPO + */ +typedef struct map_dpo_t +{ + /** + * The dat-plane protocol + */ + dpo_proto_t md_proto; + + /** + * the MAP domain index + */ + u32 md_domain; + + /** + * Number of locks/users of the label + */ + u16 md_locks; +} map_dpo_t; + +extern void map_dpo_create (dpo_proto_t dproto, + u32 domain_index, + dpo_id_t *dpo); +extern void map_t_dpo_create (dpo_proto_t dproto, + u32 domain_index, + dpo_id_t *dpo); + +extern u8* format_map_dpo(u8 *s, va_list *args); + +/* + * Encapsulation violation for fast data-path access + */ +extern map_dpo_t *map_dpo_pool; +extern dpo_type_t map_dpo_type; +extern dpo_type_t map_t_dpo_type; + +static inline map_dpo_t * +map_dpo_get (index_t index) +{ + return (pool_elt_at_index(map_dpo_pool, index)); +} + +extern void map_dpo_module_init(void); + +#endif diff --git a/vnet/vnet/mcast/mcast_test.c b/vnet/vnet/mcast/mcast_test.c index 4561d7cdc00..be80c9fc982 100644 --- a/vnet/vnet/mcast/mcast_test.c +++ b/vnet/vnet/mcast/mcast_test.c @@ -40,91 +40,91 @@ mcast_test_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - u8 *rewrite_data; - mcast_test_main_t * mtm = &mcast_test_main; - mcast_main_t * mcm = mtm->mcast_main; - ip_adjacency_t adj; - u32 adj_index; - mcast_group_t * g; - mcast_group_member_t * member; - unformat_input_t _line_input, * line_input = &_line_input; - ip4_address_t dst_addr, zero; - ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - - /* Get a line of input. */ - if (! unformat_user (input, unformat_line_input, line_input)) - return 0; - - pool_get (mcm->groups, g); - memset (g, 0, sizeof (*g)); - - while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) - { - vnet_hw_interface_t *hw; - u32 next, sw_if_index; - - if (unformat (line_input, "%U", unformat_vnet_sw_interface, - mtm->vnet_main, &sw_if_index)) - { - vec_add2 (g->members, member, 1); - member->tx_sw_if_index = sw_if_index; + /* u8 *rewrite_data; */ + /* mcast_test_main_t * mtm = &mcast_test_main; */ + /* mcast_main_t * mcm = mtm->mcast_main; */ + /* ip_adjacency_t adj; */ + /* u32 adj_index; */ + /* mcast_group_t * g; */ + /* mcast_group_member_t * member; */ + /* unformat_input_t _line_input, * line_input = &_line_input; */ + /* ip4_address_t dst_addr, zero; */ + /* ip4_main_t * im = &ip4_main; */ + /* ip_lookup_main_t * lm = &im->lookup_main; */ + + /* /\* Get a line of input. *\/ */ + /* if (! unformat_user (input, unformat_line_input, line_input)) */ + /* return 0; */ + + /* pool_get (mcm->groups, g); */ + /* memset (g, 0, sizeof (*g)); */ + + /* while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) */ + /* { */ + /* vnet_hw_interface_t *hw; */ + /* u32 next, sw_if_index; */ + + /* if (unformat (line_input, "%U", unformat_vnet_sw_interface, */ + /* mtm->vnet_main, &sw_if_index)) */ + /* { */ + /* vec_add2 (g->members, member, 1); */ + /* member->tx_sw_if_index = sw_if_index; */ - hw = vnet_get_sup_hw_interface (mtm->vnet_main, - sw_if_index); + /* hw = vnet_get_sup_hw_interface (mtm->vnet_main, */ + /* sw_if_index); */ - next = vlib_node_add_next (mtm->vlib_main, - mcast_prep_node.index, - hw->output_node_index); + /* next = vlib_node_add_next (mtm->vlib_main, */ + /* mcast_prep_node.index, */ + /* hw->output_node_index); */ - /* Required to be the same next index... */ - vlib_node_add_next_with_slot (mtm->vlib_main, - mcast_recycle_node.index, - hw->output_node_index, next); - member->prep_and_recycle_node_next_index = next; - } - else - { - return unformat_parse_error (line_input); - } - } - - if (vec_len (g->members) == 0) - { - pool_put (mcm->groups, g); - vlib_cli_output (vm, "no group members specified"); - return 0; - } - - - adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - adj.mcast_group_index = g - mcm->groups; - rewrite_data = format (0, "abcdefg"); - - vnet_rewrite_for_tunnel - (mtm->vnet_main, - (u32)~0, /* tx_sw_if_index, we dont know yet */ - ip4_rewrite_node.index, - mcast_prep_node.index, - &adj.rewrite_header, - rewrite_data, vec_len(rewrite_data)); - - ip_add_adjacency (lm, &adj, 1 /* one adj */, - &adj_index); + /* /\* Required to be the same next index... *\/ */ + /* vlib_node_add_next_with_slot (mtm->vlib_main, */ + /* mcast_recycle_node.index, */ + /* hw->output_node_index, next); */ + /* member->prep_and_recycle_node_next_index = next; */ + /* } */ + /* else */ + /* { */ + /* return unformat_parse_error (line_input); */ + /* } */ + /* } */ + + /* if (vec_len (g->members) == 0) */ + /* { */ + /* pool_put (mcm->groups, g); */ + /* vlib_cli_output (vm, "no group members specified"); */ + /* return 0; */ + /* } */ + + + /* adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; */ + /* adj.mcast_group_index = g - mcm->groups; */ + /* rewrite_data = format (0, "abcdefg"); */ + + /* vnet_rewrite_for_tunnel */ + /* (mtm->vnet_main, */ + /* (u32)~0, /\* tx_sw_if_index, we dont know yet *\/ */ + /* ip4_rewrite_node.index, */ + /* mcast_prep_node.index, */ + /* &adj.rewrite_header, */ + /* rewrite_data, vec_len(rewrite_data)); */ + + /* ip_add_adjacency (lm, &adj, 1 /\* one adj *\/, */ + /* &adj_index); */ - dst_addr.as_u32 = clib_host_to_net_u32 (0x0a000002); - zero.as_u32 = 0; - - ip4_add_del_route_next_hop (im, - IP4_ROUTE_FLAG_ADD, - &dst_addr, - 24 /* mask width */, - &zero /* no next hop */, + /* dst_addr.as_u32 = clib_host_to_net_u32 (0x0a000002); */ + /* zero.as_u32 = 0; */ + + /* ip4_add_del_route_next_hop (im, */ + /* IP4_ROUTE_FLAG_ADD, */ + /* &dst_addr, */ + /* 24 /\* mask width *\/, */ + /* &zero /\* no next hop *\/, */ - 0, // next hop sw if index - 1, // weight - adj_index, - 0 /* explicit fib 0 */); + /* 0, // next hop sw if index */ + /* 1, // weight */ + /* adj_index, */ + /* 0 /\* explicit fib 0 *\/); */ return 0; } diff --git a/vnet/vnet/misc.c b/vnet/vnet/misc.c index c0729f73c16..4c8c4cad5a7 100644 --- a/vnet/vnet/misc.c +++ b/vnet/vnet/misc.c @@ -38,6 +38,7 @@ */ #include <vnet/vnet.h> +#include <vnet/ip/ip.h> vnet_main_t vnet_main; @@ -79,6 +80,9 @@ vnet_main_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, vnet_interface_init))) return error; + if ((error = vlib_call_init_function (vm, fib_module_init))) + return error; + if ((error = vlib_call_init_function (vm, ip_main_init))) return error; @@ -88,6 +92,9 @@ vnet_main_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, ip6_lookup_init))) return error; + if ((error = vlib_call_init_function (vm, mpls_init))) + return error; + vnm->vlib_main = vm; hw_if_index = vnet_register_interface @@ -98,6 +105,11 @@ vnet_main_init (vlib_main_t * vm) vnm->local_interface_hw_if_index = hw_if_index; vnm->local_interface_sw_if_index = hw->sw_if_index; + /* the local interface is used as an input interface when decapping from + * an IPSEC tunnel. so it needs to be IP enabled */ + ip4_sw_interface_enable_disable (hw->sw_if_index, 1); + ip6_sw_interface_enable_disable (hw->sw_if_index, 1); + return 0; } diff --git a/vnet/vnet/mpls-gre/node.c b/vnet/vnet/mpls-gre/node.c deleted file mode 100644 index 474e2e2a9a4..00000000000 --- a/vnet/vnet/mpls-gre/node.c +++ /dev/null @@ -1,363 +0,0 @@ -/* - * node.c: mpls-o-gre decap processing - * - * Copyright (c) 2012-2014 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vlib/vlib.h> -#include <vnet/pg/pg.h> -#include <vnet/mpls-gre/mpls.h> - -typedef struct { - u32 next_index; - u32 decap_index; - u32 tx_fib_index; - u32 label_host_byte_order; -} mpls_rx_trace_t; - -u8 * format_mpls_rx_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - mpls_rx_trace_t * t = va_arg (*args, mpls_rx_trace_t *); - char * next_name; - - next_name = "BUG!"; - -#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b; - foreach_mpls_input_next; -#undef _ - - s = format (s, "MPLS: next %s, lookup fib index %d, decap index %d\n", - next_name, t->next_index, t->tx_fib_index, t->decap_index); - if (t->decap_index != ~0) - { - s = format (s, " label %d", - vnet_mpls_uc_get_label(t->label_host_byte_order)); - } - return s; -} - -vlib_node_registration_t mpls_input_node; - -typedef struct { - u32 last_label; - u32 last_inner_fib_index; - u32 last_outer_fib_index; - mpls_main_t * mpls_main; -} mpls_input_runtime_t; - -static inline uword -mpls_input_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame, int is_mpls_o_gre) -{ - u32 n_left_from, next_index, * from, * to_next; - ip4_main_t * im = &ip4_main; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - mpls_input_runtime_t * rt; - mpls_main_t * mm; - - rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); - mm = rt->mpls_main; - /* - * Force an initial lookup every time, in case the control-plane - * changed the label->FIB mapping. - */ - rt->last_label = ~0; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, - to_next, n_left_to_next); - -#if 0 - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t * b0, * b1; - mpls_unicast_header_t * h0, * h1; - int li0, li1; - u64 key0, key1; - u32 label0, label1; - u32 next0, next1; - uword * p0, * p1; - u32 fib_index0, fib_index1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* $$$$$ dual loop me */ - - vlib_buffer_advance (b0, sizeof (*h0)); - vlib_buffer_advance (b1, sizeof (*h1)); - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - -#endif - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t * b0; - mpls_unicast_header_t * h0; - u32 label0; - u32 next0; - u64 key0; - uword * p0; - u32 rx_fib_index0; - mpls_decap_t *d0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - h0 = vlib_buffer_get_current (b0); - - if (is_mpls_o_gre) - { - rx_fib_index0 = vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer(b0)->sw_if_index[VLIB_RX]); - } - else - { -#if 0 - /* If separate RX numbering spaces are required... */ - rx_fib_index0 = vec_elt (mm->fib_index_by_sw_if_index, - vnet_buffer(b0)->sw_if_index[VLIB_RX]); -#endif - rx_fib_index0 = 0; - } - - next0 = ~0; - d0 = 0; - - /* - * Expect the control-plane team to squeal like pigs. - * If they don't program a decap label entry for each - * and every label in the stack, packets go into the trash... - */ - - do - { - label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); - /* TTL expired? */ - if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) - { - next0 = MPLS_INPUT_NEXT_DROP; - b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; - break; - } - - key0 = ((u64)rx_fib_index0<<32) - | ((u64)vnet_mpls_uc_get_label (label0)<<12) - | ((u64)vnet_mpls_uc_get_s (label0)<<8); - - /* - * The architecture crew claims that we won't need - * separate ip4, ip6, mpls-o-ethernet label numbering - * spaces. Use the low 8 key bits as a discriminator. - */ - - p0 = hash_get (mm->mpls_decap_by_rx_fib_and_label, key0); - if (p0 == 0) - { - next0 = MPLS_INPUT_NEXT_DROP; - b0->error = node->errors[MPLS_ERROR_BAD_LABEL]; - break; - } - d0 = pool_elt_at_index (mm->decaps, p0[0]); - next0 = d0->next_index; - vnet_buffer(b0)->sw_if_index[VLIB_TX] = d0->tx_fib_index; - vlib_buffer_advance (b0, sizeof (*h0)); - h0 = vlib_buffer_get_current (b0); - } while (!vnet_mpls_uc_get_s(label0)); - - if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) - { - mpls_rx_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->next_index = next0; - tr->decap_index = d0 ? d0 - mm->decaps : ~0; - tr->tx_fib_index = vnet_buffer(b0)->sw_if_index[VLIB_TX]; - tr->label_host_byte_order = label0; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, mpls_input_node.index, - MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); - return from_frame->n_vectors; -} - -static uword -mpls_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - return mpls_input_inline (vm, node, from_frame, 1 /* is mpls-o-gre */); -} - -static char * mpls_error_strings[] = { -#define mpls_error(n,s) s, -#include "error.def" -#undef mpls_error -}; - -VLIB_REGISTER_NODE (mpls_input_node) = { - .function = mpls_input, - .name = "mpls-gre-input", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof(mpls_input_runtime_t), - - .n_errors = MPLS_N_ERROR, - .error_strings = mpls_error_strings, - - .n_next_nodes = MPLS_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, - foreach_mpls_input_next -#undef _ - }, - - .format_buffer = format_mpls_gre_header_with_length, - .format_trace = format_mpls_rx_trace, - .unformat_buffer = unformat_mpls_gre_header, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input) - -static uword -mpls_ethernet_input (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) -{ - return mpls_input_inline (vm, node, from_frame, 0 /* is mpls-o-gre */); -} - - -VLIB_REGISTER_NODE (mpls_ethernet_input_node) = { - .function = mpls_ethernet_input, - .name = "mpls-ethernet-input", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - - .runtime_data_bytes = sizeof(mpls_input_runtime_t), - - .n_errors = MPLS_N_ERROR, - .error_strings = mpls_error_strings, - - .n_next_nodes = MPLS_INPUT_N_NEXT, - .next_nodes = { -#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, - foreach_mpls_input_next -#undef _ - }, - - .format_buffer = format_mpls_eth_header_with_length, - .format_trace = format_mpls_rx_trace, - .unformat_buffer = unformat_mpls_gre_header, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (mpls_ethernet_input_node, mpls_ethernet_input) - -static void -mpls_setup_nodes (vlib_main_t * vm) -{ - vlib_node_t * n = vlib_get_node (vm, mpls_input_node.index); - pg_node_t * pn = pg_get_node (mpls_input_node.index); - mpls_input_runtime_t * rt; - - n->format_buffer = format_mpls_gre_header_with_length; - n->unformat_buffer = unformat_mpls_gre_header; - pn->unformat_edit = unformat_pg_mpls_header; - - rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); - rt->last_label = (u32) ~0; - rt->last_inner_fib_index = 0; - rt->last_outer_fib_index = 0; - rt->mpls_main = &mpls_main; - - n = vlib_get_node (vm, mpls_ethernet_input_node.index); - - n->format_buffer = format_mpls_eth_header_with_length; - - n->unformat_buffer = 0; /* unformat_mpls_ethernet_header; */ - - rt = vlib_node_get_runtime_data (vm, mpls_ethernet_input_node.index); - rt->last_label = (u32) ~0; - rt->last_inner_fib_index = 0; - rt->last_outer_fib_index = 0; - rt->mpls_main = &mpls_main; - - ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, - mpls_ethernet_input_node.index); -} - -static clib_error_t * mpls_input_init (vlib_main_t * vm) -{ - clib_error_t * error; - - error = vlib_call_init_function (vm, mpls_init); - if (error) - clib_error_report (error); - - mpls_setup_nodes (vm); - - return 0; -} - -VLIB_INIT_FUNCTION (mpls_input_init); diff --git a/vnet/vnet/mpls-gre/packet.h b/vnet/vnet/mpls-gre/packet.h deleted file mode 100644 index baa01818f09..00000000000 --- a/vnet/vnet/mpls-gre/packet.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef included_vnet_mpls_packet_h -#define included_vnet_mpls_packet_h - -/* - * MPLS packet format - * - * Copyright (c) 2012 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -typedef struct { - /* Label: top 20 bits [in network byte order] */ - /* Experimental: 3 bits ... */ - /* S (bottom of label stack): 1 bit */ - /* TTL: 8 bits */ - u32 label_exp_s_ttl; -} mpls_unicast_header_t; - -static inline u32 vnet_mpls_uc_get_label (u32 label_exp_s_ttl) -{ - return (label_exp_s_ttl>>12); -} - -static inline u32 vnet_mpls_uc_get_exp (u32 label_exp_s_ttl) -{ - return ((label_exp_s_ttl>>9) & 0x7); -} - -static inline u32 vnet_mpls_uc_get_s (u32 label_exp_s_ttl) -{ - return ((label_exp_s_ttl>>8) & 0x1); -} - -static inline u32 vnet_mpls_uc_get_ttl (u32 label_exp_s_ttl) -{ - return (label_exp_s_ttl & 0xff); -} - -#endif /* included_vnet_mpls_packet_h */ diff --git a/vnet/vnet/mpls-gre/error.def b/vnet/vnet/mpls/error.def index 424ab50a030..de8b9665dfb 100644 --- a/vnet/vnet/mpls-gre/error.def +++ b/vnet/vnet/mpls/error.def @@ -26,3 +26,6 @@ mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set") mpls_error (BAD_LABEL, "invalid FIB id in label") mpls_error (NOT_IP4, "non-ip4 packets dropped") mpls_error (DISALLOWED_FIB, "disallowed FIB id") +mpls_error (NOT_ENABLED, "MPLS not enabled") +mpls_error (DROP, "MPLS DROP DPO") +mpls_error (PUNT, "MPLS PUNT DPO") diff --git a/vnet/vnet/mpls-gre/interface.c b/vnet/vnet/mpls/interface.c index dd61a803f4c..9ef4c293494 100644 --- a/vnet/vnet/mpls-gre/interface.c +++ b/vnet/vnet/mpls/interface.c @@ -18,7 +18,10 @@ #include <vnet/vnet.h> #include <vnet/pg/pg.h> #include <vnet/gre/gre.h> -#include <vnet/mpls-gre/mpls.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/adj/adj_midchain.h> +#include <vnet/dpo/classify_dpo.h> static uword mpls_gre_set_rewrite (vnet_main_t * vnm, u32 sw_if_index, @@ -525,24 +528,23 @@ VNET_HW_INTERFACE_CLASS (mpls_eth_hw_interface_class) = { .set_rewrite = mpls_eth_set_rewrite, }; -#define foreach_mpls_post_rewrite_next \ - _ (IP4_LOOKUP, "ip4-lookup") - -typedef enum { -#define _(s,n) MPLS_POST_REWRITE_NEXT_##s, - foreach_mpls_post_rewrite_next -#undef _ - MPLS_POST_REWRITE_N_NEXT, -} mpls_post_rewrite_next_t; - +/** + * A conversion of DPO next object tpyes to VLIB graph next nodes from + * the mpls_post_rewrite node + */ +static const int dpo_next_2_mpls_post_rewrite[DPO_LAST] = { + [DPO_LOAD_BALANCE] = IP_LOOKUP_NEXT_LOAD_BALANCE, +}; static uword mpls_post_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { + ip4_main_t * im = &ip4_main; + ip_lookup_main_t * lm = &im->lookup_main; u32 n_left_from, next_index, * from, * to_next; - u16 old_l0 = 0, old_l1 = 0; + u16 old_l0 = 0; //, old_l1 = 0; from = vlib_frame_vector_args (from_frame); n_left_from = from_frame->n_vectors; @@ -556,78 +558,103 @@ mpls_post_rewrite (vlib_main_t * vm, vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t * b0, * b1; - ip4_header_t * ip0, * ip1; - u32 next0 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; - u32 next1 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; - u16 new_l0, new_l1; - ip_csum_t sum0, sum1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t * p2, * p3; + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* { */ + /* u32 bi0, bi1; */ + /* vlib_buffer_t * b0, * b1; */ + /* ip4_header_t * ip0, * ip1; */ + /* u32 next0; */ + /* u32 next1; */ + /* u16 new_l0, new_l1, adj_index0, adj_index1; */ + /* ip_csum_t sum0, sum1; */ + /* ip_adjacency_t *adj0, *adj1; */ + + /* /\* Prefetch next iteration. *\/ */ + /* { */ + /* vlib_buffer_t * p2, * p3; */ + + /* p2 = vlib_get_buffer (vm, from[2]); */ + /* p3 = vlib_get_buffer (vm, from[3]); */ + + /* vlib_prefetch_buffer_header (p2, LOAD); */ + /* vlib_prefetch_buffer_header (p3, LOAD); */ + + /* CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); */ + /* CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); */ + /* } */ + + /* bi0 = from[0]; */ + /* bi1 = from[1]; */ + /* to_next[0] = bi0; */ + /* to_next[1] = bi1; */ + /* from += 2; */ + /* to_next += 2; */ + /* n_left_to_next -= 2; */ + /* n_left_from -= 2; */ + + + /* b0 = vlib_get_buffer (vm, bi0); */ + /* b1 = vlib_get_buffer (vm, bi1); */ + /* ip0 = vlib_buffer_get_current (b0); */ + /* ip1 = vlib_buffer_get_current (b1); */ + + /* /\* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] *\/ */ - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); + /* /\* set the GRE (outer) ip packet length, fix the bloody checksum *\/ */ + /* sum0 = ip0->checksum; */ + /* sum1 = ip1->checksum; */ - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); + /* /\* old_l0, old_l1 always 0, see the rewrite setup *\/ */ + /* new_l0 = */ + /* clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); */ + /* new_l1 = */ + /* clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); */ + + /* sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, */ + /* length /\* changed member *\/); */ + /* sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, */ + /* length /\* changed member *\/); */ + /* ip0->checksum = ip_csum_fold (sum0); */ + /* ip1->checksum = ip_csum_fold (sum1); */ + /* ip0->length = new_l0; */ + /* ip1->length = new_l1; */ - CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD); - } + /* /\* replace the TX adj in the packet with the next in the chain *\/ */ + /* adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; */ + /* adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; */ - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; + /* adj0 = ip_get_adjacency (lm, adj_index0); */ + /* adj1 = ip_get_adjacency (lm, adj_index1); */ + /* ASSERT(adj0->sub_type.midchain.adj_index != ADJ_INDEX_INVALID); */ + /* ASSERT(adj1->sub_type.midchain.adj_index != ADJ_INDEX_INVALID); */ - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* Note: the tunnel rewrite sets up sw_if_index[VLIB_TX] */ + /* adj_index0 = adj0->sub_type.midchain.adj_index; */ + /* adj_index1 = adj1->sub_type.midchain.adj_index; */ - /* set the GRE (outer) ip packet length, fix the bloody checksum */ - sum0 = ip0->checksum; - sum1 = ip1->checksum; + /* vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; */ + /* vnet_buffer (b1)->ip.adj_index[VLIB_TX] = adj_index1; */ - /* old_l0, old_l1 always 0, see the rewrite setup */ - new_l0 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)); - new_l1 = - clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b1)); - - sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t, - length /* changed member */); - sum1 = ip_csum_update (sum1, old_l1, new_l1, ip4_header_t, - length /* changed member */); - ip0->checksum = ip_csum_fold (sum0); - ip1->checksum = ip_csum_fold (sum1); - ip0->length = new_l0; - ip1->length = new_l1; + /* /\* get the next adj in the chain to determine the next graph node *\/ */ + /* adj0 = ip_get_adjacency (lm, adj_index0); */ + /* adj1 = ip_get_adjacency (lm, adj_index1); */ + + /* next0 = adj0->lookup_next_index; */ + /* next1 = adj1->lookup_next_index; */ + + /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */ + /* to_next, n_left_to_next, */ + /* bi0, bi1, next0, next1); */ + /* } */ - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - while (n_left_from > 0 && n_left_to_next > 0) { + ip_adjacency_t * adj0; u32 bi0; vlib_buffer_t * b0; ip4_header_t * ip0; - u32 next0 = MPLS_POST_REWRITE_NEXT_IP4_LOOKUP; - u16 new_l0; + u32 next0; + u16 new_l0, adj_index0; ip_csum_t sum0; bi0 = from[0]; @@ -653,6 +680,20 @@ mpls_post_rewrite (vlib_main_t * vm, ip0->checksum = ip_csum_fold (sum0); ip0->length = new_l0; + /* replace the TX adj in the packet with the next in the chain */ + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + + ASSERT(adj_index0); + + adj0 = ip_get_adjacency (lm, adj_index0); + ASSERT(adj0->sub_type.midchain.next_dpo.dpoi_index != ADJ_INDEX_INVALID); + adj_index0 = adj0->sub_type.midchain.next_dpo.dpoi_index; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = adj_index0; + + /* get the next adj in the chain to determine the next graph node */ + ASSERT(0); + next0 = 0; //adj0->sub_type.midchain.next_dpo.dpoi_next; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); @@ -673,12 +714,8 @@ VLIB_REGISTER_NODE (mpls_post_rewrite_node) = { .runtime_data_bytes = 0, - .n_next_nodes = MPLS_POST_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [MPLS_POST_REWRITE_NEXT_##s] = n, - foreach_mpls_post_rewrite_next -#undef _ - }, + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = IP4_LOOKUP_NEXT_NODES, }; VLIB_NODE_FUNCTION_MULTIARCH (mpls_post_rewrite_node, mpls_post_rewrite) @@ -725,237 +762,512 @@ static u8 * mpls_gre_rewrite (mpls_main_t *mm, mpls_gre_tunnel_t * t) return (rewrite_data); } -int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, - ip4_address_t *dst, - ip4_address_t *intfc, - u32 mask_width, - u32 inner_fib_id, u32 outer_fib_id, - u32 * tunnel_sw_if_index, - u8 l2_only, - u8 is_add) +u8 +mpls_sw_interface_is_enabled (u32 sw_if_index) { - ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; - mpls_main_t * mm = &mpls_main; - vnet_main_t * vnm = vnet_get_main(); - ip4_address_t zero; - mpls_gre_tunnel_t *tp; - int need_route_add_del = 1; - u32 inner_fib_index = 0; - u32 outer_fib_index = 0; - ip_adjacency_t adj; - u32 adj_index; - u8 * rewrite_data; - int found_tunnel = 0; - mpls_encap_t * e = 0; - u32 hw_if_index = ~0; - vnet_hw_interface_t * hi; - u32 slot; - u32 dummy; - - zero.as_u32 = 0; - - /* No questions, no answers */ - if (tunnel_sw_if_index == 0) - tunnel_sw_if_index = &dummy; + mpls_main_t * mm = &mpls_main; - *tunnel_sw_if_index = ~0; + if (vec_len(mm->mpls_enabled_by_sw_if_index) < sw_if_index) + return (0); - if (inner_fib_id != (u32)~0) + return (mm->mpls_enabled_by_sw_if_index[sw_if_index]); +} + +void +mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable) +{ + mpls_interface_state_change_callback_t *callback; + vlib_main_t * vm = vlib_get_main(); + ip_config_main_t * cm = &mm->rx_config_mains; + vnet_config_main_t * vcm = &cm->config_main; + u32 lookup_feature_index; + fib_node_index_t lfib_index; + u32 ci; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) { - uword * p; - - p = hash_get (im->fib_index_by_table_id, inner_fib_id); - if (! p) - return VNET_API_ERROR_NO_SUCH_INNER_FIB; - inner_fib_index = p[0]; - } + if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index]) + return; - if (outer_fib_id != 0) + lfib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + vec_validate(mm->fib_index_by_sw_if_index, 0); + mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index; + } + else { - uword * p; - - p = hash_get (im->fib_index_by_table_id, outer_fib_id); - if (! p) - return VNET_API_ERROR_NO_SUCH_FIB; - outer_fib_index = p[0]; + ASSERT(mm->mpls_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index]) + return; + + fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index], + FIB_PROTOCOL_MPLS); } - /* suppress duplicate mpls interface generation. */ - pool_foreach (tp, mm->gre_tunnels, - ({ - /* - * If we have a tunnel which matches (src, dst, intfc/mask) - * AND the expected route is in the FIB, it's a dup + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + lookup_feature_index = mm->mpls_rx_feature_lookup; + + if (is_enable) + ci = vnet_config_add_feature (vm, vcm, + ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + ci = vnet_config_del_feature (vm, vcm, ci, + lookup_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + + cm->config_index_by_sw_if_index[sw_if_index] = ci; + + /* + * notify all interested clients of the change of state. + */ + vec_foreach(callback, mm->mpls_interface_state_change_callbacks) + { + (*callback)(sw_if_index, is_enable); + } +} + +static mpls_gre_tunnel_t * +mpls_gre_tunnel_from_fib_node (fib_node_t *node) +{ +#if (CLIB_DEBUG > 0) + ASSERT(FIB_NODE_TYPE_MPLS_GRE_TUNNEL == node->fn_type); +#endif + return ((mpls_gre_tunnel_t*)node); +} + +/* + * mpls_gre_tunnel_stack + * + * 'stack' (resolve the recursion for) the tunnel's midchain adjacency + */ +static void +mpls_gre_tunnel_stack (mpls_gre_tunnel_t *mgt) +{ + /* + * find the adjacency that is contributed by the FIB entry + * that this tunnel resovles via, and use it as the next adj + * in the midchain */ - if (!memcmp (&tp->tunnel_src, src, sizeof (*src)) - && !memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) - && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) - && tp->inner_fib_index == inner_fib_index) - { - ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); - uword * hash = fib->adj_index_by_dst_address[mask_width]; - uword key = intfc->as_u32 & im->fib_masks[mask_width]; - uword *p = hash_get (hash, key); + adj_nbr_midchain_stack(mgt->adj_index, + fib_entry_contribute_ip_forwarding(mgt->fei)); +} - found_tunnel = 1; +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +mpls_gre_tunnel_back_walk (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + mpls_gre_tunnel_stack(mpls_gre_tunnel_from_fib_node(node)); - if (is_add) - { - /* A dup, and the route is in the fib. Done */ - if (p || l2_only) - return 1; - else - { - /* Reinstall the route (and other stuff) */ - e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, - dst->as_u32); - if (e == 0) - return VNET_API_ERROR_NO_SUCH_LABEL; - goto reinstall_it; - } - } - else - { - /* Delete, the route is already gone? */ - if (!p) - need_route_add_del = 0; - goto add_del_route; - } + return (FIB_NODE_BACK_WALK_CONTINUE); +} - } - })); - - /* Delete, and we can't find the tunnel */ - if (is_add == 0 && found_tunnel == 0) - return VNET_API_ERROR_NO_SUCH_ENTRY; +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t* +mpls_gre_tunnel_fib_node_get (fib_node_index_t index) +{ + mpls_gre_tunnel_t * mgt; + mpls_main_t * mm; - e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, dst->as_u32); - if (e == 0) - return VNET_API_ERROR_NO_SUCH_LABEL; + mm = &mpls_main; + mgt = pool_elt_at_index(mm->gre_tunnels, index); - pool_get(mm->gre_tunnels, tp); - memset (tp, 0, sizeof (*tp)); + return (&mgt->mgt_node); +} - if (vec_len (mm->free_gre_sw_if_indices) > 0) +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +mpls_gre_tunnel_last_lock_gone (fib_node_t *node) +{ + /* + * The MPLS GRE tunnel is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT(0); +} + +/* + * Virtual function table registered by MPLS GRE tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t mpls_gre_vft = { + .fnv_get = mpls_gre_tunnel_fib_node_get, + .fnv_last_lock = mpls_gre_tunnel_last_lock_gone, + .fnv_back_walk = mpls_gre_tunnel_back_walk, +}; + +static mpls_gre_tunnel_t * +mpls_gre_tunnel_find (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 inner_fib_index) +{ + mpls_main_t * mm = &mpls_main; + mpls_gre_tunnel_t *tp; + int found_tunnel = 0; + + /* suppress duplicate mpls interface generation. */ + pool_foreach (tp, mm->gre_tunnels, + ({ + /* + * If we have a tunnel which matches (src, dst, intfc/mask) + * AND the expected route is in the FIB, it's a dup + */ + if (!memcmp (&tp->tunnel_src, src, sizeof (*src)) + && !memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) + && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) + && tp->inner_fib_index == inner_fib_index) + { + found_tunnel = 1; + goto found; + } + })); + +found: + if (found_tunnel) { - hw_if_index = - mm->free_gre_sw_if_indices[vec_len(mm->free_gre_sw_if_indices)-1]; - _vec_len (mm->free_gre_sw_if_indices) -= 1; - hi = vnet_get_hw_interface (vnm, hw_if_index); - hi->dev_instance = tp - mm->gre_tunnels; - hi->hw_instance = tp - mm->gre_tunnels; + return (tp); } - else + return (NULL); +} + +int mpls_gre_tunnel_add (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_index, + u32 outer_fib_index, + u32 * tunnel_sw_if_index, + u8 l2_only) +{ + mpls_main_t * mm = &mpls_main; + gre_main_t * gm = &gre_main; + vnet_main_t * vnm = vnet_get_main(); + mpls_gre_tunnel_t *tp; + ip_adjacency_t adj; + u8 * rewrite_data; + mpls_encap_t * e = 0; + u32 hw_if_index = ~0; + vnet_hw_interface_t * hi; + u32 slot; + const ip46_address_t zero_nh = { + .ip4.as_u32 = 0, + }; + + tp = mpls_gre_tunnel_find(src,dst,intfc,inner_fib_index); + + /* Add, duplicate */ + if (NULL != tp) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + e = mpls_encap_by_fib_and_dest (mm, inner_fib_index, dst->as_u32); + if (e == 0) + return VNET_API_ERROR_NO_SUCH_LABEL; + + pool_get(mm->gre_tunnels, tp); + memset (tp, 0, sizeof (*tp)); + fib_node_init(&tp->mgt_node, + FIB_NODE_TYPE_MPLS_GRE_TUNNEL); + + if (vec_len (mm->free_gre_sw_if_indices) > 0) { - hw_if_index = vnet_register_interface - (vnm, mpls_gre_device_class.index, tp - mm->gre_tunnels, - mpls_gre_hw_interface_class.index, - tp - mm->gre_tunnels); - hi = vnet_get_hw_interface (vnm, hw_if_index); + hw_if_index = + mm->free_gre_sw_if_indices[vec_len(mm->free_gre_sw_if_indices)-1]; + _vec_len (mm->free_gre_sw_if_indices) -= 1; + hi = vnet_get_hw_interface (vnm, hw_if_index); + hi->dev_instance = tp - mm->gre_tunnels; + hi->hw_instance = tp - mm->gre_tunnels; + } + else + { + hw_if_index = vnet_register_interface + (vnm, mpls_gre_device_class.index, tp - mm->gre_tunnels, + mpls_gre_hw_interface_class.index, + tp - mm->gre_tunnels); + hi = vnet_get_hw_interface (vnm, hw_if_index); + + /* ... to make the IP and L2 x-connect cases identical */ + slot = vlib_node_add_named_next_with_slot + (vnm->vlib_main, hi->tx_node_index, + "mpls-post-rewrite", MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + + ASSERT (slot == MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + } + + *tunnel_sw_if_index = hi->sw_if_index; + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + vec_validate(ip4_main.fib_index_by_sw_if_index, *tunnel_sw_if_index); + ip4_main.fib_index_by_sw_if_index[*tunnel_sw_if_index] = outer_fib_index; + + tp->hw_if_index = hw_if_index; + + /* bind the MPLS and IPv4 FIBs to the interface and enable */ + vec_validate(mm->fib_index_by_sw_if_index, hi->sw_if_index); + mm->fib_index_by_sw_if_index[hi->sw_if_index] = inner_fib_index; + mpls_sw_interface_enable_disable(mm, hi->sw_if_index, 1); + ip4_main.fib_index_by_sw_if_index[hi->sw_if_index] = inner_fib_index; + ip4_sw_interface_enable_disable(hi->sw_if_index, 1); + + tp->tunnel_src.as_u32 = src->as_u32; + tp->tunnel_dst.as_u32 = dst->as_u32; + tp->intfc_address.as_u32 = intfc->as_u32; + tp->mask_width = mask_width; + tp->inner_fib_index = inner_fib_index; + tp->outer_fib_index = outer_fib_index; + tp->encap_index = e - mm->encaps; + tp->l2_only = l2_only; + + /* Add the tunnel to the hash table of all GRE tunnels */ + u64 key = (u64)src->as_u32 << 32 | (u64)dst->as_u32; + + ASSERT(NULL == hash_get (gm->tunnel_by_key, key)); + hash_set (gm->tunnel_by_key, key, tp - mm->gre_tunnels); + + /* Create the adjacency and add to v4 fib */ + memset(&adj, 0, sizeof (adj)); + adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; + + rewrite_data = mpls_gre_rewrite (mm, tp); + if (rewrite_data == 0) + { + if (*tunnel_sw_if_index != ~0) + { + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + } + pool_put (mm->gre_tunnels, tp); + return VNET_API_ERROR_NO_SUCH_LABEL; + } - /* ... to make the IP and L2 x-connect cases identical */ - slot = vlib_node_add_named_next_with_slot - (vnm->vlib_main, hi->tx_node_index, - "mpls-post-rewrite", MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + /* Save a copy of the rewrite data for L2 x-connect */ + vec_free (tp->rewrite_data); - ASSERT (slot == MPLS_GRE_OUTPUT_NEXT_POST_REWRITE); + tp->rewrite_data = rewrite_data; + + if (!l2_only) + { + /* + * source the FIB entry for the tunnel's destination + * and become a child thereof. The tunnel will then get poked + * when the forwarding for the entry updates, and the tunnel can + * re-stack accordingly + */ + const fib_prefix_t tun_dst_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = *dst, + } + }; + + tp->fei = fib_table_entry_special_add(outer_fib_index, + &tun_dst_pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + tp->sibling_index = fib_entry_child_add(tp->fei, + FIB_NODE_TYPE_MPLS_GRE_TUNNEL, + tp - mm->gre_tunnels); + + /* + * create and update the midchain adj this tunnel sources. + * This is the adj the route we add below will resolve to. + */ + tp->adj_index = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + FIB_LINK_IP4, + &zero_nh, + hi->sw_if_index); + + adj_nbr_midchain_update_rewrite(tp->adj_index, + mpls_post_rewrite_node.index, + rewrite_data); + mpls_gre_tunnel_stack(tp); + + /* + * Update the route for the tunnel's subnet to point through the tunnel + */ + const fib_prefix_t tun_sub_net_pfx = { + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = tp->intfc_address, + }, + }; + + fib_table_entry_update_one_path(inner_fib_index, + &tun_sub_net_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + &zero_nh, + hi->sw_if_index, + ~0, // invalid fib index + 1, + MPLS_LABEL_INVALID, + FIB_ROUTE_PATH_FLAG_NONE); } + + return 0; +} + +static int +mpls_gre_tunnel_del (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_index, + u32 outer_fib_index, + u32 * tunnel_sw_if_index, + u8 l2_only) +{ + mpls_main_t * mm = &mpls_main; + vnet_main_t * vnm = vnet_get_main(); + gre_main_t * gm = &gre_main; + mpls_gre_tunnel_t *tp; + vnet_hw_interface_t * hi; - *tunnel_sw_if_index = hi->sw_if_index; - vnet_sw_interface_set_flags (vnm, hi->sw_if_index, - VNET_SW_INTERFACE_FLAG_ADMIN_UP); + tp = mpls_gre_tunnel_find(src,dst,intfc,inner_fib_index); - tp->hw_if_index = hw_if_index; - - reinstall_it: - tp->tunnel_src.as_u32 = src->as_u32; - tp->tunnel_dst.as_u32 = dst->as_u32; - tp->intfc_address.as_u32 = intfc->as_u32; - tp->mask_width = mask_width; - tp->inner_fib_index = inner_fib_index; - tp->outer_fib_index = outer_fib_index; - tp->encap_index = e - mm->encaps; - tp->l2_only = l2_only; + /* Delete, and we can't find the tunnel */ + if (NULL == tp) + return VNET_API_ERROR_NO_SUCH_ENTRY; - /* Create the adjacency and add to v4 fib */ - memset(&adj, 0, sizeof (adj)); - adj.explicit_fib_index = ~0; - adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; - - rewrite_data = mpls_gre_rewrite (mm, tp); - if (rewrite_data == 0) + hi = vnet_get_hw_interface (vnm, tp->hw_if_index); + + if (!l2_only) { - if (*tunnel_sw_if_index != ~0) - { - hi = vnet_get_hw_interface (vnm, tp->hw_if_index); - vnet_sw_interface_set_flags (vnm, hi->sw_if_index, - 0 /* admin down */); - vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); - } - pool_put (mm->gre_tunnels, tp); - return VNET_API_ERROR_NO_SUCH_LABEL; + /* + * unsource the FIB entry for the tunnel's destination + */ + const fib_prefix_t tun_dst_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = *dst, + } + }; + + fib_entry_child_remove(tp->fei, + tp->sibling_index); + fib_table_entry_special_remove(outer_fib_index, + &tun_dst_pfx, + FIB_SOURCE_RR); + tp->fei = FIB_NODE_INDEX_INVALID; + adj_unlock(tp->adj_index); + + /* + * unsource the route for the tunnel's subnet + */ + const fib_prefix_t tun_sub_net_pfx = { + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = tp->intfc_address, + }, + }; + + fib_table_entry_delete(inner_fib_index, + &tun_sub_net_pfx, + FIB_SOURCE_INTERFACE); } - - /* Save a copy of the rewrite data for L2 x-connect */ - vec_free (tp->rewrite_data); - tp->rewrite_data = rewrite_data; + u64 key = ((u64)tp->tunnel_src.as_u32 << 32 | + (u64)tp->tunnel_src.as_u32); - vnet_rewrite_for_tunnel - (vnm, - outer_fib_index /* tx_sw_if_index, aka outer fib ID */, - ip4_rewrite_node.index, - mpls_post_rewrite_node.index, - &adj.rewrite_header, - rewrite_data, vec_len(rewrite_data)); - - if (!l2_only) - ip_add_adjacency (lm, &adj, 1 /* one adj */, - &adj_index); + hash_unset (gm->tunnel_by_key, key); + mpls_sw_interface_enable_disable(mm, hi->sw_if_index, 0); + ip4_sw_interface_enable_disable(hi->sw_if_index, 0); + + vnet_sw_interface_set_flags (vnm, hi->sw_if_index, + 0 /* admin down */); + vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); + vec_free (tp->rewrite_data); + fib_node_deinit(&tp->mgt_node); + pool_put (mm->gre_tunnels, tp); + + return 0; +} + +int +vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, + ip4_address_t *dst, + ip4_address_t *intfc, + u32 mask_width, + u32 inner_fib_id, u32 outer_fib_id, + u32 * tunnel_sw_if_index, + u8 l2_only, + u8 is_add) +{ + u32 inner_fib_index = 0; + u32 outer_fib_index = 0; + u32 dummy; + ip4_main_t * im = &ip4_main; - add_del_route: + /* No questions, no answers */ + if (NULL == tunnel_sw_if_index) + tunnel_sw_if_index = &dummy; - if (need_route_add_del && !l2_only) + *tunnel_sw_if_index = ~0; + + if (inner_fib_id != (u32)~0) { - if (is_add) - ip4_add_del_route_next_hop (im, - IP4_ROUTE_FLAG_ADD, - &tp->intfc_address, - tp->mask_width, - &zero /* no next hop */, - (u32)~0 /* next_hop_sw_if_index */, - 1 /* weight */, - adj_index, - tp->inner_fib_index); - else - { - ip4_add_del_route_args_t a; - memset (&a, 0, sizeof (a)); - - a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; - a.table_index_or_table_id = tp->inner_fib_index; - a.dst_address = tp->intfc_address; - a.dst_address_length = tp->mask_width; - a.adj_index = ~0; - - ip4_add_del_route (im, &a); - ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, - IP4_ROUTE_FLAG_FIB_INDEX); - } + uword * p; + + p = hash_get (im->fib_index_by_table_id, inner_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_INNER_FIB; + inner_fib_index = p[0]; } - if (is_add == 0 && found_tunnel) + if (outer_fib_id != 0) { - hi = vnet_get_hw_interface (vnm, tp->hw_if_index); - vnet_sw_interface_set_flags (vnm, hi->sw_if_index, - 0 /* admin down */); - vec_add1 (mm->free_gre_sw_if_indices, tp->hw_if_index); - vec_free (tp->rewrite_data); - pool_put (mm->gre_tunnels, tp); + uword * p; + + p = hash_get (im->fib_index_by_table_id, outer_fib_id); + if (! p) + return VNET_API_ERROR_NO_SUCH_FIB; + outer_fib_index = p[0]; } - return 0; + if (is_add) + { + return (mpls_gre_tunnel_add(src,dst,intfc, mask_width, + inner_fib_index, + outer_fib_index, + tunnel_sw_if_index, + l2_only)); + } + else + { + return (mpls_gre_tunnel_del(src,dst,intfc, mask_width, + inner_fib_index, + outer_fib_index, + tunnel_sw_if_index, + l2_only)); + } } /* @@ -963,21 +1275,17 @@ int vnet_mpls_gre_add_del_tunnel (ip4_address_t *src, */ int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id) { - ip4_main_t * im = &ip4_main; mpls_main_t * mm = &mpls_main; vnet_main_t * vnm = mm->vnet_main; mpls_gre_tunnel_t *tp; u32 fib_index = 0; - uword * p; u32 * tunnels_to_delete = 0; vnet_hw_interface_t * hi; - ip4_fib_t * fib; int i; - p = hash_get (im->fib_index_by_table_id, fib_id); - if (! p) + fib_index = ip4_fib_index_from_table_id(fib_id); + if (~0 == fib_index) return VNET_API_ERROR_NO_SUCH_INNER_FIB; - fib_index = p[0]; pool_foreach (tp, mm->gre_tunnels, ({ @@ -985,28 +1293,40 @@ int vnet_mpls_gre_delete_fib_tunnels (u32 fib_id) vec_add1 (tunnels_to_delete, tp - mm->gre_tunnels); })); - fib = vec_elt_at_index (im->fibs, fib_index); - for (i = 0; i < vec_len(tunnels_to_delete); i++) { tp = pool_elt_at_index (mm->gre_tunnels, tunnels_to_delete[i]); - uword * hash = fib->adj_index_by_dst_address[tp->mask_width]; - uword key = tp->intfc_address.as_u32 & im->fib_masks[tp->mask_width]; - uword *p = hash_get (hash, key); - ip4_add_del_route_args_t a; /* Delete, the route if not already gone */ - if (p && !tp->l2_only) - { - memset (&a, 0, sizeof (a)); - a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; - a.table_index_or_table_id = tp->inner_fib_index; - a.dst_address = tp->intfc_address; - a.dst_address_length = tp->mask_width; - a.adj_index = ~0; - ip4_add_del_route (im, &a); - ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, - IP4_ROUTE_FLAG_FIB_INDEX); - } + if (FIB_NODE_INDEX_INVALID != tp->fei && !tp->l2_only) + { + const fib_prefix_t tun_dst_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = tp->tunnel_dst, + } + }; + + fib_entry_child_remove(tp->fei, + tp->sibling_index); + fib_table_entry_special_remove(tp->outer_fib_index, + &tun_dst_pfx, + FIB_SOURCE_RR); + tp->fei = FIB_NODE_INDEX_INVALID; + adj_unlock(tp->adj_index); + + const fib_prefix_t tun_sub_net_pfx = { + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = tp->intfc_address, + }, + }; + + fib_table_entry_delete(tp->inner_fib_index, + &tun_sub_net_pfx, + FIB_SOURCE_INTERFACE); + } hi = vnet_get_hw_interface (vnm, tp->hw_if_index); vnet_sw_interface_set_flags (vnm, hi->sw_if_index, @@ -1229,11 +1549,15 @@ VLIB_CLI_COMMAND (show_mpls_tunnel_command, static) = { .function = show_mpls_tunnel_command_fn, }; + /* force inclusion from application's main.c */ clib_error_t *mpls_interface_init (vlib_main_t *vm) { clib_error_t * error; + fib_node_register_type(FIB_NODE_TYPE_MPLS_GRE_TUNNEL, + &mpls_gre_vft); + if ((error = vlib_call_init_function (vm, mpls_policy_encap_init))) return error; @@ -1286,9 +1610,7 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, ip_lookup_main_t * lm = &im->lookup_main; mpls_main_t * mm = &mpls_main; vnet_main_t * vnm = vnet_get_main(); - ip4_address_t zero; mpls_eth_tunnel_t *tp; - int need_route_add_del = 1; u32 inner_fib_index = 0; ip_adjacency_t adj; u32 adj_index; @@ -1300,8 +1622,6 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, u32 slot; u32 dummy; - zero.as_u32 = 0; - if (tunnel_sw_if_index == 0) tunnel_sw_if_index = &dummy; @@ -1326,18 +1646,14 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, */ if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) - && tp->inner_fib_index == inner_fib_index) + && tp->inner_fib_index == inner_fib_index + && FIB_NODE_INDEX_INVALID != tp->fei) { - ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); - uword * hash = fib->adj_index_by_dst_address[mask_width]; - uword key = intfc->as_u32 & im->fib_masks[mask_width]; - uword *p = hash_get (hash, key); - found_tunnel = 1; if (is_add) { - if (p || l2_only) + if (l2_only) return 1; else { @@ -1351,9 +1667,7 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, } else { - /* Delete, the route is already gone? */ - if (!p) - need_route_add_del = 0; + /* Delete */ goto add_del_route; } @@ -1413,7 +1727,6 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, /* Create the adjacency and add to v4 fib */ memset(&adj, 0, sizeof (adj)); - adj.explicit_fib_index = ~0; adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE; rewrite_data = mpls_ethernet_rewrite (mm, tp); @@ -1465,33 +1778,26 @@ int vnet_mpls_ethernet_add_del_tunnel (u8 *dst, add_del_route: - if (need_route_add_del && !l2_only) + if (!l2_only) { + const fib_prefix_t pfx = { + .fp_addr = { + .ip4 = tp->intfc_address, + }, + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + }; if (is_add) - ip4_add_del_route_next_hop (im, - IP4_ROUTE_FLAG_ADD, - &tp->intfc_address, - tp->mask_width, - &zero /* no next hop */, - (u32)~0 /* next_hop_sw_if_index */, - 1 /* weight */, - adj_index, - tp->inner_fib_index); + tp->fei = fib_table_entry_special_add(tp->inner_fib_index, + &pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + adj_index); else { - ip4_add_del_route_args_t a; - memset (&a, 0, sizeof (a)); - - a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; - a.table_index_or_table_id = tp->inner_fib_index; - a.dst_address = tp->intfc_address; - a.dst_address_length = tp->mask_width; - a.adj_index = ~0; - - ip4_add_del_route (im, &a); - ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, - IP4_ROUTE_FLAG_FIB_INDEX); - } + fib_table_entry_delete(tp->inner_fib_index, &pfx, FIB_SOURCE_API); + tp->fei = FIB_NODE_INDEX_INVALID; + } } if (is_add == 0 && found_tunnel) { @@ -1667,15 +1973,10 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst, u8 is_add) { ip4_main_t * im = &ip4_main; - ip_lookup_main_t * lm = &im->lookup_main; mpls_main_t * mm = &mpls_main; vnet_main_t * vnm = vnet_get_main(); - ip4_address_t zero; mpls_eth_tunnel_t *tp; - int need_route_add_del = 1; u32 inner_fib_index = 0; - ip_adjacency_t adj; - u32 adj_index; int found_tunnel = 0; mpls_encap_t * e = 0; u32 hw_if_index = ~0; @@ -1683,8 +1984,6 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst, u32 slot; u32 dummy; - zero.as_u32 = 0; - if (tunnel_sw_if_index == 0) tunnel_sw_if_index = &dummy; @@ -1709,18 +2008,14 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst, */ if (!memcmp (&tp->tunnel_dst, dst, sizeof (*dst)) && !memcmp (&tp->intfc_address, intfc, sizeof (*intfc)) - && tp->inner_fib_index == inner_fib_index) + && tp->inner_fib_index == inner_fib_index + && FIB_NODE_INDEX_INVALID != tp->fei) { - ip4_fib_t * fib = vec_elt_at_index (im->fibs, inner_fib_index); - uword * hash = fib->adj_index_by_dst_address[mask_width]; - uword key = intfc->as_u32 & im->fib_masks[mask_width]; - uword *p = hash_get (hash, key); - found_tunnel = 1; if (is_add) { - if (p || l2_only) + if (l2_only) return 1; else { @@ -1729,9 +2024,7 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst, } else { - /* Delete, the route is already gone? */ - if (!p) - need_route_add_del = 0; + /* Delete */ goto add_del_route; } @@ -1784,49 +2077,44 @@ int vnet_mpls_ethernet_add_del_policy_tunnel (u8 *dst, tp->encap_index = e - mm->encaps; tp->tx_sw_if_index = tx_sw_if_index; tp->l2_only = l2_only; + tp->fei = FIB_NODE_INDEX_INVALID; if (new_tunnel_index) *new_tunnel_index = tp - mm->eth_tunnels; - /* Create the classify adjacency and add to v4 fib */ - memset(&adj, 0, sizeof (adj)); - adj.explicit_fib_index = ~0; - adj.lookup_next_index = IP_LOOKUP_NEXT_CLASSIFY; - adj.classify.table_index = classify_table_index; - - if (!l2_only) - ip_add_adjacency (lm, &adj, 1 /* one adj */, - &adj_index); - add_del_route: - if (need_route_add_del && !l2_only) + if (!l2_only) { + const fib_prefix_t pfx = { + .fp_addr = { + .ip4 = tp->intfc_address, + }, + .fp_len = tp->mask_width, + .fp_proto = FIB_PROTOCOL_IP4, + }; + dpo_id_t dpo = DPO_NULL; + if (is_add) - ip4_add_del_route_next_hop (im, - IP4_ROUTE_FLAG_ADD, - &tp->intfc_address, - tp->mask_width, - &zero /* no next hop */, - (u32)~0 /* next_hop_sw_if_index */, - 1 /* weight */, - adj_index, - tp->inner_fib_index); - else { - ip4_add_del_route_args_t a; - memset (&a, 0, sizeof (a)); - - a.flags = IP4_ROUTE_FLAG_FIB_INDEX | IP4_ROUTE_FLAG_DEL; - a.table_index_or_table_id = tp->inner_fib_index; - a.dst_address = tp->intfc_address; - a.dst_address_length = tp->mask_width; - a.adj_index = ~0; - - ip4_add_del_route (im, &a); - ip4_maybe_remap_adjacencies (im, tp->inner_fib_index, - IP4_ROUTE_FLAG_FIB_INDEX); + dpo_set(&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP4, + classify_dpo_create(FIB_PROTOCOL_IP4, + classify_table_index)); + + tp->fei = fib_table_entry_special_dpo_add(tp->inner_fib_index, + &pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + dpo_reset(&dpo); } + else + { + fib_table_entry_delete(tp->inner_fib_index, &pfx, FIB_SOURCE_API); + tp->fei = FIB_NODE_INDEX_INVALID; + } } if (is_add == 0 && found_tunnel) { @@ -1945,3 +2233,44 @@ VLIB_CLI_COMMAND (create_mpls_ethernet_policy_tunnel_command, static) = { " classify-table-index <nn>", .function = create_mpls_ethernet_policy_tunnel_command_fn, }; + +static clib_error_t * +mpls_interface_enable_disable (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + clib_error_t * error = 0; + u32 sw_if_index, enable; + + sw_if_index = ~0; + + if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "enable")) + enable = 1; + else if (unformat (input, "disable")) + enable = 0; + else + { + error = clib_error_return (0, "expected 'enable' or 'disable'", + format_unformat_error, input); + goto done; + } + + mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = { + .path = "set interface mpls", + .function = mpls_interface_enable_disable, + .short_help = "Enable/Disable an interface for MPLS forwarding", +}; diff --git a/vnet/vnet/mpls-gre/mpls.c b/vnet/vnet/mpls/mpls.c index d914b4c2b72..be5e882f1b3 100644 --- a/vnet/vnet/mpls-gre/mpls.c +++ b/vnet/vnet/mpls/mpls.c @@ -16,10 +16,86 @@ */ #include <vnet/vnet.h> -#include <vnet/mpls-gre/mpls.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/mpls_fib.h> + +const static char* mpls_eos_bit_names[] = MPLS_EOS_BITS; mpls_main_t mpls_main; +u8 * format_mpls_unicast_label (u8 * s, va_list * args) +{ + mpls_label_t label = va_arg (*args, mpls_label_t); + + switch (label) { + case MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IPV4_EXPLICIT_NULL_STRING); + break; + case MPLS_IETF_ROUTER_ALERT_LABEL: + s = format (s, "%s", MPLS_IETF_ROUTER_ALERT_STRING); + break; + case MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IPV6_EXPLICIT_NULL_STRING); + break; + case MPLS_IETF_IMPLICIT_NULL_LABEL: + s = format (s, "%s", MPLS_IETF_IMPLICIT_NULL_STRING); + break; + case MPLS_IETF_ELI_LABEL: + s = format (s, "%s", MPLS_IETF_ELI_STRING); + break; + case MPLS_IETF_GAL_LABEL: + s = format (s, "%s", MPLS_IETF_GAL_STRING); + break; + default: + s = format (s, "%d", label); + break; + } + return s; +} + +uword unformat_mpls_unicast_label (unformat_input_t * input, va_list * args) +{ + mpls_label_t *label = va_arg (*args, mpls_label_t*); + + if (unformat (input, MPLS_IETF_IPV4_EXPLICIT_NULL_STRING)) + *label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_IPV6_EXPLICIT_NULL_STRING)) + *label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL; + else if (unformat (input, MPLS_IETF_ROUTER_ALERT_STRING)) + *label = MPLS_IETF_ROUTER_ALERT_LABEL; + else if (unformat (input, MPLS_IETF_IMPLICIT_NULL_STRING)) + *label = MPLS_IETF_IMPLICIT_NULL_LABEL; + else if (unformat (input, "%d", label)) + ; + + return (1); +} + +u8 * format_mpls_eos_bit (u8 * s, va_list * args) +{ + mpls_eos_bit_t eb = va_arg (*args, mpls_eos_bit_t); + + ASSERT(eb <= MPLS_EOS); + + s = format(s, "%s", mpls_eos_bit_names[eb]); + + return (s); +} + +u8 * format_mpls_header (u8 * s, va_list * args) +{ + mpls_unicast_header_t hdr = va_arg (*args, mpls_unicast_header_t); + + return (format(s, "[%U:%d:%d:%U]", + format_mpls_unicast_label, + vnet_mpls_uc_get_label(hdr.label_exp_s_ttl), + vnet_mpls_uc_get_ttl(hdr.label_exp_s_ttl), + vnet_mpls_uc_get_exp(hdr.label_exp_s_ttl), + format_mpls_eos_bit, + vnet_mpls_uc_get_s(hdr.label_exp_s_ttl))); +} + u8 * format_mpls_gre_tx_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); @@ -203,8 +279,9 @@ int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id, /* Reformat label into mpls_unicast_header_t */ label_host_byte_order <<= 12; - if (i == vec_len(labels_host_byte_order) - 1) - label_host_byte_order |= 1<<8; /* S=1 */ + // FIXME NEOS AND EOS + //if (i == vec_len(labels_host_byte_order) - 1) + // label_host_byte_order |= 1<<8; /* S=1 */ label_host_byte_order |= 0xff; /* TTL=FF */ label_net_byte_order = clib_host_to_net_u32 (label_host_byte_order); h.label_exp_s_ttl = label_net_byte_order; @@ -385,7 +462,7 @@ int vnet_mpls_add_del_decap (u32 rx_fib_id, rx_fib_index = p[0]; /* L3 decap => transform fib ID to fib index */ - if (next_index == MPLS_INPUT_NEXT_IP4_INPUT) + if (next_index == MPLS_LOOKUP_NEXT_IP4_INPUT) { p = hash_get (im->fib_index_by_table_id, tx_fib_id); if (! p) @@ -437,12 +514,12 @@ unformat_mpls_gre_input_next (unformat_input_t * input, va_list * args) if (unformat (input, "lookup")) { - *result = MPLS_INPUT_NEXT_IP4_INPUT; + *result = MPLS_LOOKUP_NEXT_IP4_INPUT; rv = 1; } else if (unformat (input, "output")) { - *result = MPLS_INPUT_NEXT_L2_OUTPUT; + *result = MPLS_LOOKUP_NEXT_L2_OUTPUT; rv = 1; } return rv; @@ -614,10 +691,7 @@ show_mpls_fib_command_fn (vlib_main_t * vm, show_mpls_fib_t *records = 0; show_mpls_fib_t *s; mpls_main_t * mm = &mpls_main; - ip4_main_t * im = &ip4_main; - ip4_fib_t * rx_fib, * tx_fib; - u32 tx_table_id; - char *swif_tag; + ip4_fib_t * rx_fib; hash_foreach (key, value, mm->mpls_encap_by_fib_and_dest, ({ @@ -630,7 +704,6 @@ show_mpls_fib_command_fn (vlib_main_t * vm, if (!vec_len(records)) { vlib_cli_output (vm, "MPLS encap table empty"); - goto decap_table; } /* sort output by dst address within fib */ vec_sort_with_function (records, mpls_dest_cmp); @@ -639,65 +712,174 @@ show_mpls_fib_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "%=6s%=16s%=16s", "Table", "Dest address", "Labels"); vec_foreach (s, records) { - rx_fib = vec_elt_at_index (im->fibs, s->fib_index); + rx_fib = ip4_fib_get (s->fib_index); vlib_cli_output (vm, "%=6d%=16U%=16U", rx_fib->table_id, format_ip4_address, &s->dest, format_mpls_encap_index, mm, s->entry_index); } - decap_table: - vec_reset_length(records); + vec_free(records); + return 0; +} - hash_foreach (key, value, mm->mpls_decap_by_rx_fib_and_label, - ({ - vec_add2 (records, s, 1); - s->fib_index = (u32)(key>>32); - s->entry_index = (u32) value; - s->label = ((u32) key)>>12; - s->s_bit = (key & (1<<8)) != 0; - })); - - if (!vec_len(records)) - { - vlib_cli_output (vm, "MPLS decap table empty"); - goto out; - } +VLIB_CLI_COMMAND (show_mpls_fib_command, static) = { + .path = "show mpls encap", + .short_help = "show mpls encap", + .function = show_mpls_fib_command_fn, +}; - vec_sort_with_function (records, mpls_label_cmp); +static clib_error_t * +vnet_mpls_local_label (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, * line_input = &_line_input; + fib_route_path_t *rpaths = NULL, rpath; + clib_error_t * error = 0; + u32 table_id, is_del, is_ip; + fib_prefix_t pfx; + mpls_label_t local_label; + mpls_eos_bit_t eos; + + is_ip = 0; + table_id = 0; + eos = MPLS_EOS; + + /* Get a line of input. */ + if (! unformat_user (input, unformat_line_input, line_input)) + return 0; - vlib_cli_output (vm, "MPLS decap table"); - vlib_cli_output (vm, "%=10s%=15s%=6s%=6s", "RX Table", "TX Table/Intfc", - "Label", "S-bit"); - vec_foreach (s, records) + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { - mpls_decap_t * d; - d = pool_elt_at_index (mm->decaps, s->entry_index); - if (d->next_index == MPLS_INPUT_NEXT_IP4_INPUT) - { - tx_fib = vec_elt_at_index (im->fibs, d->tx_fib_index); - tx_table_id = tx_fib->table_id; - swif_tag = " "; - } + memset(&rpath, 0, sizeof(rpath)); + memset(&pfx, 0, sizeof(pfx)); + + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "eos")) + eos = MPLS_EOS; + else if (unformat (line_input, "non-eos")) + eos = MPLS_NON_EOS; + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, + &pfx.fp_addr.ip4, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + is_ip = 1; + } + else if (unformat (line_input, "%U/%d", + unformat_ip6_address, + &pfx.fp_addr.ip6, + &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + is_ip = 1; + } + else if (unformat (line_input, "%d", &local_label)) + ; + else if (unformat (line_input, + "ip4-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "ip6-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP6; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + vec_add1(rpaths, rpath); + } + else if (unformat (line_input, + "mpls-lookup-in-table %d", + &rpath.frp_fib_index)) + { + rpath.frp_label = MPLS_LABEL_INVALID; + rpath.frp_proto = FIB_PROTOCOL_IP4; + rpath.frp_sw_if_index = FIB_NODE_INDEX_INVALID; + vec_add1(rpaths, rpath); + } else - { - tx_table_id = d->tx_fib_index; - swif_tag = "(i) "; - } - rx_fib = vec_elt_at_index (im->fibs, s->fib_index); + { + error = clib_error_return (0, "unkown input: %U", + format_unformat_error, input); + goto done; + } - vlib_cli_output (vm, "%=10d%=10d%=5s%=6d%=6d", rx_fib->table_id, - tx_table_id, swif_tag, s->label, s->s_bit); } - out: - vec_free(records); - return 0; + if (is_ip) + { + u32 fib_index = fib_table_find(pfx.fp_proto, table_id); + + if (FIB_NODE_INDEX_INVALID == fib_index) + { + error = clib_error_return (0, "%U table-id %d does not exist", + format_fib_protocol, pfx.fp_proto, table_id); + goto done; + } + + if (is_del) + { + fib_table_entry_local_label_remove(fib_index, &pfx, local_label); + } + else + { + fib_table_entry_local_label_add(fib_index, &pfx, local_label); + } + } + else + { + fib_node_index_t lfe, fib_index; + fib_prefix_t prefix = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = local_label, + .fp_eos = eos, + }; + + fib_index = mpls_fib_index_from_table_id(table_id); + + if (FIB_NODE_INDEX_INVALID == fib_index) + { + error = clib_error_return (0, "MPLS table-id %d does not exist", + table_id); + goto done; + } + + lfe = fib_table_entry_path_add2(fib_index, + &prefix, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + rpaths); + + if (FIB_NODE_INDEX_INVALID == lfe) + { + error = clib_error_return (0, "Failed to create %U-%U in MPLS table-id %d", + format_mpls_unicast_label, local_label, + format_mpls_eos_bit, eos, + table_id); + goto done; + } + } + +done: + return error; } -VLIB_CLI_COMMAND (show_mpls_fib_command, static) = { - .path = "show mpls fib", - .short_help = "show mpls fib", - .function = show_mpls_fib_command_fn, +VLIB_CLI_COMMAND (mpls_local_label_command, static) = { + .path = "mpls local-label", + .function = vnet_mpls_local_label, + .short_help = "Create/Delete MPL local labels", }; int mpls_fib_reset_labels (u32 fib_id) @@ -764,7 +946,6 @@ static clib_error_t * mpls_init (vlib_main_t * vm) mpls_main_t * mm = &mpls_main; clib_error_t * error; - memset (mm, 0, sizeof (mm[0])); mm->vlib_main = vm; mm->vnet_main = vnet_get_main(); diff --git a/vnet/vnet/mpls-gre/mpls.h b/vnet/vnet/mpls/mpls.h index d8ffca22793..2aeae49df43 100644 --- a/vnet/vnet/mpls-gre/mpls.h +++ b/vnet/vnet/mpls/mpls.h @@ -17,9 +17,12 @@ #include <vnet/vnet.h> #include <vnet/gre/gre.h> -#include <vnet/mpls-gre/packet.h> +#include <vnet/mpls/packet.h> +#include <vnet/mpls/mpls_types.h> #include <vnet/ip/ip4_packet.h> #include <vnet/ethernet/ethernet.h> +#include <vnet/fib/fib_node.h> +#include <vnet/adj/adj.h> typedef CLIB_PACKED (struct { ip4_header_t ip4; /* 20 bytes */ @@ -31,7 +34,7 @@ extern vnet_hw_interface_class_t mpls_gre_hw_interface_class; typedef enum { #define mpls_error(n,s) MPLS_ERROR_##n, -#include <vnet/mpls-gre/error.def> +#include <vnet/mpls/error.def> #undef mpls_error MPLS_N_ERROR, } mpls_gre_error_t; @@ -42,6 +45,7 @@ typedef enum { */ typedef struct { + fib_node_t mgt_node; ip4_address_t tunnel_src; ip4_address_t tunnel_dst; ip4_address_t intfc_address; @@ -52,6 +56,9 @@ typedef struct { u32 hw_if_index; /* L2 x-connect capable tunnel intfc */ u8 * rewrite_data; u8 l2_only; + fib_node_index_t fei; /* FIB Entry index for the tunnel's destination */ + adj_index_t adj_index; /* The midchain adj this tunnel creates */ + u32 sibling_index; } mpls_gre_tunnel_t; typedef struct { @@ -64,6 +71,7 @@ typedef struct { u32 hw_if_index; u8 * rewrite_data; u8 l2_only; + fib_node_index_t fei; } mpls_eth_tunnel_t; typedef struct { @@ -78,7 +86,53 @@ typedef struct { u32 next_index; /* e.g. ip4/6-input, l2-input */ } mpls_decap_t; +#define MPLS_FIB_DEFAULT_TABLE_ID 0 + +/** + * Type exposure is to allow the DP fast/inlined access + */ +#define MPLS_FIB_KEY_SIZE 21 +#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1)) + +typedef struct mpls_fib_t_ +{ + /** + * A hash table of entries. 21 bit key + * Hash table for reduced memory footprint + */ + uword * mf_entries; + + /** + * The load-balance indeices keyed by 21 bit label+eos bit. + * A flat array for maximum lookup performace. + */ + index_t mf_lbs[MPLS_FIB_DB_SIZE]; +} mpls_fib_t; + +/** + * @brief Definition of a callback for receiving MPLS interface state change + * notifications + */ +typedef void (*mpls_interface_state_change_callback_t)(u32 sw_if_index, + u32 is_enable); + typedef struct { + /* MPLS FIB index for each software interface */ + u32 *fib_index_by_sw_if_index; + + /** A pool of all the MPLS FIBs */ + struct fib_table_t_ *fibs; + + /** A hash table to lookup the mpls_fib by table ID */ + uword *fib_index_by_table_id; + + /* rx/tx interface/feature configuration. */ + ip_config_main_t rx_config_mains, tx_config_main; + + /* Built-in unicast feature path indices, see ip_feature_init_cast(...) */ + u32 mpls_rx_feature_lookup; + u32 mpls_rx_feature_not_enabled; + /* pool of gre tunnel instances */ mpls_gre_tunnel_t *gre_tunnels; u32 * free_gre_sw_if_indices; @@ -99,23 +153,53 @@ typedef struct { u32 ip4_classify_mpls_policy_encap_next_index; u32 ip6_classify_mpls_policy_encap_next_index; + /* feature path configuration lists */ + vnet_ip_feature_registration_t * next_feature; + + /* Save feature results for show command */ + char **feature_nodes; + + /* IP4 enabled count by software interface */ + u8 * mpls_enabled_by_sw_if_index; + + /* Functions to call when MPLS state on an interface changes. */ + mpls_interface_state_change_callback_t * mpls_interface_state_change_callbacks; + /* convenience */ vlib_main_t * vlib_main; vnet_main_t * vnet_main; } mpls_main_t; -mpls_main_t mpls_main; +extern mpls_main_t mpls_main; + +#define VNET_MPLS_FEATURE_INIT(x,...) \ + __VA_ARGS__ vnet_ip_feature_registration_t uc_##x; \ +static void __vnet_add_feature_registration_uc_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vnet_add_feature_registration_uc_##x (void) \ +{ \ + mpls_main_t * mm = &mpls_main; \ + uc_##x.next = mm->next_feature; \ + mm->next_feature = &uc_##x; \ +} \ +__VA_ARGS__ vnet_ip_feature_registration_t uc_##x + +extern clib_error_t * mpls_feature_init(vlib_main_t * vm); format_function_t format_mpls_protocol; -format_function_t format_mpls_header; -format_function_t format_mpls_header_with_length; format_function_t format_mpls_gre_header_with_length; format_function_t format_mpls_eth_header_with_length; -format_function_t format_mpls_unicast_label; format_function_t format_mpls_encap_index; +format_function_t format_mpls_eos_bit; +format_function_t format_mpls_unicast_header_net_byte_order; +format_function_t format_mpls_unicast_label; +format_function_t format_mpls_header; + extern vlib_node_registration_t mpls_input_node; extern vlib_node_registration_t mpls_policy_encap_node; +extern vlib_node_registration_t mpls_output_node; +extern vlib_node_registration_t mpls_midchain_node; extern vnet_device_class_t mpls_gre_device_class; @@ -126,6 +210,7 @@ unformat_function_t unformat_mpls_protocol_net_byte_order; unformat_function_t unformat_mpls_label_net_byte_order; unformat_function_t unformat_mpls_gre_header; unformat_function_t unformat_pg_mpls_gre_header; +unformat_function_t unformat_mpls_unicast_label; /* Parse mpls header. */ unformat_function_t unformat_mpls_header; @@ -135,6 +220,12 @@ unformat_function_t unformat_pg_mpls_header; #define MPLS_GRE_OUTPUT_NEXT_LOOKUP 1 #define MPLS_GRE_OUTPUT_NEXT_DROP VNET_INTERFACE_TX_NEXT_DROP +void mpls_sw_interface_enable_disable (mpls_main_t * mm, + u32 sw_if_index, + u8 is_enable); + +u8 mpls_sw_interface_is_enabled (u32 sw_if_index); + mpls_encap_t * mpls_encap_by_fib_and_dest (mpls_main_t * mm, u32 rx_fib, u32 dst_address); @@ -176,6 +267,7 @@ int vnet_mpls_add_del_encap (ip4_address_t *dest, u32 fib_id, int vnet_mpls_policy_tunnel_add_rewrite (mpls_main_t * mm, mpls_encap_t * e, u32 policy_tunnel_index); + typedef struct { u32 lookup_miss; @@ -198,8 +290,7 @@ u8 * format_mpls_gre_header (u8 * s, va_list * args); #define foreach_mpls_input_next \ _(DROP, "error-drop") \ -_(IP4_INPUT, "ip4-input") \ -_(L2_OUTPUT, "l2-output") +_(LOOKUP, "mpls-lookup") typedef enum { #define _(s,n) MPLS_INPUT_NEXT_##s, @@ -208,6 +299,28 @@ typedef enum { MPLS_INPUT_N_NEXT, } mpls_input_next_t; +#define foreach_mpls_lookup_next \ +_(DROP, "error-drop") \ +_(IP4_INPUT, "ip4-input") \ +_(L2_OUTPUT, "l2-output") + +// FIXME remove. +typedef enum { +#define _(s,n) MPLS_LOOKUP_NEXT_##s, + foreach_mpls_lookup_next +#undef _ + MPLS_LOOKUP_N_NEXT, +} mpls_lookup_next_t; + +#define foreach_mpls_output_next \ +_(DROP, "error-drop") + +typedef enum { +#define _(s,n) MPLS_OUTPUT_NEXT_##s, + foreach_mpls_output_next +#undef _ + MPLS_OUTPUT_N_NEXT, +} mpls_output_next_t; typedef struct { u32 lookup_miss; diff --git a/vnet/vnet/mpls/mpls_features.c b/vnet/vnet/mpls/mpls_features.c new file mode 100644 index 00000000000..d3a726afd04 --- /dev/null +++ b/vnet/vnet/mpls/mpls_features.c @@ -0,0 +1,254 @@ +/* + * mpls_features.c: MPLS input and output features + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/mpls/mpls.h> + +always_inline uword +mpls_terminate (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int error_code) +{ + u32 * buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + + vlib_error_drop_buffers (vm, node, + buffers, + /* stride */ 1, + n_packets, + /* next */ 0, + mpls_input_node.index, + error_code); + + return n_packets; +} + +static uword +mpls_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_PUNT)); +} + +VLIB_REGISTER_NODE (mpls_punt_node) = { + .function = mpls_punt, + .name = "mpls-punt", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-punt", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_punt_node, mpls_punt) + +static uword +mpls_drop (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_DROP)); +} + +VLIB_REGISTER_NODE (mpls_drop_node) = { + .function = mpls_drop, + .name = "mpls-drop", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_drop_node, mpls_drop) + +static uword +mpls_not_enabled (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return (mpls_terminate(vm, node, frame, MPLS_ERROR_NOT_ENABLED)); +} + +VLIB_REGISTER_NODE (mpls_not_enabled_node) = { + .function = mpls_not_enabled, + .name = "mpls-not-enabled", + .vector_size = sizeof (u32), + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_not_enabled_node, mpls_not_enabled) + +VNET_MPLS_FEATURE_INIT (mpls_lookup, static) = { + .node_name = "mpls-lookup", + .runs_before = ORDER_CONSTRAINTS {"mpls-not-enabled", 0}, + .feature_index = &mpls_main.mpls_rx_feature_lookup, +}; + +VNET_MPLS_FEATURE_INIT (mpls_not_enabled, static) = { + .node_name = "mpls-not-enabled", + .runs_before = ORDER_CONSTRAINTS {0}, /* not before any other features */ + .feature_index = &mpls_main.mpls_rx_feature_not_enabled, +}; + +static char * feature_start_nodes[] = +{ + "mpls-input", +}; + +clib_error_t * +mpls_feature_init (vlib_main_t * vm) +{ + ip_config_main_t * cm = &mpls_main.rx_config_mains; + vnet_config_main_t * vcm = &cm->config_main; + + return (ip_feature_init_cast (vm, cm, vcm, + feature_start_nodes, + ARRAY_LEN(feature_start_nodes), + VNET_IP_RX_UNICAST_FEAT, + VNET_L3_PACKET_TYPE_MPLS_UNICAST)); +} + +static clib_error_t * +mpls_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, + u32 is_add) +{ + vlib_main_t * vm = vnm->vlib_main; + mpls_main_t * mm = &mpls_main; + ip_config_main_t * cm = &mm->rx_config_mains; + vnet_config_main_t * vcm = &cm->config_main; + u32 drop_feature_index; + u32 ci; + + vec_validate_init_empty (mm->mpls_enabled_by_sw_if_index, sw_if_index, 0); + vec_validate_init_empty (mm->fib_index_by_sw_if_index, sw_if_index, 0); + vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0); + ci = cm->config_index_by_sw_if_index[sw_if_index]; + + drop_feature_index = mm->mpls_rx_feature_not_enabled; + + if (is_add) + ci = vnet_config_add_feature (vm, vcm, ci, + drop_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + else + { + ci = vnet_config_del_feature (vm, vcm, ci, + drop_feature_index, + /* config data */ 0, + /* # bytes of config data */ 0); + mm->mpls_enabled_by_sw_if_index[sw_if_index] = 0;; + } + + cm->config_index_by_sw_if_index[sw_if_index] = ci; + + return /* no error */ 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (mpls_sw_interface_add_del); + +static clib_error_t * +show_mpls_features_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + mpls_main_t * mm = &mpls_main; + int i; + char ** features; + + vlib_cli_output (vm, "Available MPLS feature nodes"); + + do { + features = mm->feature_nodes; + for (i = 0; i < vec_len(features); i++) + vlib_cli_output (vm, " %s\n", features[i]); + } while(0); + + return 0; +} + +VLIB_CLI_COMMAND (show_ip_features_command, static) = { + .path = "show mpls features", + .short_help = "show mpls features", + .function = show_mpls_features_command_fn, +}; + +static clib_error_t * +show_mpls_interface_features_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t * vnm = vnet_get_main(); + mpls_main_t * mm = &mpls_main; + + ip_config_main_t * cm; + vnet_config_main_t * vcm; + vnet_config_t * cfg; + u32 cfg_index; + vnet_config_feature_t * feat; + vlib_node_t * n; + u32 sw_if_index; + u32 node_index; + u32 current_config_index; + int i; + + if (! unformat (input, "%U", unformat_vnet_sw_interface, + vnm, &sw_if_index)) + return clib_error_return (0, "Interface not specified..."); + + vlib_cli_output (vm, "MPLS feature paths configured on %U...", + format_vnet_sw_if_index_name, vnm, sw_if_index); + + cm = &mm->rx_config_mains; + vcm = &cm->config_main; + + current_config_index = vec_elt (cm->config_index_by_sw_if_index, + sw_if_index); + + ASSERT(current_config_index + < vec_len (vcm->config_pool_index_by_user_index)); + + cfg_index = + vcm->config_pool_index_by_user_index[current_config_index]; + cfg = pool_elt_at_index (vcm->config_pool, cfg_index); + + for (i = 0; i < vec_len(cfg->features); i++) + { + feat = cfg->features + i; + node_index = feat->node_index; + n = vlib_get_node (vm, node_index); + vlib_cli_output (vm, " %v", n->name); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_mpls_interface_features_command, static) = { + .path = "show mpls interface features", + .short_help = "show mpls interface features <intfc>", + .function = show_mpls_interface_features_command_fn, +}; + diff --git a/vnet/vnet/mpls/mpls_lookup.c b/vnet/vnet/mpls/mpls_lookup.c new file mode 100644 index 00000000000..31ad68c4bc6 --- /dev/null +++ b/vnet/vnet/mpls/mpls_lookup.c @@ -0,0 +1,278 @@ +/* + * node.c: mpls-o-gre decap processing + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/dpo/load_balance.h> + +vlib_node_registration_t mpls_lookup_node; + +typedef struct { + u32 next_index; + u32 lb_index; + u32 lfib_index; + u32 label_net_byte_order; +} mpls_lookup_trace_t; + +static u8 * +format_mpls_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *); + + s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d " + "label %d eos %d", + t->next_index, t->lfib_index, t->lb_index, + vnet_mpls_uc_get_label( + clib_net_to_host_u32(t->label_net_byte_order)), + vnet_mpls_uc_get_s(t->label_net_byte_order)); + return s; +} + +/* + * Compute flow hash. + * We'll use it to select which adjacency to use for this flow. And other things. + */ +always_inline u32 +mpls_compute_flow_hash (const mpls_unicast_header_t * hdr, + flow_hash_config_t flow_hash_config) +{ + // FIXME + return (vnet_mpls_uc_get_label(hdr->label_exp_s_ttl)); +} + +static inline uword +mpls_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters; + u32 n_left_from, next_index, * from, * to_next; + mpls_main_t * mm = &mpls_main; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 lbi0, next0, lfib_index0, bi0, hash_c0; + const mpls_unicast_header_t * h0; + const load_balance_t *lb0; + const dpo_id_t *dpo0; + vlib_buffer_t * b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + + lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index, + vnet_buffer(b0)->sw_if_index[VLIB_RX]); + + lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0); + lb0 = load_balance_get(lbi0); + + hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0; + if (PREDICT_FALSE(lb0->lb_n_buckets > 1)) + { + hash_c0 = vnet_buffer (b0)->ip.flow_hash = + mpls_compute_flow_hash(h0, lb0->lb_hash_config); + } + + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + + dpo0 = load_balance_get_bucket_i(lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, b0)); + + /* + * pop the label that was just used in the lookup + */ + vlib_buffer_advance(b0, sizeof(*h0)); + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_lookup_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->lb_index = lbi0; + tr->lfib_index = lfib_index0; + tr->label_net_byte_order = h0->label_exp_s_ttl; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_lookup_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_lookup_node) = { + .function = mpls_lookup, + .name = "mpls-lookup", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .sibling_of = "ip4-lookup", + + .format_buffer = format_mpls_gre_header_with_length, + .format_trace = format_mpls_lookup_trace, + .unformat_buffer = unformat_mpls_gre_header, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_lookup_node, mpls_lookup) + +typedef struct { + u32 next_index; + u32 lb_index; +} mpls_load_balance_trace_t; + +static u8 * +format_mpls_load_balance_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *); + + s = format (s, "MPLS: next [%d], LB index %d ", + t->next_index, t->lb_index); + return s; +} + +always_inline uword +mpls_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, * from, * to_next; + ip_lookup_next_t next; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + + + while (n_left_from > 0 && n_left_to_next > 0) + { + const mpls_unicast_header_t *hdr0; + const load_balance_t *lb0; + u32 pi0, lbi0, hc0, next0; + const dpo_id_t *dpo0; + vlib_buffer_t * p0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + hdr0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get(lbi0); + hc0 = lb0->lb_hash_config; + vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(hdr0, hc0); + + dpo0 = load_balance_get_bucket_i(lb0, + vnet_buffer(p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1)); + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, cpu_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, + to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (mpls_load_balance_node) = { + .function = mpls_load_balance, + .name = "mpls-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "mpls-lookup", + + .format_trace = format_mpls_load_balance_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_load_balance_node, mpls_load_balance) diff --git a/vnet/vnet/mpls/mpls_output.c b/vnet/vnet/mpls/mpls_output.c new file mode 100644 index 00000000000..932fcb8d0bd --- /dev/null +++ b/vnet/vnet/mpls/mpls_output.c @@ -0,0 +1,343 @@ +/* + * mpls_output.c: MPLS Adj rewrite + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + /* Adjacency taken. */ + u32 adj_index; + u32 flow_hash; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[64 - 1*sizeof(u32)]; +} mpls_output_trace_t; + +static u8 * +format_mpls_output_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *); + vnet_main_t * vnm = vnet_get_main(); + uword indent = format_get_indent (s); + + s = format (s, "adj-idx %d : %U flow hash: 0x%08x", + t->adj_index, + format_ip_adjacency, vnm, t->adj_index, FORMAT_IP_ADJACENCY_NONE, + t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + vnm, t->adj_index, + t->packet_data, sizeof (t->packet_data)); + return s; +} + +static inline uword +mpls_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next, cpu_index; + vlib_node_runtime_t * error_node; + + cpu_index = os_get_cpu_number(); + error_node = vlib_node_get_runtime (vm, mpls_output_node.index); + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t * adj0; + mpls_unicast_header_t *hdr0; + vlib_buffer_t * p0; + u32 pi0, rw_len0, adj_index0, next0, error0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + /* We should never rewrite a pkt using the MISS adjacency */ + ASSERT(adj_index0); + + adj0 = adj_get(adj_index0); + hdr0 = vlib_buffer_get_current (p0); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], hdr0, + sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + + if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t))) + vlib_increment_combined_counter + (&adjacency_counters, + cpu_index, adj_index0, + /* packet increment */ 0, + /* byte increment */ rw_len0-sizeof(ethernet_header_t)); + + /* Check MTU of outgoing interface. */ + error0 = (vlib_buffer_length_in_chain (vm, p0) + > adj0[0].rewrite_header.max_l3_packet_bytes + ? IP4_ERROR_MTU_EXCEEDED + : IP4_ERROR_NONE); + + p0->error = error_node->errors[error0]; + + /* Don't adjust the buffer for ttl issue; icmp-error node wants + * to see the IP headerr */ + if (PREDICT_TRUE(error0 == IP4_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + adj0[0].rewrite_header.sw_if_index; + next0 = adj0[0].rewrite_header.next_index; + } + else + { + next0 = MPLS_OUTPUT_NEXT_DROP; + } + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_output_trace_t *tr = vlib_add_trace (vm, node, + p0, sizeof (*tr)); + tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX]; + tr->flow_hash = vnet_buffer(p0)->ip.flow_hash; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_output_node.index, + MPLS_ERROR_PKTS_ENCAP, + from_frame->n_vectors); + + return from_frame->n_vectors; +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +static inline uword +mpls_output (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (mpls_output_inline(vm, node, from_frame)); +} + +VLIB_REGISTER_NODE (mpls_output_node) = { + .function = mpls_output, + .name = "mpls-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_OUTPUT_NEXT_##s] = n, + foreach_mpls_output_next +#undef _ + }, + + .format_trace = format_mpls_output_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_output_node, mpls_output) + +static inline uword +mpls_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return (mpls_output_inline(vm, node, from_frame)); +} + +VLIB_REGISTER_NODE (mpls_midchain_node) = { + .function = mpls_output, + .name = "mpls-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_mpls_output_trace, + + .sibling_of = "mpls-output", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_midchain_node, mpls_midchain) + +/** + * @brief Next index values from the MPLS incomplete adj node + */ +#define foreach_mpls_adj_incomplete_next \ +_(DROP, "error-drop") \ +_(IP4, "ip4-arp") \ +_(IP6, "ip6-discover-neighbor") + +typedef enum { +#define _(s,n) MPLS_ADJ_INCOMPLETE_NEXT_##s, + foreach_mpls_adj_incomplete_next +#undef _ + MPLS_ADJ_INCOMPLETE_N_NEXT, +} mpls_adj_incomplete_next_t; + +/** + * @brief A struct to hold tracing information for the MPLS label imposition + * node. + */ +typedef struct mpls_adj_incomplete_trace_t_ +{ + u32 next; +} mpls_adj_incomplete_trace_t; + + +/** + * @brief Graph node for incomplete MPLS adjacency. + * This node will push traffic to either the v4-arp or v6-nd node + * based on the next-hop proto of the adj. + * We pay a cost for this 'routing' node, but an incomplete adj is the + * exception case. + */ +static inline uword +mpls_adj_incomplete (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0, next0, adj_index0; + ip_adjacency_t * adj0; + vlib_buffer_t * p0; + + pi0 = to_next[0] = from[0]; + p0 = vlib_get_buffer (vm, pi0); + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + ASSERT(adj_index0); + + adj0 = adj_get(adj_index0); + + if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto)) + { + next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP4; + } + else + { + next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_adj_incomplete_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->next = next0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static u8 * +format_mpls_adj_incomplete_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_adj_incomplete_trace_t * t; + uword indent; + + t = va_arg (*args, mpls_adj_incomplete_trace_t *); + indent = format_get_indent (s); + + s = format (s, "%Unext:%d", + format_white_space, indent, + t->next); + return (s); +} + +VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = { + .function = mpls_adj_incomplete, + .name = "mpls-adj-incomplete", + .format_trace = format_mpls_adj_incomplete_trace, + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_ADJ_INCOMPLETE_NEXT_##s] = n, + foreach_mpls_adj_incomplete_next +#undef _ + }, + + .format_trace = format_mpls_output_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_adj_incomplete_node, + mpls_adj_incomplete) diff --git a/vnet/vnet/mpls/mpls_types.h b/vnet/vnet/mpls/mpls_types.h new file mode 100644 index 00000000000..d7c629df832 --- /dev/null +++ b/vnet/vnet/mpls/mpls_types.h @@ -0,0 +1,39 @@ +#ifndef __MPLS_TYPES_H__ +#define __MPLS_TYPES_H__ + +#define MPLS_IETF_MIN_LABEL 0x00000 +#define MPLS_IETF_MAX_LABEL 0xfffff + +#define MPLS_IETF_MIN_RESERVED_LABEL 0x00000 +#define MPLS_IETF_MAX_RESERVED_LABEL 0x0000f + +#define MPLS_IETF_MIN_UNRES_LABEL 0x00010 +#define MPLS_IETF_MAX_UNRES_LABEL 0xfffff + +#define MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL 0x00000 +#define MPLS_IETF_ROUTER_ALERT_LABEL 0x00001 +#define MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL 0x00002 +#define MPLS_IETF_IMPLICIT_NULL_LABEL 0x00003 +#define MPLS_IETF_ELI_LABEL 0x00007 +#define MPLS_IETF_GAL_LABEL 0x0000D + +#define MPLS_IETF_IPV4_EXPLICIT_NULL_STRING "ip4-explicit-null" +#define MPLS_IETF_IPV4_EXPLICIT_NULL_BRIEF_STRING "e-nul" +#define MPLS_IETF_IMPLICIT_NULL_STRING "implicit-null" +#define MPLS_IETF_IMPLICIT_NULL_BRIEF_STRING "i-nul" +#define MPLS_IETF_ROUTER_ALERT_STRING "router-alert" +#define MPLS_IETF_ROUTER_ALERT_BRIEF_STRING "r-alt" +#define MPLS_IETF_IPV6_EXPLICIT_NULL_STRING "ipv6-explicit-null" +#define MPLS_IETF_IPV6_EXPLICIT_NULL_BRIEF_STRING "v6enl" +#define MPLS_IETF_ELI_STRING "entropy-label-indicator" +#define MPLS_IETF_ELI_BRIEF_STRING "eli" +#define MPLS_IETF_GAL_STRING "gal" +#define MPLS_IETF_GAL_BRIEF_STRING "gal" + +#define MPLS_LABEL_INVALID (MPLS_IETF_MAX_LABEL+1) + +#define MPLS_LABEL_IS_REAL(_lbl) \ + (((_lbl) > MPLS_IETF_MIN_UNRES_LABEL) && \ + ((_lbl) <= MPLS_IETF_MAX_UNRES_LABEL)) + +#endif diff --git a/vnet/vnet/mpls/node.c b/vnet/vnet/mpls/node.c new file mode 100644 index 00000000000..6801cc7b3ae --- /dev/null +++ b/vnet/vnet/mpls/node.c @@ -0,0 +1,223 @@ +/* + * node.c: mpls-o-gre decap processing + * + * Copyright (c) 2012-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/mpls/mpls.h> + +typedef struct { + u32 next_index; + u32 label_host_byte_order; +} mpls_input_trace_t; + +static u8 * +format_mpls_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *); + char * next_name; + + next_name = "BUG!"; + +#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b; + foreach_mpls_input_next; +#undef _ + + s = format (s, "MPLS: next %s[%d] label %d ttl %d", + next_name, t->next_index, + vnet_mpls_uc_get_label(t->label_host_byte_order), + vnet_mpls_uc_get_ttl(t->label_host_byte_order)); + + return s; +} + +vlib_node_registration_t mpls_input_node; + +typedef struct { + u32 last_label; + u32 last_inner_fib_index; + u32 last_outer_fib_index; + mpls_main_t * mpls_main; +} mpls_input_runtime_t; + +static inline uword +mpls_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, * from, * to_next; + mpls_input_runtime_t * rt; + mpls_main_t * mm; + u32 cpu_index = os_get_cpu_number(); + vlib_simple_counter_main_t * cm; + vnet_main_t * vnm = vnet_get_main(); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + mm = rt->mpls_main; + /* + * Force an initial lookup every time, in case the control-plane + * changed the label->FIB mapping. + */ + rt->last_label = ~0; + + next_index = node->cached_next_index; + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_MPLS); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t * b0; + mpls_unicast_header_t * h0; + u32 label0; + u32 next0; + ip_config_main_t * cm0; + u32 sw_if_index0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + h0 = vlib_buffer_get_current (b0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + cm0 = &mm->rx_config_mains; + b0->current_config_index = vec_elt (cm0->config_index_by_sw_if_index, + sw_if_index0); + + label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl); + /* TTL expired? */ + if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0)) + { + next0 = MPLS_INPUT_NEXT_DROP; + b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED]; + } + else + { + vnet_get_config_data (&cm0->config_main, + &b0->current_config_index, + &next0, + /* # bytes of config data */ 0); + vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1); + } + + if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) + { + mpls_input_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->next_index = next0; + tr->label_host_byte_order = label0; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, mpls_input_node.index, + MPLS_ERROR_PKTS_DECAP, from_frame->n_vectors); + return from_frame->n_vectors; +} + +static uword +mpls_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return mpls_input_inline (vm, node, from_frame); +} + +static char * mpls_error_strings[] = { +#define mpls_error(n,s) s, +#include "error.def" +#undef mpls_error +}; + +VLIB_REGISTER_NODE (mpls_input_node) = { + .function = mpls_input, + .name = "mpls-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .runtime_data_bytes = sizeof(mpls_input_runtime_t), + + .n_errors = MPLS_N_ERROR, + .error_strings = mpls_error_strings, + + .n_next_nodes = MPLS_INPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [MPLS_INPUT_NEXT_##s] = n, + foreach_mpls_input_next +#undef _ + }, + + .format_buffer = format_mpls_unicast_header_net_byte_order, + .format_trace = format_mpls_input_trace, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (mpls_input_node, mpls_input) + +static void +mpls_setup_nodes (vlib_main_t * vm) +{ + mpls_input_runtime_t * rt; + pg_node_t * pn; + + pn = pg_get_node (mpls_input_node.index); + pn->unformat_edit = unformat_pg_mpls_header; + + rt = vlib_node_get_runtime_data (vm, mpls_input_node.index); + rt->last_label = (u32) ~0; + rt->last_inner_fib_index = 0; + rt->last_outer_fib_index = 0; + rt->mpls_main = &mpls_main; + + ethernet_register_input_type (vm, ETHERNET_TYPE_MPLS_UNICAST, + mpls_input_node.index); +} + +static clib_error_t * mpls_input_init (vlib_main_t * vm) +{ + clib_error_t * error; + + error = vlib_call_init_function (vm, mpls_init); + if (error) + clib_error_report (error); + + mpls_setup_nodes (vm); + + return (mpls_feature_init(vm)); +} + +VLIB_INIT_FUNCTION (mpls_input_init); diff --git a/vnet/vnet/mpls/packet.h b/vnet/vnet/mpls/packet.h new file mode 100644 index 00000000000..bc67445be89 --- /dev/null +++ b/vnet/vnet/mpls/packet.h @@ -0,0 +1,125 @@ +#ifndef included_vnet_mpls_packet_h +#define included_vnet_mpls_packet_h + +/* + * MPLS packet format + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A label value only, i.e. 20bits. + */ +typedef u32 mpls_label_t; + +typedef struct { + /* Label: top 20 bits [in network byte order] */ + /* Experimental: 3 bits ... */ + /* S (bottom of label stack): 1 bit */ + /* TTL: 8 bits */ + mpls_label_t label_exp_s_ttl; +} mpls_unicast_header_t; + +typedef enum mpls_eos_bit_t_ +{ + MPLS_NON_EOS = 0, + MPLS_EOS = 1, +} mpls_eos_bit_t; + +#define MPLS_EOS_BITS { \ + [MPLS_NON_EOS] = "neos", \ + [MPLS_EOS] = "eos", \ +} + +#define FOR_EACH_MPLS_EOS_BIT(_eos) \ + for (_eos = MPLS_NON_EOS; _eos <= MPLS_EOS; _eos++) + +#define MPLS_ENTRY_LABEL_OFFSET 0 +#define MPLS_ENTRY_LABEL_SHIFT 12 +#define MPLS_ENTRY_LABEL_MASK 0x000fffff +#define MPLS_ENTRY_LABEL_BITS \ + (MPLS_ENTRY_LABEL_MASK << MPLS_ENTRY_LABEL_SHIFT) + +#define MPLS_ENTRY_EXP_OFFSET 2 /* byte offset to EXP bits */ +#define MPLS_ENTRY_EXP_SHIFT 9 +#define MPLS_ENTRY_EXP_MASK 0x07 +#define MPLS_ENTRY_EXP(mpls) \ + (((mpls)>>MPLS_ENTRY_EXP_SHIFT) & MPLS_ENTRY_EXP_MASK) +#define MPLS_ENTRY_EXP_BITS \ + (MPLS_ENTRY_EXP_MASK << MPLS_ENTRY_EXP_SHIFT) + +#define MPLS_ENTRY_EOS_OFFSET 2 /* byte offset to EOS bit */ +#define MPLS_ENTRY_EOS_SHIFT 8 +#define MPLS_ENTRY_EOS_MASK 0x01 /* EOS bit in its byte */ +#define MPLS_ENTRY_EOS(mpls) \ + (((mpls) >> MPLS_ENTRY_EOS_SHIFT) & MPLS_ENTRY_EOS_MASK) +#define MPLS_ENTRY_EOS_BIT (MPLS_ENTRY_EOS_MASK << MPLS_ENTRY_EOS_SHIFT) + +#define MPLS_ENTRY_TTL_OFFSET 3 /* byte offset to ttl field */ +#define MPLS_ENTRY_TTL_SHIFT 0 +#define MPLS_ENTRY_TTL_MASK 0xff +#define MPLS_ENTRY_TTL(mpls) \ + (((mpls) >> MPLS_ENTRY_TTL_SHIFT) & MPLS_ENTRY_TTL_MASK) +#define MPLS_ENTRY_TTL_BITS \ + (MPLS_ENTRY_TTL_MASK << MPLS_ENTRY_TTL_SHIFT) + +static inline u32 vnet_mpls_uc_get_label (mpls_label_t label_exp_s_ttl) +{ + return (label_exp_s_ttl>>MPLS_ENTRY_LABEL_SHIFT); +} + +static inline u32 vnet_mpls_uc_get_exp (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_EXP(label_exp_s_ttl)); +} + +static inline u32 vnet_mpls_uc_get_s (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_EOS(label_exp_s_ttl)); +} + +static inline u32 vnet_mpls_uc_get_ttl (mpls_label_t label_exp_s_ttl) +{ + return (MPLS_ENTRY_TTL(label_exp_s_ttl)); +} + +static inline void vnet_mpls_uc_set_label (mpls_label_t *label_exp_s_ttl, + u32 value) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_LABEL_BITS)) | + ((value & MPLS_ENTRY_LABEL_MASK) << MPLS_ENTRY_LABEL_SHIFT)); +} + +static inline void vnet_mpls_uc_set_exp (mpls_label_t *label_exp_s_ttl, + u32 exp) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EXP_BITS)) | + ((exp & MPLS_ENTRY_EXP_MASK) << MPLS_ENTRY_EXP_SHIFT)); +} + +static inline void vnet_mpls_uc_set_s (mpls_label_t *label_exp_s_ttl, + u32 eos) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_EOS_BIT)) | + ((eos & MPLS_ENTRY_EOS_MASK) << MPLS_ENTRY_EOS_SHIFT)); +} + +static inline void vnet_mpls_uc_set_ttl (mpls_label_t *label_exp_s_ttl, + u32 ttl) +{ + *label_exp_s_ttl = (((*label_exp_s_ttl) & ~(MPLS_ENTRY_TTL_BITS)) | + ((ttl & MPLS_ENTRY_TTL_MASK))); +} + +#endif /* included_vnet_mpls_packet_h */ diff --git a/vnet/vnet/mpls-gre/pg.c b/vnet/vnet/mpls/pg.c index 6b6a1017c58..f04b53075d3 100644 --- a/vnet/vnet/mpls-gre/pg.c +++ b/vnet/vnet/mpls/pg.c @@ -18,7 +18,7 @@ #include <vlib/vlib.h> #include <vnet/pg/pg.h> #include <vnet/gre/gre.h> -#include <vnet/mpls-gre/mpls.h> +#include <vnet/mpls/mpls.h> typedef struct { pg_edit_t label; diff --git a/vnet/vnet/mpls-gre/policy_encap.c b/vnet/vnet/mpls/policy_encap.c index 0ea051f56ec..278e8e6d7ce 100644 --- a/vnet/vnet/mpls-gre/policy_encap.c +++ b/vnet/vnet/mpls/policy_encap.c @@ -17,7 +17,7 @@ #include <vlib/vlib.h> #include <vnet/pg/pg.h> -#include <vnet/mpls-gre/mpls.h> +#include <vnet/mpls/mpls.h> typedef struct { u32 next_index; diff --git a/vnet/vnet/pg/stream.c b/vnet/vnet/pg/stream.c index 9f7e9e8df05..b66fb742ab4 100644 --- a/vnet/vnet/pg/stream.c +++ b/vnet/vnet/pg/stream.c @@ -40,6 +40,8 @@ #include <vnet/vnet.h> #include <vnet/pg/pg.h> #include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> +#include <vnet/mpls/mpls.h> /* Mark stream active or inactive. */ void @@ -186,6 +188,10 @@ pg_interface_add_or_get (pg_main_t * pg, uword if_id) pi->sw_if_index = hi->sw_if_index; hash_set (pg->if_index_by_if_id, if_id, i); + + ip4_sw_interface_enable_disable (pi->hw_if_index, 1); + ip6_sw_interface_enable_disable (pi->hw_if_index, 1); + mpls_sw_interface_enable_disable (&mpls_main, pi->hw_if_index, 1); } return i; diff --git a/vnet/vnet/rewrite.c b/vnet/vnet/rewrite.c index 0dcec408424..42d0688a5cc 100644 --- a/vnet/vnet/rewrite.c +++ b/vnet/vnet/rewrite.c @@ -70,27 +70,25 @@ format_vnet_rewrite (u8 * s, va_list * args) vlib_main_t *vm = va_arg (*args, vlib_main_t *); vnet_rewrite_header_t *rw = va_arg (*args, vnet_rewrite_header_t *); u32 max_data_bytes = va_arg (*args, u32); + CLIB_UNUSED (uword indent) = va_arg (*args, u32); vnet_main_t *vnm = vnet_get_main (); vlib_node_t *next; - uword indent; next = vlib_get_next_node (vm, rw->node_index, rw->next_index); - indent = format_get_indent (s); - if (rw->sw_if_index != ~0) { vnet_sw_interface_t *si; si = vnet_get_sw_interface (vnm, rw->sw_if_index); - s = format (s, "%U", format_vnet_sw_interface_name, vnm, si); + s = format (s, "%U: ", format_vnet_sw_interface_name, vnm, si); } else - s = format (s, "%v", next->name); + s = format (s, "%v: ", next->name); /* Format rewrite string. */ if (rw->data_bytes > 0) - s = format (s, "\n%U%U", - format_white_space, indent, + + s = format (s, "%U", next->format_buffer ? next->format_buffer : format_hex_bytes, rw->data + max_data_bytes - rw->data_bytes, rw->data_bytes); diff --git a/vnet/vnet/sr/sr.c b/vnet/vnet/sr/sr.c index 9c2d591102d..086cbe965b6 100644 --- a/vnet/vnet/sr/sr.c +++ b/vnet/vnet/sr/sr.c @@ -22,6 +22,8 @@ */ #include <vnet/vnet.h> #include <vnet/sr/sr.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/dpo.h> #include <openssl/hmac.h> @@ -29,6 +31,11 @@ ip6_sr_main_t sr_main; static vlib_node_registration_t sr_local_node; /** + * @brief Dynamically added SR DPO type + */ +static dpo_type_t sr_dpo_type; + +/** * @brief Use passed HMAC key in ip6_sr_header_t in OpenSSL HMAC routines * * @param sm ip6_sr_main_t * @@ -319,16 +326,12 @@ format_sr_rewrite_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); sr_rewrite_trace_t *t = va_arg (*args, sr_rewrite_trace_t *); - ip6_main_t *im = &ip6_main; ip6_sr_main_t *sm = &sr_main; ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); ip6_fib_t *rx_fib, *tx_fib; - rx_fib = find_ip6_fib_by_table_index_or_id (im, tun->rx_fib_index, - IP6_ROUTE_FLAG_FIB_INDEX); - - tx_fib = find_ip6_fib_by_table_index_or_id (im, tun->tx_fib_index, - IP6_ROUTE_FLAG_FIB_INDEX); + rx_fib = ip6_fib_get (tun->rx_fib_index); + tx_fib = ip6_fib_get (tun->tx_fib_index); s = format (s, "SR-REWRITE: next %s ip6 src %U dst %U len %u\n" @@ -733,38 +736,18 @@ VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite) u32 dst_address_length, u32 rx_table_id) { - ip6_add_del_route_args_t a; - ip6_address_t dst_address; - ip6_fib_t *fib; - ip6_main_t *im6 = &ip6_main; - BVT (clib_bihash_kv) kv, value; - - fib = find_ip6_fib_by_table_index_or_id (im6, rx_table_id, - IP6_ROUTE_FLAG_TABLE_ID); - memset (&a, 0, sizeof (a)); - a.flags |= IP4_ROUTE_FLAG_DEL; - a.dst_address_length = dst_address_length; - - dst_address = *dst_address_arg; - - ip6_address_mask (&dst_address, &im6->fib_masks[dst_address_length]); - - kv.key[0] = dst_address.as_u64[0]; - kv.key[1] = dst_address.as_u64[1]; - kv.key[2] = ((u64) ((fib - im6->fibs)) << 32) | dst_address_length; - - if (BV (clib_bihash_search) (&im6->ip6_lookup_table, &kv, &value) < 0) - { - clib_warning ("%U/%d not in FIB", - format_ip6_address, &a.dst_address, a.dst_address_length); - return -10; - } + fib_prefix_t pfx = { + .fp_len = dst_address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = *dst_address_arg, + } + }; - a.adj_index = value.value; - a.dst_address = dst_address; + fib_table_entry_delete (fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, + rx_table_id), + &pfx, FIB_SOURCE_SR); - ip6_add_del_route (im6, &a); - ip6_maybe_remap_adjacencies (im6, rx_table_id, IP6_ROUTE_FLAG_TABLE_ID); return 0; } @@ -837,23 +820,20 @@ int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) { ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; ip6_sr_tunnel_key_t key; ip6_sr_tunnel_t *t; uword *p, *n; ip6_sr_header_t *h = 0; u32 header_length; ip6_address_t *addrp, *this_address; - ip_adjacency_t adj, *ap, *add_adj = 0; - u32 adj_index; ip6_sr_main_t *sm = &sr_main; u8 *key_copy; u32 rx_fib_index, tx_fib_index; - ip6_add_del_route_args_t aa; u32 hmac_key_index_u32; u8 hmac_key_index = 0; ip6_sr_policy_t *pt; int i; + dpo_id_t dpo = DPO_NULL; /* Make sure that the rx FIB exists */ p = hash_get (im->fib_index_by_table_id, a->rx_table_id); @@ -1057,15 +1037,6 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) clib_memcpy (key_copy, &key, sizeof (ip6_sr_tunnel_key_t)); hash_set_mem (sm->tunnel_index_by_key, key_copy, t - sm->tunnels); - memset (&adj, 0, sizeof (adj)); - - /* Create an adjacency and add to v6 fib */ - adj.lookup_next_index = sm->ip6_lookup_sr_next_index; - adj.explicit_fib_index = ~0; - - ap = ip_add_adjacency (lm, &adj, 1 /* one adj */ , - &adj_index); - /* * Stick the tunnel index into the rewrite header. * @@ -1077,22 +1048,20 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain * at some point... */ - ap->rewrite_header.sw_if_index = t - sm->tunnels; - - vec_add1 (add_adj, ap[0]); - - clib_memcpy (aa.dst_address.as_u8, a->dst_address, - sizeof (aa.dst_address.as_u8)); - aa.dst_address_length = a->dst_mask_width; + dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); - aa.flags = (a->is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD); - aa.flags |= IP6_ROUTE_FLAG_FIB_INDEX; - aa.table_index_or_table_id = rx_fib_index; - aa.add_adj = add_adj; - aa.adj_index = adj_index; - aa.n_add_adj = 1; - ip6_add_del_route (im, &aa); - vec_free (add_adj); + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = a->dst_mask_width, + .fp_addr = { + .ip6 = *a->dst_address, + } + }; + fib_table_entry_special_dpo_add (rx_fib_index, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); if (a->policy_name) { @@ -1126,6 +1095,48 @@ ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) } /** + * @brief no-op lock function. + * The lifetime of the SR entry is managed by the control plane + */ +static void +sr_dpo_lock (dpo_id_t * dpo) +{ +} + +/** + * @brief no-op unlock function. + * The lifetime of the SR entry is managed by the control plane + */ +static void +sr_dpo_unlock (dpo_id_t * dpo) +{ +} + +u8 * +format_sr_dpo (u8 * s, va_list * args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + + return (format (s, "SR: tunnel:[%d]", index)); +} + +const static dpo_vft_t sr_vft = { + .dv_lock = sr_dpo_lock, + .dv_unlock = sr_dpo_unlock, + .dv_format = format_sr_dpo, +}; + +const static char *const sr_ip6_nodes[] = { + "sr-rewrite", + NULL, +}; + +const static char *const *const sr_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_ip6_nodes, +}; + +/** * @brief CLI parser for Add or Delete a Segment Routing tunnel. * * @param vm vlib_main_t * @@ -1315,16 +1326,12 @@ VLIB_CLI_COMMAND (sr_tunnel_command, static) = { void ip6_sr_tunnel_display (vlib_main_t * vm, ip6_sr_tunnel_t * t) { - ip6_main_t *im = &ip6_main; ip6_sr_main_t *sm = &sr_main; ip6_fib_t *rx_fib, *tx_fib; ip6_sr_policy_t *pt; - rx_fib = find_ip6_fib_by_table_index_or_id (im, t->rx_fib_index, - IP6_ROUTE_FLAG_FIB_INDEX); - - tx_fib = find_ip6_fib_by_table_index_or_id (im, t->tx_fib_index, - IP6_ROUTE_FLAG_FIB_INDEX); + rx_fib = ip6_fib_get (t->rx_fib_index); + tx_fib = ip6_fib_get (t->tx_fib_index); if (t->name) vlib_cli_output (vm, "sr tunnel name: %s", (char *) t->name); @@ -1678,13 +1685,8 @@ int ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) { uword *p; - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; ip6_sr_tunnel_t *t; - ip_adjacency_t adj, *ap, *add_adj = 0; - u32 adj_index; ip6_sr_main_t *sm = &sr_main; - ip6_add_del_route_args_t aa; ip6_sr_policy_t *pt; if (a->is_del) @@ -1714,16 +1716,6 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) t = pool_elt_at_index (sm->tunnels, pt->tunnel_indices[0]); - /* Construct a FIB entry for multicast using the rx/tx fib from the first tunnel */ - memset (&adj, 0, sizeof (adj)); - - /* Create an adjacency and add to v6 fib */ - adj.lookup_next_index = sm->ip6_lookup_sr_replicate_index; - adj.explicit_fib_index = ~0; - - ap = ip_add_adjacency (lm, &adj, 1 /* one adj */ , - &adj_index); - /* * Stick the tunnel index into the rewrite header. * @@ -1735,22 +1727,23 @@ ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain * at some point... */ - ap->rewrite_header.sw_if_index = t - sm->tunnels; - - vec_add1 (add_adj, ap[0]); + dpo_id_t dpo = DPO_NULL; - memcpy (aa.dst_address.as_u8, a->multicast_address, - sizeof (aa.dst_address.as_u8)); - aa.dst_address_length = 128; + dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); - aa.flags = (a->is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD); - aa.flags |= IP6_ROUTE_FLAG_FIB_INDEX; - aa.table_index_or_table_id = t->rx_fib_index; - aa.add_adj = add_adj; - aa.adj_index = adj_index; - aa.n_add_adj = 1; - ip6_add_del_route (im, &aa); - vec_free (add_adj); + /* Construct a FIB entry for multicast using the rx/tx fib from the first tunnel */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = *a->multicast_address, + } + }; + fib_table_entry_special_dpo_add (t->rx_fib_index, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); u8 *mcast_copy = 0; mcast_copy = vec_new (ip6_address_t, 1); @@ -2224,10 +2217,6 @@ VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr) ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); ASSERT (ip6_rewrite_node); - /* Add a disposition to ip6_lookup for the sr rewrite node */ - sm->ip6_lookup_sr_next_index = - vlib_node_add_next (vm, ip6_lookup_node->index, sr_rewrite_node.index); - #if DPDK > 0 /* Cannot run replicate without DPDK */ /* Add a disposition to sr_replicate for the sr multicast replicate node */ sm->ip6_lookup_sr_replicate_index = @@ -2244,6 +2233,8 @@ VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr) sm->md = (void *) EVP_get_digestbyname ("sha1"); sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX)); + sr_dpo_type = dpo_register_new_type (&sr_vft, sr_nodes); + return error; } @@ -2884,41 +2875,48 @@ static clib_error_t * set_ip6_sr_rewrite_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - ip6_address_t a; - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + }; u32 fib_index = 0; u32 fib_id = 0; u32 adj_index; - uword *p; ip_adjacency_t *adj; vnet_hw_interface_t *hi; u32 sw_if_index; ip6_sr_main_t *sm = &sr_main; vnet_main_t *vnm = vnet_get_main (); + fib_node_index_t fei; - if (!unformat (input, "%U", unformat_ip6_address, &a)) + if (!unformat (input, "%U", unformat_ip6_address, &pfx.fp_addr.ip6)) return clib_error_return (0, "ip6 address missing in '%U'", format_unformat_error, input); if (unformat (input, "rx-table-id %d", &fib_id)) { - p = hash_get (im->fib_index_by_table_id, fib_id); - if (p == 0) - return clib_error_return (0, "fib-id %d not found"); - fib_index = p[0]; + fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, fib_id); + if (fib_index == ~0) + return clib_error_return (0, "fib-id %d not found", fib_id); } - adj_index = ip6_fib_lookup_with_table (im, fib_index, &a); + fei = fib_table_lookup_exact_match (fib_index, &pfx); + + if (FIB_NODE_INDEX_INVALID == fei) + return clib_error_return (0, "no match for %U", + format_ip6_address, &pfx.fp_addr.ip6); + + adj_index = fib_entry_get_adj_for_source (fei, FIB_SOURCE_SR); - if (adj_index == lm->miss_adj_index) - return clib_error_return (0, "no match for %U", format_ip6_address, &a); + if (ADJ_INDEX_INVALID == adj_index) + return clib_error_return (0, "%U not SR sourced", + format_ip6_address, &pfx.fp_addr.ip6); - adj = ip_get_adjacency (lm, adj_index); + adj = adj_get (adj_index); if (adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) return clib_error_return (0, "%U unresolved (not a rewrite adj)", - format_ip6_address, &a); + format_ip6_address, &pfx.fp_addr.ip6); adj->rewrite_header.next_index = sm->ip6_rewrite_sr_next_index; diff --git a/vnet/vnet/sr/sr.h b/vnet/vnet/sr/sr.h index bd8fa8ebdc0..610b36996f3 100644 --- a/vnet/vnet/sr/sr.h +++ b/vnet/vnet/sr/sr.h @@ -199,9 +199,6 @@ typedef struct /** multicast address to policy mapping */ uword *policy_index_by_multicast_address; - /** ip6-lookup next index for imposition FIB entries */ - u32 ip6_lookup_sr_next_index; - /** hmac key id by shared secret */ uword *hmac_key_by_shared_secret; diff --git a/vnet/vnet/sr/sr_replicate.c b/vnet/vnet/sr/sr_replicate.c index 37c39442efd..5fd9ef04b0f 100644 --- a/vnet/vnet/sr/sr_replicate.c +++ b/vnet/vnet/sr/sr_replicate.c @@ -32,6 +32,7 @@ #include <vnet/devices/dpdk/dpdk.h> #include <vnet/dpdk_replication.h> #include <vnet/ip/ip.h> +#include <vnet/fib/ip6_fib.h> #include <vppinfra/hash.h> #include <vppinfra/error.h> @@ -76,16 +77,12 @@ format_sr_replicate_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); sr_replicate_trace_t *t = va_arg (*args, sr_replicate_trace_t *); - ip6_main_t *im = &ip6_main; ip6_sr_main_t *sm = &sr_main; ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); ip6_fib_t *rx_fib, *tx_fib; - rx_fib = find_ip6_fib_by_table_index_or_id (im, tun->rx_fib_index, - IP6_ROUTE_FLAG_FIB_INDEX); - - tx_fib = find_ip6_fib_by_table_index_or_id (im, tun->tx_fib_index, - IP6_ROUTE_FLAG_FIB_INDEX); + rx_fib = ip6_fib_get (tun->rx_fib_index); + tx_fib = ip6_fib_get (tun->tx_fib_index); s = format (s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n" diff --git a/vnet/vnet/vxlan-gpe/vxlan_gpe.c b/vnet/vnet/vxlan-gpe/vxlan_gpe.c index a2b8978241a..fae481c36bb 100644 --- a/vnet/vnet/vxlan-gpe/vxlan_gpe.c +++ b/vnet/vnet/vxlan-gpe/vxlan_gpe.c @@ -18,6 +18,7 @@ * */ #include <vnet/vxlan-gpe/vxlan_gpe.h> +#include <vnet/fib/fib.h> #include <vnet/ip/format.h> vxlan_gpe_main_t vxlan_gpe_main; @@ -419,56 +420,6 @@ int vnet_vxlan_gpe_add_del_tunnel return 0; } -/** - * @brief Find the IPv4 FIB index from the FIB ID - * - * @param fib_id - * - * @return fib_index - * - */ -static u32 fib4_index_from_fib_id (u32 fib_id) -{ - ip4_main_t * im = &ip4_main; - uword * p; - - p = hash_get (im->fib_index_by_table_id, fib_id); - if (!p) - return ~0; - - return p[0]; -} - -/** - * @brief Find the IPv4 FIB index from the FIB ID - * - * @param fib_id - * - * @return fib_index - * - */ -static u32 fib6_index_from_fib_id (u32 fib_id) -{ - ip6_main_t * im = &ip6_main; - uword * p; - - p = hash_get (im->fib_index_by_table_id, fib_id); - if (!p) - return ~0; - - return p[0]; -} - -/** - * @brief CLI function for Add/Del of IPv4/IPv6 VXLAN GPE tunnel - * - * @param *vm - * @param *input - * @param *cmd - * - * @return error - * - */ static clib_error_t * vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -525,20 +476,19 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm, else if (unformat (line_input, "encap-vrf-id %d", &tmp)) { if (ipv6_set) - encap_fib_index = fib6_index_from_fib_id (tmp); + encap_fib_index = ip6_fib_index_from_table_id (tmp); else - encap_fib_index = fib4_index_from_fib_id (tmp); + encap_fib_index = ip4_fib_index_from_table_id (tmp); if (encap_fib_index == ~0) return clib_error_return (0, "nonexistent encap fib id %d", tmp); } else if (unformat (line_input, "decap-vrf-id %d", &tmp)) { - if (ipv6_set) - decap_fib_index = fib6_index_from_fib_id (tmp); + decap_fib_index = ip6_fib_index_from_table_id (tmp); else - decap_fib_index = fib4_index_from_fib_id (tmp); + decap_fib_index = ip4_fib_index_from_table_id (tmp); if (decap_fib_index == ~0) return clib_error_return (0, "nonexistent decap fib id %d", tmp); diff --git a/vnet/vnet/vxlan/vxlan.c b/vnet/vnet/vxlan/vxlan.c index 32ad7533e58..da359a8d1bb 100644 --- a/vnet/vnet/vxlan/vxlan.c +++ b/vnet/vnet/vxlan/vxlan.c @@ -348,11 +348,13 @@ int vnet_vxlan_add_del_tunnel vnet_sw_interface_set_flags (vnm, sw_if_index, VNET_SW_INTERFACE_FLAG_ADMIN_UP); if (!a->is_ip6) { - vec_validate (im4->fib_index_by_sw_if_index, sw_if_index); - im4->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index; + vec_validate (im4->fib_index_by_sw_if_index, sw_if_index); + im4->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index; + ip4_sw_interface_enable_disable(sw_if_index, 1); } else { vec_validate (im6->fib_index_by_sw_if_index, sw_if_index); im6->fib_index_by_sw_if_index[sw_if_index] = t->encap_fib_index; + ip6_sw_interface_enable_disable(sw_if_index, 1); } } else @@ -375,13 +377,16 @@ int vnet_vxlan_add_del_tunnel = L2OUTPUT_NEXT_DEL_TUNNEL; if (!a->is_ip6) - hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64); + { + hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64); + ip4_sw_interface_enable_disable(sw_if_index, 1); + } else { hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6); clib_mem_free (t->key6); + ip6_sw_interface_enable_disable(sw_if_index, 1); } - vec_free (t->rewrite); pool_put (vxm->tunnels, t); } |