diff options
Diffstat (limited to 'src/vnet/fib')
48 files changed, 25560 insertions, 0 deletions
diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c new file mode 100644 index 00000000000..413f93e893c --- /dev/null +++ b/src/vnet/fib/fib.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_path.h> +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_path_list.h> + +static clib_error_t * +fib_module_init (vlib_main_t * vm) +{ + clib_error_t * error; + + if ((error = vlib_call_init_function (vm, dpo_module_init))) + return (error); + if ((error = vlib_call_init_function (vm, adj_module_init))) + return (error); + + fib_entry_module_init(); + fib_entry_src_module_init(); + fib_path_module_init(); + fib_path_list_module_init(); + fib_walk_module_init(); + + return (NULL); +} + +VLIB_INIT_FUNCTION (fib_module_init); diff --git a/src/vnet/fib/fib.h b/src/vnet/fib/fib.h new file mode 100644 index 00000000000..7cf1d136935 --- /dev/null +++ b/src/vnet/fib/fib.h @@ -0,0 +1,652 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * \brief + * A IP v4/6 independent FIB. + * + * The main functions provided by the FIB are as follows; + * + * - source priorities + * + * A route can be added to the FIB by more than entity or source. Sources + * include, but are not limited to, API, CLI, LISP, MAP, etc (for the full list + * see fib_entry.h). Each source provides the forwarding information (FI) that + * is has determined as required for that route. Since each source determines the + * FI using different best path and loop prevention algorithms, it is not + * correct for the FI of multiple sources to be combined. Instead the FIB must + * choose to use the FI from only one source. This choose is based on a static + * priority assignment. For example; + * IF a prefix is added as a result of interface configuration: + * set interface address 192.168.1.1/24 GigE0 + * and then it is also added from the CLI + * ip route 192.168.1.1/32 via 2.2.2.2/32 + * then the 'interface' source will prevail, and the route will remain as + * 'local'. + * The requirement of the FIB is to always install the FI from the winning + * source and thus to maintain the FI added by losing sources so it can be + * installed should the winning source be withdrawn. + * + * - adj-fib maintenance + * + * When ARP or ND discover a neighbour on a link an adjacency forms for the + * address of that neighbour. It is also required to insert a route in the + * appropriate FIB table, corresponding to the VRF for the link, an entry for + * that neighbour. This entry is often referred to as an adj-fib. Adj-fibs + * have a dedicated source; 'ADJ'. + * The priority of the ADJ source is lower than most. This is so the following + * config; + * set interface address 192.168.1.1/32 GigE0 + * ip arp 192.168.1.2 GigE0 dead.dead.dead + * ip route add 192.168.1.2 via 10.10.10.10 GigE1 + * will forward traffic for 192.168.1.2 via GigE1. That is the route added + * by the control plane is favoured over the adjacency discovered by ARP. + * The control plane, with its associated authentication, is considered the + * authoritative source. + * To counter the nefarious addition of adj-fib, through the nefarious injection + * of adjacencies, the FIB is also required to ensure that only adj-fibs whose + * less specific covering prefix is connected are installed in forwarding. This + * requires the use of 'cover tracking', where a route maintains a dependency + * relationship with the route that is its less specific cover. When this cover + * changes (i.e. there is a new covering route) or the forwarding information + * of the cover changes, then the covered route is notified. + * + * Overlapping sub-nets are not supported, so no adj-fib has multiple paths. + * The control plane is expected to remove a prefix configured for an interface + * before the interface changes VRF. + * So while the following config is accepted: + * set interface address 192.168.1.1/32 GigE0 + * ip arp 192.168.1.2 GigE0 dead.dead.dead + * set interface ip table GigE0 2 + * it does not result in the desired behaviour. + * + * - attached export. + * + * Further to adj-fib maintenance above consider the following config: + * set interface address 192.168.1.1/24 GigE0 + * ip route add table 2 192.168.1.0/24 GigE0 + * Traffic destined for 192.168.1.2 in table 2 will generate an ARP request + * on GigE0. However, since GigE0 is in table 0, all adj-fibs will be added in + * FIB 0. Hence all hosts in the sub-net are unreachable from table 2. To resolve + * this, all adj-fib and local prefixes are exported (i.e. copied) from the + * 'export' table 0, to the 'import' table 2. There can be many import tables + * for a single export table. + * + * - recursive route resolution + * + * A recursive route is of the form: + * 1.1.1.1/32 via 10.10.10.10 + * i.e. a route for which no egress interface is provided. In order to forward + * traffic to 1.1.1.1/32 the FIB must therefore first determine how to forward + * traffic to 10.10.10.10/32. This is recursive resolution. + * Recursive resolution, just like normal resolution, proceeds via a longest + * prefix match for the 'via-address' 10.10.10.10. Note it is only possible + * to add routes via an address (i.e. a /32 or /128) not via a shorter mask + * prefix. There is no use case for the latter. + * Since recursive resolution proceeds via a longest prefix match, the entry + * in the FIB that will resolve the recursive route, termed the via-entry, may + * change as other routes are added to the FIB. Consider the recursive + * route shown above, and this non-recursive route: + * 10.10.10.0/24 via 192.168.16.1 GigE0 + * The entry for 10.10.10.0/24 is thus the resolving via-entry. If this entry is + * modified, to say; + * 10.10.10.0/24 via 192.16.1.3 GigE0 + * Then packet for 1.1.1.1/32 must also be sent to the new next-hop. + * Now consider the addition of; + * 10.10.10.0/28 via 192.168.16.2 GigE0 + * The more specific /28 is a better longest prefix match and thus becomes the + * via-entry. Removal of the /28 means the resolution will revert to the /24. + * The tracking to the changes in recursive resolution is the requirement of + * the FIB. When the forwarding information of the via-entry changes a back-walk + * is used to update dependent recursive routes. When new routes are added to + * the table the cover tracking feature provides the necessary notifications to + * the via-entry routes. + * The adjacency constructed for 1.1.1.1/32 will be a recursive adjacency + * whose next adjacency will be contributed from the via-entry. Maintaining + * the validity of this recursive adjacency is a requirement of the FIB. + * + * - recursive loop avoidance + * + * Consider this set of routes: + * 1.1.1.1/32 via 2.2.2.2 + * 2.2.2.2/32 via 3.3.3.3 + * 3.3.3.3/32 via 1.1.1.1 + * this is termed a recursion loop - all of the routes in the loop are + * unresolved in so far as they do not have a resolving adjacency, but each + * is resolved because the via-entry is known. It is important here to note + * the distinction between the control-plane objects and the data-plane objects + * (more details in the implementation section). The control plane objects must + * allow the loop to form (i.e. the graph becomes cyclic), however, the + * data-plane absolutely must not allow the loop to form, otherwise the packet + * would loop indefinitely and never egress the device - meltdown would follow. + * The control plane must allow the loop to form, because when the loop breaks, + * all members of the loop need to be updated. Forming the loop allows the + * dependencies to be correctly setup to allow this to happen. + * There is no limit to the depth of recursion supported by VPP so: + * 9.9.9.100/32 via 9.9.9.99 + * 9.9.9.99/32 via 9.9.9.98 + * 9.9.9.98/32 via 9.9.9.97 + * ... turtles, turtles, turtles ... + * 9.9.9.1/32 via 10.10.10.10 Gig0 + * is supported to as many layers of turtles is desired, however, when + * back-walking a graph (in this case from 9.9.9.1/32 up toward 9.9.9.100/32) + * a FIB needs to differentiate the case where the recursion is deep versus + * the case where the recursion is looped. A simple method, employed by VPP FIB, + * is to limit the number of steps. VPP FIB limit is 16. Typical BGP scenarios + * in the wild do not exceed 3 (BGP Inter-AS option C). + * + * - Fast Convergence + * + * After a network topology change, the 'convergence' time, is the time taken + * for the router to complete a transition to forward traffic using the new + * topology. The convergence time is therefore a summation of the time to; + * - detect the failure. + * - calculate the new 'best path' information + * - download the new best paths to the data-plane. + * - install those best best in data-plane forwarding. + * The last two points are of relevance to VPP architecture. The download API is + * binary and batch, details are not discussed here. There is no HW component to + * programme, installation time is bounded by the memory allocation and table + * lookup and insert access times. + * + * 'Fast' convergence refers to a set of technologies that a FIB can employ to + * completely or partially restore forwarding whilst the convergence actions + * listed above are ongoing. Fast convergence technologies are further + * sub-divided into Prefix Independent Convergence (PIC) and Loop Free + * Alternate path Fast re-route (LFA-FRR or sometimes called IP-FRR) which + * affect recursive and non-recursive routes respectively. + * + * LFA-FRR + * + * Consider the network topology below: + * + * C + * / \ + * X -- A --- B - Y + * | | + * D F + * \ / + * E + * + * all links are equal cost, traffic is passing from X to Y. the best path is + * X-A-B-Y. There are two alternative paths, one via C and one via E. An + * alternate path is considered to be loop free if no other router on that path + * would forward the traffic back to the sender. Consider router C, its best + * path to Y is via B, so if A were to send traffic destined to Y to C, then C + * would forward that traffic to B - this is a loop-free alternate path. In + * contrast consider router D. D's shortest path to Y is via A, so if A were to + * send traffic destined to Y via D, then D would send it back to A; this is + * not a loop-free alternate path. There are several points of note; + * - we are considering the pre-failure routing topology + * - any equal-cost multi-path between A and B is also a LFA path. + * - in order for A to calculate LFA paths it must be aware of the best-path + * to Y from the perspective of D. These calculations are thus limited to + * routing protocols that have a full view of the network topology, i.e. + * link-state DB protocols like OSPF or an SDN controller. LFA protected + * prefixes are thus non-recursive. + * + * LFA is specified as a 1 to 1 redundancy; a primary path has only one LFA + * (a.k.a. backup) path. To my knowledge this limitation is one of complexity + * in the calculation of and capacity planning using a 1-n redundancy. + * + * In the event that the link A-B fails, the alternate path via C can be used. + * In order to provide 'fast' failover in the event of a failure, the control + * plane will download both the primary and the backup path to the FIB. It is + * then a requirement of the FIB to perform the failover (a.k.a cutover) from + * the primary to the backup path as quickly as possible, and particularly + * without any other control-plane intervention. The expectation is cutover is + * less than 50 milli-seconds - a value allegedly from the VOIP QoS. Note that + * cutover time still includes the fault detection time, which in a vitalised + * environment could be the dominant factor. Failure detection can be either a + * link down, which will affect multiple paths on a multi-access interface, or + * via a specific path heartbeat (i.e. BFD). + * At this time VPP does not support LFA, that is it does not support the + * installation of a primary and backup path[s] for a route. However, it does + * support ECMP, and VPP FIB is designed to quickly remove failed paths from + * the ECMP set, however, it does not insert shared objects specific to the + * protected resource into the forwarding object graph, since this would incur + * a forwarding/performance cost. Failover time is thus route number dependent. + * Details are provided in the implementation section below. + * + * PIC + * + * PIC refers to the concept that the converge time should be independent of + * the number of prefixes/routes that are affected by the failure. PIC is + * therefore most appropriate when considering networks with large number of + * prefixes, i.e. BGP networks and thus recursive prefixes. There are several + * flavours of PIC covering different locations of protection and failure + * scenarios. An outline is given below, see the literature for more details: + * + * Y/16 - CE1 -- PE1---\ + * | \ P1---\ + * | \ PE3 -- CE3 - X/16 + * | - P2---/ + * Y/16 - CE2 -- PE2---/ + * + * CE = customer edge, PE = provider edge. external-BGP runs between customer + * and provider, internal-BGP runs between provider and provider. + * + * 1) iBGP PIC-core: consider traffic from CE1 to X/16 via CE3. On PE1 there is + * are routes; + * X/16 (and hundreds of thousands of others like it) + * via PE3 + * and + * PE3/32 (its loopback address) + * via 10.0.0.1 Link0 (this is P1) + * via 10.1.1.1 Link1 (this is P2) + * the failure is the loss of link0 or link1 + * As in all PIC scenarios, in order to provide prefix independent convergence + * it must be that the route for X/16 (and all other routes via PE3) do not + * need to be updated in the FIB. The FIB therefore needs to update a single + * object that is shared by all routes - once this shared object is updated, + * then all routes using it will be instantly updated to use the new forwarding + * information. In this case the shared object is the resolving route via PE3. + * Once the route via PE3 is updated via IGP (OSPF) convergence, then all + * recursive routes that resolve through it are also updated. VPP FIB + * implements this scenario via a recursive-adjacency. the X/16 and it sibling + * routes share a recursive-adjacency that links to/points at/stacks on the + * normal adjacency contributed by the route for PE3. Once this shared + * recursive adj is re-linked then all routes are switched to using the new + * forwarding information. This is shown below; + * + * pre-failure; + * X/16 --> R-ADJ-1 --> ADJ-1-PE3 (multi-path via P1 and P2) + * + * post-failure: + * X/16 --> R-ADJ-1 --> ADJ-2-PE3 (single path via P1) + * + * note that R-ADJ-1 (the recursive adj) remains in the forwarding graph, + * therefore X/16 (and all its siblings) is not updated. + * X/16 and its siblings share the recursive adj since they share the same + * path-list. It is the path-list object that contributes the recursive-adj + * (see next section for more details) + * + * + * 2) iBGP PIC-edge; Traffic from CE3 to Y/16. On PE3 there is are routes; + * Y/16 (and hundreds of thousands of others like it) + * via PE1 + * via PE2 + * and + * PE1/32 (PE1's loopback address) + * via 10.0.2.2 Link0 (this is P1) + * PE2/32 (PE2's loopback address) + * via 10.0.3.3 Link1 (this is P2) + * + * the failure is the loss of reachability to PE2. this could be either the + * loss of the link P2-PE2 or the loss of the node PE2. This is detected either + * by the withdrawal of the PE2's loopback route or by some form of failure + * detection (i.e. BFD). + * VPP FIB again provides PIC via the use of the shared recursive-adj. Y/16 and + * its siblings will again share a path-list for the list {PE1,PE2}, this + * path-list will contribute a multi-path-recursive-adj, i.e. a multi-path-adj + * with each choice therein being another adj; + * + * Y/16 -> RM-ADJ --> ADJ1 (for PE1) + * --> ADJ2 (for PE2) + * + * when the route for PE1 is withdrawn then the multi-path-recursive-adjacency + * is updated to be; + * + * Y/16 --> RM-ADJ --> ADJ1 (for PE1) + * --> ADJ1 (for PE1) + * + * that is both choices in the ECMP set are the same and thus all traffic is + * forwarded to PE1. Eventually the control plane will download a route update + * for Y/16 to be via PE1 only. At that time the situation will be: + * + * Y/16 -> R-ADJ --> ADJ1 (for PE1) + * + * In the scenario above we assumed that PE1 and PE2 are ECMP for Y/16. eBGP + * PIC core is also specified for the case were one PE is primary and the other + * backup - VPP FIB does not support that case at this time. + * + * 3) eBGP PIC Edge; Traffic from CE3 to Y/16. On PE1 there is are routes; + * Y/16 (and hundreds of thousands of others like it) + * via CE1 (primary) + * via PE2 (backup) + * and + * CE1 (this is an adj-fib) + * via 11.0.0.1 Link0 (this is CE1) << this is an adj-fib + * PE2 (PE2's loopback address) + * via 10.0.5.5 Link1 (this is link PE1-PE2) + * the failure is the loss of link0 to CE1. The failure can be detected by FIB + * either as a link down event or by the control plane withdrawing the connected + * prefix on the link0 (say 10.0.5.4/30). The latter works because the resolving + * entry is an adj-fib, so removing the connected will withdraw the adj-fib, and + * hence the recursive path becomes unresolved. The former is faster, + * particularly in the case of Inter-AS option A where there are many VLAN + * sub-interfaces on the PE-CE link, one for each VRF, and so the control plane + * must remove the connected prefix for each sub-interface to trigger PIC in + * each VRF. Note though that total PIC cutover time will depend on VRF scale + * with either trigger. + * Primary and backup paths in this eBGP PIC-edge scenario are calculated by + * BGP. Each peer is configured to always advertise its best external path to + * its iBGP peers. Backup paths therefore send traffic from the PE back into the + * core to an alternate PE. A PE may have multiple external paths, i.e. multiple + * directly connected CEs, it may also have multiple backup PEs, however there + * is no correlation between the two, so unlike LFA-FRR, the redundancy model is + * N-M; N primary paths are backed-up by M backup paths - only when all primary + * paths fail, then the cutover is performed onto the M backup paths. Note that + * PE2 must be suitably configured to forward traffic on its external path that + * was received from PE1. VPP FIB does not support external-internal-BGP (eiBGP) + * load-balancing. + * + * As with LFA-FRR the use of primary and backup paths is not currently + * supported, however, the use of a recursive-multi-path-adj, and a suitably + * constrained hashing algorithm to choose from the primary or backup path sets, + * would again provide the necessary shared object and hence the prefix scale + * independent cutover. + * + * Astute readers will recognise that both of the eBGP PIC scenarios refer only + * to a BGP free core. + * + * Fast convergence implementation options come in two flavours: + * 1) Insert switches into the data-path. The switch represents the protected + * resource. If the switch is 'on' the primary path is taken, otherwise + * the backup path is taken. Testing the switch in the data-path comes with + * an associated performance cost. A given packet may encounter more than + * one protected resource as it is forwarded. This approach minimises + * cutover times as packets will be forwarded on the backup path as soon + * as the protected resource is detected to be down and the single switch + * is tripped. However, it comes at a performance cost, which increases + * with each shared resource a packet encounters in the data-path. + * This approach is thus best suited to LFA-FRR where the protected routes + * are non-recursive (i.e. encounter few shared resources) and the + * expectation on cutover times is more stringent (<50msecs). + * 2) Update shared objects. Identify objects in the data-path, that are + * required to be present whether or not fast convergence is required (i.e. + * adjacencies) that can be shared by multiple routes. Create a dependency + * between these objects at the protected resource. When the protected + * resource fails, each of the shared objects is updated in a way that all + * users of it see a consistent change. This approach incurs no performance + * penalty as the data-path structure is unchanged, however, the cutover + * times are longer as more work is required when the resource fails. This + * scheme is thus more appropriate to recursive prefixes (where the packet + * will encounter multiple protected resources) and to fast-convergence + * technologies where the cutover times are less stringent (i.e. PIC). + * + * Implementation: + * --------------- + * + * Due to the requirements outlined above, not all routes known to FIB + * (e.g. adj-fibs) are installed in forwarding. However, should circumstances + * change, those routes will need to be added. This adds the requirement that + * a FIB maintains two tables per-VRF, per-AF (where a 'table' is indexed by + * prefix); the forwarding and non-forwarding tables. + * + * For DP speed in VPP we want the lookup in the forwarding table to directly + * result in the ADJ. So the two tables; one contains all the routes (a + * lookup therein yields a fib_entry_t), the other contains only the forwarding + * routes (a lookup therein yields an ip_adjacency_t). The latter is used by the + * DP. + * This trades memory for forwarding performance. A good trade-off in VPP's + * expected operating environments. + * + * Note these tables are keyed only by the prefix (and since there 2 two + * per-VRF, implicitly by the VRF too). The key for an adjacency is the + * tuple:{next-hop, address (and it's AF), interface, link/ether-type}. + * consider this curious, but allowed, config; + * + * set int ip addr 10.0.0.1/24 Gig0 + * set ip arp Gig0 10.0.0.2 dead.dead.dead + * # a host in that sub-net is routed via a better next hop (say it avoids a + * # big L2 domain) + * ip route add 10.0.0.2 Gig1 192.168.1.1 + * # this recursive should go via Gig1 + * ip route add 1.1.1.1/32 via 10.0.0.2 + * # this non-recursive should go via Gig0 + * ip route add 2.2.2.2/32 via Gig0 10.0.0.2 + * + * for the last route, the lookup for the path (via {Gig0, 10.0.0.2}) in the + * prefix table would not yield the correct result. To fix this we need a + * separate table for the adjacencies. + * + * - FIB data structures; + * + * fib_entry_t: + * - a representation of a route. + * - has a prefix. + * - it maintains an array of path-lists that have been contributed by the + * different sources + * - install an adjacency in the forwarding table contributed by the best + * source's path-list. + * + * fib_path_list_t: + * - a list of paths + * - path-lists may be shared between FIB entries. The path-lists are thus + * kept in a DB. The key is the combined description of the paths. We share + * path-lists when it will aid convergence to do so. Adding path-lists to + * this DB that are never shared, or are not shared by prefixes that are + * not subject to PIC, will increase the size of the DB unnecessarily and + * may lead to increased search times due to hash collisions. + * - the path-list contributes the appropriate adj for the entry in the + * forwarding table. The adj can be 'normal', multi-path or recursive, + * depending on the number of paths and their types. + * - since path-lists are shared there is only one instance of the multi-path + * adj that they [may] create. As such multi-path adjacencies do not need a + * separate DB. + * The path-list with recursive paths and the recursive adjacency that it + * contributes forms the backbone of the fast convergence architecture (as + * described previously). + * + * fib_path_t: + * - a description of how to forward the traffic (i.e. via {Gig1, K}). + * - the path describes the intent on how to forward. This differs from how + * the path resolves. I.e. it might not be resolved at all (since the + * interface is deleted or down). + * - paths have different types, most notably recursive or non-recursive. + * - a fib_path_t will contribute the appropriate adjacency object. It is from + * these contributions that the DP graph/chain for the route is built. + * - if the path is recursive and a recursion loop is detected, then the path + * will contribute the special DROP adjacency. This way, whilst the control + * plane graph is looped, the data-plane graph does not. + * + * we build a graph of these objects; + * + * fib_entry_t -> fib_path_list_t -> fib_path_t -> ... + * + * for recursive paths: + * + * fib_path_t -> fib_entry_t -> .... + * + * for non-recursive paths + * + * fib_path_t -> ip_adjacency_t -> interface + * + * These objects, which constitute the 'control plane' part of the FIB are used + * to represent the resolution of a route. As a whole this is referred to as the + * control plane graph. There is a separate DP graph to represent the forwarding + * of a packet. In the DP graph each object represents an action that is applied + * to a packet as it traverses the graph. For example, a lookup of a IP address + * in the forwarding table could result in the following graph: + * + * recursive-adj --> multi-path-adj --> interface_A + * --> interface_B + * + * A packet traversing this FIB DP graph would thus also traverse a VPP node + * graph of: + * + * ipX_recursive --> ipX_rewrite --> interface_A_tx --> etc + * + * The taxonomy of objects in a FIB graph is as follows, consider; + * + * A --> + * B --> D + * C --> + * + * Where A,B and C are (for example) routes that resolve through D. + * parent; D is the parent of A, B, and C. + * children: A, B, and C are children of D. + * sibling: A, B and C are siblings of one another. + * + * All shared objects in the FIB are reference counted. Users of these objects + * are thus expected to use the add_lock/unlock semantics (as one would + * normally use malloc/free). + * + * WALKS + * + * It is necessary to walk/traverse the graph forwards (entry to interface) to + * perform a collapse or build a recursive adj and backwards (interface + * to entry) to perform updates, i.e. when interface state changes or when + * recursive route resolution updates occur. + * A forward walk follows simply by navigating an object's parent pointer to + * access its parent object. For objects with multiple parents (e.g. a + * path-list), each parent is walked in turn. + * To support back-walks direct dependencies are maintained between objects, + * i.e. in the relationship, {A, B, C} --> D, then object D will maintain a list + * of 'pointers' to its children {A, B, C}. Bare C-language pointers are not + * allowed, so a pointer is described in terms of an object type (i.e. entry, + * path-list, etc) and index - this allows the object to be retrieved from the + * appropriate pool. A list is maintained to achieve fast convergence at scale. + * When there are millions or recursive prefixes, it is very inefficient to + * blindly walk the tables looking for entries that were affected by a given + * topology change. The lowest hanging fruit when optimising is to remove + * actions that are not required, so all back-walks only traverse objects that + * are directly affected by the change. + * + * PIC Core and fast-reroute rely on FIB reacting quickly to an interface + * state change to update the multi-path-adjacencies that use this interface. + * An example graph is shown below: + * + * E_a --> + * E_b --> PL_2 --> P_a --> Interface_A + * ... --> P_c -\ + * E_k --> \ + * Interface_K + * / + * E_l --> / + * E_m --> PL_1 --> P_d -/ + * ... --> P_f --> Interface_F + * E_z --> + * + * E = fib_entry_t + * PL = fib_path_list_t + * P = fib_path_t + * The subscripts are arbitrary and serve only to distinguish object instances. + * This CP graph result in the following DP graph: + * + * M-ADJ-2 --> Interface_A + * \ + * -> Interface_K + * / + * M-ADJ-1 --> Interface_F + * + * M-ADJ = multi-path-adjacency. + * + * When interface K goes down a back-walk is started over its dependants in the + * control plane graph. This back-walk will reach PL_1 and PL_2 and result in + * the calculation of new adjacencies that have interface K removed. The walk + * will continue to the entry objects and thus the forwarding table is updated + * for each prefix with the new adjacency. The DP graph then becomes: + * + * ADJ-3 --> Interface_A + * + * ADJ-4 --> Interface_F + * + * The eBGP PIC scenarios described above relied on the update of a path-list's + * recursive-adjacency to provide the shared point of cutover. This is shown + * below + * + * E_a --> + * E_b --> PL_2 --> P_a --> E_44 --> PL_a --> P_b --> Interface_A + * ... --> P_c -\ + * E_k --> \ + * \ + * E_1 --> PL_k -> P_k --> Interface_K + * / + * E_l --> / + * E_m --> PL_1 --> P_d -/ + * ... --> P_f --> E_55 --> PL_e --> P_e --> Interface_E + * E_z --> + * + * The failure scenario is the removal of entry E_1 and thus the paths P_c and + * P_d become unresolved. To achieve PIC the two shared recursive path-lists, + * PL_1 and PL_2 must be updated to remove E_1 from the recursive-multi-path- + * adjacencies that they contribute, before any entry E_a to E_z is updated. + * This means that as the update propagates backwards (right to left) in the + * graph it must do so breadth first not depth first. Note this approach leads + * to convergence times that are dependent on the number of path-list and so + * the number of combinations of egress PEs - this is desirable as this + * scale is considerably lower than the number of prefixes. + * + * If we consider another section of the graph that is similar to the one + * shown above where there is another prefix E_2 in a similar position to E_1 + * and so also has many dependent children. It is reasonable to expect that a + * particular network failure may simultaneously render E_1 and E_2 unreachable. + * This means that the update to withdraw E_2 is download immediately after the + * update to withdraw E_1. It is a requirement on the FIB to not spend large + * amounts of time in a back-walk whilst processing the update for E_1, i.e. the + * back-walk must not reach as far as E_a and its siblings. Therefore, after the + * back-walk has traversed one generation (breadth first) to update all the + * path-lists it should be suspended/back-ground and further updates allowed + * to be handled. Once the update queue is empty, the suspended walks can be + * resumed. Note that in the case that multiple updates affect the same entry + * (say E_1) then this will trigger multiple similar walks, these are merged, + * so each child is updated only once. + * In the presence of more layers of recursion PIC is still a desirable + * feature. Consider an extension to the diagram above, where more recursive + * routes (E_100 -> E_200) are added as children of E_a: + * + * E_100 --> + * E_101 --> PL_3 --> P_j-\ + * ... \ + * E_199 --> E_a --> + * E_b --> PL_2 --> P_a --> E_44 --> ...etc.. + * ... --> P_c -\ + * E_k \ + * E_1 --> ...etc.. + * / + * E_l --> / + * E_m --> PL_1 --> P_d -/ + * ... --> P_e --> E_55 --> ...etc.. + * E_z --> + * + * To achieve PIC for the routes E_100->E_199, PL_3 needs to be updated before + * E_b -> E_z, a breadth first traversal at each level would not achieve this. + * Instead the walk must proceed intelligently. Children on PL_2 are sorted so + * those Entry objects that themselves have children appear first in the list, + * those without later. When an entry object is walked that has children, a + * walk of its children is pushed to the front background queue. The back + * ground queue is a priority queue. As the breadth first traversal proceeds + * across the dependent entry object E_a to E_k, when the first entry that does + * not have children is reached (E_b), the walk is suspended and placed at the + * back of the queue. Following this prioritisation method shared path-list + * updates are performed before all non-resolving entry objects. + * The CPU/core/thread that handles the updates is the same thread that handles + * the back-walks. Handling updates has a higher priority than making walk + * progress, so a walk is required to be interruptable/suspendable when new + * updates are available. + * !!! TODO - this section describes how walks should be not how they are !!! + * + * In the diagram above E_100 is an IP route, however, VPP has no restrictions + * on the type of object that can be a dependent of a FIB entry. Children of + * a FIB entry can be (and are) GRE & VXLAN tunnels endpoints, L2VPN LSPs etc. + * By including all object types into the graph and extending the back-walk, we + * can thus deliver fast convergence to technologies that overlay on an IP + * network. + * + * If having read all the above carefully you are still thinking; 'i don't need + * all this %&$* i have a route only I know about and I just need to jam it in', + * then fib_table_entry_special_add() is your only friend. + */ + +#ifndef __FIB_H__ +#define __FIB_H__ + +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> + +#endif diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h new file mode 100644 index 00000000000..f82753170db --- /dev/null +++ b/src/vnet/fib/fib_api.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_API_H__ +#define __FIB_API_H__ + + +int +add_del_route_check (fib_protocol_t table_proto, + u32 table_id, + u32 next_hop_sw_if_index, + fib_protocol_t next_hop_table_proto, + u32 next_hop_table_id, + u8 create_missing_tables, + u32 * fib_index, u32 * next_hop_fib_index); + +int +add_del_route_t_handler (u8 is_multipath, + u8 is_add, + u8 is_drop, + u8 is_unreach, + u8 is_prohibit, + u8 is_local, + u8 is_classify, + u32 classify_table_index, + u8 is_resolve_host, + u8 is_resolve_attached, + u32 fib_index, + const fib_prefix_t * prefix, + u8 next_hop_proto_is_ip4, + const ip46_address_t * next_hop, + u32 next_hop_sw_if_index, + u8 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t next_hop_via_label, + mpls_label_t * next_hop_out_label_stack); + +void +copy_fib_next_hop (fib_route_path_encode_t * api_rpath, + void * fp_arg); + +#endif /* __FIB_API_H__ */ diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c new file mode 100644 index 00000000000..c389ea43feb --- /dev/null +++ b/src/vnet/fib/fib_attached_export.c @@ -0,0 +1,572 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_table.h> + +#include <vnet/fib/fib_attached_export.h> +#include <vnet/fib/fib_entry_cover.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_entry_delegate.h> + +/** + * A description of the need to import routes from the export table + */ +typedef struct fib_ae_import_t_ +{ + /** + * The entry in the epxort table that this importer + * is importing covereds from + */ + fib_node_index_t faei_export_entry; + + /** + * The attached entry in the import table + */ + fib_node_index_t faei_import_entry; + /** + * the sibling index on the cover + */ + u32 faei_export_sibling; + + /** + * The index of the exporter tracker. Not set if the + * export entry is not valid for export + */ + fib_node_index_t faei_exporter; + + /** + * A vector/list of imported entry indicies + */ + fib_node_index_t *faei_importeds; + + /** + * The FIB index and prefix we are tracking + */ + fib_node_index_t faei_export_fib; + fib_prefix_t faei_prefix; + + /** + * The FIB index we are importing into + */ + fib_node_index_t faei_import_fib; +} fib_ae_import_t; + +/** + * A description of the need to export routes to one or more export tables + */ +typedef struct fib_ae_export_t_ { + /** + * The vector/list of import tracker indicies + */ + fib_node_index_t *faee_importers; + + /** + * THe connected entry this export is acting on behalf of + */ + fib_node_index_t faee_ei; + + /** + * Reference counting locks + */ + u32 faee_locks; +} fib_ae_export_t; + +/* + * memory pools for the importers and exportes + */ +static fib_ae_import_t *fib_ae_import_pool; +static fib_ae_export_t *fib_ae_export_pool; + +static fib_ae_export_t * +fib_entry_ae_add_or_lock (fib_node_index_t connected) +{ + fib_entry_delegate_t *fed; + fib_ae_export_t *export; + fib_entry_t *entry; + + entry = fib_entry_get(connected); + fed = fib_entry_delegate_get(entry, + FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); + + if (NULL == fed) + { + fed = fib_entry_delegate_find_or_add(entry, + FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); + pool_get(fib_ae_export_pool, export); + memset(export, 0, sizeof(*export)); + + fed->fd_index = (export - fib_ae_export_pool); + export->faee_ei = connected; + } + else + { + export = pool_elt_at_index(fib_ae_export_pool, fed->fd_index); + } + + export->faee_locks++; + + return (export); +} + +static void +fib_entry_import_remove (fib_ae_import_t *import, + fib_node_index_t entry_index) +{ + fib_prefix_t prefix; + u32 index; + + /* + * find the index in the vector of the entry we are removing + */ + index = vec_search(import->faei_importeds, entry_index); + + if (index < vec_len(import->faei_importeds)) + { + /* + * this is an entry that was previsouly imported + */ + fib_entry_get_prefix(entry_index, &prefix); + + fib_table_entry_special_remove(import->faei_import_fib, + &prefix, + FIB_SOURCE_AE); + + fib_entry_unlock(entry_index); + vec_del1(import->faei_importeds, index); + } +} + +static void +fib_entry_import_add (fib_ae_import_t *import, + fib_node_index_t entry_index) +{ + fib_node_index_t *existing; + fib_prefix_t prefix; + + /* + * ensure we only add the exported entry once, since + * sourcing prefixes in the table is reference counted + */ + vec_foreach(existing, import->faei_importeds) + { + if (*existing == entry_index) + { + return; + } + } + + /* + * this is the first time this export entry has been imported + * Add it to the import FIB and to the list of importeds + */ + fib_entry_get_prefix(entry_index, &prefix); + + /* + * don't import entries that have the same prefix the import entry + */ + if (0 != fib_prefix_cmp(&prefix, + &import->faei_prefix)) + { + const dpo_id_t *dpo; + + dpo = fib_entry_contribute_ip_forwarding(entry_index); + + if (dpo_id_is_valid(dpo)) + { + fib_table_entry_special_dpo_add(import->faei_import_fib, + &prefix, + FIB_SOURCE_AE, + (fib_entry_get_flags(entry_index) | + FIB_ENTRY_FLAG_EXCLUSIVE), + load_balance_get_bucket(dpo->dpoi_index, 0)); + + fib_entry_lock(entry_index); + vec_add1(import->faei_importeds, entry_index); + } + /* + * else + * the entry currently has no valid forwarding. when it + * does it will export itself + */ + } +} + +/** + * Call back when walking a connected prefix's covered prefixes for import + */ +static int +fib_entry_covered_walk_import (fib_entry_t *cover, + fib_node_index_t covered, + void *ctx) +{ + fib_ae_import_t *import = ctx; + + fib_entry_import_add(import, covered); + + return (0); +} + +/* + * fib_entry_ae_import_add + * + * Add an importer to a connected entry + */ +static void +fib_ae_export_import_add (fib_ae_export_t *export, + fib_ae_import_t *import) +{ + fib_entry_t *entry; + + import->faei_exporter = (export - fib_ae_export_pool); + entry = fib_entry_get(export->faee_ei); + + fib_entry_cover_walk(entry, + fib_entry_covered_walk_import, + import); +} + +void +fib_attached_export_import (fib_entry_t *fib_entry, + fib_node_index_t export_fib) +{ + fib_entry_delegate_t *fed; + fib_ae_import_t *import; + + pool_get(fib_ae_import_pool, import); + + import->faei_import_fib = fib_entry->fe_fib_index; + import->faei_export_fib = export_fib; + import->faei_prefix = fib_entry->fe_prefix; + import->faei_import_entry = fib_entry_get_index(fib_entry); + import->faei_export_sibling = ~0; + + /* + * do an exact match in the export table + */ + import->faei_export_entry = + fib_table_lookup_exact_match(import->faei_export_fib, + &import->faei_prefix); + + if (FIB_NODE_INDEX_INVALID == import->faei_export_entry) + { + /* + * no exact matching entry in the export table. can't be good. + * track the next best thing + */ + import->faei_export_entry = + fib_table_lookup(import->faei_export_fib, + &import->faei_prefix); + import->faei_exporter = FIB_NODE_INDEX_INVALID; + } + else + { + /* + * found the entry in the export table. import the + * the prefixes that it covers. + * only if the prefix found in the export FIB really is + * attached do we want to import its covered + */ + if (FIB_ENTRY_FLAG_ATTACHED & + fib_entry_get_flags_i(fib_entry_get(import->faei_export_entry))) + { + fib_ae_export_t *export; + + export = fib_entry_ae_add_or_lock(import->faei_export_entry); + vec_add1(export->faee_importers, (import - fib_ae_import_pool)); + fib_ae_export_import_add(export, import); + } + } + + /* + * track the entry in the export table so we can update appropriately + * when it changes + */ + import->faei_export_sibling = + fib_entry_cover_track(fib_entry_get(import->faei_export_entry), + fib_entry_get_index(fib_entry)); + + fed = fib_entry_delegate_find_or_add(fib_entry, + FIB_ENTRY_DELEGATE_ATTACHED_IMPORT); + fed->fd_index = (import - fib_ae_import_pool); +} + +/** + * \brief All the imported entries need to be pruged + */ +void +fib_attached_export_purge (fib_entry_t *fib_entry) +{ + fib_entry_delegate_t *fed; + + fed = fib_entry_delegate_get(fib_entry, + FIB_ENTRY_DELEGATE_ATTACHED_IMPORT); + + if (NULL != fed) + { + fib_node_index_t *import_index; + fib_entry_t *export_entry; + fib_ae_import_t *import; + fib_ae_export_t *export; + + import = pool_elt_at_index(fib_ae_import_pool, fed->fd_index); + + /* + * remove each imported entry + */ + vec_foreach(import_index, import->faei_importeds) + { + fib_prefix_t prefix; + + fib_entry_get_prefix(*import_index, &prefix); + + fib_table_entry_delete(import->faei_import_fib, + &prefix, + FIB_SOURCE_AE); + fib_entry_unlock(*import_index); + } + vec_free(import->faei_importeds); + + /* + * stop tracking the export entry + */ + if (~0 != import->faei_export_sibling) + { + fib_entry_cover_untrack(fib_entry_get(import->faei_export_entry), + import->faei_export_sibling); + } + import->faei_export_sibling = ~0; + + /* + * remove this import tracker from the export's list, + * if it is attached to one. It won't be in the case the tracked + * export entry is not an attached exact match. + */ + if (FIB_NODE_INDEX_INVALID != import->faei_exporter) + { + fib_entry_delegate_t *fed; + + export_entry = fib_entry_get(import->faei_export_entry); + + fed = fib_entry_delegate_get(export_entry, + FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); + ASSERT(NULL != fed); + + export = pool_elt_at_index(fib_ae_export_pool, fed->fd_index); + + u32 index = vec_search(export->faee_importers, + (import - fib_ae_import_pool)); + + ASSERT(index < vec_len(export->faee_importers)); + vec_del1(export->faee_importers, index); + + /* + * free the exporter if there are no longer importers + */ + if (0 == --export->faee_locks) + { + pool_put(fib_ae_export_pool, export); + fib_entry_delegate_remove(export_entry, + FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); + } + } + + /* + * free the import tracker + */ + pool_put(fib_ae_import_pool, import); + fib_entry_delegate_remove(fib_entry, + FIB_ENTRY_DELEGATE_ATTACHED_IMPORT); + } +} + +void +fib_attached_export_covered_added (fib_entry_t *cover, + fib_node_index_t covered) +{ + fib_entry_delegate_t *fed; + + fed = fib_entry_delegate_get(cover, + FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); + + if (NULL != fed) + { + /* + * the covering prefix is exporting to other tables + */ + fib_node_index_t *import_index; + fib_ae_import_t *import; + fib_ae_export_t *export; + + export = pool_elt_at_index(fib_ae_export_pool, fed->fd_index); + + /* + * export the covered entry to each of the importers + */ + vec_foreach(import_index, export->faee_importers) + { + import = pool_elt_at_index(fib_ae_import_pool, *import_index); + + fib_entry_import_add(import, covered); + } + } +} + +void +fib_attached_export_covered_removed (fib_entry_t *cover, + fib_node_index_t covered) +{ + fib_entry_delegate_t *fed; + + fed = fib_entry_delegate_get(cover, + FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); + + if (NULL != fed) + { + /* + * the covering prefix is exporting to other tables + */ + fib_node_index_t *import_index; + fib_ae_import_t *import; + fib_ae_export_t *export; + + export = pool_elt_at_index(fib_ae_export_pool, fed->fd_index); + + /* + * remove the covered entry from each of the importers + */ + vec_foreach(import_index, export->faee_importers) + { + import = pool_elt_at_index(fib_ae_import_pool, *import_index); + + fib_entry_import_remove(import, covered); + } + } +} + +static void +fib_attached_export_cover_modified_i (fib_entry_t *fib_entry) +{ + fib_entry_delegate_t *fed; + + fed = fib_entry_delegate_get(fib_entry, + FIB_ENTRY_DELEGATE_ATTACHED_IMPORT); + + if (NULL != fed) + { + fib_ae_import_t *import; + u32 export_fib; + + /* + * safe the temporaries we need from the existing import + * since it will be toast after the purge. + */ + import = pool_elt_at_index(fib_ae_import_pool, fed->fd_index); + export_fib = import->faei_export_fib; + + /* + * keep it simple. purge anything that was previously imported. + * then re-evaluate the need to import. + */ + fib_attached_export_purge(fib_entry); + fib_attached_export_import(fib_entry, export_fib); + } +} + +/** + * \brief If this entry is tracking a cover (in another table) + * then that cover has changed. re-evaluate import. + */ +void +fib_attached_export_cover_change (fib_entry_t *fib_entry) +{ + fib_attached_export_cover_modified_i(fib_entry); +} + +/** + * \brief If this entry is tracking a cover (in another table) + * then that cover has been updated. re-evaluate import. + */ +void +fib_attached_export_cover_update (fib_entry_t *fib_entry) +{ + fib_attached_export_cover_modified_i(fib_entry); +} + +u8* +fib_ae_import_format (fib_entry_t *fib_entry, + u8* s) +{ + fib_entry_delegate_t *fed; + + fed = fib_entry_delegate_get(fib_entry, + FIB_ENTRY_DELEGATE_ATTACHED_IMPORT); + + if (NULL != fed) + { + fib_node_index_t *index; + fib_ae_import_t *import; + + import = pool_elt_at_index(fib_ae_import_pool, fed->fd_index); + + s = format(s, "\n Attached-Import:%d:[", (import - fib_ae_import_pool)); + s = format(s, "export-prefix:%U ", format_fib_prefix, &import->faei_prefix); + s = format(s, "export-entry:%d ", import->faei_export_entry); + s = format(s, "export-sibling:%d ", import->faei_export_sibling); + s = format(s, "exporter:%d ", import->faei_exporter); + s = format(s, "export-fib:%d ", import->faei_export_fib); + + s = format(s, "import-entry:%d ", import->faei_import_entry); + s = format(s, "import-fib:%d ", import->faei_import_fib); + + s = format(s, "importeds:["); + vec_foreach(index, import->faei_importeds) + { + s = format(s, "%d, ", *index); + } + s = format(s, "]]"); + } + + return (s); +} + +u8* +fib_ae_export_format (fib_entry_t *fib_entry, + u8* s) +{ + fib_entry_delegate_t *fed; + + fed = fib_entry_delegate_get(fib_entry, + FIB_ENTRY_DELEGATE_ATTACHED_EXPORT); + + if (NULL != fed) + { + fib_node_index_t *index; + fib_ae_export_t *export; + + export = pool_elt_at_index(fib_ae_export_pool, fed->fd_list); + + s = format(s, "\n Attached-Export:%d:[", (export - fib_ae_export_pool)); + s = format(s, "export-entry:%d ", export->faee_ei); + + s = format(s, "importers:["); + vec_foreach(index, export->faee_importers) + { + s = format(s, "%d, ", *index); + } + s = format(s, "]]"); + } + return (s); +} diff --git a/src/vnet/fib/fib_attached_export.h b/src/vnet/fib/fib_attached_export.h new file mode 100644 index 00000000000..fa28a6e13b8 --- /dev/null +++ b/src/vnet/fib/fib_attached_export.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * FIB attached export + * + * what's it all about? + * say one does this: + * set int ip table Gig0 2 + * set int ip addr Gig0 10.0.0.1/24 + * Ggi0 is in table 2 with a connected address. + * Now we add a routing matching said connected in a different table + * ip route add table 3 10.0.0.0/24 via Gig0 + * How do we expect traffic in table 3 to be forwarded? Clearly out of + * Ggi0. It's an attached route, hence we are saying that we can ARP for + * hosts in the attached subnet. and we can. but any ARP entries we send + * we be received on Gig0, but since Gig0 is in table 2, it will install + * the adj-fins in table 2. So traffic in table 3 will never hit an adj-fib + * and hence always the glean, and so thus be effectively dropped. + * How do we fix this? Attached Export !! All more specfiic entries in table 2 + * that track and are covered by the connected are automatically exported into + * table 3. Now table 3 also has adj-fibs (and the local) so traffic to hosts + * is restored. + */ + +#ifndef __FIB_ATTACHED_EXPORT_H__ +#define __FIB_ATTACHED_EXPORT_H__ + +#include <vnet/fib/fib_types.h> + +extern void fib_attached_export_import(fib_entry_t *fib_entry, + fib_node_index_t export_fib); + +extern void fib_attached_export_purge(fib_entry_t *fib_entry); + +extern void fib_attached_export_covered_added(fib_entry_t *cover, + fib_node_index_t covered); +extern void fib_attached_export_covered_removed(fib_entry_t *cover, + fib_node_index_t covered); +extern void fib_attached_export_cover_change(fib_entry_t *fib_entry); +extern void fib_attached_export_cover_update(fib_entry_t *fib_entry); + +extern u8* fib_ae_import_format(fib_entry_t *fib_entry, u8*s); +extern u8* fib_ae_export_format(fib_entry_t *fib_entry, u8*s); + +#endif diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c new file mode 100644 index 00000000000..24b506379ac --- /dev/null +++ b/src/vnet/fib/fib_entry.c @@ -0,0 +1,1503 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/format.h> +#include <vnet/ip/lookup.h> +#include <vnet/adj/adj.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/drop_dpo.h> + +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_entry_cover.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/fib/fib_attached_export.h> +#include <vnet/fib/fib_path_ext.h> + +/* + * Array of strings/names for the FIB sources + */ +static const char *fib_source_names[] = FIB_SOURCES; +static const char *fib_attribute_names[] = FIB_ENTRY_ATTRIBUTES; + +/* + * Pool for all fib_entries + */ +static fib_entry_t *fib_entry_pool; + +fib_entry_t * +fib_entry_get (fib_node_index_t index) +{ + return (pool_elt_at_index(fib_entry_pool, index)); +} + +static fib_node_t * +fib_entry_get_node (fib_node_index_t index) +{ + return ((fib_node_t*)fib_entry_get(index)); +} + +fib_node_index_t +fib_entry_get_index (const fib_entry_t * fib_entry) +{ + return (fib_entry - fib_entry_pool); +} + +static fib_protocol_t +fib_entry_get_proto (const fib_entry_t * fib_entry) +{ + return (fib_entry->fe_prefix.fp_proto); +} + +fib_forward_chain_type_t +fib_entry_get_default_chain_type (const fib_entry_t *fib_entry) +{ + switch (fib_entry->fe_prefix.fp_proto) + { + case FIB_PROTOCOL_IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case FIB_PROTOCOL_IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + case FIB_PROTOCOL_MPLS: + if (MPLS_EOS == fib_entry->fe_prefix.fp_eos) + /* + * If the entry being asked is a eos-MPLS label entry, + * then use the payload-protocol field, that we stashed there + * for just this purpose + */ + return (fib_forw_chain_type_from_dpo_proto( + fib_entry->fe_prefix.fp_payload_proto)); + else + return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + } + + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); +} + +u8 * +format_fib_entry (u8 * s, va_list * args) +{ + fib_forward_chain_type_t fct; + fib_entry_attribute_t attr; + fib_path_ext_t *path_ext; + fib_entry_t *fib_entry; + fib_entry_src_t *src; + fib_node_index_t fei; + fib_source_t source; + u32 n_covered; + int level; + + fei = va_arg (*args, fib_node_index_t); + level = va_arg (*args, int); + fib_entry = fib_entry_get(fei); + + s = format (s, "%U", format_fib_prefix, &fib_entry->fe_prefix); + + if (level >= FIB_ENTRY_FORMAT_DETAIL) + { + s = format (s, " fib:%d", fib_entry->fe_fib_index); + s = format (s, " index:%d", fib_entry_get_index(fib_entry)); + s = format (s, " locks:%d", fib_entry->fe_node.fn_locks); + + FOR_EACH_SRC_ADDED(fib_entry, src, source, + ({ + s = format (s, "\n src:%s ", + fib_source_names[source]); + s = fib_entry_src_format(fib_entry, source, s); + s = format (s, " refs:%d ", src->fes_ref_count); + if (FIB_ENTRY_FLAG_NONE != src->fes_entry_flags) { + s = format(s, "flags:"); + FOR_EACH_FIB_ATTRIBUTE(attr) { + if ((1<<attr) & src->fes_entry_flags) { + s = format (s, "%s,", fib_attribute_names[attr]); + } + } + } + s = format (s, "\n"); + if (FIB_NODE_INDEX_INVALID != src->fes_pl) + { + s = fib_path_list_format(src->fes_pl, s); + } + if (NULL != src->fes_path_exts) + { + s = format(s, " Extensions:"); + vec_foreach(path_ext, src->fes_path_exts) + { + s = format(s, "\n %U", format_fib_path_ext, path_ext); + } + } + })); + + n_covered = fib_entry_cover_get_size(fib_entry); + if (n_covered > 0) { + s = format(s, "\n tracking %d covered: ", n_covered); + s = fib_entry_cover_list_format(fib_entry, s); + } + s = fib_ae_import_format(fib_entry, s); + s = fib_ae_export_format(fib_entry, s); + + s = format (s, "\n forwarding: "); + } + else + { + s = format (s, "\n"); + } + + fct = fib_entry_get_default_chain_type(fib_entry); + + if (!dpo_id_is_valid(&fib_entry->fe_lb)) + { + s = format (s, " UNRESOLVED\n"); + return (s); + } + else + { + s = format(s, " %U-chain\n %U", + format_fib_forw_chain_type, fct, + format_dpo_id, + &fib_entry->fe_lb, + 2); + s = format(s, "\n"); + + if (level >= FIB_ENTRY_FORMAT_DETAIL2) + { + fib_entry_delegate_type_t fdt; + fib_entry_delegate_t *fed; + + FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed, + { + s = format(s, " %U-chain\n %U", + format_fib_forw_chain_type, + fib_entry_delegate_type_to_chain_type(fdt), + format_dpo_id, &fed->fd_dpo, 2); + s = format(s, "\n"); + }); + } + } + + if (level >= FIB_ENTRY_FORMAT_DETAIL2) + { + s = format(s, "\nchildren:"); + s = fib_node_children_format(fib_entry->fe_node.fn_children, s); + } + + return (s); +} + +static fib_entry_t* +fib_entry_from_fib_node (fib_node_t *node) +{ +#if CLIB_DEBUG > 0 + ASSERT(FIB_NODE_TYPE_ENTRY == node->fn_type); +#endif + return ((fib_entry_t*)node); +} + +static void +fib_entry_last_lock_gone (fib_node_t *node) +{ + fib_entry_delegate_type_t fdt; + fib_entry_delegate_t *fed; + fib_entry_t *fib_entry; + + fib_entry = fib_entry_from_fib_node(node); + + FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed, + { + dpo_reset(&fed->fd_dpo); + fib_entry_delegate_remove(fib_entry, fdt); + }); + + FIB_ENTRY_DBG(fib_entry, "last-lock"); + + fib_node_deinit(&fib_entry->fe_node); + // FIXME -RR Backwalk + + ASSERT(0 == vec_len(fib_entry->fe_delegates)); + vec_free(fib_entry->fe_delegates); + pool_put(fib_entry_pool, fib_entry); +} + +static fib_entry_src_t* +fib_entry_get_best_src_i (const fib_entry_t *fib_entry) +{ + fib_entry_src_t *bsrc; + + /* + * the enum of sources is deliberately arranged in priority order + */ + if (0 == vec_len(fib_entry->fe_srcs)) + { + bsrc = NULL; + } + else + { + bsrc = vec_elt_at_index(fib_entry->fe_srcs, 0); + } + + return (bsrc); +} + +static fib_source_t +fib_entry_src_get_source (const fib_entry_src_t *esrc) +{ + if (NULL != esrc) + { + return (esrc->fes_src); + } + return (FIB_SOURCE_MAX); +} + +static fib_entry_flag_t +fib_entry_src_get_flags (const fib_entry_src_t *esrc) +{ + if (NULL != esrc) + { + return (esrc->fes_entry_flags); + } + return (FIB_ENTRY_FLAG_NONE); +} + +fib_entry_flag_t +fib_entry_get_flags (fib_node_index_t fib_entry_index) +{ + return (fib_entry_get_flags_i(fib_entry_get(fib_entry_index))); +} + +/* + * fib_entry_back_walk_notify + * + * A back walk has reach this entry. + */ +static fib_node_back_walk_rc_t +fib_entry_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_from_fib_node(node); + + if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason || + FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason) + { + fib_entry_src_action_reactivate(fib_entry, + fib_entry_get_best_source( + fib_entry_get_index(fib_entry))); + } + + /* + * all other walk types can be reclassifed to a re-evaluate to + * all recursive dependents. + * By reclassifying we ensure that should any of these walk types meet + * they can be merged. + */ + ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + + /* + * ... and nothing is forced sync from now on. + */ + ctx->fnbw_flags &= ~FIB_NODE_BW_FLAG_FORCE_SYNC; + + /* + * propagate the backwalk further if we haven't already reached the + * maximum depth. + */ + fib_walk_sync(FIB_NODE_TYPE_ENTRY, + fib_entry_get_index(fib_entry), + ctx); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +static void +fib_entry_show_memory (void) +{ + u32 n_srcs = 0, n_exts = 0; + fib_entry_src_t *esrc; + fib_entry_t *entry; + + fib_show_memory_usage("Entry", + pool_elts(fib_entry_pool), + pool_len(fib_entry_pool), + sizeof(fib_entry_t)); + + pool_foreach(entry, fib_entry_pool, + ({ + n_srcs += vec_len(entry->fe_srcs); + vec_foreach(esrc, entry->fe_srcs) + { + n_exts += vec_len(esrc->fes_path_exts); + } + })); + + fib_show_memory_usage("Entry Source", + n_srcs, n_srcs, sizeof(fib_entry_src_t)); + fib_show_memory_usage("Entry Path-Extensions", + n_exts, n_exts, + sizeof(fib_path_ext_t)); +} + +/* + * The FIB path-list's graph node virtual function table + */ +static const fib_node_vft_t fib_entry_vft = { + .fnv_get = fib_entry_get_node, + .fnv_last_lock = fib_entry_last_lock_gone, + .fnv_back_walk = fib_entry_back_walk_notify, + .fnv_mem_show = fib_entry_show_memory, +}; + +/** + * @brief Contribute the set of Adjacencies that this entry forwards with + * to build the uRPF list of its children + */ +void +fib_entry_contribute_urpf (fib_node_index_t entry_index, + index_t urpf) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(entry_index); + + return (fib_path_list_contribute_urpf(fib_entry->fe_parent, urpf)); +} + +/* + * fib_entry_contribute_forwarding + * + * Get an lock the forwarding information (DPO) contributed by the FIB entry. + */ +void +fib_entry_contribute_forwarding (fib_node_index_t fib_entry_index, + fib_forward_chain_type_t fct, + dpo_id_t *dpo) +{ + fib_entry_delegate_t *fed; + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + if (fct == fib_entry_get_default_chain_type(fib_entry)) + { + dpo_copy(dpo, &fib_entry->fe_lb); + } + else + { + fed = fib_entry_delegate_get(fib_entry, + fib_entry_chain_type_to_delegate_type(fct)); + + if (NULL == fed) + { + fed = fib_entry_delegate_find_or_add( + fib_entry, + fib_entry_chain_type_to_delegate_type(fct)); + /* + * on-demand create eos/non-eos. + * There is no on-demand delete because: + * - memory versus complexity & reliability: + * leaving unrequired [n]eos LB arounds wastes memory, cleaning + * then up on the right trigger is more code. i favour the latter. + */ + fib_entry_src_mk_lb(fib_entry, + fib_entry_get_best_src_i(fib_entry), + fct, + &fed->fd_dpo); + } + + dpo_copy(dpo, &fed->fd_dpo); + } +} + +const dpo_id_t * +fib_entry_contribute_ip_forwarding (fib_node_index_t fib_entry_index) +{ + fib_forward_chain_type_t fct; + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + fct = fib_entry_get_default_chain_type(fib_entry); + + ASSERT((fct == FIB_FORW_CHAIN_TYPE_UNICAST_IP4 || + fct == FIB_FORW_CHAIN_TYPE_UNICAST_IP6)); + + return (&fib_entry->fe_lb); +} + +adj_index_t +fib_entry_get_adj (fib_node_index_t fib_entry_index) +{ + const dpo_id_t *dpo; + + dpo = fib_entry_contribute_ip_forwarding(fib_entry_index); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + + if (dpo_is_adj(dpo)) + { + return (dpo->dpoi_index); + } + return (ADJ_INDEX_INVALID); +} + +fib_node_index_t +fib_entry_get_path_list (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + return (fib_entry->fe_parent); +} + +u32 +fib_entry_child_add (fib_node_index_t fib_entry_index, + fib_node_type_t child_type, + fib_node_index_t child_index) +{ + return (fib_node_child_add(FIB_NODE_TYPE_ENTRY, + fib_entry_index, + child_type, + child_index)); +}; + +void +fib_entry_child_remove (fib_node_index_t fib_entry_index, + u32 sibling_index) +{ + fib_node_child_remove(FIB_NODE_TYPE_ENTRY, + fib_entry_index, + sibling_index); + + if (0 == fib_node_get_n_children(FIB_NODE_TYPE_ENTRY, + fib_entry_index)) + { + /* + * if there are no children left then there is no reason to keep + * the non-default forwarding chains. those chains are built only + * because the children want them. + */ + fib_entry_delegate_type_t fdt; + fib_entry_delegate_t *fed; + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed, + { + dpo_reset(&fed->fd_dpo); + fib_entry_delegate_remove(fib_entry, fdt); + }); + } +} + +static fib_entry_t * +fib_entry_alloc (u32 fib_index, + const fib_prefix_t *prefix, + fib_node_index_t *fib_entry_index) +{ + fib_entry_t *fib_entry; + fib_prefix_t *fep; + + pool_get(fib_entry_pool, fib_entry); + memset(fib_entry, 0, sizeof(*fib_entry)); + + fib_node_init(&fib_entry->fe_node, + FIB_NODE_TYPE_ENTRY); + + fib_entry->fe_fib_index = fib_index; + + /* + * the one time we need to update the const prefix is when + * the entry is first created + */ + fep = (fib_prefix_t*)&(fib_entry->fe_prefix); + *fep = *prefix; + + if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto) + { + fep->fp_len = 21; + if (MPLS_NON_EOS == fep->fp_eos) + { + fep->fp_payload_proto = DPO_PROTO_MPLS; + } + ASSERT(DPO_PROTO_NONE != fib_entry->fe_prefix.fp_payload_proto); + } + + dpo_reset(&fib_entry->fe_lb); + + *fib_entry_index = fib_entry_get_index(fib_entry); + + FIB_ENTRY_DBG(fib_entry, "alloc"); + + return (fib_entry); +} + +static void +fib_entry_post_flag_update_actions (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t old_flags) +{ + /* + * handle changes to attached export for import entries + */ + int is_import = (FIB_ENTRY_FLAG_IMPORT & fib_entry_get_flags_i(fib_entry)); + int was_import = (FIB_ENTRY_FLAG_IMPORT & old_flags); + + if (!was_import && is_import) + { + /* + * transition from not exported to exported + */ + + /* + * there is an assumption here that the entry resolves via only + * one interface and that it is the cross VRF interface. + */ + u32 sw_if_index = fib_path_list_get_resolving_interface(fib_entry->fe_parent); + + fib_attached_export_import(fib_entry, + fib_table_get_index_for_sw_if_index( + fib_entry_get_proto(fib_entry), + sw_if_index)); + } + else if (was_import && !is_import) + { + /* + * transition from exported to not exported + */ + fib_attached_export_purge(fib_entry); + } + /* + * else + * no change. nothing to do. + */ + + /* + * handle changes to attached export for export entries + */ + int is_attached = (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(fib_entry)); + int was_attached = (FIB_ENTRY_FLAG_ATTACHED & old_flags); + + if (!was_attached && is_attached) + { + /* + * transition to attached. time to export + */ + // FIXME + } + // else FIXME +} + +static void +fib_entry_post_install_actions (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t old_flags) +{ + fib_entry_post_flag_update_actions(fib_entry, source, old_flags); + fib_entry_src_action_installed(fib_entry, source); +} + +fib_node_index_t +fib_entry_create (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths) +{ + fib_node_index_t fib_entry_index; + fib_entry_t *fib_entry; + + ASSERT(0 < vec_len(paths)); + + fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index); + + /* + * since this is a new entry create, we don't need to check for winning + * sources - there is only one. + */ + fib_entry = fib_entry_src_action_add(fib_entry, source, flags, + drop_dpo_get( + fib_proto_to_dpo( + fib_entry_get_proto(fib_entry)))); + fib_entry_src_action_path_swap(fib_entry, + source, + flags, + paths); + /* + * handle possible realloc's by refetching the pointer + */ + fib_entry = fib_entry_get(fib_entry_index); + fib_entry_src_action_activate(fib_entry, source); + + fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE); + + return (fib_entry_index); +} + +fib_node_index_t +fib_entry_create_special (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t fib_entry_index; + fib_entry_t *fib_entry; + + /* + * create and initiliase the new enty + */ + fib_entry = fib_entry_alloc(fib_index, prefix, &fib_entry_index); + + /* + * create the path-list + */ + fib_entry = fib_entry_src_action_add(fib_entry, source, flags, dpo); + fib_entry_src_action_activate(fib_entry, source); + + fib_entry_post_install_actions(fib_entry, source, FIB_ENTRY_FLAG_NONE); + + return (fib_entry_index); +} + +static void +fib_entry_post_update_actions (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t old_flags) +{ + /* + * backwalk to children to inform then of the change to forwarding. + */ + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE, + }; + + fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_get_index(fib_entry), &bw_ctx); + + /* + * then inform any covered prefixes + */ + fib_entry_cover_update_notify(fib_entry); + + fib_entry_post_install_actions(fib_entry, source, old_flags); +} + +static void +fib_entry_source_change (fib_entry_t *fib_entry, + fib_source_t best_source, + fib_source_t new_source, + fib_entry_flag_t old_flags) +{ + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (new_source < best_source) + { + /* + * we have a new winning source. + */ + fib_entry_src_action_deactivate(fib_entry, best_source); + fib_entry_src_action_activate(fib_entry, new_source); + } + else if (new_source > best_source) + { + /* + * the new source loses. nothing to do here. + * the data from the source is saved in the path-list created + */ + return; + } + else + { + /* + * the new source is one this entry already has. + * But the path-list was updated, which will contribute new forwarding, + * so install it. + */ + fib_entry_src_action_deactivate(fib_entry, new_source); + fib_entry_src_action_activate(fib_entry, new_source); + } + + fib_entry_post_update_actions(fib_entry, new_source, old_flags); +} + +void +fib_entry_special_add (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(fib_entry_index); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + fib_entry = fib_entry_src_action_add(fib_entry, source, flags, dpo); + fib_entry_source_change(fib_entry, best_source, source, bflags); +} + +void +fib_entry_special_update (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(fib_entry_index); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + fib_entry = fib_entry_src_action_update(fib_entry, source, flags, dpo); + fib_entry_source_change(fib_entry, best_source, source, bflags); +} + + +void +fib_entry_path_add (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpath) +{ + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + ASSERT(1 == vec_len(rpath)); + + fib_entry = fib_entry_get(fib_entry_index); + ASSERT(NULL != fib_entry); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + fib_entry = fib_entry_src_action_path_add(fib_entry, source, flags, rpath); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source) + { + /* + * we have a new winning source. + */ + fib_entry_src_action_deactivate(fib_entry, best_source); + fib_entry_src_action_activate(fib_entry, source); + } + else if (source > best_source) + { + /* + * the new source loses. nothing to do here. + * the data from the source is saved in the path-list created + */ + return; + } + else + { + /* + * the new source is one this entry already has. + * But the path-list was updated, which will contribute new forwarding, + * so install it. + */ + fib_entry_src_action_deactivate(fib_entry, source); + fib_entry_src_action_activate(fib_entry, source); + } + + fib_entry_post_update_actions(fib_entry, source, bflags); +} + +/* + * fib_entry_path_remove + * + * remove a path from the entry. + * return the fib_entry's index if it is still present, INVALID otherwise. + */ +fib_entry_src_flag_t +fib_entry_path_remove (fib_node_index_t fib_entry_index, + fib_source_t source, + const fib_route_path_t *rpath) +{ + fib_entry_src_flag_t sflag; + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + ASSERT(1 == vec_len(rpath)); + + fib_entry = fib_entry_get(fib_entry_index); + ASSERT(NULL != fib_entry); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + sflag = fib_entry_src_action_path_remove(fib_entry, source, rpath); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source ) + { + /* + * Que! removing a path from a source that is better than the + * one this entry is using. + */ + ASSERT(0); + } + else if (source > best_source ) + { + /* + * the source is not the best. nothing to do. + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); + } + else + { + /* + * removing a path from the path-list we were using. + */ + if (!(FIB_ENTRY_SRC_FLAG_ADDED & sflag)) + { + /* + * the last path from the source was removed. + * fallback to lower source + */ + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + + if (FIB_SOURCE_MAX == best_source) { + /* + * no more sources left. this entry is toast. + */ + fib_entry_src_action_uninstall(fib_entry); + fib_entry_post_flag_update_actions(fib_entry, source, bflags); + + return (FIB_ENTRY_SRC_FLAG_NONE); + } + else + { + fib_entry_src_action_activate(fib_entry, best_source); + source = best_source; + } + } + else + { + /* + * re-install the new forwarding information + */ + fib_entry_src_action_deactivate(fib_entry, source); + fib_entry_src_action_activate(fib_entry, source); + } + } + + fib_entry_post_update_actions(fib_entry, source, bflags); + + /* + * still have sources + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); +} + +/* + * fib_entry_special_remove + * + * remove a special source from the entry. + * return the fib_entry's index if it is still present, INVALID otherwise. + */ +fib_entry_src_flag_t +fib_entry_special_remove (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_src_flag_t sflag; + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(fib_entry_index); + ASSERT(NULL != fib_entry); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + sflag = fib_entry_src_action_remove(fib_entry, source); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source ) + { + /* + * Que! removing a path from a source that is better than the + * one this entry is using. This can only mean it is a source + * this prefix does not have. + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); + } + else if (source > best_source ) { + /* + * the source is not the best. nothing to do. + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); + } + else + { + if (!(FIB_ENTRY_SRC_FLAG_ADDED & sflag)) + { + /* + * the source was removed. use the next best. + */ + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + + if (FIB_SOURCE_MAX == best_source) { + /* + * no more sources left. this entry is toast. + */ + fib_entry_src_action_uninstall(fib_entry); + fib_entry_post_flag_update_actions(fib_entry, source, bflags); + + return (FIB_ENTRY_SRC_FLAG_NONE); + } + else + { + fib_entry_src_action_activate(fib_entry, best_source); + source = best_source; + } + } + else + { + /* + * re-install the new forwarding information + */ + fib_entry_src_action_reactivate(fib_entry, source); + } + } + + fib_entry_post_update_actions(fib_entry, source, bflags); + + /* + * still have sources + */ + return (FIB_ENTRY_SRC_FLAG_ADDED); +} + +/** + * fib_entry_delete + * + * The source is withdrawing all the paths it provided + */ +fib_entry_src_flag_t +fib_entry_delete (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + return (fib_entry_special_remove(fib_entry_index, source)); +} + +/** + * fib_entry_update + * + * The source has provided a new set of paths that will replace the old. + */ +void +fib_entry_update (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths) +{ + fib_source_t best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(fib_entry_index); + ASSERT(NULL != fib_entry); + + bsrc = fib_entry_get_best_src_i(fib_entry); + best_source = fib_entry_src_get_source(bsrc); + bflags = fib_entry_src_get_flags(bsrc); + + fib_entry_src_action_path_swap(fib_entry, + source, + flags, + paths); + /* + * handle possible realloc's by refetching the pointer + */ + fib_entry = fib_entry_get(fib_entry_index); + + /* + * if the path list for the source passed is invalid, + * then we need to create a new one. else we are updating + * an existing. + */ + if (source < best_source) + { + /* + * we have a new winning source. + */ + fib_entry_src_action_deactivate(fib_entry, best_source); + fib_entry_src_action_activate(fib_entry, source); + } + else if (source > best_source) { + /* + * the new source loses. nothing to do here. + * the data from the source is saved in the path-list created + */ + return; + } + else + { + /* + * the new source is one this entry already has. + * But the path-list was updated, which will contribute new forwarding, + * so install it. + */ + fib_entry_src_action_deactivate(fib_entry, source); + fib_entry_src_action_activate(fib_entry, source); + } + + fib_entry_post_update_actions(fib_entry, source, bflags); +} + + +/* + * fib_entry_cover_changed + * + * this entry is tracking its cover and that cover has changed. + */ +void +fib_entry_cover_changed (fib_node_index_t fib_entry_index) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + fib_source_t source, best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + u32 index; + + bflags = FIB_ENTRY_FLAG_NONE; + best_source = FIB_SOURCE_FIRST; + fib_entry = fib_entry_get(fib_entry_index); + + fib_attached_export_cover_change(fib_entry); + + /* + * propagate the notificuation to each of the added sources + */ + index = 0; + FOR_EACH_SRC_ADDED(fib_entry, esrc, source, + ({ + if (0 == index) + { + /* + * only the best source gets to set the back walk flags + */ + res = fib_entry_src_action_cover_change(fib_entry, source); + bflags = fib_entry_src_get_flags(esrc); + best_source = fib_entry_src_get_source(esrc); + } + else + { + fib_entry_src_action_cover_change(fib_entry, source); + } + index++; + })); + + if (res.install) + { + fib_entry_src_action_reactivate(fib_entry, + fib_entry_src_get_source( + fib_entry_get_best_src_i(fib_entry))); + fib_entry_post_install_actions(fib_entry, best_source, bflags); + } + else + { + fib_entry_src_action_uninstall(fib_entry); + } + + if (FIB_NODE_BW_REASON_FLAG_NONE != res.bw_reason) + { + /* + * time for walkies fido. + */ + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = res.bw_reason, + }; + + fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_index, &bw_ctx); + } +} + +/* + * fib_entry_cover_updated + * + * this entry is tracking its cover and that cover has been updated + * (i.e. its forwarding information has changed). + */ +void +fib_entry_cover_updated (fib_node_index_t fib_entry_index) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + fib_source_t source, best_source; + fib_entry_flag_t bflags; + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + u32 index; + + bflags = FIB_ENTRY_FLAG_NONE; + best_source = FIB_SOURCE_FIRST; + fib_entry = fib_entry_get(fib_entry_index); + + fib_attached_export_cover_update(fib_entry); + + /* + * propagate the notificuation to each of the added sources + */ + index = 0; + FOR_EACH_SRC_ADDED(fib_entry, esrc, source, + ({ + if (0 == index) + { + /* + * only the best source gets to set the back walk flags + */ + res = fib_entry_src_action_cover_update(fib_entry, source); + bflags = fib_entry_src_get_flags(esrc); + best_source = fib_entry_src_get_source(esrc); + } + else + { + fib_entry_src_action_cover_update(fib_entry, source); + } + index++; + })); + + if (res.install) + { + fib_entry_src_action_reactivate(fib_entry, + fib_entry_src_get_source( + fib_entry_get_best_src_i(fib_entry))); + fib_entry_post_install_actions(fib_entry, best_source, bflags); + } + else + { + fib_entry_src_action_uninstall(fib_entry); + } + + if (FIB_NODE_BW_REASON_FLAG_NONE != res.bw_reason) + { + /* + * time for walkies fido. + */ + fib_node_back_walk_ctx_t bw_ctx = { + .fnbw_reason = res.bw_reason, + }; + + fib_walk_sync(FIB_NODE_TYPE_ENTRY, fib_entry_index, &bw_ctx); + } +} + +int +fib_entry_recursive_loop_detect (fib_node_index_t entry_index, + fib_node_index_t **entry_indicies) +{ + fib_entry_t *fib_entry; + int was_looped, is_looped; + + fib_entry = fib_entry_get(entry_index); + + if (FIB_NODE_INDEX_INVALID != fib_entry->fe_parent) + { + fib_node_index_t *entries = *entry_indicies; + + vec_add1(entries, entry_index); + was_looped = fib_path_list_is_looped(fib_entry->fe_parent); + is_looped = fib_path_list_recursive_loop_detect(fib_entry->fe_parent, + &entries); + + *entry_indicies = entries; + + if (!!was_looped != !!is_looped) + { + /* + * re-evaluate all the entry's forwarding + * NOTE: this is an inplace modify + */ + fib_entry_delegate_type_t fdt; + fib_entry_delegate_t *fed; + + FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed, + { + fib_entry_src_mk_lb(fib_entry, + fib_entry_get_best_src_i(fib_entry), + fib_entry_delegate_type_to_chain_type(fdt), + &fed->fd_dpo); + }); + } + } + else + { + /* + * the entry is currently not linked to a path-list. this happens + * when it is this entry that is re-linking path-lists and has thus + * broken the loop + */ + is_looped = 0; + } + + return (is_looped); +} + +u32 +fib_entry_get_resolving_interface (fib_node_index_t entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(entry_index); + + return (fib_path_list_get_resolving_interface(fib_entry->fe_parent)); +} + +fib_source_t +fib_entry_get_best_source (fib_node_index_t entry_index) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *bsrc; + + fib_entry = fib_entry_get(entry_index); + + bsrc = fib_entry_get_best_src_i(fib_entry); + return (fib_entry_src_get_source(bsrc)); +} + +static int +fib_ip4_address_compare (const ip4_address_t * a1, + const ip4_address_t * a2) +{ + /* + * IP addresses are unsiged ints. the return value here needs to be signed + * a simple subtraction won't cut it. + * If the addresses are the same, the sort order is undefiend, so phoey. + */ + return ((clib_net_to_host_u32(a1->data_u32) > + clib_net_to_host_u32(a2->data_u32) ) ? + 1 : -1); +} + +static int +fib_ip6_address_compare (const ip6_address_t * a1, + const ip6_address_t * a2) +{ + int i; + for (i = 0; i < ARRAY_LEN (a1->as_u16); i++) + { + int cmp = (clib_net_to_host_u16 (a1->as_u16[i]) - + clib_net_to_host_u16 (a2->as_u16[i])); + if (cmp != 0) + return cmp; + } + return 0; +} + +static int +fib_entry_cmp (fib_node_index_t fib_entry_index1, + fib_node_index_t fib_entry_index2) +{ + fib_entry_t *fib_entry1, *fib_entry2; + int cmp = 0; + + fib_entry1 = fib_entry_get(fib_entry_index1); + fib_entry2 = fib_entry_get(fib_entry_index2); + + switch (fib_entry1->fe_prefix.fp_proto) + { + case FIB_PROTOCOL_IP4: + cmp = fib_ip4_address_compare(&fib_entry1->fe_prefix.fp_addr.ip4, + &fib_entry2->fe_prefix.fp_addr.ip4); + break; + case FIB_PROTOCOL_IP6: + cmp = fib_ip6_address_compare(&fib_entry1->fe_prefix.fp_addr.ip6, + &fib_entry2->fe_prefix.fp_addr.ip6); + break; + case FIB_PROTOCOL_MPLS: + cmp = (fib_entry1->fe_prefix.fp_label - fib_entry2->fe_prefix.fp_label); + + if (0 == cmp) + { + cmp = (fib_entry1->fe_prefix.fp_eos - fib_entry2->fe_prefix.fp_eos); + } + break; + } + + if (0 == cmp) { + cmp = (fib_entry1->fe_prefix.fp_len - fib_entry2->fe_prefix.fp_len); + } + return (cmp); +} + +int +fib_entry_cmp_for_sort (void *i1, void *i2) +{ + fib_node_index_t *fib_entry_index1 = i1, *fib_entry_index2 = i2; + + return (fib_entry_cmp(*fib_entry_index1, + *fib_entry_index2)); +} + +void +fib_entry_lock (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + fib_node_lock(&fib_entry->fe_node); +} + +void +fib_entry_unlock (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + fib_node_unlock(&fib_entry->fe_node); +} + +void +fib_entry_module_init (void) +{ + fib_node_register_type (FIB_NODE_TYPE_ENTRY, &fib_entry_vft); +} + +void +fib_entry_encode (fib_node_index_t fib_entry_index, + fib_route_path_encode_t **api_rpaths) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + fib_path_list_walk(fib_entry->fe_parent, fib_path_encode, api_rpaths); +} + +void +fib_entry_get_prefix (fib_node_index_t fib_entry_index, + fib_prefix_t *pfx) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + *pfx = fib_entry->fe_prefix; +} + +u32 +fib_entry_get_fib_index (fib_node_index_t fib_entry_index) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + return (fib_entry->fe_fib_index); +} + +u32 +fib_entry_pool_size (void) +{ + return (pool_elts(fib_entry_pool)); +} + +static clib_error_t * +show_fib_entry_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_node_index_t fei; + + if (unformat (input, "%d", &fei)) + { + /* + * show one in detail + */ + if (!pool_is_free_index(fib_entry_pool, fei)) + { + vlib_cli_output (vm, "%d@%U", + fei, + format_fib_entry, fei, + FIB_ENTRY_FORMAT_DETAIL2); + } + else + { + vlib_cli_output (vm, "entry %d invalid", fei); + } + } + else + { + /* + * show all + */ + vlib_cli_output (vm, "FIB Entries:"); + pool_foreach_index(fei, fib_entry_pool, + ({ + vlib_cli_output (vm, "%d@%U", + fei, + format_fib_entry, fei, + FIB_ENTRY_FORMAT_BRIEF); + })); + } + + return (NULL); +} + +VLIB_CLI_COMMAND (show_fib_entry, static) = { + .path = "show fib entry", + .function = show_fib_entry_command, + .short_help = "show fib entry", +}; diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h new file mode 100644 index 00000000000..44a5f2e6d7f --- /dev/null +++ b/src/vnet/fib/fib_entry.h @@ -0,0 +1,530 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_ENTRY_H__ +#define __FIB_ENTRY_H__ + +#include <vnet/fib/fib_node.h> +#include <vnet/fib/fib_entry_delegate.h> +#include <vnet/adj/adj.h> +#include <vnet/ip/ip.h> +#include <vnet/dpo/dpo.h> + +/** + * The different sources that can create a route. + * The sources are defined here the thier relative priority order. + * The lower the value the higher the priority + */ +typedef enum fib_source_t_ { + /** + * Marker. Add new values after this one. + */ + FIB_SOURCE_FIRST, + /** + * Special sources. These are for entries that are added to all + * FIBs by default, and should never be over-ridden (hence they + * are the highest priority) + */ + FIB_SOURCE_SPECIAL = FIB_SOURCE_FIRST, + /** + * Classify. A route that links directly to a classify adj + */ + FIB_SOURCE_CLASSIFY, + /** + * Route added as a result of interface configuration. + * this will also come from the API/CLI, but the distinction is + * that is from confiiguration on an interface, not a 'ip route' command + */ + FIB_SOURCE_INTERFACE, + /** + * A high priority source a plugin can use + */ + FIB_SOURCE_PLUGIN_HI, + /** + * From the control plane API + */ + FIB_SOURCE_API, + /** + * From the CLI. + */ + FIB_SOURCE_CLI, + /** + * LISP + */ + FIB_SOURCE_LISP, + /** + * SRv6 + */ + FIB_SOURCE_SR, + /** + * IPv[46] Mapping + */ + FIB_SOURCE_MAP, + /** + * SIXRD + */ + FIB_SOURCE_SIXRD, + /** + * DHCP + */ + FIB_SOURCE_DHCP, + /** + * Adjacency source. + * routes created as a result of ARP/ND entries. This is lower priority + * then the API/CLI. This is on purpose. trust me. + */ + FIB_SOURCE_ADJ, + /** + * MPLS label. The prefix has been assigned a local label. This source + * never provides forwarding information, instead it acts as a place-holder + * so the association of label to prefix can be maintained + */ + FIB_SOURCE_MPLS, + /** + * Attached Export source. + * routes created as a result of attahced export. routes thus sourced + * will be present in the export tables + */ + FIB_SOURCE_AE, + /** + * Recursive resolution source. + * Used to install an entry that is the resolution traget of another. + */ + FIB_SOURCE_RR, + /** + * uRPF bypass/exemption. + * Used to install an entry that is exempt from the loose uRPF check + */ + FIB_SOURCE_URPF_EXEMPT, + /** + * The default route source. + * The default route is always added to the FIB table (like the + * special sources) but we need to be able to over-ride it with + * 'ip route' sources when provided + */ + FIB_SOURCE_DEFAULT_ROUTE, + /** + * Marker. add new entries before this one. + */ + FIB_SOURCE_LAST = FIB_SOURCE_DEFAULT_ROUTE, +} __attribute__ ((packed)) fib_source_t; + +STATIC_ASSERT (sizeof(fib_source_t) == 1, + "FIB too many sources"); + +/** + * The maximum number of sources + */ +#define FIB_SOURCE_MAX (FIB_SOURCE_LAST+1) + +#define FIB_SOURCES { \ + [FIB_SOURCE_SPECIAL] = "special", \ + [FIB_SOURCE_INTERFACE] = "interface", \ + [FIB_SOURCE_API] = "API", \ + [FIB_SOURCE_CLI] = "CLI", \ + [FIB_SOURCE_ADJ] = "adjacency", \ + [FIB_SOURCE_MAP] = "MAP", \ + [FIB_SOURCE_SR] = "SR", \ + [FIB_SOURCE_SIXRD] = "SixRD", \ + [FIB_SOURCE_LISP] = "LISP", \ + [FIB_SOURCE_CLASSIFY] = "classify", \ + [FIB_SOURCE_DHCP] = "DHCP", \ + [FIB_SOURCE_RR] = "recursive-resolution", \ + [FIB_SOURCE_AE] = "attached_export", \ + [FIB_SOURCE_MPLS] = "mpls", \ + [FIB_SOURCE_URPF_EXEMPT] = "urpf-exempt", \ + [FIB_SOURCE_DEFAULT_ROUTE] = "default-route", \ +} + +#define FOR_EACH_FIB_SOURCE(_item) \ + for (_item = FIB_SOURCE_FIRST; _item < FIB_SOURCE_MAX; _item++) + +/** + * The different sources that can create a route. + * The sources are defined here the thier relative priority order. + * The lower the value the higher the priority + */ +typedef enum fib_entry_attribute_t_ { + /** + * Marker. Add new values after this one. + */ + FIB_ENTRY_ATTRIBUTE_FIRST, + /** + * Connected. The prefix is configured on an interface. + */ + FIB_ENTRY_ATTRIBUTE_CONNECTED = FIB_ENTRY_ATTRIBUTE_FIRST, + /** + * Attached. The prefix is attached to an interface. + */ + FIB_ENTRY_ATTRIBUTE_ATTACHED, + /** + * The route is an explicit drop. + */ + FIB_ENTRY_ATTRIBUTE_DROP, + /** + * The route is exclusive. The client creating the route is + * providing an exclusive adjacency. + */ + FIB_ENTRY_ATTRIBUTE_EXCLUSIVE, + /** + * The route is attached cross tables and thus imports covered + * prefixes from the other table. + */ + FIB_ENTRY_ATTRIBUTE_IMPORT, + /** + * The prefix/address is local to this device + */ + FIB_ENTRY_ATTRIBUTE_LOCAL, + /** + * Marker. add new entries before this one. + */ + FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_LOCAL, +} fib_entry_attribute_t; + +/** + * The maximum number of sources + */ +#define FIB_ENTRY_ATTRIBUTE_MAX (FIB_ENTRY_ATTRIBUTE_LAST+1) + +#define FIB_ENTRY_ATTRIBUTES { \ + [FIB_ENTRY_ATTRIBUTE_CONNECTED] = "connected", \ + [FIB_ENTRY_ATTRIBUTE_ATTACHED] = "attached", \ + [FIB_ENTRY_ATTRIBUTE_IMPORT] = "import", \ + [FIB_ENTRY_ATTRIBUTE_DROP] = "drop", \ + [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ + [FIB_ENTRY_ATTRIBUTE_LOCAL] = "local", \ +} + +#define FOR_EACH_FIB_ATTRIBUTE(_item) \ + for (_item = FIB_ENTRY_ATTRIBUTE_FIRST; \ + _item < FIB_ENTRY_ATTRIBUTE_MAX; \ + _item++) + +typedef enum fib_entry_flag_t_ { + FIB_ENTRY_FLAG_NONE = 0, + FIB_ENTRY_FLAG_CONNECTED = (1 << FIB_ENTRY_ATTRIBUTE_CONNECTED), + FIB_ENTRY_FLAG_ATTACHED = (1 << FIB_ENTRY_ATTRIBUTE_ATTACHED), + FIB_ENTRY_FLAG_DROP = (1 << FIB_ENTRY_ATTRIBUTE_DROP), + FIB_ENTRY_FLAG_EXCLUSIVE = (1 << FIB_ENTRY_ATTRIBUTE_EXCLUSIVE), + FIB_ENTRY_FLAG_LOCAL = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL), + FIB_ENTRY_FLAG_IMPORT = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT), +} fib_entry_flag_t; + +/** + * Flags for the source data + */ +typedef enum fib_entry_src_attribute_t_ { + /** + * Marker. Add new values after this one. + */ + FIB_ENTRY_SRC_ATTRIBUTE_FIRST, + /** + * the source has been added to the entry + */ + FIB_ENTRY_SRC_ATTRIBUTE_ADDED = FIB_ENTRY_SRC_ATTRIBUTE_FIRST, + /** + * the source is active/best + */ + FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE, + /** + * Marker. add new entries before this one. + */ + FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE, +} fib_entry_src_attribute_t; + +#define FIB_ENTRY_SRC_ATTRIBUTE_MAX (FIB_ENTRY_SRC_ATTRIBUTE_LAST+1) + +#define FIB_ENTRY_SRC_ATTRIBUTES { \ + [FIB_ENTRY_SRC_ATTRIBUTE_ADDED] = "added", \ + [FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE] = "active", \ +} + +typedef enum fib_entry_src_flag_t_ { + FIB_ENTRY_SRC_FLAG_NONE = 0, + FIB_ENTRY_SRC_FLAG_ADDED = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ADDED), + FIB_ENTRY_SRC_FLAG_ACTIVE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE), +} __attribute__ ((packed)) fib_entry_src_flag_t; + +/* + * Keep the size of the flags field to 2 bytes, so it + * can be placed next to the 2 bytes reference count + */ +STATIC_ASSERT (sizeof(fib_entry_src_flag_t) <= 2, + "FIB entry flags field size too big"); + +/** + * Information related to the source of a FIB entry + */ +typedef struct fib_entry_src_t_ { + /** + * A vector of path extensions + */ + struct fib_path_ext_t_ *fes_path_exts; + + /** + * The path-list created by the source + */ + fib_node_index_t fes_pl; + /** + * Which source this info block is for + */ + fib_source_t fes_src; + /** + * Flags on the source + */ + fib_entry_src_flag_t fes_flags; + + /** + * 1 bytes ref count. This is not the number of users of the Entry + * (which is itself not large, due to path-list sharing), but the number + * of times a given source has been added. Which is even fewer + */ + u8 fes_ref_count; + + /** + * Flags the source contributes to the entry + */ + fib_entry_flag_t fes_entry_flags; + + /** + * Source specific info + */ + union { + struct { + /** + * the index of the FIB entry that is the covering entry + */ + fib_node_index_t fesr_cover; + /** + * This source's index in the cover's list + */ + u32 fesr_sibling; + } rr; + struct { + /** + * the index of the FIB entry that is the covering entry + */ + fib_node_index_t fesa_cover; + /** + * This source's index in the cover's list + */ + u32 fesa_sibling; + } adj; + struct { + /** + * the index of the FIB entry that is the covering entry + */ + fib_node_index_t fesi_cover; + /** + * This source's index in the cover's list + */ + u32 fesi_sibling; + } interface; + struct { + /** + * This MPLS local label associated with the prefix. + */ + mpls_label_t fesm_label; + + /** + * the indicies of the LFIB entries created + */ + fib_node_index_t fesm_lfes[2]; + } mpls; + struct { + /** + * The source FIB index. + */ + fib_node_index_t fesl_fib_index; + } lisp; + }; +} fib_entry_src_t; + +/** + * An entry in a FIB table. + * + * This entry represents a route added to the FIB that is stored + * in one of the FIB tables. + */ +typedef struct fib_entry_t_ { + /** + * Base class. The entry's node representation in the graph. + */ + fib_node_t fe_node; + /** + * The prefix of the route. this is const just to be sure. + * It is the entry's key/identity and so should never change. + */ + const fib_prefix_t fe_prefix; + /** + * The index of the FIB table this entry is in + */ + u32 fe_fib_index; + /** + * The load-balance used for forwarding. + * + * We don't share the EOS and non-EOS even in case when they could be + * because: + * - complexity & reliability v. memory + * determining the conditions where sharing is possible is non-trivial. + * - separate LBs means we can get the EOS bit right in the MPLS label DPO + * and so save a few clock cycles in the DP imposition node since we can + * paint the header straight on without the need to check the packet + * type to derive the EOS bit value. + */ + dpo_id_t fe_lb; // [FIB_FORW_CHAIN_MPLS_NUM]; + /** + * Vector of source infos. + * Most entries will only have 1 source. So we optimise for memory usage, + * which is preferable since we have many entries. + */ + fib_entry_src_t *fe_srcs; + /** + * the path-list for which this entry is a child. This is also the path-list + * that is contributing forwarding for this entry. + */ + fib_node_index_t fe_parent; + /** + * index of this entry in the parent's child list. + * This is set when this entry is added as a child, but can also + * be changed by the parent as it manages its list. + */ + u32 fe_sibling; + + /** + * A vector of delegates. + */ + fib_entry_delegate_t *fe_delegates; +} fib_entry_t; + +#define FOR_EACH_FIB_ENTRY_FLAG(_item) \ + for (_item = FIB_ENTRY_FLAG_FIRST; _item < FIB_ENTRY_FLAG_MAX; _item++) + +#define FIB_ENTRY_FORMAT_BRIEF (0x0) +#define FIB_ENTRY_FORMAT_DETAIL (0x1) +#define FIB_ENTRY_FORMAT_DETAIL2 (0x2) + +extern u8 *format_fib_entry (u8 * s, va_list * args); + +extern fib_node_index_t fib_entry_create_special(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo); + +extern fib_node_index_t fib_entry_create (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths); +extern void fib_entry_update (fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *paths); + +extern void fib_entry_path_add(fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpath); +extern void fib_entry_special_add(fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo); +extern void fib_entry_special_update(fib_node_index_t fib_entry_index, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo); +extern fib_entry_src_flag_t fib_entry_special_remove(fib_node_index_t fib_entry_index, + fib_source_t source); + +extern fib_entry_src_flag_t fib_entry_path_remove(fib_node_index_t fib_entry_index, + fib_source_t source, + const fib_route_path_t *rpath); +extern fib_entry_src_flag_t fib_entry_delete(fib_node_index_t fib_entry_index, + fib_source_t source); + +extern void fib_entry_contribute_urpf(fib_node_index_t path_index, + index_t urpf); +extern void fib_entry_contribute_forwarding( + fib_node_index_t fib_entry_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo); +extern const dpo_id_t * fib_entry_contribute_ip_forwarding( + fib_node_index_t fib_entry_index); +extern adj_index_t fib_entry_get_adj_for_source( + fib_node_index_t fib_entry_index, + fib_source_t source); +extern const int fib_entry_get_dpo_for_source ( + fib_node_index_t fib_entry_index, + fib_source_t source, + dpo_id_t *dpo); + +extern adj_index_t fib_entry_get_adj(fib_node_index_t fib_entry_index); + +extern int fib_entry_cmp_for_sort(void *i1, void *i2); + +extern void fib_entry_cover_changed(fib_node_index_t fib_entry); +extern void fib_entry_cover_updated(fib_node_index_t fib_entry); +extern int fib_entry_recursive_loop_detect(fib_node_index_t entry_index, + fib_node_index_t **entry_indicies); + +extern void fib_entry_lock(fib_node_index_t fib_entry_index); +extern void fib_entry_unlock(fib_node_index_t fib_entry_index); + +extern u32 fib_entry_child_add(fib_node_index_t fib_entry_index, + fib_node_type_t type, + fib_node_index_t child_index); +extern void fib_entry_child_remove(fib_node_index_t fib_entry_index, + u32 sibling_index); +extern u32 fib_entry_get_resolving_interface(fib_node_index_t fib_entry_index); +extern u32 fib_entry_get_resolving_interface_for_source( + fib_node_index_t fib_entry_index, + fib_source_t source); + +extern void fib_entry_encode(fib_node_index_t fib_entry_index, + fib_route_path_encode_t **api_rpaths); +extern void fib_entry_get_prefix(fib_node_index_t fib_entry_index, + fib_prefix_t *pfx); +extern u32 fib_entry_get_fib_index(fib_node_index_t fib_entry_index); +extern void fib_entry_set_source_data(fib_node_index_t fib_entry_index, + fib_source_t source, + const void *data); +extern const void* fib_entry_get_source_data(fib_node_index_t fib_entry_index, + fib_source_t source); + +extern fib_entry_flag_t fib_entry_get_flags(fib_node_index_t fib_entry_index); +extern fib_entry_flag_t fib_entry_get_flags_for_source( + fib_node_index_t fib_entry_index, + fib_source_t source); +extern fib_source_t fib_entry_get_best_source(fib_node_index_t fib_entry_index); +extern int fib_entry_is_sourced(fib_node_index_t fib_entry_index, + fib_source_t source); + +extern fib_node_index_t fib_entry_get_path_list(fib_node_index_t fib_entry_index); + +extern void fib_entry_module_init(void); + +/* + * unsafe... beware the raw pointer. + */ +extern fib_node_index_t fib_entry_get_index(const fib_entry_t * fib_entry); +extern fib_entry_t * fib_entry_get(fib_node_index_t fib_entry_index); + +/* + * for testing purposes. + */ +extern u32 fib_entry_pool_size(void); + +#endif diff --git a/src/vnet/fib/fib_entry_cover.c b/src/vnet/fib/fib_entry_cover.c new file mode 100644 index 00000000000..147c5daa4fd --- /dev/null +++ b/src/vnet/fib/fib_entry_cover.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_entry_cover.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_node_list.h> + +u32 +fib_entry_cover_track (fib_entry_t* cover, + fib_node_index_t covered) +{ + fib_entry_delegate_t *fed; + + FIB_ENTRY_DBG(cover, "cover-track %d", covered); + + ASSERT(fib_entry_get_index(cover) != covered); + + fed = fib_entry_delegate_get(cover, FIB_ENTRY_DELEGATE_COVERED); + + if (NULL == fed) + { + fed = fib_entry_delegate_find_or_add(cover, FIB_ENTRY_DELEGATE_COVERED); + fed->fd_list = fib_node_list_create(); + } + + return (fib_node_list_push_front(fed->fd_list, + 0, FIB_NODE_TYPE_ENTRY, + covered)); +} + +void +fib_entry_cover_untrack (fib_entry_t* cover, + u32 tracked_index) +{ + fib_entry_delegate_t *fed; + + FIB_ENTRY_DBG(cover, "cover-untrack @ %d", tracked_index); + + fed = fib_entry_delegate_get(cover, FIB_ENTRY_DELEGATE_COVERED); + + if (NULL == fed) + return; + + fib_node_list_remove(fed->fd_list, tracked_index); + + if (0 == fib_node_list_get_size(fed->fd_list)) + { + fib_node_list_destroy(&fed->fd_list); + fib_entry_delegate_remove(cover, FIB_ENTRY_DELEGATE_COVERED); + } +} + +/** + * Internal struct to hold user supplied paraneters for the cover walk + */ +typedef struct fib_enty_cover_walk_ctx_t_ { + fib_entry_t *cover; + fib_entry_covered_walk_t walk; + void *ctx; +} fib_enty_cover_walk_ctx_t; + +static int +fib_entry_cover_walk_node_ptr (fib_node_ptr_t *depend, + void *args) +{ + fib_enty_cover_walk_ctx_t *ctx = args; + + ctx->walk(ctx->cover, depend->fnp_index, ctx->ctx); + + /* continue */ + return (1); +} + +void +fib_entry_cover_walk (fib_entry_t *cover, + fib_entry_covered_walk_t walk, + void *args) +{ + fib_entry_delegate_t *fed; + + fed = fib_entry_delegate_get(cover, FIB_ENTRY_DELEGATE_COVERED); + + if (NULL == fed) + return; + + fib_enty_cover_walk_ctx_t ctx = { + .cover = cover, + .walk = walk, + .ctx = args, + }; + + fib_node_list_walk(fed->fd_list, + fib_entry_cover_walk_node_ptr, + &ctx); +} + +u32 +fib_entry_cover_get_size (fib_entry_t *cover) +{ + fib_entry_delegate_t *fed; + + fed = fib_entry_delegate_get(cover, FIB_ENTRY_DELEGATE_COVERED); + + if (NULL == fed) + return (0); + + return (fib_node_list_get_size(fed->fd_list)); +} + +typedef struct fib_entry_cover_list_format_ctx_t_ { + u8 *s; +} fib_entry_cover_list_format_ctx_t; + +static int +fib_entry_covered_list_format_one (fib_entry_t *cover, + fib_node_index_t covered, + void *args) +{ + fib_entry_cover_list_format_ctx_t * ctx = args; + + ctx->s = format(ctx->s, "%d, ", covered); + + /* continue */ + return (1); +} + +u8* +fib_entry_cover_list_format (fib_entry_t *fib_entry, + u8 *s) +{ + fib_entry_cover_list_format_ctx_t ctx = { + .s = s, + }; + + fib_entry_cover_walk(fib_entry, + fib_entry_covered_list_format_one, + &ctx); + + return (ctx.s); +} + +static int +fib_entry_cover_change_one (fib_entry_t *cover, + fib_node_index_t covered, + void *args) +{ + fib_node_index_t new_cover; + + /* + * The 3 entries involved here are: + * cover - the least specific. It will cover both the others + * new_cover - the enty just inserted below the cover + * covered - the entry that was tracking the cover. + * + * The checks below are to determine if new_cover is a cover for covered. + */ + new_cover = pointer_to_uword(args); + + if (FIB_NODE_INDEX_INVALID == new_cover) + { + /* + * nothing has been inserted, which implies the cover was removed. + * 'cover' is thus the new cover. + */ + fib_entry_cover_changed(covered); + } + else if (new_cover != covered) + { + fib_prefix_t pfx_covered, pfx_new_cover; + + fib_entry_get_prefix(covered, &pfx_covered); + fib_entry_get_prefix(new_cover, &pfx_new_cover); + + if (fib_prefix_is_cover(&pfx_new_cover, &pfx_covered)) + { + fib_entry_cover_changed(covered); + } + } + /* continue */ + return (1); +} + +void +fib_entry_cover_change_notify (fib_node_index_t cover_index, + fib_node_index_t covered) +{ + fib_entry_t *cover; + + cover = fib_entry_get(cover_index); + + fib_entry_cover_walk(cover, + fib_entry_cover_change_one, + uword_to_pointer(covered, void*)); +} + +static int +fib_entry_cover_update_one (fib_entry_t *cover, + fib_node_index_t covered, + void *args) +{ + fib_entry_cover_updated(covered); + + /* continue */ + return (1); +} + +void +fib_entry_cover_update_notify (fib_entry_t *fib_entry) +{ + fib_entry_cover_walk(fib_entry, + fib_entry_cover_update_one, + NULL); +} diff --git a/src/vnet/fib/fib_entry_cover.h b/src/vnet/fib/fib_entry_cover.h new file mode 100644 index 00000000000..fbbbc211dc9 --- /dev/null +++ b/src/vnet/fib/fib_entry_cover.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_ENTRY_COVER_H__ +#define __FIB_ENTRY_COVER_H__ + +#include "fib_entry.h" + +/** + * callback function used when walking the covered entries + */ +typedef int (*fib_entry_covered_walk_t)(fib_entry_t *cover, + fib_node_index_t covered, + void *ctx); + +extern u32 fib_entry_cover_track(fib_entry_t *cover, + fib_node_index_t covered); + +extern void fib_entry_cover_untrack(fib_entry_t *cover, + u32 tracked_index); + +extern void fib_entry_cover_walk(fib_entry_t *cover, + fib_entry_covered_walk_t walk, + void *ctx); + +extern void fib_entry_cover_change_notify(fib_node_index_t cover_index, + fib_node_index_t covered_index); +extern void fib_entry_cover_update_notify(fib_entry_t *cover); + +extern u32 fib_entry_cover_get_size(fib_entry_t *cover); + +extern u8* fib_entry_cover_list_format(fib_entry_t *fib_entry, + u8 *s); + +#endif diff --git a/src/vnet/fib/fib_entry_delegate.c b/src/vnet/fib/fib_entry_delegate.c new file mode 100644 index 00000000000..a0d45f970b3 --- /dev/null +++ b/src/vnet/fib/fib_entry_delegate.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_entry_delegate.h> +#include <vnet/fib/fib_entry.h> + +static fib_entry_delegate_t * +fib_entry_delegate_find_i (const fib_entry_t *fib_entry, + fib_entry_delegate_type_t type, + u32 *index) +{ + fib_entry_delegate_t *delegate; + int ii; + + ii = 0; + vec_foreach(delegate, fib_entry->fe_delegates) + { + if (delegate->fd_type == type) + { + if (NULL != index) + *index = ii; + + return (delegate); + } + else + { + ii++; + } + } + + return (NULL); +} + +fib_entry_delegate_t * +fib_entry_delegate_get (const fib_entry_t *fib_entry, + fib_entry_delegate_type_t type) +{ + return (fib_entry_delegate_find_i(fib_entry, type, NULL)); +} + +void +fib_entry_delegate_remove (fib_entry_t *fib_entry, + fib_entry_delegate_type_t type) +{ + fib_entry_delegate_t *fed; + u32 index = ~0; + + fed = fib_entry_delegate_find_i(fib_entry, type, &index); + + ASSERT(NULL != fed); + + vec_del1(fib_entry->fe_delegates, index); +} + +static int +fib_entry_delegate_cmp_for_sort (void * v1, + void * v2) +{ + fib_entry_delegate_t *delegate1 = v1, *delegate2 = v2; + + return (delegate1->fd_type - delegate2->fd_type); +} + +static void +fib_entry_delegate_init (fib_entry_t *fib_entry, + fib_entry_delegate_type_t type) + +{ + fib_entry_delegate_t delegate = { + .fd_entry_index = fib_entry_get_index(fib_entry), + .fd_type = type, + }; + + vec_add1(fib_entry->fe_delegates, delegate); + vec_sort_with_function(fib_entry->fe_delegates, + fib_entry_delegate_cmp_for_sort); +} + +fib_entry_delegate_t * +fib_entry_delegate_find_or_add (fib_entry_t *fib_entry, + fib_entry_delegate_type_t fdt) +{ + fib_entry_delegate_t *delegate; + + delegate = fib_entry_delegate_get(fib_entry, fdt); + + if (NULL == delegate) + { + fib_entry_delegate_init(fib_entry, fdt); + } + + return (fib_entry_delegate_get(fib_entry, fdt)); +} + +fib_entry_delegate_type_t +fib_entry_chain_type_to_delegate_type (fib_forward_chain_type_t fct) +{ + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4); + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP6); + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + return (FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS); + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + return (FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS); + case FIB_FORW_CHAIN_TYPE_ETHERNET: + return (FIB_ENTRY_DELEGATE_CHAIN_ETHERNET); + } + ASSERT(0); + return (FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4); +} + +fib_forward_chain_type_t +fib_entry_delegate_type_to_chain_type (fib_entry_delegate_type_t fdt) +{ + switch (fdt) + { + case FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + case FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS: + return (FIB_FORW_CHAIN_TYPE_MPLS_EOS); + case FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS: + return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + case FIB_ENTRY_DELEGATE_CHAIN_ETHERNET: + return (FIB_FORW_CHAIN_TYPE_ETHERNET); + case FIB_ENTRY_DELEGATE_COVERED: + case FIB_ENTRY_DELEGATE_ATTACHED_IMPORT: + case FIB_ENTRY_DELEGATE_ATTACHED_EXPORT: + break; + } + ASSERT(0); + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); +} diff --git a/src/vnet/fib/fib_entry_delegate.h b/src/vnet/fib/fib_entry_delegate.h new file mode 100644 index 00000000000..6d3a6549f32 --- /dev/null +++ b/src/vnet/fib/fib_entry_delegate.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_ENTRY_DELEGATE_T__ +#define __FIB_ENTRY_DELEGATE_T__ + +#include <vnet/fib/fib_node.h> + +/** + * Delegate types + */ +typedef enum fib_entry_delegate_type_t_ { + /** + * Forwarding chain types: + * for the vast majority of FIB entries only one chain is required - the + * one that forwards traffic matching the fib_entry_t's fib_prefix_t. For those + * fib_entry_t that are a resolution target for other fib_entry_t's they will also + * need the chain to provide forwarding for those children. We store these additional + * chains in delegates to save memory in the common case. + */ + FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4 = FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP6 = FIB_FORW_CHAIN_TYPE_UNICAST_IP6, + FIB_ENTRY_DELEGATE_CHAIN_MPLS_EOS = FIB_FORW_CHAIN_TYPE_MPLS_EOS, + FIB_ENTRY_DELEGATE_CHAIN_MPLS_NON_EOS = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + FIB_ENTRY_DELEGATE_CHAIN_ETHERNET = FIB_FORW_CHAIN_TYPE_ETHERNET, + /** + * Dependency list of covered entries. + * these are more specific entries that are interested in changes + * to their respective cover + */ + FIB_ENTRY_DELEGATE_COVERED, + /** + * Attached import/export functionality + */ + FIB_ENTRY_DELEGATE_ATTACHED_IMPORT, + FIB_ENTRY_DELEGATE_ATTACHED_EXPORT, +} fib_entry_delegate_type_t; + +#define FOR_EACH_DELEGATE_CHAIN(_entry, _fdt, _fed, _body) \ +{ \ + for (_fdt = FIB_ENTRY_DELEGATE_CHAIN_UNICAST_IP4; \ + _fdt <= FIB_ENTRY_DELEGATE_CHAIN_ETHERNET; \ + _fdt++) \ + { \ + _fed = fib_entry_delegate_get(_entry, _fdt); \ + if (NULL != _fed) { \ + _body; \ + } \ + } \ +} + +/** + * A Delagate is a means to implmenet the Delagation design pattern; the extension of an + * objects functionality through the composition of, and delgation to, other objects. + * These 'other' objects are delegates. Delagates are thus attached to other FIB objects + * to extend their functionality. + */ +typedef struct fib_entry_delegate_t_ +{ + /** + * The FIB entry object to which the delagate is attached + */ + fib_node_index_t fd_entry_index; + + /** + * The delagate type + */ + fib_entry_delegate_type_t fd_type; + + /** + * A union of data for the different delegate types + * These delegates are stored in a sparse vector on the entry, so they + * must all be of the same size. We could use indirection here for all types, + * i.e. store an index, that's ok for large delegates, like the attached export + * but for the chain delegates it's excessive + */ + union + { + /** + * Valid for the forwarding chain delegates. The LB that is built. + */ + dpo_id_t fd_dpo; + + /** + * Valid for the attached import cases. An index of the importer/exporter + */ + fib_node_index_t fd_index; + + /** + * For the cover tracking. The node list; + */ + fib_node_list_t fd_list; + }; +} fib_entry_delegate_t; + +struct fib_entry_t_; + +extern void fib_entry_delegate_remove(struct fib_entry_t_ *fib_entry, + fib_entry_delegate_type_t type); + +extern fib_entry_delegate_t *fib_entry_delegate_find_or_add(struct fib_entry_t_ *fib_entry, + fib_entry_delegate_type_t fdt); +extern fib_entry_delegate_t *fib_entry_delegate_get(const struct fib_entry_t_ *fib_entry, + fib_entry_delegate_type_t type); + +extern fib_forward_chain_type_t fib_entry_delegate_type_to_chain_type( + fib_entry_delegate_type_t type); + +extern fib_entry_delegate_type_t fib_entry_chain_type_to_delegate_type( + fib_forward_chain_type_t type); + +#endif diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c new file mode 100644 index 00000000000..060fac941d2 --- /dev/null +++ b/src/vnet/fib/fib_entry_src.c @@ -0,0 +1,1456 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/adj/adj.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/dpo/drop_dpo.h> + +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_path_ext.h> +#include <vnet/fib/fib_urpf_list.h> + +/* + * per-source type vft + */ +static fib_entry_src_vft_t fib_entry_src_vft[FIB_SOURCE_MAX]; + +static fib_protocol_t +fib_entry_get_proto (const fib_entry_t * fib_entry) +{ + return (fib_entry->fe_prefix.fp_proto); +} + +void +fib_entry_src_register (fib_source_t source, + const fib_entry_src_vft_t *vft) +{ + fib_entry_src_vft[source] = *vft; +} + +static int +fib_entry_src_cmp_for_sort (void * v1, + void * v2) +{ + fib_entry_src_t *esrc1 = v1, *esrc2 = v2; + + return (esrc1->fes_src - esrc2->fes_src); +} + +void +fib_entry_src_action_init (fib_entry_t *fib_entry, + fib_source_t source) + +{ + fib_entry_src_t esrc = { + .fes_pl = FIB_NODE_INDEX_INVALID, + .fes_flags = FIB_ENTRY_SRC_FLAG_NONE, + .fes_src = source, + }; + + if (NULL != fib_entry_src_vft[source].fesv_init) + { + fib_entry_src_vft[source].fesv_init(&esrc); + } + + vec_add1(fib_entry->fe_srcs, esrc); + vec_sort_with_function(fib_entry->fe_srcs, + fib_entry_src_cmp_for_sort); +} + +static fib_entry_src_t * +fib_entry_src_find (const fib_entry_t *fib_entry, + fib_source_t source, + u32 *index) + +{ + fib_entry_src_t *esrc; + int ii; + + ii = 0; + vec_foreach(esrc, fib_entry->fe_srcs) + { + if (esrc->fes_src == source) + { + if (NULL != index) + { + *index = ii; + } + return (esrc); + } + else + { + ii++; + } + } + + return (NULL); +} + +int +fib_entry_is_sourced (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + + fib_entry = fib_entry_get(fib_entry_index); + + return (NULL != fib_entry_src_find(fib_entry, source, NULL)); +} + +static fib_entry_src_t * +fib_entry_src_find_or_create (fib_entry_t *fib_entry, + fib_source_t source, + u32 *index) +{ + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL == esrc) + { + fib_entry_src_action_init(fib_entry, source); + } + + return (fib_entry_src_find(fib_entry, source, NULL)); +} + +void +fib_entry_src_action_deinit (fib_entry_t *fib_entry, + fib_source_t source) + +{ + fib_entry_src_t *esrc; + u32 index = ~0; + + esrc = fib_entry_src_find(fib_entry, source, &index); + + ASSERT(NULL != esrc); + + if (NULL != fib_entry_src_vft[source].fesv_deinit) + { + fib_entry_src_vft[source].fesv_deinit(esrc); + } + + vec_free(esrc->fes_path_exts); + vec_del1(fib_entry->fe_srcs, index); +} + +fib_entry_src_cover_res_t +fib_entry_src_action_cover_change (fib_entry_t *fib_entry, + fib_source_t source) +{ + if (NULL != fib_entry_src_vft[source].fesv_cover_change) + { + return (fib_entry_src_vft[source].fesv_cover_change( + fib_entry_src_find(fib_entry, source, NULL), + fib_entry)); + } + + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + return (res); +} + +fib_entry_src_cover_res_t +fib_entry_src_action_cover_update (fib_entry_t *fib_entry, + fib_source_t source) +{ + if (NULL != fib_entry_src_vft[source].fesv_cover_update) + { + return (fib_entry_src_vft[source].fesv_cover_update( + fib_entry_src_find(fib_entry, source, NULL), + fib_entry)); + } + + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + return (res); +} + +typedef struct fib_entry_src_collect_forwarding_ctx_t_ +{ + load_balance_path_t * next_hops; + const fib_entry_t *fib_entry; + const fib_entry_src_t *esrc; + fib_forward_chain_type_t fct; + int is_recursive; +} fib_entry_src_collect_forwarding_ctx_t; + +/** + * @brief Determine whether this FIB entry should use a load-balance MAP + * to support PIC edge fast convergence + */ +load_balance_flags_t +fib_entry_calc_lb_flags (fib_entry_src_collect_forwarding_ctx_t *ctx) +{ + /** + * We'll use a LB map is the path-list has recursive paths. + * recursive paths implies BGP, and hence scale. + */ + if (ctx->is_recursive) + { + return (LOAD_BALANCE_FLAG_USES_MAP); + } + return (LOAD_BALANCE_FLAG_NONE); +} + +static int +fib_entry_src_valid_out_label (mpls_label_t label) +{ + return ((MPLS_LABEL_IS_REAL(label) || + MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL == label || + MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL == label || + MPLS_IETF_IMPLICIT_NULL_LABEL == label)); +} + +/** + * @brief Turn the chain type requested by the client into the one they + * really wanted + */ +fib_forward_chain_type_t +fib_entry_chain_type_fixup (const fib_entry_t *entry, + fib_forward_chain_type_t fct) +{ + ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS == fct); + + /* + * The EOS chain is a tricky since one cannot know the adjacency + * to link to without knowing what the packets payload protocol + * will be once the label is popped. + */ + fib_forward_chain_type_t dfct; + + dfct = fib_entry_get_default_chain_type(entry); + + if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct) + { + /* + * If the entry being asked is a eos-MPLS label entry, + * then use the payload-protocol field, that we stashed there + * for just this purpose + */ + return (fib_forw_chain_type_from_dpo_proto( + entry->fe_prefix.fp_payload_proto)); + } + /* + * else give them what this entry would be by default. i.e. if it's a v6 + * entry, then the label its local labelled should be carrying v6 traffic. + * If it's a non-EOS label entry, then there are more labels and we want + * a non-eos chain. + */ + return (dfct); +} + +static int +fib_entry_src_collect_forwarding (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *arg) +{ + fib_entry_src_collect_forwarding_ctx_t *ctx; + fib_path_ext_t *path_ext; + + ctx = arg; + + /* + * if the path is not resolved, don't include it. + */ + if (!fib_path_is_resolved(path_index)) + { + return (!0); + } + + if (fib_path_is_recursive(path_index)) + { + ctx->is_recursive = 1; + } + + /* + * get the matching path-extension for the path being visited. + */ + vec_foreach(path_ext, ctx->esrc->fes_path_exts) + { + if (path_ext->fpe_path_index == path_index) + break; + } + + if (NULL != path_ext && + path_ext->fpe_path_index == path_index && + fib_entry_src_valid_out_label(path_ext->fpe_label_stack[0])) + { + /* + * found a matching extension. stack it to obtain the forwarding + * info for this path. + */ + ctx->next_hops = fib_path_ext_stack(path_ext, ctx->fib_entry, ctx->fct, ctx->next_hops); + } + else + { + load_balance_path_t *nh; + + /* + * no extension => no out-going label for this path. that's OK + * in the case of an IP or EOS chain, but not for non-EOS + */ + switch (ctx->fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + /* + * EOS traffic with no label to stack, we need the IP Adj + */ + vec_add2(ctx->next_hops, nh, 1); + + nh->path_index = path_index; + nh->path_weight = fib_path_get_weight(path_index); + fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo); + + break; + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + if (fib_path_is_exclusive(path_index) || + fib_path_is_deag(path_index)) + { + vec_add2(ctx->next_hops, nh, 1); + + nh->path_index = path_index; + nh->path_weight = fib_path_get_weight(path_index); + fib_path_contribute_forwarding(path_index, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &nh->path_dpo); + } + break; + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + { + /* + * no label. we need a chain based on the payload. fixup. + */ + vec_add2(ctx->next_hops, nh, 1); + + nh->path_index = path_index; + nh->path_weight = fib_path_get_weight(path_index); + fib_path_contribute_forwarding(path_index, + fib_entry_chain_type_fixup(ctx->fib_entry, + ctx->fct), + &nh->path_dpo); + + break; + } + case FIB_FORW_CHAIN_TYPE_ETHERNET: + ASSERT(0); + break; + } + } + + return (!0); +} + +void +fib_entry_src_mk_lb (fib_entry_t *fib_entry, + const fib_entry_src_t *esrc, + fib_forward_chain_type_t fct, + dpo_id_t *dpo_lb) +{ + dpo_proto_t lb_proto; + + /* + * If the entry has path extensions then we construct a load-balance + * by stacking the extensions on the forwarding chains of the paths. + * Otherwise we use the load-balance of the path-list + */ + fib_entry_src_collect_forwarding_ctx_t ctx = { + .esrc = esrc, + .fib_entry = fib_entry, + .next_hops = NULL, + .is_recursive = 0, + .fct = fct, + }; + + lb_proto = fib_proto_to_dpo(fib_entry->fe_prefix.fp_proto); + + fib_path_list_walk(esrc->fes_pl, + fib_entry_src_collect_forwarding, + &ctx); + + if (esrc->fes_entry_flags & FIB_ENTRY_FLAG_EXCLUSIVE) + { + /* + * the client provided the DPO that the entry should link to. + * all entries must link to a LB, so if it is an LB already + * then we can use it. + */ + if ((1 == vec_len(ctx.next_hops)) && + (DPO_LOAD_BALANCE == ctx.next_hops[0].path_dpo.dpoi_type)) + { + dpo_copy(dpo_lb, &ctx.next_hops[0].path_dpo); + dpo_reset(&ctx.next_hops[0].path_dpo); + return; + } + } + + if (!dpo_id_is_valid(dpo_lb)) + { + /* + * first time create + */ + flow_hash_config_t fhc; + + fhc = fib_table_get_flow_hash_config(fib_entry->fe_fib_index, + dpo_proto_to_fib(lb_proto)); + dpo_set(dpo_lb, + DPO_LOAD_BALANCE, + lb_proto, + load_balance_create(0, lb_proto, fhc)); + } + + load_balance_multipath_update(dpo_lb, + ctx.next_hops, + fib_entry_calc_lb_flags(&ctx)); + vec_free(ctx.next_hops); + + /* + * if this entry is sourced by the uRPF-exempt source then we + * append the always present local0 interface (index 0) to the + * uRPF list so it is not empty. that way packets pass the loose check. + */ + index_t ui = fib_path_list_get_urpf(esrc->fes_pl); + + if (fib_entry_is_sourced(fib_entry_get_index(fib_entry), + FIB_SOURCE_URPF_EXEMPT) && + (0 == fib_urpf_check_size(ui))) + { + /* + * The uRPF list we get from the path-list is shared by all + * other users of the list, but the uRPF exemption applies + * only to this prefix. So we need our own list. + */ + ui = fib_urpf_list_alloc_and_lock(); + fib_urpf_list_append(ui, 0); + fib_urpf_list_bake(ui); + load_balance_set_urpf(dpo_lb->dpoi_index, ui); + fib_urpf_list_unlock(ui); + } + else + { + load_balance_set_urpf(dpo_lb->dpoi_index, ui); + } +} + +void +fib_entry_src_action_install (fib_entry_t *fib_entry, + fib_source_t source) +{ + /* + * Install the forwarding chain for the given source into the forwarding + * tables + */ + fib_forward_chain_type_t fct; + fib_entry_src_t *esrc; + int insert; + + fct = fib_entry_get_default_chain_type(fib_entry); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + /* + * Every entry has its own load-balance object. All changes to the entry's + * forwarding result in an inplace modify of the load-balance. This means + * the load-balance object only needs to be added to the forwarding + * DB once, when it is created. + */ + insert = !dpo_id_is_valid(&fib_entry->fe_lb); + + fib_entry_src_mk_lb(fib_entry, esrc, fct, &fib_entry->fe_lb); + + ASSERT(dpo_id_is_valid(&fib_entry->fe_lb)); + FIB_ENTRY_DBG(fib_entry, "install: %d", fib_entry->fe_lb); + + /* + * insert the adj into the data-plane forwarding trie + */ + if (insert) + { + fib_table_fwding_dpo_update(fib_entry->fe_fib_index, + &fib_entry->fe_prefix, + &fib_entry->fe_lb); + } + + /* + * if any of the other chain types are already created they will need + * updating too + */ + fib_entry_delegate_type_t fdt; + fib_entry_delegate_t *fed; + + FOR_EACH_DELEGATE_CHAIN(fib_entry, fdt, fed, + { + fib_entry_src_mk_lb(fib_entry, esrc, + fib_entry_delegate_type_to_chain_type(fdt), + &fed->fd_dpo); + }); +} + +void +fib_entry_src_action_uninstall (fib_entry_t *fib_entry) +{ + /* + * uninstall the forwarding chain from the forwarding tables + */ + FIB_ENTRY_DBG(fib_entry, "uninstall: %d", + fib_entry->fe_adj_index); + + if (dpo_id_is_valid(&fib_entry->fe_lb)) + { + fib_table_fwding_dpo_remove( + fib_entry->fe_fib_index, + &fib_entry->fe_prefix, + &fib_entry->fe_lb); + + dpo_reset(&fib_entry->fe_lb); + } +} + +static void +fib_entry_recursive_loop_detect_i (fib_node_index_t path_list_index) +{ + fib_node_index_t *entries = NULL; + + fib_path_list_recursive_loop_detect(path_list_index, &entries); + + vec_free(entries); +} + +void +fib_entry_src_action_activate (fib_entry_t *fib_entry, + fib_source_t source) + +{ + int houston_we_are_go_for_install; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + ASSERT(!(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE)); + ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED); + + esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ACTIVE; + + if (NULL != fib_entry_src_vft[source].fesv_activate) + { + houston_we_are_go_for_install = + fib_entry_src_vft[source].fesv_activate(esrc, fib_entry); + } + else + { + /* + * the source is not providing an activate function, we'll assume + * therefore it has no objection to installing the entry + */ + houston_we_are_go_for_install = !0; + } + + /* + * link to the path-list provided by the source, and go check + * if that forms any loops in the graph. + */ + fib_entry->fe_parent = esrc->fes_pl; + fib_entry->fe_sibling = + fib_path_list_child_add(fib_entry->fe_parent, + FIB_NODE_TYPE_ENTRY, + fib_entry_get_index(fib_entry)); + + fib_entry_recursive_loop_detect_i(fib_entry->fe_parent); + + FIB_ENTRY_DBG(fib_entry, "activate: %d", + fib_entry->fe_parent); + + if (0 != houston_we_are_go_for_install) + { + fib_entry_src_action_install(fib_entry, source); + } + else + { + fib_entry_src_action_uninstall(fib_entry); + } +} + +void +fib_entry_src_action_deactivate (fib_entry_t *fib_entry, + fib_source_t source) + +{ + fib_node_index_t path_list_index; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE); + + if (NULL != fib_entry_src_vft[source].fesv_deactivate) + { + fib_entry_src_vft[source].fesv_deactivate(esrc, fib_entry); + } + + esrc->fes_flags &= ~FIB_ENTRY_SRC_FLAG_ACTIVE; + + FIB_ENTRY_DBG(fib_entry, "deactivate: %d", fib_entry->fe_parent); + + /* + * un-link from an old path-list. Check for any loops this will clear + */ + path_list_index = fib_entry->fe_parent; + fib_entry->fe_parent = FIB_NODE_INDEX_INVALID; + + fib_entry_recursive_loop_detect_i(path_list_index); + + /* + * this will unlock the path-list, so it may be invalid thereafter. + */ + fib_path_list_child_remove(path_list_index, fib_entry->fe_sibling); + fib_entry->fe_sibling = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_action_fwd_update (const fib_entry_t *fib_entry, + fib_source_t source) +{ + fib_entry_src_t *esrc; + + vec_foreach(esrc, fib_entry->fe_srcs) + { + if (NULL != fib_entry_src_vft[esrc->fes_src].fesv_fwd_update) + { + fib_entry_src_vft[esrc->fes_src].fesv_fwd_update(esrc, + fib_entry, + source); + } + } +} + +void +fib_entry_src_action_reactivate (fib_entry_t *fib_entry, + fib_source_t source) +{ + fib_node_index_t path_list_index; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE); + + FIB_ENTRY_DBG(fib_entry, "reactivate: %d to %d", + fib_entry->fe_parent, + esrc->fes_pl); + + if (fib_entry->fe_parent != esrc->fes_pl) + { + /* + * un-link from an old path-list. Check for any loops this will clear + */ + path_list_index = fib_entry->fe_parent; + fib_entry->fe_parent = FIB_NODE_INDEX_INVALID; + + /* + * temporary lock so it doesn't get deleted when this entry is no + * longer a child. + */ + fib_path_list_lock(path_list_index); + + /* + * this entry is no longer a child. after unlinking check if any loops + * were broken + */ + fib_path_list_child_remove(path_list_index, + fib_entry->fe_sibling); + + fib_entry_recursive_loop_detect_i(path_list_index); + + /* + * link to the path-list provided by the source, and go check + * if that forms any loops in the graph. + */ + fib_entry->fe_parent = esrc->fes_pl; + fib_entry->fe_sibling = + fib_path_list_child_add(fib_entry->fe_parent, + FIB_NODE_TYPE_ENTRY, + fib_entry_get_index(fib_entry)); + + fib_entry_recursive_loop_detect_i(fib_entry->fe_parent); + fib_path_list_unlock(path_list_index); + } + fib_entry_src_action_install(fib_entry, source); + fib_entry_src_action_fwd_update(fib_entry, source); +} + +void +fib_entry_src_action_installed (const fib_entry_t *fib_entry, + fib_source_t source) +{ + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != fib_entry_src_vft[source].fesv_installed) + { + fib_entry_src_vft[source].fesv_installed(esrc, + fib_entry); + } + + fib_entry_src_action_fwd_update(fib_entry, source); +} + +/* + * fib_entry_src_action_add + * + * Adding a source can result in a new fib_entry being created, which + * can inturn mean the pool is realloc'd and thus the entry passed as + * an argument it also realloc'd + * @return the original entry + */ +fib_entry_t * +fib_entry_src_action_add (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t fib_entry_index; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find_or_create(fib_entry, source, NULL); + + esrc->fes_ref_count++; + + if (1 != esrc->fes_ref_count) + { + /* + * we only want to add the source on the 0->1 transition + */ + return (fib_entry); + } + + esrc->fes_entry_flags = flags; + + /* + * save variable so we can recover from a fib_entry realloc. + */ + fib_entry_index = fib_entry_get_index(fib_entry); + + if (NULL != fib_entry_src_vft[source].fesv_add) + { + fib_entry_src_vft[source].fesv_add(esrc, + fib_entry, + flags, + fib_entry_get_proto(fib_entry), + dpo); + } + + fib_entry = fib_entry_get(fib_entry_index); + + esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ADDED; + + fib_path_list_lock(esrc->fes_pl); + + /* + * the source owns a lock on the entry + */ + fib_entry_lock(fib_entry_get_index(fib_entry)); + + return (fib_entry); +} + +/* + * fib_entry_src_action_update + * + * Adding a source can result in a new fib_entry being created, which + * can inturn mean the pool is realloc'd and thus the entry passed as + * an argument it also realloc'd + * @return the original entry + */ +fib_entry_t * +fib_entry_src_action_update (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t fib_entry_index, old_path_list_index; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find_or_create(fib_entry, source, NULL); + + if (NULL == esrc) + return (fib_entry_src_action_add(fib_entry, source, flags, dpo)); + + old_path_list_index = esrc->fes_pl; + esrc->fes_entry_flags = flags; + + /* + * save variable so we can recover from a fib_entry realloc. + */ + fib_entry_index = fib_entry_get_index(fib_entry); + + if (NULL != fib_entry_src_vft[source].fesv_add) + { + fib_entry_src_vft[source].fesv_add(esrc, + fib_entry, + flags, + fib_entry_get_proto(fib_entry), + dpo); + } + + fib_entry = fib_entry_get(fib_entry_index); + + esrc->fes_flags |= FIB_ENTRY_SRC_FLAG_ADDED; + + fib_path_list_lock(esrc->fes_pl); + fib_path_list_unlock(old_path_list_index); + + return (fib_entry); +} + + +fib_entry_src_flag_t +fib_entry_src_action_remove (fib_entry_t *fib_entry, + fib_source_t source) + +{ + fib_node_index_t old_path_list; + fib_entry_src_flag_t sflags; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL == esrc) + return (FIB_ENTRY_SRC_FLAG_ACTIVE); + + esrc->fes_ref_count--; + sflags = esrc->fes_flags; + + if (0 != esrc->fes_ref_count) + { + /* + * only remove the source on the 1->0 transisition + */ + return (sflags); + } + + if (esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ACTIVE) + { + fib_entry_src_action_deactivate(fib_entry, source); + } + + old_path_list = esrc->fes_pl; + + if (NULL != fib_entry_src_vft[source].fesv_remove) + { + fib_entry_src_vft[source].fesv_remove(esrc); + } + + fib_path_list_unlock(old_path_list); + fib_entry_unlock(fib_entry_get_index(fib_entry)); + + sflags &= ~FIB_ENTRY_SRC_FLAG_ADDED; + fib_entry_src_action_deinit(fib_entry, source); + + return (sflags); +} + +static inline int +fib_route_recurses_via_self (const fib_prefix_t *prefix, + const fib_route_path_t *rpath) +{ + /* + * not all zeros next hop && + * is recursive path && + * nexthop is same as the route's address + */ + return ((!ip46_address_is_zero(&rpath->frp_addr)) && + (~0 == rpath->frp_sw_if_index) && + (0 == ip46_address_cmp(&rpath->frp_addr, &prefix->fp_addr))); + +} + +/* + * fib_route_attached_cross_table + * + * Return true the the route is attached via an interface that + * is not in the same table as the route + */ +static inline int +fib_route_attached_cross_table (const fib_entry_t *fib_entry, + const fib_route_path_t *rpath) +{ + /* + * - All zeros next-hop + * - a valid interface + * - entry's fib index not equeal to interface's index + */ + if (ip46_address_is_zero(&rpath->frp_addr) && + (~0 != rpath->frp_sw_if_index) && + (fib_entry->fe_fib_index != + fib_table_get_index_for_sw_if_index(fib_entry_get_proto(fib_entry), + rpath->frp_sw_if_index))) + { + return (!0); + } + return (0); +} + +/* + * fib_route_attached_cross_table + * + * Return true the the route is attached via an interface that + * is not in the same table as the route + */ +static inline int +fib_path_is_attached (const fib_route_path_t *rpath) +{ + /* + * - All zeros next-hop + * - a valid interface + */ + if (ip46_address_is_zero(&rpath->frp_addr) && + (~0 != rpath->frp_sw_if_index)) + { + return (!0); + } + return (0); +} + +fib_path_list_flags_t +fib_entry_src_flags_2_path_list_flags (fib_entry_flag_t eflags) +{ + fib_path_list_flags_t plf = FIB_PATH_LIST_FLAG_NONE; + + if (eflags & FIB_ENTRY_FLAG_DROP) + { + plf |= FIB_PATH_LIST_FLAG_DROP; + } + if (eflags & FIB_ENTRY_FLAG_LOCAL) + { + plf |= FIB_PATH_LIST_FLAG_LOCAL; + } + if (eflags & FIB_ENTRY_FLAG_EXCLUSIVE) + { + plf |= FIB_PATH_LIST_FLAG_EXCLUSIVE; + } + + return (plf); +} + +static void +fib_entry_flags_update (const fib_entry_t *fib_entry, + const fib_route_path_t *rpath, + fib_path_list_flags_t *pl_flags, + fib_entry_src_t *esrc) +{ + /* + * don't allow the addition of a recursive looped path for prefix + * via itself. + */ + if (fib_route_recurses_via_self(&fib_entry->fe_prefix, rpath)) + { + /* + * force the install of a drop path-list. + * we want the entry to have some path-list, mainly so + * the dodgy path can be rmeoved when the source stops playing + * silly buggers. + */ + *pl_flags |= FIB_PATH_LIST_FLAG_DROP; + } + else + { + *pl_flags &= ~FIB_PATH_LIST_FLAG_DROP; + } + + if ((esrc->fes_src == FIB_SOURCE_API) || + (esrc->fes_src == FIB_SOURCE_CLI)) + { + if (fib_path_is_attached(rpath)) + { + esrc->fes_entry_flags |= FIB_ENTRY_FLAG_ATTACHED; + } + else + { + esrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_ATTACHED; + } + } + if (fib_route_attached_cross_table(fib_entry, rpath)) + { + esrc->fes_entry_flags |= FIB_ENTRY_FLAG_IMPORT; + } + else + { + esrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_IMPORT; + } +} + +/* + * fib_entry_src_path_ext_add + * + * append a path extension to the entry's list + */ +static void +fib_entry_src_path_ext_append (fib_entry_src_t *esrc, + const fib_route_path_t *rpath) +{ + if (NULL != rpath->frp_label_stack) + { + fib_path_ext_t *path_ext; + + vec_add2(esrc->fes_path_exts, path_ext, 1); + + fib_path_ext_init(path_ext, esrc->fes_pl, rpath); + } +} + +/* + * fib_entry_src_path_ext_insert + * + * insert, sorted, a path extension to the entry's list. + * It's not strictly necessary in sort the path extensions, since each + * extension has the path index to which it resolves. However, by being + * sorted the load-balance produced has a deterministic order, not an order + * based on the sequence of extension additions. this is a considerable benefit. + */ +static void +fib_entry_src_path_ext_insert (fib_entry_src_t *esrc, + const fib_route_path_t *rpath) +{ + if (0 == vec_len(esrc->fes_path_exts)) + return (fib_entry_src_path_ext_append(esrc, rpath)); + + if (NULL != rpath->frp_label_stack) + { + fib_path_ext_t path_ext; + int i = 0; + + fib_path_ext_init(&path_ext, esrc->fes_pl, rpath); + + while (i < vec_len(esrc->fes_path_exts) && + (fib_path_ext_cmp(&esrc->fes_path_exts[i], rpath) < 0)) + { + i++; + } + + vec_insert_elts(esrc->fes_path_exts, &path_ext, 1, i); + } +} + +/* + * fib_entry_src_action_add + * + * Adding a source can result in a new fib_entry being created, which + * can inturn mean the pool is realloc'd and thus the entry passed as + * an argument it also realloc'd + * @return the entry + */ +fib_entry_t* +fib_entry_src_action_path_add (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpath) +{ + fib_node_index_t old_path_list, fib_entry_index; + fib_path_list_flags_t pl_flags; + fib_path_ext_t *path_ext; + fib_entry_src_t *esrc; + + /* + * save variable so we can recover from a fib_entry realloc. + */ + fib_entry_index = fib_entry_get_index(fib_entry); + + esrc = fib_entry_src_find(fib_entry, source, NULL); + if (NULL == esrc) + { + fib_entry = + fib_entry_src_action_add(fib_entry, + source, + flags, + drop_dpo_get( + fib_proto_to_dpo( + fib_entry_get_proto(fib_entry)))); + esrc = fib_entry_src_find(fib_entry, source, NULL); + } + + /* + * we are no doubt modifying a path-list. If the path-list + * is shared, and hence not modifiable, then the index returned + * will be for a different path-list. This FIB entry to needs + * to maintain its lock appropriately. + */ + old_path_list = esrc->fes_pl; + + ASSERT(NULL != fib_entry_src_vft[source].fesv_path_add); + + pl_flags = fib_entry_src_flags_2_path_list_flags(fib_entry_get_flags_i(fib_entry)); + fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc); + + fib_entry_src_vft[source].fesv_path_add(esrc, fib_entry, pl_flags, rpath); + fib_entry = fib_entry_get(fib_entry_index); + + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, esrc->fes_path_exts) + { + fib_path_ext_resolve(path_ext, esrc->fes_pl); + } + /* + * if the path has a label we need to add a path extension + */ + fib_entry_src_path_ext_insert(esrc, rpath); + + fib_path_list_lock(esrc->fes_pl); + fib_path_list_unlock(old_path_list); + + return (fib_entry); +} + +/* + * fib_entry_src_action_swap + * + * The source is providing new paths to replace the old ones. + * Adding a source can result in a new fib_entry being created, which + * can inturn mean the pool is realloc'd and thus the entry passed as + * an argument it also realloc'd + * @return the entry + */ +fib_entry_t* +fib_entry_src_action_path_swap (fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *rpaths) +{ + fib_node_index_t old_path_list, fib_entry_index; + fib_path_list_flags_t pl_flags; + const fib_route_path_t *rpath; + fib_path_ext_t *path_ext; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + /* + * save variable so we can recover from a fib_entry realloc. + */ + fib_entry_index = fib_entry_get_index(fib_entry); + + if (NULL == esrc) + { + fib_entry = fib_entry_src_action_add(fib_entry, + source, + flags, + drop_dpo_get( + fib_proto_to_dpo( + fib_entry_get_proto(fib_entry)))); + esrc = fib_entry_src_find(fib_entry, source, NULL); + } + + /* + * swapping paths may create a new path-list (or may use an existing shared) + * but we are certainly getting a different one. This FIB entry to needs + * to maintain its lock appropriately. + */ + old_path_list = esrc->fes_pl; + + ASSERT(NULL != fib_entry_src_vft[source].fesv_path_swap); + + pl_flags = fib_entry_src_flags_2_path_list_flags(flags); + + vec_foreach(rpath, rpaths) + { + fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc); + } + + fib_entry_src_vft[source].fesv_path_swap(esrc, + fib_entry, + pl_flags, + rpaths); + + vec_foreach(path_ext, esrc->fes_path_exts) + { + vec_free(path_ext->fpe_label_stack); + } + vec_free(esrc->fes_path_exts); + + vec_foreach(rpath, rpaths) + { + fib_entry_src_path_ext_append(esrc, rpath); + } + + fib_entry = fib_entry_get(fib_entry_index); + + fib_path_list_lock(esrc->fes_pl); + fib_path_list_unlock(old_path_list); + + return (fib_entry); +} + +fib_entry_src_flag_t +fib_entry_src_action_path_remove (fib_entry_t *fib_entry, + fib_source_t source, + const fib_route_path_t *rpath) +{ + fib_path_list_flags_t pl_flags; + fib_node_index_t old_path_list; + fib_path_ext_t *path_ext; + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + ASSERT(NULL != esrc); + ASSERT(esrc->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED); + + /* + * we no doubt modifying a path-list. If the path-list + * is shared, and hence not modifiable, then the index returned + * will be for a different path-list. This FIB entry to needs + * to maintain its lock appropriately. + */ + old_path_list = esrc->fes_pl; + + ASSERT(NULL != fib_entry_src_vft[source].fesv_path_remove); + + pl_flags = fib_entry_src_flags_2_path_list_flags(fib_entry_get_flags_i(fib_entry)); + fib_entry_flags_update(fib_entry, rpath, &pl_flags, esrc); + + fib_entry_src_vft[source].fesv_path_remove(esrc, pl_flags, rpath); + /* + * find the matching path extension and remove it + */ + vec_foreach(path_ext, esrc->fes_path_exts) + { + if (!fib_path_ext_cmp(path_ext, rpath)) + { + /* + * delete the element moving the remaining elements down 1 position. + * this preserves the sorted order. + */ + vec_free(path_ext->fpe_label_stack); + vec_delete(esrc->fes_path_exts, 1, (path_ext - esrc->fes_path_exts)); + break; + } + } + /* + * re-resolve all the path-extensions with the new path-list + */ + vec_foreach(path_ext, esrc->fes_path_exts) + { + fib_path_ext_resolve(path_ext, esrc->fes_pl); + } + + /* + * lock the new path-list, unlock the old if it had one + */ + fib_path_list_unlock(old_path_list); + + if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) { + fib_path_list_lock(esrc->fes_pl); + return (FIB_ENTRY_SRC_FLAG_ADDED); + } + else + { + /* + * no more paths left from this source + */ + fib_entry_src_action_remove(fib_entry, source); + return (FIB_ENTRY_SRC_FLAG_NONE); + } +} + +u8* +fib_entry_src_format (fib_entry_t *fib_entry, + fib_source_t source, + u8* s) +{ + fib_entry_src_t *esrc; + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != fib_entry_src_vft[source].fesv_format) + { + return (fib_entry_src_vft[source].fesv_format(esrc, s)); + } + return (s); +} + +adj_index_t +fib_entry_get_adj_for_source (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + return (ADJ_INDEX_INVALID); + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc) + { + if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) + { + return (fib_path_list_get_adj( + esrc->fes_pl, + fib_entry_get_default_chain_type(fib_entry))); + } + } + return (ADJ_INDEX_INVALID); +} + +const int +fib_entry_get_dpo_for_source (fib_node_index_t fib_entry_index, + fib_source_t source, + dpo_id_t *dpo) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + return (0); + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc) + { + if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) + { + fib_path_list_contribute_forwarding( + esrc->fes_pl, + fib_entry_get_default_chain_type(fib_entry), + dpo); + + return (dpo_id_is_valid(dpo)); + } + } + return (0); +} + +u32 +fib_entry_get_resolving_interface_for_source (fib_node_index_t entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + fib_entry = fib_entry_get(entry_index); + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc) + { + if (FIB_NODE_INDEX_INVALID != esrc->fes_pl) + { + return (fib_path_list_get_resolving_interface(esrc->fes_pl)); + } + } + return (~0); +} + +fib_entry_flag_t +fib_entry_get_flags_for_source (fib_node_index_t entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + fib_entry = fib_entry_get(entry_index); + + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc) + { + return (esrc->fes_entry_flags); + } + + return (FIB_ENTRY_FLAG_NONE); +} + +fib_entry_flag_t +fib_entry_get_flags_i (const fib_entry_t *fib_entry) +{ + fib_entry_flag_t flags; + + /* + * the vector of sources is deliberately arranged in priority order + */ + if (0 == vec_len(fib_entry->fe_srcs)) + { + flags = FIB_ENTRY_FLAG_NONE; + } + else + { + fib_entry_src_t *esrc; + + esrc = vec_elt_at_index(fib_entry->fe_srcs, 0); + flags = esrc->fes_entry_flags; + } + + return (flags); +} + +void +fib_entry_set_source_data (fib_node_index_t fib_entry_index, + fib_source_t source, + const void *data) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc && + NULL != fib_entry_src_vft[source].fesv_set_data) + { + fib_entry_src_vft[source].fesv_set_data(esrc, fib_entry, data); + } +} + +const void* +fib_entry_get_source_data (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_entry_t *fib_entry; + fib_entry_src_t *esrc; + + fib_entry = fib_entry_get(fib_entry_index); + esrc = fib_entry_src_find(fib_entry, source, NULL); + + if (NULL != esrc && + NULL != fib_entry_src_vft[source].fesv_get_data) + { + return (fib_entry_src_vft[source].fesv_get_data(esrc, fib_entry)); + } + return (NULL); +} + +void +fib_entry_src_module_init (void) +{ + fib_entry_src_rr_register(); + fib_entry_src_interface_register(); + fib_entry_src_default_route_register(); + fib_entry_src_special_register(); + fib_entry_src_api_register(); + fib_entry_src_adj_register(); + fib_entry_src_mpls_register(); + fib_entry_src_lisp_register(); +} diff --git a/src/vnet/fib/fib_entry_src.h b/src/vnet/fib/fib_entry_src.h new file mode 100644 index 00000000000..640c174db47 --- /dev/null +++ b/src/vnet/fib/fib_entry_src.h @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_ENTRY_SRC_H__ +#define __FIB_ENTRY_SRC_H__ + +#include "fib_entry.h" +#include "fib_path_list.h" +#include "fib_internal.h" + +/** + * Debug macro + */ +#ifdef FIB_DEBUG +#define FIB_ENTRY_DBG(_e, _fmt, _args...) \ +{ \ + u8*__tmp = NULL; \ + __tmp = format(__tmp, "e:[%d:%U", \ + fib_entry_get_index(_e), \ + format_ip46_address, \ + &_e->fe_prefix.fp_addr, \ + IP46_TYPE_ANY); \ + __tmp = format(__tmp, "/%d]:", \ + _e->fe_prefix.fp_len); \ + __tmp = format(__tmp, _fmt, ##_args); \ + clib_warning("%s", __tmp); \ + vec_free(__tmp); \ +} +#else +#define FIB_ENTRY_DBG(_e, _fmt, _args...) +#endif + +/** + * Source initialisation Function + */ +typedef void (*fib_entry_src_init_t)(fib_entry_src_t *src); + +/** + * Source deinitialisation Function + */ +typedef void (*fib_entry_src_deinit_t)(fib_entry_src_t *src); + +/** + * Source activation. Called when the source is the new best source on the entry. + * Return non-zero if the entry can now install, 0 otherwise + */ +typedef int (*fib_entry_src_activate_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +typedef void (*fib_entry_src_deactivate_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Source Add. + * Called when the source is added to the entry + */ +typedef void (*fib_entry_src_add_t)(fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo); + +/** + * Source Remove. + */ +typedef void (*fib_entry_src_remove_t)(fib_entry_src_t *src); + +/** + * Result from a cover update/change + */ +typedef struct fib_entry_src_cover_res_t_ { + u16 install; + fib_node_bw_reason_flag_t bw_reason; +} fib_entry_src_cover_res_t; + +/** + * Cover changed. the source should re-evaluate its cover. + */ +typedef fib_entry_src_cover_res_t (*fib_entry_src_cover_change_t)( + fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Cover updated. The cover the source has, has updated (i.e. its forwarding) + * the source may need to re-evaluate. + */ +typedef fib_entry_src_cover_res_t (*fib_entry_src_cover_update_t)( + fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Forwarding updated. Notification that the forwarding information for the + * entry has been updated. This notification is sent to all sources, not just + * the active best. + */ +typedef void (*fib_entry_src_fwd_update_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry, + fib_source_t best_source); + +/** + * Installed. Notification that the source is now installed as + * the entry's forwarding source. + */ +typedef void (*fib_entry_src_installed_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * format. + */ +typedef u8* (*fib_entry_src_format_t)(fib_entry_src_t *src, + u8* s); + +/** + * Source path add + * the source is adding a new path + */ +typedef void (*fib_entry_src_path_add_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *path); + +/** + * Source path remove + * the source is remoinvg a path + */ +typedef void (*fib_entry_src_path_remove_t)(fib_entry_src_t *src, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *path); + +/** + * Source path replace/swap + * the source is providing a new set of paths + */ +typedef void (*fib_entry_src_path_swap_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *path); + +/** + * Set source specific opaque data + */ +typedef void (*fib_entry_src_set_data_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry, + const void *data); + +/** + * Get source specific opaque data + */ +typedef const void* (*fib_entry_src_get_data_t)(fib_entry_src_t *src, + const fib_entry_t *fib_entry); + +/** + * Virtual function table each FIB entry source will register + */ +typedef struct fib_entry_src_vft_t_ { + fib_entry_src_init_t fesv_init; + fib_entry_src_deinit_t fesv_deinit; + fib_entry_src_activate_t fesv_activate; + fib_entry_src_deactivate_t fesv_deactivate; + fib_entry_src_add_t fesv_add; + fib_entry_src_remove_t fesv_remove; + fib_entry_src_path_swap_t fesv_path_swap; + fib_entry_src_path_add_t fesv_path_add; + fib_entry_src_path_remove_t fesv_path_remove; + fib_entry_src_cover_change_t fesv_cover_change; + fib_entry_src_cover_update_t fesv_cover_update; + fib_entry_src_format_t fesv_format; + fib_entry_src_installed_t fesv_installed; + fib_entry_src_fwd_update_t fesv_fwd_update; + fib_entry_src_get_data_t fesv_get_data; + fib_entry_src_set_data_t fesv_set_data; +} fib_entry_src_vft_t; + +#define FOR_EACH_SRC_ADDED(_entry, _src, _source, action) \ +{ \ + vec_foreach(_src, _entry->fe_srcs) \ + { \ + if (_src->fes_flags & FIB_ENTRY_SRC_FLAG_ADDED) { \ + _source = _src->fes_src; \ + do { \ + action; \ + } while(0); \ + } \ + } \ +} + +extern u8* fib_entry_src_format(fib_entry_t *entry, + fib_source_t source, + u8* s); + +extern void fib_entry_src_register(fib_source_t source, + const fib_entry_src_vft_t *vft); + +extern void fib_entry_src_action_init(fib_entry_t *entry, + fib_source_t source); + +extern void fib_entry_src_action_deinit(fib_entry_t *fib_entry, + fib_source_t source); + +extern fib_entry_src_cover_res_t fib_entry_src_action_cover_change( + fib_entry_t *entry, + fib_source_t source); + +extern fib_entry_src_cover_res_t fib_entry_src_action_cover_update( + fib_entry_t *fib_entry, + fib_source_t source); + +extern void fib_entry_src_action_activate(fib_entry_t *fib_entry, + fib_source_t source); + +extern void fib_entry_src_action_deactivate(fib_entry_t *fib_entry, + fib_source_t source); +extern void fib_entry_src_action_reactivate(fib_entry_t *fib_entry, + fib_source_t source); + +extern fib_entry_t* fib_entry_src_action_add(fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo); +extern fib_entry_t* fib_entry_src_action_update(fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo); + +extern fib_entry_src_flag_t fib_entry_src_action_remove(fib_entry_t *fib_entry, + fib_source_t source); + +extern void fib_entry_src_action_install(fib_entry_t *fib_entry, + fib_source_t source); + +extern void fib_entry_src_action_uninstall(fib_entry_t *fib_entry); + +extern fib_entry_t* fib_entry_src_action_path_add(fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *path); + +extern fib_entry_t* fib_entry_src_action_path_swap(fib_entry_t *fib_entry, + fib_source_t source, + fib_entry_flag_t flags, + const fib_route_path_t *path); + +extern fib_entry_src_flag_t fib_entry_src_action_path_remove(fib_entry_t *fib_entry, + fib_source_t source, + const fib_route_path_t *path); + +extern void fib_entry_src_action_installed(const fib_entry_t *fib_entry, + fib_source_t source); + +extern fib_forward_chain_type_t fib_entry_get_default_chain_type( + const fib_entry_t *fib_entry); +extern fib_entry_flag_t fib_entry_get_flags_i(const fib_entry_t *fib_entry); +extern fib_path_list_flags_t fib_entry_src_flags_2_path_list_flags( + fib_entry_flag_t eflags); + +extern fib_forward_chain_type_t fib_entry_chain_type_fixup(const fib_entry_t *entry, + fib_forward_chain_type_t fct); + +extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry, + const fib_entry_src_t *esrc, + fib_forward_chain_type_t fct, + dpo_id_t *dpo_lb); + + +/* + * Per-source registration. declared here so we save a separate .h file for each + */ +extern void fib_entry_src_default_register(void); +extern void fib_entry_src_rr_register(void); +extern void fib_entry_src_interface_register(void); +extern void fib_entry_src_default_route_register(void); +extern void fib_entry_src_special_register(void); +extern void fib_entry_src_api_register(void); +extern void fib_entry_src_adj_register(void); +extern void fib_entry_src_mpls_register(void); +extern void fib_entry_src_lisp_register(void); + +extern void fib_entry_src_module_init(void); + +#endif diff --git a/src/vnet/fib/fib_entry_src_adj.c b/src/vnet/fib/fib_entry_src_adj.c new file mode 100644 index 00000000000..64f82a73e07 --- /dev/null +++ b/src/vnet/fib/fib_entry_src_adj.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" +#include "fib_table.h" +#include "fib_entry_cover.h" +#include "fib_attached_export.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_adj_init (fib_entry_src_t *src) +{ + src->adj.fesa_cover = FIB_NODE_INDEX_INVALID; + src->adj.fesa_sibling = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_adj_path_swap (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + src->fes_pl = fib_path_list_create(pl_flags, paths); +} + +static void +fib_entry_src_adj_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + + +/* + * Source activate. + * Called when the source is teh new longer best source on the entry + */ +static int +fib_entry_src_adj_activate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * find the covering prefix. become a dependent thereof. + * there should always be a cover, though it may be the default route. + */ + src->adj.fesa_cover = fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix); + + ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover); + ASSERT(fib_entry_get_index(fib_entry) != src->adj.fesa_cover); + + cover = fib_entry_get(src->adj.fesa_cover); + + ASSERT(cover != fib_entry); + + src->adj.fesa_sibling = + fib_entry_cover_track(cover, + fib_entry_get_index(fib_entry)); + + /* + * if the ocver is attached then this adj source entry can install, + * via the adj. otherwise install a drop. + * This prevents ARP/ND entries that on interface X that do not belong + * on X's subnet from being added to the FIB. To do so would allow + * nefarious gratuitous ARP requests from attracting traffic to the sender. + * + * and yes, I really do mean attached and not connected. + * this abomination; + * ip route add 10.0.0.0/24 Eth0 + * is attached. and we want adj-fibs to install on Eth0. + */ + return (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)); +} + +/* + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +static void +fib_entry_src_adj_deactivate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * remove the depednecy on the covering entry + */ + ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover); + cover = fib_entry_get(src->adj.fesa_cover); + + fib_entry_cover_untrack(cover, src->adj.fesa_sibling); + + /* + * tell the cover this entry no longer needs exporting + */ + fib_attached_export_covered_removed(cover, fib_entry_get_index(fib_entry)); + + src->adj.fesa_cover = FIB_NODE_INDEX_INVALID; +} + +static u8* +fib_entry_src_adj_format (fib_entry_src_t *src, + u8* s) +{ + return (format(s, "cover:%d", src->adj.fesa_cover)); +} + +static void +fib_entry_src_adj_installed (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + /* + * The adj source now rules! poke our cover to get exported + */ + fib_entry_t *cover; + + ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover); + cover = fib_entry_get(src->adj.fesa_cover); + + fib_attached_export_covered_added(cover, + fib_entry_get_index(fib_entry)); +} + +static fib_entry_src_cover_res_t +fib_entry_src_adj_cover_change (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + + fib_entry_src_adj_deactivate(src, fib_entry); + + res.install = fib_entry_src_adj_activate(src, fib_entry); + + if (res.install) { + /* + * ADJ fib can install + */ + res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + } + + return (res); +} + +/* + * fib_entry_src_adj_cover_update + */ +static fib_entry_src_cover_res_t +fib_entry_src_adj_cover_update (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + /* + * the cover has updated, i.e. its forwarding or flags + * have changed. do'nt decativate/activate here, since this + * prefix is updated during the covers walk. + */ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + fib_entry_t *cover; + + ASSERT(FIB_NODE_INDEX_INVALID != src->adj.fesa_cover); + + cover = fib_entry_get(src->adj.fesa_cover); + + res.install = (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)); + + return (res); +} + +const static fib_entry_src_vft_t adj_src_vft = { + .fesv_init = fib_entry_src_adj_init, + .fesv_path_swap = fib_entry_src_adj_path_swap, + .fesv_remove = fib_entry_src_adj_remove, + .fesv_activate = fib_entry_src_adj_activate, + .fesv_deactivate = fib_entry_src_adj_deactivate, + .fesv_format = fib_entry_src_adj_format, + .fesv_installed = fib_entry_src_adj_installed, + .fesv_cover_change = fib_entry_src_adj_cover_change, + .fesv_cover_update = fib_entry_src_adj_cover_update, +}; + +void +fib_entry_src_adj_register (void) +{ + fib_entry_src_register(FIB_SOURCE_ADJ, &adj_src_vft); +} diff --git a/src/vnet/fib/fib_entry_src_api.c b/src/vnet/fib/fib_entry_src_api.c new file mode 100644 index 00000000000..edc8a47bc17 --- /dev/null +++ b/src/vnet/fib/fib_entry_src_api.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_api_init (fib_entry_src_t *src) +{ +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_api_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_api_path_swap (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); +} + +static void +fib_entry_src_api_path_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + if (FIB_NODE_INDEX_INVALID == src->fes_pl) + { + src->fes_pl = + fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), paths); + } + else + { + src->fes_pl = + fib_path_list_copy_and_path_add(src->fes_pl, + (FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); + } +} + +static void +fib_entry_src_api_path_remove (fib_entry_src_t *src, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + if (FIB_NODE_INDEX_INVALID != src->fes_pl) + { + src->fes_pl = + fib_path_list_copy_and_path_remove(src->fes_pl, + (FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); + } +} + +static void +fib_entry_src_api_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + if (FIB_ENTRY_FLAG_NONE != flags) + { + src->fes_pl = fib_path_list_create_special( + proto, + fib_entry_src_flags_2_path_list_flags(flags), + dpo); + } +} + +static void +fib_entry_src_api_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + +const static fib_entry_src_vft_t api_src_vft = { + .fesv_init = fib_entry_src_api_init, + .fesv_deinit = fib_entry_src_api_deinit, + .fesv_add = fib_entry_src_api_add, + .fesv_remove = fib_entry_src_api_remove, + .fesv_path_add = fib_entry_src_api_path_add, + .fesv_path_swap = fib_entry_src_api_path_swap, + .fesv_path_remove = fib_entry_src_api_path_remove, +}; + +void +fib_entry_src_api_register (void) +{ + fib_entry_src_register(FIB_SOURCE_PLUGIN_HI, &api_src_vft); + fib_entry_src_register(FIB_SOURCE_API, &api_src_vft); + fib_entry_src_register(FIB_SOURCE_CLI, &api_src_vft); + fib_entry_src_register(FIB_SOURCE_DHCP, &api_src_vft); +} diff --git a/src/vnet/fib/fib_entry_src_default.c b/src/vnet/fib/fib_entry_src_default.c new file mode 100644 index 00000000000..9846cf56e64 --- /dev/null +++ b/src/vnet/fib/fib_entry_src_default.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_default_init (fib_entry_src_t *src) +{ +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_default_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_cover_change (fib_entry_src_t *src) +{ +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_default_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_default_path_add (fib_entry_src_t *src, + fib_protocol_t proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight) +{ +} + +static void +fib_entry_src_default_path_remove (fib_entry_src_t *src, + fib_protocol_t proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight) +{ +} + + +/* + * Source activate. + * Called when the source is teh new longer best source on the entry + */ +static void +fib_entry_src_default_activate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ +} + +/* + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +static void +fib_entry_src_default_deactivate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ +} + +static void +fib_entry_src_default_add (fib_entry_src_t *src, + fib_entry_flag_t flags, + fib_protocol_t proto) +{ +} + +static void +fib_entry_src_default_remove (fib_entry_src_t *src) +{ +} + +const static fib_entry_src_vft_t default_src_vft = { + .fesv_init = fib_entry_src_default_init, + .fesv_deinit = fib_entry_src_default_deinit, + .fesv_add = fib_entry_src_default_add, + .fesv_remove = fib_entry_src_default_remove, + .fesv_path_add = fib_entry_src_default_path_add, + .fesv_path_remove = fib_entry_src_default_path_remove, + .fesv_activate = fib_entry_src_default_activate, + .fesv_deactivate = fib_entry_src_default_deactivate, +}; + +void +fib_entry_src_default_register (void) +{ + fib_source_t source; + + FOR_EACH_FIB_SOURCE(source) { + fib_entry_src_register(source, &default_src_vft); + } +} diff --git a/src/vnet/fib/fib_entry_src_default_route.c b/src/vnet/fib/fib_entry_src_default_route.c new file mode 100644 index 00000000000..9f4e7c36952 --- /dev/null +++ b/src/vnet/fib/fib_entry_src_default_route.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_default_route_init (fib_entry_src_t *src) +{ + src->fes_flags = FIB_ENTRY_SRC_FLAG_NONE; +} + +static void +fib_entry_src_default_route_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_default_route_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + src->fes_pl = fib_path_list_create_special(proto, + FIB_PATH_LIST_FLAG_DROP, + dpo); +} + +const static fib_entry_src_vft_t interface_src_vft = { + .fesv_init = fib_entry_src_default_route_init, + .fesv_add = fib_entry_src_default_route_add, + .fesv_remove = fib_entry_src_default_route_remove, +}; + +void +fib_entry_src_default_route_register (void) +{ + fib_entry_src_register(FIB_SOURCE_DEFAULT_ROUTE, &interface_src_vft); +} + + diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c new file mode 100644 index 00000000000..ca04716ed8f --- /dev/null +++ b/src/vnet/fib/fib_entry_src_interface.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" +#include "fib_internal.h" +#include "fib_table.h" +#include "fib_entry_cover.h" +#include "fib_attached_export.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_interface_init (fib_entry_src_t *src) +{ + src->interface.fesi_cover = FIB_NODE_INDEX_INVALID; + src->interface.fesi_sibling = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_interface_path_swap (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + ip_adjacency_t *adj; + + src->fes_pl = fib_path_list_create(pl_flags, paths); + + /* + * this is a hack to get the entry's prefix into the glean adjacnecy + * so that it is available for fast retreival in the switch path. + */ + if (!(FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags)) + { + adj = adj_get(fib_path_list_get_adj( + src->fes_pl, + fib_entry_get_default_chain_type(entry))); + + if (IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index) + { + /* + * the connected prefix will link to a glean on a non-p2p + * interface. + */ + adj->sub_type.glean.receive_addr = entry->fe_prefix.fp_addr; + } + } +} + +/* + * Source activate. + * Called when the source is teh new longer best source on the entry + */ +static int +fib_entry_src_interface_activate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + if (FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags) + { + /* + * Track the covering attached/connected cover. This is so that + * during an attached export of the cover, this local prefix is + * also exported + */ + src->interface.fesi_cover = + fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix); + + ASSERT(FIB_NODE_INDEX_INVALID != src->interface.fesi_cover); + + cover = fib_entry_get(src->interface.fesi_cover); + + src->interface.fesi_sibling = + fib_entry_cover_track(cover, fib_entry_get_index(fib_entry)); + } + + return (!0); +} + + +/* + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +static void +fib_entry_src_interface_deactivate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * remove the depednecy on the covering entry + */ + if (FIB_NODE_INDEX_INVALID != src->interface.fesi_cover) + { + cover = fib_entry_get(src->interface.fesi_cover); + + fib_entry_cover_untrack(cover, src->interface.fesi_sibling); + + src->interface.fesi_cover = FIB_NODE_INDEX_INVALID; + } +} + +static fib_entry_src_cover_res_t +fib_entry_src_interface_cover_change (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + + if (FIB_NODE_INDEX_INVALID == src->interface.fesi_cover) + { + /* + * not tracking the cover. surprised we got poked? + */ + return (res); + } + + /* + * this function is called when this entry's cover has a more specific + * entry inserted benaeth it. That does not necessarily mean that this + * entry is covered by the new prefix. check that + */ + if (src->rr.fesr_cover != fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix)) + { + fib_entry_src_interface_deactivate(src, fib_entry); + fib_entry_src_interface_activate(src, fib_entry); + } + return (res); +} + +static void +fib_entry_src_interface_installed (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + /* + * The interface source now rules! poke our cover to get exported + */ + fib_entry_t *cover; + + if (FIB_NODE_INDEX_INVALID != src->interface.fesi_cover) + { + cover = fib_entry_get(src->interface.fesi_cover); + + fib_attached_export_covered_added(cover, + fib_entry_get_index(fib_entry)); + } +} + +static u8* +fib_entry_src_interface_format (fib_entry_src_t *src, + u8* s) +{ + return (format(s, "cover:%d", src->interface.fesi_cover)); +} + +const static fib_entry_src_vft_t interface_src_vft = { + .fesv_init = fib_entry_src_interface_init, + .fesv_path_swap = fib_entry_src_interface_path_swap, + .fesv_activate = fib_entry_src_interface_activate, + .fesv_deactivate = fib_entry_src_interface_deactivate, + .fesv_format = fib_entry_src_interface_format, + .fesv_installed = fib_entry_src_interface_installed, + .fesv_cover_change = fib_entry_src_interface_cover_change, + /* + * not concerned about updates to the cover. the cover will + * decide to export or not + */ +}; + +void +fib_entry_src_interface_register (void) +{ + fib_entry_src_register(FIB_SOURCE_INTERFACE, &interface_src_vft); +} diff --git a/src/vnet/fib/fib_entry_src_lisp.c b/src/vnet/fib/fib_entry_src_lisp.c new file mode 100644 index 00000000000..7f8b91bbab6 --- /dev/null +++ b/src/vnet/fib/fib_entry_src_lisp.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" +#include "fib_path_list.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_lisp_init (fib_entry_src_t *src) +{ +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_lisp_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_lisp_path_swap (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + src->fes_pl = fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); +} + +static void +fib_entry_src_lisp_path_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + if (FIB_NODE_INDEX_INVALID == src->fes_pl) + { + src->fes_pl = + fib_path_list_create((FIB_PATH_LIST_FLAG_SHARED | pl_flags), paths); + } + else + { + src->fes_pl = + fib_path_list_copy_and_path_add(src->fes_pl, + (FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); + } +} + +static void +fib_entry_src_lisp_path_remove (fib_entry_src_t *src, + fib_path_list_flags_t pl_flags, + const fib_route_path_t *paths) +{ + if (FIB_NODE_INDEX_INVALID != src->fes_pl) + { + src->fes_pl = + fib_path_list_copy_and_path_remove(src->fes_pl, + (FIB_PATH_LIST_FLAG_SHARED | pl_flags), + paths); + } +} + +static void +fib_entry_src_lisp_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + if (FIB_ENTRY_FLAG_NONE != flags) + { + src->fes_pl = fib_path_list_create_special( + proto, + fib_entry_src_flags_2_path_list_flags(flags), + dpo); + } +} + +static void +fib_entry_src_lisp_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_lisp_set_data (fib_entry_src_t *src, + const fib_entry_t *entry, + const void *data) +{ + src->lisp.fesl_fib_index = *(u32*)data; +} + +static const void* +fib_entry_src_lisp_get_data (fib_entry_src_t *src, + const fib_entry_t *entry) +{ + return (&(src->lisp.fesl_fib_index)); +} + +const static fib_entry_src_vft_t api_src_vft = { + .fesv_init = fib_entry_src_lisp_init, + .fesv_deinit = fib_entry_src_lisp_deinit, + .fesv_add = fib_entry_src_lisp_add, + .fesv_remove = fib_entry_src_lisp_remove, + .fesv_path_add = fib_entry_src_lisp_path_add, + .fesv_path_swap = fib_entry_src_lisp_path_swap, + .fesv_path_remove = fib_entry_src_lisp_path_remove, + .fesv_set_data = fib_entry_src_lisp_set_data, + .fesv_get_data = fib_entry_src_lisp_get_data, +}; + +void +fib_entry_src_lisp_register (void) +{ + fib_entry_src_register(FIB_SOURCE_LISP, &api_src_vft); +} diff --git a/src/vnet/fib/fib_entry_src_mpls.c b/src/vnet/fib/fib_entry_src_mpls.c new file mode 100644 index 00000000000..14c7310fbf3 --- /dev/null +++ b/src/vnet/fib/fib_entry_src_mpls.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/mpls/mpls_types.h> +#include <vnet/dpo/drop_dpo.h> + +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/mpls_fib.h> + +/** + * Source initialisation Function + */ +static void +fib_entry_src_mpls_init (fib_entry_src_t *src) +{ + mpls_eos_bit_t eos; + + src->fes_flags = FIB_ENTRY_SRC_FLAG_NONE; + src->mpls.fesm_label = MPLS_LABEL_INVALID; + + FOR_EACH_MPLS_EOS_BIT(eos) + { + src->mpls.fesm_lfes[eos] = FIB_NODE_INDEX_INVALID; + } +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_mpls_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_mpls_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; + src->mpls.fesm_label = MPLS_LABEL_INVALID; +} + +static void +fib_entry_src_mpls_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + src->fes_pl = + fib_path_list_create_special(proto, + FIB_PATH_LIST_FLAG_DROP, + drop_dpo_get(fib_proto_to_dpo(proto))); +} + +static void +fib_entry_src_mpls_set_data (fib_entry_src_t *src, + const fib_entry_t *entry, + const void *data) +{ + fib_protocol_t payload_proto; + fib_node_index_t fei; + mpls_label_t label; + mpls_eos_bit_t eos; + + /* + * post MPLS table alloc and the possible rea-alloc of fib entrys + * the entry pointer will no longer be valid. so save its index + */ + payload_proto = entry->fe_prefix.fp_proto; + fei = fib_entry_get_index(entry); + label = *(mpls_label_t*)data; + + if (MPLS_LABEL_INVALID == label) + { + /* + * removing the local label + */ + FOR_EACH_MPLS_EOS_BIT(eos) + { + fib_table_entry_delete_index(src->mpls.fesm_lfes[eos], + FIB_SOURCE_SPECIAL); + } + fib_table_unlock(MPLS_FIB_DEFAULT_TABLE_ID, FIB_PROTOCOL_MPLS); + src->mpls.fesm_label = label; + } + else + { + fib_prefix_t prefix = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = label, + }; + fib_node_index_t fib_index; + dpo_id_t dpo = DPO_INVALID; + + /* + * adding a new local label. make sure the MPLS fib exists. + */ + if (MPLS_LABEL_INVALID == src->mpls.fesm_label) + { + fib_index = + fib_table_find_or_create_and_lock(FIB_PROTOCOL_MPLS, + MPLS_FIB_DEFAULT_TABLE_ID); + } + else + { + fib_index = mpls_fib_index_from_table_id(MPLS_FIB_DEFAULT_TABLE_ID); + + /* + * if this is a change in label, reomve the old one first + */ + if (src->mpls.fesm_label != label) + { + FOR_EACH_MPLS_EOS_BIT(eos) + { + ASSERT(FIB_NODE_INDEX_INVALID != src->mpls.fesm_lfes[eos]); + fib_table_entry_delete_index(src->mpls.fesm_lfes[eos], + FIB_SOURCE_SPECIAL); + } + } + } + + src->mpls.fesm_label = label; + + FOR_EACH_MPLS_EOS_BIT(eos) + { + prefix.fp_eos = eos; + prefix.fp_payload_proto = fib_proto_to_dpo(payload_proto); + + fib_entry_contribute_forwarding(fei, + (eos ? + FIB_FORW_CHAIN_TYPE_MPLS_EOS : + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS), + &dpo); + src->mpls.fesm_lfes[eos] = + fib_table_entry_special_dpo_add(fib_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + dpo_reset(&dpo); + } + } +} + +static const void * +fib_entry_src_mpls_get_data (fib_entry_src_t *src, + const fib_entry_t *entry) +{ + return (&(src->mpls.fesm_label)); +} + +static u8* +fib_entry_src_mpls_format (fib_entry_src_t *src, + u8* s) +{ + return (format(s, "MPLS local-label:%d", src->mpls.fesm_label)); +} + +const static fib_entry_src_vft_t mpls_src_vft = { + .fesv_init = fib_entry_src_mpls_init, + .fesv_deinit = fib_entry_src_mpls_deinit, + .fesv_add = fib_entry_src_mpls_add, + .fesv_remove = fib_entry_src_mpls_remove, + .fesv_format = fib_entry_src_mpls_format, + .fesv_set_data = fib_entry_src_mpls_set_data, + .fesv_get_data = fib_entry_src_mpls_get_data, + /* + * .fesv_fwd_update = fib_entry_src_mpls_fwd_update, + * When the forwarding for the IP entry is updated, any MPLS chains + * it has created are also updated. Since the MPLS entry will have already + * installed that chain/load-balance there is no need to update the netry + * FIXME: later: propagate any walk to the children of the MPLS entry. for SR + */ +}; + +void +fib_entry_src_mpls_register (void) +{ + fib_entry_src_register(FIB_SOURCE_MPLS, &mpls_src_vft); +} + + diff --git a/src/vnet/fib/fib_entry_src_rr.c b/src/vnet/fib/fib_entry_src_rr.c new file mode 100644 index 00000000000..ff15c54e281 --- /dev/null +++ b/src/vnet/fib/fib_entry_src_rr.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/format.h> +#include <vnet/ip/lookup.h> +#include <vnet/adj/adj.h> +#include <vnet/dpo/drop_dpo.h> + +#include "fib_entry_src.h" +#include "fib_entry_cover.h" +#include "fib_entry.h" +#include "fib_table.h" + +/* + * fib_entry_src_rr_resolve_via_connected + * + * Resolve via a connected cover. + */ +static void +fib_entry_src_rr_resolve_via_connected (fib_entry_src_t *src, + const fib_entry_t *fib_entry, + const fib_entry_t *cover) +{ + const fib_route_path_t path = { + .frp_proto = fib_entry->fe_prefix.fp_proto, + .frp_addr = fib_entry->fe_prefix.fp_addr, + .frp_sw_if_index = fib_entry_get_resolving_interface( + fib_entry_get_index(cover)), + .frp_fib_index = ~0, + .frp_weight = 1, + }; + fib_route_path_t *paths = NULL; + vec_add1(paths, path); + + /* + * since the cover is connected, the address this entry corresponds + * to is a peer (ARP-able for) on the interface to which the cover is + * connected. The fact we resolve via the cover, just means this RR + * source is the first SRC to use said peer. The ARP source will be along + * shortly to over-rule this RR source. + */ + src->fes_pl = fib_path_list_create(FIB_PATH_LIST_FLAG_NONE, paths); + src->fes_entry_flags = fib_entry_get_flags(fib_entry_get_index(cover)); + + vec_free(paths); +} + + +/** + * Source initialisation Function + */ +static void +fib_entry_src_rr_init (fib_entry_src_t *src) +{ + src->rr.fesr_cover = FIB_NODE_INDEX_INVALID; + src->rr.fesr_sibling = FIB_NODE_INDEX_INVALID; +} + +/* + * Source activation. Called when the source is the new best source on the entry + */ +static int +fib_entry_src_rr_activate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * find the covering prefix. become a dependent thereof. + * for IP there should always be a cover, though it may be the default route. + * For MPLS there is never a cover. + */ + if (FIB_PROTOCOL_MPLS == fib_entry->fe_prefix.fp_proto) + { + src->fes_pl = fib_path_list_create_special(FIB_PROTOCOL_MPLS, + FIB_PATH_LIST_FLAG_DROP, + NULL); + fib_path_list_lock(src->fes_pl); + return (!0); + } + + src->rr.fesr_cover = fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix); + + ASSERT(FIB_NODE_INDEX_INVALID != src->rr.fesr_cover); + + cover = fib_entry_get(src->rr.fesr_cover); + + src->rr.fesr_sibling = + fib_entry_cover_track(cover, fib_entry_get_index(fib_entry)); + + /* + * if the ocver is attached then install an attached-host path + * (like an adj-fib). Otherwise inherit the forwarding from the cover + */ + if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)) + { + fib_entry_src_rr_resolve_via_connected(src, fib_entry, cover); + } + else + { + /* + * use the path-list of the cover, unless it would form a loop. + * that is unless the cover is via this entry. + * If a loop were to form it would be a 1 level loop (i.e. X via X), + * and there would be 2 locks on the path-list; one since its used + * by the cover, and 1 from here. The first lock will go when the + * cover is removed, the second, and last, when the covered walk + * occurs during the cover's removel - this is not a place where + * we can handle last lock gone. + * In short, don't let the loop form. The usual rules of 'we must + * let it form so we know when it breaks' don't apply here, since + * the loop will break when the cover changes, and this function + * will be called again when that happens. + */ + fib_node_index_t *entries = NULL; + fib_protocol_t proto; + + proto = fib_entry->fe_prefix.fp_proto; + vec_add1(entries, fib_entry_get_index(fib_entry)); + + if (fib_path_list_recursive_loop_detect(cover->fe_parent, + &entries)) + { + src->fes_pl = fib_path_list_create_special( + proto, + FIB_PATH_LIST_FLAG_DROP, + drop_dpo_get(fib_proto_to_dpo(proto))); + } + else + { + src->fes_pl = cover->fe_parent; + } + vec_free(entries); + + } + fib_path_list_lock(src->fes_pl); + + /* + * return go for install + */ + return (!0); +} + +/** + * Source Deactivate. + * Called when the source is no longer best source on the entry + */ +static void +fib_entry_src_rr_deactivate (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_t *cover; + + /* + * remove the depednecy on the covering entry + */ + if (FIB_NODE_INDEX_INVALID != src->rr.fesr_cover) + { + cover = fib_entry_get(src->rr.fesr_cover); + fib_entry_cover_untrack(cover, src->rr.fesr_sibling); + src->rr.fesr_cover = FIB_NODE_INDEX_INVALID; + } + + fib_path_list_unlock(src->fes_pl); + src->fes_pl = FIB_NODE_INDEX_INVALID; + src->fes_entry_flags = FIB_ENTRY_FLAG_NONE; +} + +static fib_entry_src_cover_res_t +fib_entry_src_rr_cover_change (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + + if (FIB_NODE_INDEX_INVALID == src->rr.fesr_cover) + { + /* + * the source may be added, but it is not active + * if it is not tracking the cover. + */ + return (res); + } + + /* + * this function is called when this entry's cover has a more specific + * entry inserted benaeth it. That does not necessarily mean that this + * entry is covered by the new prefix. check that + */ + if (src->rr.fesr_cover != fib_table_get_less_specific(fib_entry->fe_fib_index, + &fib_entry->fe_prefix)) + { + fib_entry_src_rr_deactivate(src, fib_entry); + fib_entry_src_rr_activate(src, fib_entry); + + /* + * dependent children need to re-resolve to the new forwarding info + */ + res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + } + return (res); +} + +/* + * fib_entry_src_rr_cover_update + * + * This entry's cover has updated its forwarding info. This entry + * will need to re-inheret. + */ +static fib_entry_src_cover_res_t +fib_entry_src_rr_cover_update (fib_entry_src_t *src, + const fib_entry_t *fib_entry) +{ + fib_entry_src_cover_res_t res = { + .install = !0, + .bw_reason = FIB_NODE_BW_REASON_FLAG_NONE, + }; + fib_node_index_t old_path_list; + fib_entry_t *cover; + + if (FIB_NODE_INDEX_INVALID == src->rr.fesr_cover) + { + /* + * the source may be added, but it is not active + * if it is not tracking the cover. + */ + return (res); + } + + cover = fib_entry_get(src->rr.fesr_cover); + old_path_list = src->fes_pl; + + /* + * if the ocver is attached then install an attached-host path + * (like an adj-fib). Otherwise inherit the forwarding from the cover + */ + if (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags_i(cover)) + { + fib_entry_src_rr_resolve_via_connected(src, fib_entry, cover); + } + else + { + src->fes_pl = cover->fe_parent; + } + fib_path_list_lock(src->fes_pl); + fib_path_list_unlock(old_path_list); + + /* + * dependent children need to re-resolve to the new forwarding info + */ + res.bw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE; + + return (res); +} + +static u8* +fib_entry_src_rr_format (fib_entry_src_t *src, + u8* s) +{ + return (format(s, "cover:%d", src->rr.fesr_cover)); +} + +const static fib_entry_src_vft_t rr_src_vft = { + .fesv_init = fib_entry_src_rr_init, + .fesv_activate = fib_entry_src_rr_activate, + .fesv_deactivate = fib_entry_src_rr_deactivate, + .fesv_cover_change = fib_entry_src_rr_cover_change, + .fesv_cover_update = fib_entry_src_rr_cover_update, + .fesv_format = fib_entry_src_rr_format, +}; + +void +fib_entry_src_rr_register (void) +{ + fib_entry_src_register(FIB_SOURCE_RR, &rr_src_vft); + fib_entry_src_register(FIB_SOURCE_URPF_EXEMPT, &rr_src_vft); +} diff --git a/src/vnet/fib/fib_entry_src_special.c b/src/vnet/fib/fib_entry_src_special.c new file mode 100644 index 00000000000..52a6134e337 --- /dev/null +++ b/src/vnet/fib/fib_entry_src_special.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "fib_entry.h" +#include "fib_entry_src.h" + +/** + * Source initialisation Function + */ +static void +fib_entry_src_special_init (fib_entry_src_t *src) +{ + src->fes_flags = FIB_ENTRY_SRC_FLAG_NONE; +} + +/** + * Source deinitialisation Function + */ +static void +fib_entry_src_special_deinit (fib_entry_src_t *src) +{ +} + +static void +fib_entry_src_special_remove (fib_entry_src_t *src) +{ + src->fes_pl = FIB_NODE_INDEX_INVALID; +} + +static void +fib_entry_src_special_add (fib_entry_src_t *src, + const fib_entry_t *entry, + fib_entry_flag_t flags, + fib_protocol_t proto, + const dpo_id_t *dpo) +{ + src->fes_pl = + fib_path_list_create_special(proto, + fib_entry_src_flags_2_path_list_flags(flags), + dpo); +} + +const static fib_entry_src_vft_t special_src_vft = { + .fesv_init = fib_entry_src_special_init, + .fesv_deinit = fib_entry_src_special_deinit, + .fesv_add = fib_entry_src_special_add, + .fesv_remove = fib_entry_src_special_remove, +}; + +void +fib_entry_src_special_register (void) +{ + fib_entry_src_register(FIB_SOURCE_SPECIAL, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_MAP, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_SIXRD, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_CLASSIFY, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_SR, &special_src_vft); + fib_entry_src_register(FIB_SOURCE_AE, &special_src_vft); +} diff --git a/src/vnet/fib/fib_internal.h b/src/vnet/fib/fib_internal.h new file mode 100644 index 00000000000..2d980bcce0a --- /dev/null +++ b/src/vnet/fib/fib_internal.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_INTERNAL_H__ +#define __FIB_INTERNAL_H__ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/dpo.h> + +/** + * Big train switch; FIB debugs on or off + */ +#undef FIB_DEBUG + +extern void fib_prefix_from_mpls_label(mpls_label_t label, + fib_prefix_t *prf); + +extern int fib_route_path_cmp(const fib_route_path_t *rpath1, + const fib_route_path_t *rpath2); + +/** + * @brief + * Add or update an entry in the FIB's forwarding table. + * This is called from the fib_entry code. It is not meant to be used + * by the client/source. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add/update + * + * @param dpo + * The data-path object to use for forwarding + */ +extern void fib_table_fwding_dpo_update(u32 fib_index, + const fib_prefix_t *prefix, + const dpo_id_t *dpo); +/** + * @brief + * remove an entry in the FIB's forwarding table + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add/update + * + * @param dpo + * The data-path object to use for forwarding + */ +extern void fib_table_fwding_dpo_remove(u32 fib_index, + const fib_prefix_t *prefix, + const dpo_id_t *dpo); + + +#endif diff --git a/src/vnet/fib/fib_node.c b/src/vnet/fib/fib_node.c new file mode 100644 index 00000000000..db3e22bb3b8 --- /dev/null +++ b/src/vnet/fib/fib_node.c @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_node.h> +#include <vnet/fib/fib_node_list.h> + +/* + * The per-type vector of virtual function tables + */ +static fib_node_vft_t *fn_vfts; + +/** + * The last registered new type + */ +static fib_node_type_t last_new_type = FIB_NODE_TYPE_LAST; + +/* + * the node type names + */ +static const char *fn_type_names[] = FIB_NODE_TYPES; + +const char* +fib_node_type_get_name (fib_node_type_t type) +{ + if (type < FIB_NODE_TYPE_LAST) + return (fn_type_names[type]); + else + { + if (NULL != fn_vfts[type].fnv_format) + { + return ("fixme"); + } + else + { + return ("unknown"); + } + } +} + +/** + * fib_node_register_type + * + * Register the function table for a given type + */ +void +fib_node_register_type (fib_node_type_t type, + const fib_node_vft_t *vft) +{ + /* + * assert that one only registration is made per-node type + */ + if (vec_len(fn_vfts) > type) + ASSERT(NULL == fn_vfts[type].fnv_get); + + /* + * Assert that we are getting each of the required functions + */ + ASSERT(NULL != vft->fnv_get); + ASSERT(NULL != vft->fnv_last_lock); + + vec_validate(fn_vfts, type); + fn_vfts[type] = *vft; +} + +fib_node_type_t +fib_node_register_new_type (const fib_node_vft_t *vft) +{ + fib_node_type_t new_type; + + new_type = ++last_new_type; + + fib_node_register_type(new_type, vft); + + return (new_type); +} + +static u8* +fib_node_format (fib_node_ptr_t *fnp, u8*s) +{ + return (format(s, "{%s:%d}", fn_type_names[fnp->fnp_type], fnp->fnp_index)); +} + +u32 +fib_node_child_add (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_type_t type, + fib_node_index_t index) +{ + fib_node_t *parent; + + parent = fn_vfts[parent_type].fnv_get(parent_index); + + /* + * return the index of the sibling in the child list + */ + fib_node_lock(parent); + + if (FIB_NODE_INDEX_INVALID == parent->fn_children) + { + parent->fn_children = fib_node_list_create(); + } + + return (fib_node_list_push_front(parent->fn_children, + 0, type, + index)); +} + +void +fib_node_child_remove (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_index_t sibling_index) +{ + fib_node_t *parent; + + parent = fn_vfts[parent_type].fnv_get(parent_index); + + fib_node_list_remove(parent->fn_children, sibling_index); + + if (0 == fib_node_list_get_size(parent->fn_children)) + { + fib_node_list_destroy(&parent->fn_children); + } + + fib_node_unlock(parent); +} + +u32 +fib_node_get_n_children (fib_node_type_t parent_type, + fib_node_index_t parent_index) +{ + fib_node_t *parent; + + parent = fn_vfts[parent_type].fnv_get(parent_index); + + return (fib_node_list_get_size(parent->fn_children)); +} + + +fib_node_back_walk_rc_t +fib_node_back_walk_one (fib_node_ptr_t *ptr, + fib_node_back_walk_ctx_t *ctx) +{ + fib_node_t *node; + + node = fn_vfts[ptr->fnp_type].fnv_get(ptr->fnp_index); + + return (fn_vfts[ptr->fnp_type].fnv_back_walk(node, ctx)); +} + +static int +fib_node_ptr_format_one_child (fib_node_ptr_t *ptr, + void *arg) +{ + u8 **s = (u8**) arg; + + *s = fib_node_format(ptr, *s); + + return (1); +} + +u8* +fib_node_children_format (fib_node_list_t list, + u8 *s) +{ + fib_node_list_walk(list, fib_node_ptr_format_one_child, (void*)&s); + + return (s); +} + +void +fib_node_init (fib_node_t *node, + fib_node_type_t type) +{ +#if CLIB_DEBUG > 0 + /** + * The node's type. make sure we are dynamic/down casting correctly + */ + node->fn_type = type; +#endif + node->fn_locks = 0; + node->fn_vft = &fn_vfts[type]; + node->fn_children = FIB_NODE_INDEX_INVALID; +} + +void +fib_node_deinit (fib_node_t *node) +{ + fib_node_list_destroy(&node->fn_children); +} + +void +fib_node_lock (fib_node_t *node) +{ + node->fn_locks++; +} + +void +fib_node_unlock (fib_node_t *node) +{ + node->fn_locks--; + + if (0 == node->fn_locks) + { + node->fn_vft->fnv_last_lock(node); + } +} + +void +fib_show_memory_usage (const char *name, + u32 in_use_elts, + u32 allocd_elts, + size_t size_elt) +{ + vlib_cli_output (vlib_get_main(), "%=30s %=5d %=8d/%=9d %d/%d ", + name, size_elt, + in_use_elts, allocd_elts, + in_use_elts*size_elt, allocd_elts*size_elt); +} + +static clib_error_t * +fib_memory_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_node_vft_t *vft; + + vlib_cli_output (vm, "FIB memory"); + vlib_cli_output (vm, "%=30s %=5s %=8s/%=9s totals", + "Name","Size", "in-use", "allocated"); + + vec_foreach(vft, fn_vfts) + { + if (NULL != vft->fnv_mem_show) + vft->fnv_mem_show(); + } + + fib_node_list_memory_show(); + + return (NULL); +} + +/* *INDENT-OFF* */ +/*? + * The '<em>sh fib memory </em>' command displays the memory usage for each + * FIB object type. + * + * @cliexpar + * @cliexstart{show fib memory} + * FIB memory + * Name Size in-use /allocated totals + * Entry 120 11 / 11 1320/1320 + * Entry Source 32 11 / 11 352/352 + * Entry Path-Extensions 44 0 / 0 0/0 + * Path-list 40 11 / 11 440/440 + * Path 88 11 / 11 968/968 + * Node-list elements 20 11 / 11 220/220 + * Node-list heads 8 13 / 13 104/104 + * @cliexend +?*/ +VLIB_CLI_COMMAND (show_fib_memory, static) = { + .path = "show fib memory", + .function = fib_memory_show, + .short_help = "show fib memory", +}; +/* *INDENT-ON* */ diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h new file mode 100644 index 00000000000..3ad8ee95b64 --- /dev/null +++ b/src/vnet/fib/fib_node.h @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_NODE_H__ +#define __FIB_NODE_H__ + +#include <vnet/fib/fib_types.h> + +/** + * The types of nodes in a FIB graph + */ +typedef enum fib_node_type_t_ { + /** + * Marker. New types after this one. + */ + FIB_NODE_TYPE_FIRST = 0, + /** + * See the respective fib_*.h files for descriptions of these objects. + */ + FIB_NODE_TYPE_WALK, + FIB_NODE_TYPE_ENTRY, + FIB_NODE_TYPE_PATH_LIST, + FIB_NODE_TYPE_PATH, + FIB_NODE_TYPE_ADJ, + FIB_NODE_TYPE_MPLS_ENTRY, + FIB_NODE_TYPE_MPLS_TUNNEL, + FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY, + FIB_NODE_TYPE_LISP_ADJ, + FIB_NODE_TYPE_GRE_TUNNEL, + FIB_NODE_TYPE_VXLAN_TUNNEL, + /** + * Marker. New types before this one. leave the test last. + */ + FIB_NODE_TYPE_TEST, + FIB_NODE_TYPE_LAST = FIB_NODE_TYPE_TEST, +} fib_node_type_t; + +#define FIB_NODE_TYPE_MAX (FIB_NODE_TYPE_LAST + 1) + +#define FIB_NODE_TYPES { \ + [FIB_NODE_TYPE_ENTRY] = "entry", \ + [FIB_NODE_TYPE_WALK] = "walk", \ + [FIB_NODE_TYPE_PATH_LIST] = "path-list", \ + [FIB_NODE_TYPE_PATH] = "path", \ + [FIB_NODE_TYPE_MPLS_ENTRY] = "mpls-entry", \ + [FIB_NODE_TYPE_MPLS_TUNNEL] = "mpls-tunnel", \ + [FIB_NODE_TYPE_ADJ] = "adj", \ + [FIB_NODE_TYPE_LISP_GPE_FWD_ENTRY] = "lisp-gpe-fwd-entry", \ + [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \ + [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \ + [FIB_NODE_TYPE_VXLAN_TUNNEL] = "vxlan-tunnel", \ +} + +/** + * Reasons for backwalking the FIB object graph + */ +typedef enum fib_node_back_walk_reason_t_ { + /** + * Marker. Add new ones after. + */ + FIB_NODE_BW_REASON_FIRST = 0, + /** + * Walk to re-resolve the child. + * Used when the parent is no longer a valid resolution target + */ + FIB_NODE_BW_REASON_RESOLVE = FIB_NODE_BW_REASON_FIRST, + /** + * Walk to re-evaluate the forwarding contributed by the parent. + * Used when a parent's forwarding changes and the child needs to + * incorporate this change in its forwarding. + */ + FIB_NODE_BW_REASON_EVALUATE, + /** + * A resolving interface has come up + */ + FIB_NODE_BW_REASON_INTERFACE_UP, + /** + * A resolving interface has gone down + */ + FIB_NODE_BW_REASON_INTERFACE_DOWN, + /** + * A resolving interface has been deleted. + */ + FIB_NODE_BW_REASON_INTERFACE_DELETE, + /** + * Walk to re-collapse the multipath adjs when the rewrite of + * a unipath adjacency changes + */ + FIB_NODE_BW_REASON_ADJ_UPDATE, + /** + * Walk to update children to inform them the adjacency is now down. + */ + FIB_NODE_BW_REASON_ADJ_DOWN, + /** + * Marker. Add new before and update + */ + FIB_NODE_BW_REASON_LAST = FIB_NODE_BW_REASON_ADJ_DOWN, +} fib_node_back_walk_reason_t; + +#define FIB_NODE_BW_REASONS { \ + [FIB_NODE_BW_REASON_RESOLVE] = "resolve", \ + [FIB_NODE_BW_REASON_EVALUATE] = "evaluate", \ + [FIB_NODE_BW_REASON_INTERFACE_UP] = "if-up", \ + [FIB_NODE_BW_REASON_INTERFACE_DOWN] = "if-down", \ + [FIB_NODE_BW_REASON_INTERFACE_DELETE] = "if-delete", \ + [FIB_NODE_BW_REASON_ADJ_UPDATE] = "adj-update", \ + [FIB_NODE_BW_REASON_ADJ_DOWN] = "adj-down", \ +} + +#define FOR_EACH_FIB_NODE_BW_REASON(_item) \ + for (_item = FIB_NODE_BW_REASON_FIRST; \ + _item <= FIB_NODE_BW_REASON_LAST; \ + _item++) + +/** + * Flags enum constructed from the reaons + */ +typedef enum fib_node_bw_reason_flag_t_ { + FIB_NODE_BW_REASON_FLAG_NONE = 0, + FIB_NODE_BW_REASON_FLAG_RESOLVE = (1 << FIB_NODE_BW_REASON_RESOLVE), + FIB_NODE_BW_REASON_FLAG_EVALUATE = (1 << FIB_NODE_BW_REASON_EVALUATE), + FIB_NODE_BW_REASON_FLAG_INTERFACE_UP = (1 << FIB_NODE_BW_REASON_INTERFACE_UP), + FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN = (1 << FIB_NODE_BW_REASON_INTERFACE_DOWN), + FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE = (1 << FIB_NODE_BW_REASON_INTERFACE_DELETE), + FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE = (1 << FIB_NODE_BW_REASON_ADJ_UPDATE), + FIB_NODE_BW_REASON_FLAG_ADJ_DOWN = (1 << FIB_NODE_BW_REASON_ADJ_DOWN), +} __attribute__ ((packed)) fib_node_bw_reason_flag_t; + +STATIC_ASSERT(sizeof(fib_node_bw_reason_flag_t) < 2, + "BW Reason enum < 2 byte. Consequences for cover_upd_res_t"); + +/** + * Flags on the walk + */ +typedef enum fib_node_bw_flags_t_ +{ + /** + * Force the walk to be synchronous + */ + FIB_NODE_BW_FLAG_FORCE_SYNC = (1 << 0), +} fib_node_bw_flags_t; + +/** + * Forward eclarations + */ +struct fib_node_t_; + +/** + * A representation of one pointer to another node. + * To fully qualify a node, one must know its type and its index so it + * can be retrieved from the appropriate pool. Direct pointers to nodes + * are forbidden, since all nodes are allocated from pools, which are vectors, + * and thus subject to realloc at any time. + */ +typedef struct fib_node_ptr_t_ { + /** + * node type + */ + fib_node_type_t fnp_type; + /** + * node's index + */ + fib_node_index_t fnp_index; +} fib_node_ptr_t; + +/** + * @brief A list of FIB nodes. + */ +typedef u32 fib_node_list_t; + +/** + * Context passed between object during a back walk. + */ +typedef struct fib_node_back_walk_ctx_t_ { + /** + * The reason/trigger for the backwalk + */ + fib_node_bw_reason_flag_t fnbw_reason; + + /** + * additional flags for the walk + */ + fib_node_bw_flags_t fnbw_flags; + + /** + * the number of levels the walk has already traversed. + * this value is maintained by the walk infra, tp limit the depth of + * a walk so it does not run indefinately the presence of a loop/cycle + * in the graph. + */ + u32 fnbw_depth; +} fib_node_back_walk_ctx_t; + +/** + * We consider a depth of 32 to be sufficient to cover all sane + * network topologies. Anything more is then an indication that + * there is a loop/cycle in the FIB graph. + * Note that all object types contribute to 1 to the depth. + */ +#define FIB_NODE_GRAPH_MAX_DEPTH ((u32)32) + +/** + * A callback function for walking a node dependency list + */ +typedef int (*fib_node_ptr_walk_t)(fib_node_ptr_t *depend, + void *ctx); + +/** + * A list of dependent nodes. + * This is currently implemented as a hash_table of fib_node_ptr_t + */ +typedef fib_node_ptr_t fib_node_ptr_list_t; + +/** + * Return code from a back walk function + */ +typedef enum fib_node_back_walk_rc_t_ { + FIB_NODE_BACK_WALK_MERGE, + FIB_NODE_BACK_WALK_CONTINUE, +} fib_node_back_walk_rc_t; + +/** + * Function definition to backwalk a FIB node + */ +typedef fib_node_back_walk_rc_t (*fib_node_back_walk_t)( + struct fib_node_t_ *node, + fib_node_back_walk_ctx_t *ctx); + +/** + * Function definition to get a FIB node from its index + */ +typedef struct fib_node_t_* (*fib_node_get_t)(fib_node_index_t index); + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +typedef void (*fib_node_last_lock_gone_t)(struct fib_node_t_ *node); + +/** + * Function definition to display the amount of memory used by a type. + * Implementations should call fib_show_memory_usage() + */ +typedef void (*fib_node_memory_show_t)(void); + +/** + * A FIB graph nodes virtual function table + */ +typedef struct fib_node_vft_t_ { + fib_node_get_t fnv_get; + fib_node_last_lock_gone_t fnv_last_lock; + fib_node_back_walk_t fnv_back_walk; + format_function_t *fnv_format; + fib_node_memory_show_t fnv_mem_show; +} fib_node_vft_t; + +/** + * An node in the FIB graph + * + * Objects in the FIB form a graph. + */ +typedef struct fib_node_t_ { +#if CLIB_DEBUG > 0 + /** + * The node's type. make sure we are dynamic/down casting correctly + */ + fib_node_type_t fn_type; +#endif + /** + * The node's VFT. + * we could store the type here instead, and lookup the VFT using that. But + * I like this better, + */ + const fib_node_vft_t *fn_vft; + + /** + * Vector of nodes that depend upon/use/share this node + */ + fib_node_list_t fn_children; + + /** + * Number of dependents on this node. This number includes the number + * of children + */ + u32 fn_locks; +} fib_node_t; + +/** + * @brief + * Register the function table for a given type + * + * @param ft + * FIB node type + * + * @param vft + * virtual function table + */ +extern void fib_node_register_type (fib_node_type_t ft, + const fib_node_vft_t *vft); + +/** + * @brief + * Create a new FIB node type and Register the function table for it. + * + * @param vft + * virtual function table + * + * @return new FIB node type + */ +extern fib_node_type_t fib_node_register_new_type (const fib_node_vft_t *vft); + +/** + * @brief Show the memory usage for a type + * + * This should be invoked by the type in response to the infra calling + * its registered memory show function + * + * @param name the name of the type + * @param in_use_elts The number of elements in use + * @param allocd_elts The number of allocated pool elemenets + * @param size_elt The size of one element + */ +extern void fib_show_memory_usage(const char *name, + u32 in_use_elts, + u32 allocd_elts, + size_t size_elt); + +extern void fib_node_init(fib_node_t *node, + fib_node_type_t ft); +extern void fib_node_deinit(fib_node_t *node); + +extern void fib_node_lock(fib_node_t *node); +extern void fib_node_unlock(fib_node_t *node); + +extern u32 fib_node_get_n_children(fib_node_type_t parent_type, + fib_node_index_t parent_index); +extern u32 fib_node_child_add(fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_type_t child_type, + fib_node_index_t child_index); +extern void fib_node_child_remove(fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_index_t sibling_index); + +extern fib_node_back_walk_rc_t fib_node_back_walk_one(fib_node_ptr_t *ptr, + fib_node_back_walk_ctx_t *ctx); + +extern u8* fib_node_children_format(fib_node_list_t list, + u8 *s); + +extern const char* fib_node_type_get_name(fib_node_type_t type); + +static inline int +fib_node_index_is_valid (fib_node_index_t ni) +{ + return (FIB_NODE_INDEX_INVALID != ni); +} + +#endif + diff --git a/src/vnet/fib/fib_node_list.c b/src/vnet/fib/fib_node_list.c new file mode 100644 index 00000000000..ceb951b466b --- /dev/null +++ b/src/vnet/fib/fib_node_list.c @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief a hetrogeneous w.r.t. FIB node type, of FIB nodes. + * Since we cannot use C pointers, due to memeory reallocs, the next/prev + * are described as key:{type,index}. + */ + +#include <vnet/fib/fib_node_list.h> + +/** + * @brief An element in the list + */ +typedef struct fib_node_list_elt_t_ +{ + /** + * The index of the list this element is in + */ + fib_node_list_t fnle_list; + + /** + * The owner of this element + */ + fib_node_ptr_t fnle_owner; + + /** + * The next element in the list + */ + u32 fnle_next; + + /** + * The previous element in the list + */ + u32 fnle_prev; +} fib_node_list_elt_t; + +/** + * @brief A list of FIB nodes + */ +typedef struct fib_node_list_head_t_ +{ + /** + * The head element + */ + u32 fnlh_head; + + /** + * Number of elements in the list + */ + u32 fnlh_n_elts; +} fib_node_list_head_t; + +/** + * Pools of list elements and heads + */ +static fib_node_list_elt_t *fib_node_list_elt_pool; +static fib_node_list_head_t *fib_node_list_head_pool; + +static index_t +fib_node_list_elt_get_index (fib_node_list_elt_t *elt) +{ + return (elt - fib_node_list_elt_pool); +} + +static fib_node_list_elt_t * +fib_node_list_elt_get (index_t fi) +{ + return (pool_elt_at_index(fib_node_list_elt_pool, fi)); +} + +static index_t +fib_node_list_head_get_index (fib_node_list_head_t *head) +{ + return (head - fib_node_list_head_pool); +} +static fib_node_list_head_t * +fib_node_list_head_get (fib_node_list_t fi) +{ + return (pool_elt_at_index(fib_node_list_head_pool, fi)); +} + +static fib_node_list_elt_t * +fib_node_list_elt_create (fib_node_list_head_t *head, + int id, + fib_node_type_t type, + fib_node_index_t index) +{ + fib_node_list_elt_t *elt; + + pool_get(fib_node_list_elt_pool, elt); + + elt->fnle_list = fib_node_list_head_get_index(head); + elt->fnle_owner.fnp_type = type; + elt->fnle_owner.fnp_index = index; + + elt->fnle_next = FIB_NODE_INDEX_INVALID; + elt->fnle_prev = FIB_NODE_INDEX_INVALID; + + return (elt); +} + +static void +fib_node_list_head_init (fib_node_list_head_t *head) +{ + head->fnlh_n_elts = 0; + head->fnlh_head = FIB_NODE_INDEX_INVALID; +} + +/** + * @brief Create a new node list. + */ +fib_node_list_t +fib_node_list_create (void) +{ + fib_node_list_head_t *head; + + pool_get(fib_node_list_head_pool, head); + + fib_node_list_head_init(head); + + return (fib_node_list_head_get_index(head)); +} + +void +fib_node_list_destroy (fib_node_list_t *list) +{ + fib_node_list_head_t *head; + + if (FIB_NODE_INDEX_INVALID == *list) + return; + + head = fib_node_list_head_get(*list); + ASSERT(0 == head->fnlh_n_elts); + + pool_put(fib_node_list_head_pool, head); + *list = FIB_NODE_INDEX_INVALID; +} + + +/** + * @brief Insert an element at the from of the list. + */ +u32 +fib_node_list_push_front (fib_node_list_t list, + int owner_id, + fib_node_type_t type, + fib_node_index_t index) +{ + fib_node_list_elt_t *elt, *next; + fib_node_list_head_t *head; + + head = fib_node_list_head_get(list); + elt = fib_node_list_elt_create(head, owner_id, type, index); + + elt->fnle_prev = FIB_NODE_INDEX_INVALID; + elt->fnle_next = head->fnlh_head; + + if (FIB_NODE_INDEX_INVALID != head->fnlh_head) + { + next = fib_node_list_elt_get(head->fnlh_head); + next->fnle_prev = fib_node_list_elt_get_index(elt); + } + head->fnlh_head = fib_node_list_elt_get_index(elt); + + head->fnlh_n_elts++; + + return (fib_node_list_elt_get_index(elt)); +} + +u32 +fib_node_list_push_back (fib_node_list_t list, + int owner_id, + fib_node_type_t type, + fib_node_index_t index) +{ + ASSERT(0); + return (FIB_NODE_INDEX_INVALID); +} + +static void +fib_node_list_extract (fib_node_list_head_t *head, + fib_node_list_elt_t *elt) +{ + fib_node_list_elt_t *next, *prev; + + if (FIB_NODE_INDEX_INVALID != elt->fnle_next) + { + next = fib_node_list_elt_get(elt->fnle_next); + next->fnle_prev = elt->fnle_prev; + } + + if (FIB_NODE_INDEX_INVALID != elt->fnle_prev) + { + prev = fib_node_list_elt_get(elt->fnle_prev); + prev->fnle_next = elt->fnle_next; + } + else + { + ASSERT (fib_node_list_elt_get_index(elt) == head->fnlh_head); + head->fnlh_head = elt->fnle_next; + } +} + +static void +fib_node_list_insert_after (fib_node_list_head_t *head, + fib_node_list_elt_t *prev, + fib_node_list_elt_t *elt) +{ + fib_node_list_elt_t *next; + + elt->fnle_next = prev->fnle_next; + if (FIB_NODE_INDEX_INVALID != prev->fnle_next) + { + next = fib_node_list_elt_get(prev->fnle_next); + next->fnle_prev = fib_node_list_elt_get_index(elt); + } + prev->fnle_next = fib_node_list_elt_get_index(elt); + elt->fnle_prev = fib_node_list_elt_get_index(prev); +} + +void +fib_node_list_remove (fib_node_list_t list, + u32 sibling) +{ + fib_node_list_head_t *head; + fib_node_list_elt_t *elt; + + head = fib_node_list_head_get(list); + elt = fib_node_list_elt_get(sibling); + + fib_node_list_extract(head, elt); + + head->fnlh_n_elts--; + pool_put(fib_node_list_elt_pool, elt); +} + +void +fib_node_list_elt_remove (u32 sibling) +{ + fib_node_list_elt_t *elt; + + elt = fib_node_list_elt_get(sibling); + + fib_node_list_remove(elt->fnle_list, sibling); +} + +/** + * @brief Advance the sibling one step (toward the tail) in the list. + * return 0 if at the end of the list, 1 otherwise. + */ +int +fib_node_list_advance (u32 sibling) +{ + fib_node_list_elt_t *elt, *next; + fib_node_list_head_t *head; + + elt = fib_node_list_elt_get(sibling); + head = fib_node_list_head_get(elt->fnle_list); + + if (FIB_NODE_INDEX_INVALID != elt->fnle_next) + { + /* + * not at the end of the list + */ + next = fib_node_list_elt_get(elt->fnle_next); + + fib_node_list_extract(head, elt); + fib_node_list_insert_after(head, next, elt); + + return (1); + } + else + { + return (0); + } +} + +int +fib_node_list_elt_get_next (u32 sibling, + fib_node_ptr_t *ptr) +{ + fib_node_list_elt_t *elt, *next; + + elt = fib_node_list_elt_get(sibling); + + if (FIB_NODE_INDEX_INVALID != elt->fnle_next) + { + next = fib_node_list_elt_get(elt->fnle_next); + + *ptr = next->fnle_owner; + return (1); + } + else + { + ptr->fnp_index = FIB_NODE_INDEX_INVALID; + return (0); + } +} + +u32 +fib_node_list_get_size (fib_node_list_t list) +{ + fib_node_list_head_t *head; + + if (FIB_NODE_INDEX_INVALID == list) + { + return (0); + } + + head = fib_node_list_head_get(list); + + return (head->fnlh_n_elts); +} + +int +fib_node_list_get_front (fib_node_list_t list, + fib_node_ptr_t *ptr) +{ + fib_node_list_head_t *head; + fib_node_list_elt_t *elt; + + + if (0 == fib_node_list_get_size(list)) + { + ptr->fnp_index = FIB_NODE_INDEX_INVALID; + return (0); + } + + head = fib_node_list_head_get(list); + elt = fib_node_list_elt_get(head->fnlh_head); + + *ptr = elt->fnle_owner; + + return (1); +} + +/** + * @brief Walk the list of node. This must be safe w.r.t. the removal + * of nodes during the walk. + */ +void +fib_node_list_walk (fib_node_list_t list, + fib_node_list_walk_cb_t fn, + void *args) +{ + fib_node_list_elt_t *elt; + fib_node_list_head_t *head; + u32 sibling; + + if (FIB_NODE_INDEX_INVALID == list) + { + return; + } + + head = fib_node_list_head_get(list); + sibling = head->fnlh_head; + + while (FIB_NODE_INDEX_INVALID != sibling) + { + elt = fib_node_list_elt_get(sibling); + sibling = elt->fnle_next; + + fn(&elt->fnle_owner, args); + } +} + +void +fib_node_list_memory_show (void) +{ + fib_show_memory_usage("Node-list elements", + pool_elts(fib_node_list_elt_pool), + pool_len(fib_node_list_elt_pool), + sizeof(fib_node_list_elt_t)); + fib_show_memory_usage("Node-list heads", + pool_elts(fib_node_list_head_pool), + pool_len(fib_node_list_head_pool), + sizeof(fib_node_list_head_t)); +} diff --git a/src/vnet/fib/fib_node_list.h b/src/vnet/fib/fib_node_list.h new file mode 100644 index 00000000000..9567b9669e8 --- /dev/null +++ b/src/vnet/fib/fib_node_list.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief a hetrogeneous w.r.t. FIB node type, list of FIB nodes. + * Since we cannot use C pointers, due to memeory reallocs, the next/prev + * are described as an index to an element. Each element contains a pointer + * (key:{type, index}) to a FIB node. + */ + +#ifndef __FIB_NODE_LIST_H__ +#define __FIB_NODE_LIST_H__ + +#include <vnet/fib/fib_node.h> + +extern fib_node_list_t fib_node_list_create(void); +extern void fib_node_list_destroy(fib_node_list_t *list); + +extern u32 fib_node_list_push_front(fib_node_list_t head, + int owner_id, + fib_node_type_t type, + fib_node_index_t index); +extern u32 fib_node_list_push_back(fib_node_list_t head, + int owner_id, + fib_node_type_t type, + fib_node_index_t index); +extern void fib_node_list_remove(fib_node_list_t head, + u32 sibling); +extern void fib_node_list_elt_remove(u32 sibling); + +extern int fib_node_list_advance(u32 sibling); + +extern int fib_node_list_get_front(fib_node_list_t head, + fib_node_ptr_t *ptr); + +extern int fib_node_list_elt_get_next(u32 elt, + fib_node_ptr_t *ptr); + +extern u32 fib_node_list_get_size(fib_node_list_t head); + +/** + * @brief Callback function invoked during a list walk + */ +typedef int (*fib_node_list_walk_cb_t)(fib_node_ptr_t *owner, + void *args); + +extern void fib_node_list_walk(fib_node_list_t head, + fib_node_list_walk_cb_t fn, + void *args); + +extern void fib_node_list_memory_show(void); + +#endif diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c new file mode 100644 index 00000000000..809e3e166da --- /dev/null +++ b/src/vnet/fib/fib_path.c @@ -0,0 +1,2001 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/ip/format.h> +#include <vnet/ip/ip.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/receive_dpo.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/lookup_dpo.h> + +#include <vnet/adj/adj.h> + +#include <vnet/fib/fib_path.h> +#include <vnet/fib/fib_node.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_path_list.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/fib/fib_urpf_list.h> + +/** + * Enurmeration of path types + */ +typedef enum fib_path_type_t_ { + /** + * Marker. Add new types after this one. + */ + FIB_PATH_TYPE_FIRST = 0, + /** + * Attached-nexthop. An interface and a nexthop are known. + */ + FIB_PATH_TYPE_ATTACHED_NEXT_HOP = FIB_PATH_TYPE_FIRST, + /** + * attached. Only the interface is known. + */ + FIB_PATH_TYPE_ATTACHED, + /** + * recursive. Only the next-hop is known. + */ + FIB_PATH_TYPE_RECURSIVE, + /** + * special. nothing is known. so we drop. + */ + FIB_PATH_TYPE_SPECIAL, + /** + * exclusive. user provided adj. + */ + FIB_PATH_TYPE_EXCLUSIVE, + /** + * deag. Link to a lookup adj in the next table + */ + FIB_PATH_TYPE_DEAG, + /** + * receive. it's for-us. + */ + FIB_PATH_TYPE_RECEIVE, + /** + * Marker. Add new types before this one, then update it. + */ + FIB_PATH_TYPE_LAST = FIB_PATH_TYPE_RECEIVE, +} __attribute__ ((packed)) fib_path_type_t; + +/** + * The maximum number of path_types + */ +#define FIB_PATH_TYPE_MAX (FIB_PATH_TYPE_LAST + 1) + +#define FIB_PATH_TYPES { \ + [FIB_PATH_TYPE_ATTACHED_NEXT_HOP] = "attached-nexthop", \ + [FIB_PATH_TYPE_ATTACHED] = "attached", \ + [FIB_PATH_TYPE_RECURSIVE] = "recursive", \ + [FIB_PATH_TYPE_SPECIAL] = "special", \ + [FIB_PATH_TYPE_EXCLUSIVE] = "exclusive", \ + [FIB_PATH_TYPE_DEAG] = "deag", \ + [FIB_PATH_TYPE_RECEIVE] = "receive", \ +} + +#define FOR_EACH_FIB_PATH_TYPE(_item) \ + for (_item = FIB_PATH_TYPE_FIRST; _item <= FIB_PATH_TYPE_LAST; _item++) + +/** + * Enurmeration of path operational (i.e. derived) attributes + */ +typedef enum fib_path_oper_attribute_t_ { + /** + * Marker. Add new types after this one. + */ + FIB_PATH_OPER_ATTRIBUTE_FIRST = 0, + /** + * The path forms part of a recursive loop. + */ + FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP = FIB_PATH_OPER_ATTRIBUTE_FIRST, + /** + * The path is resolved + */ + FIB_PATH_OPER_ATTRIBUTE_RESOLVED, + /** + * The path has become a permanent drop. + */ + FIB_PATH_OPER_ATTRIBUTE_DROP, + /** + * Marker. Add new types before this one, then update it. + */ + FIB_PATH_OPER_ATTRIBUTE_LAST = FIB_PATH_OPER_ATTRIBUTE_DROP, +} __attribute__ ((packed)) fib_path_oper_attribute_t; + +/** + * The maximum number of path operational attributes + */ +#define FIB_PATH_OPER_ATTRIBUTE_MAX (FIB_PATH_OPER_ATTRIBUTE_LAST + 1) + +#define FIB_PATH_OPER_ATTRIBUTES { \ + [FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP] = "recursive-loop", \ + [FIB_PATH_OPER_ATTRIBUTE_RESOLVED] = "resolved", \ + [FIB_PATH_OPER_ATTRIBUTE_DROP] = "drop", \ +} + +#define FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(_item) \ + for (_item = FIB_PATH_OPER_ATTRIBUTE_FIRST; \ + _item <= FIB_PATH_OPER_ATTRIBUTE_LAST; \ + _item++) + +/** + * Path flags from the attributes + */ +typedef enum fib_path_oper_flags_t_ { + FIB_PATH_OPER_FLAG_NONE = 0, + FIB_PATH_OPER_FLAG_RECURSIVE_LOOP = (1 << FIB_PATH_OPER_ATTRIBUTE_RECURSIVE_LOOP), + FIB_PATH_OPER_FLAG_DROP = (1 << FIB_PATH_OPER_ATTRIBUTE_DROP), + FIB_PATH_OPER_FLAG_RESOLVED = (1 << FIB_PATH_OPER_ATTRIBUTE_RESOLVED), +} __attribute__ ((packed)) fib_path_oper_flags_t; + +/** + * A FIB path + */ +typedef struct fib_path_t_ { + /** + * A path is a node in the FIB graph. + */ + fib_node_t fp_node; + + /** + * The index of the path-list to which this path belongs + */ + u32 fp_pl_index; + + /** + * This marks the start of the memory area used to hash + * the path + */ + STRUCT_MARK(path_hash_start); + + /** + * Configuration Flags + */ + fib_path_cfg_flags_t fp_cfg_flags; + + /** + * The type of the path. This is the selector for the union + */ + fib_path_type_t fp_type; + + /** + * The protocol of the next-hop, i.e. the address family of the + * next-hop's address. We can't derive this from the address itself + * since the address can be all zeros + */ + fib_protocol_t fp_nh_proto; + + /** + * UCMP [unnormalised] weigt + */ + u32 fp_weight; + + /** + * per-type union of the data required to resolve the path + */ + union { + struct { + /** + * The next-hop + */ + ip46_address_t fp_nh; + /** + * The interface + */ + u32 fp_interface; + } attached_next_hop; + struct { + /** + * The interface + */ + u32 fp_interface; + } attached; + struct { + union + { + /** + * The next-hop + */ + ip46_address_t fp_ip; + /** + * The local label to resolve through. + */ + mpls_label_t fp_local_label; + } fp_nh; + /** + * The FIB table index in which to find the next-hop. + * This needs to be fixed. We should lookup the adjacencies in + * a separate table of adjacencies, rather than from the FIB. + * Two reasons I can think of: + * - consider: + * int ip addr Gig0 10.0.0.1/24 + * ip route 10.0.0.2/32 via Gig1 192.168.1.2 + * ip route 1.1.1.1/32 via Gig0 10.0.0.2 + * this is perfectly valid. + * Packets addressed to 10.0.0.2 should be sent via Gig1. + * Packets address to 1.1.1.1 should be sent via Gig0. + * when we perform the adj resolution from the FIB for the path + * "via Gig0 10.0.0.2" the lookup will result in the route via Gig1 + * and so we will pick up the adj via Gig1 - which was not what the + * operator wanted. + * - we can only return link-type IPv4 and so not the link-type MPLS. + * more on this in a later commit. + * + * The table ID should only belong to a recursive path and indicate + * which FIB should be used to resolve the next-hop. + */ + fib_node_index_t fp_tbl_id; + } recursive; + struct { + /** + * The FIB index in which to perfom the next lookup + */ + fib_node_index_t fp_tbl_id; + } deag; + struct { + } special; + struct { + /** + * The user provided 'exclusive' DPO + */ + dpo_id_t fp_ex_dpo; + } exclusive; + struct { + /** + * The interface on which the local address is configured + */ + u32 fp_interface; + /** + * The next-hop + */ + ip46_address_t fp_addr; + } receive; + }; + STRUCT_MARK(path_hash_end); + + /** + * Memebers in this last section represent information that is + * dervied during resolution. It should not be copied to new paths + * nor compared. + */ + + /** + * Operational Flags + */ + fib_path_oper_flags_t fp_oper_flags; + + /** + * the resolving via fib. not part of the union, since it it not part + * of the path's hash. + */ + fib_node_index_t fp_via_fib; + + /** + * The Data-path objects through which this path resolves for IP. + */ + dpo_id_t fp_dpo; + + /** + * the index of this path in the parent's child list. + */ + u32 fp_sibling; +} fib_path_t; + +/* + * Array of strings/names for the path types and attributes + */ +static const char *fib_path_type_names[] = FIB_PATH_TYPES; +static const char *fib_path_oper_attribute_names[] = FIB_PATH_OPER_ATTRIBUTES; +static const char *fib_path_cfg_attribute_names[] = FIB_PATH_CFG_ATTRIBUTES; + +/* + * The memory pool from which we allocate all the paths + */ +static fib_path_t *fib_path_pool; + +/* + * Debug macro + */ +#ifdef FIB_DEBUG +#define FIB_PATH_DBG(_p, _fmt, _args...) \ +{ \ + u8 *_tmp = NULL; \ + _tmp = fib_path_format(fib_path_get_index(_p), _tmp); \ + clib_warning("path:[%d:%s]:" _fmt, \ + fib_path_get_index(_p), _tmp, \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define FIB_PATH_DBG(_p, _fmt, _args...) +#endif + +static fib_path_t * +fib_path_get (fib_node_index_t index) +{ + return (pool_elt_at_index(fib_path_pool, index)); +} + +static fib_node_index_t +fib_path_get_index (fib_path_t *path) +{ + return (path - fib_path_pool); +} + +static fib_node_t * +fib_path_get_node (fib_node_index_t index) +{ + return ((fib_node_t*)fib_path_get(index)); +} + +static fib_path_t* +fib_path_from_fib_node (fib_node_t *node) +{ +#if CLIB_DEBUG > 0 + ASSERT(FIB_NODE_TYPE_PATH == node->fn_type); +#endif + return ((fib_path_t*)node); +} + +u8 * +format_fib_path (u8 * s, va_list * args) +{ + fib_path_t *path = va_arg (*args, fib_path_t *); + vnet_main_t * vnm = vnet_get_main(); + fib_path_oper_attribute_t oattr; + fib_path_cfg_attribute_t cattr; + + s = format (s, " index:%d ", fib_path_get_index(path)); + s = format (s, "pl-index:%d ", path->fp_pl_index); + s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto); + s = format (s, "weight=%d ", path->fp_weight); + s = format (s, "%s: ", fib_path_type_names[path->fp_type]); + if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) { + s = format(s, " oper-flags:"); + FOR_EACH_FIB_PATH_OPER_ATTRIBUTE(oattr) { + if ((1<<oattr) & path->fp_oper_flags) { + s = format (s, "%s,", fib_path_oper_attribute_names[oattr]); + } + } + } + if (FIB_PATH_CFG_FLAG_NONE != path->fp_cfg_flags) { + s = format(s, " cfg-flags:"); + FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(cattr) { + if ((1<<cattr) & path->fp_cfg_flags) { + s = format (s, "%s,", fib_path_cfg_attribute_names[cattr]); + } + } + } + s = format(s, "\n "); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + s = format (s, "%U", format_ip46_address, + &path->attached_next_hop.fp_nh, + IP46_TYPE_ANY); + if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP) + { + s = format (s, " if_index:%d", path->attached_next_hop.fp_interface); + } + else + { + s = format (s, " %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface( + vnm, + path->attached_next_hop.fp_interface)); + if (vnet_sw_interface_is_p2p(vnet_get_main(), + path->attached_next_hop.fp_interface)) + { + s = format (s, " (p2p)"); + } + } + if (!dpo_id_is_valid(&path->fp_dpo)) + { + s = format(s, "\n unresolved"); + } + else + { + s = format(s, "\n %U", + format_dpo_id, + &path->fp_dpo, 13); + } + break; + case FIB_PATH_TYPE_ATTACHED: + if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP) + { + s = format (s, " if_index:%d", path->attached_next_hop.fp_interface); + } + else + { + s = format (s, " %U", + format_vnet_sw_interface_name, + vnm, + vnet_get_sw_interface( + vnm, + path->attached.fp_interface)); + } + break; + case FIB_PATH_TYPE_RECURSIVE: + if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + { + s = format (s, "via %U", + format_mpls_unicast_label, + path->recursive.fp_nh.fp_local_label); + } + else + { + s = format (s, "via %U", + format_ip46_address, + &path->recursive.fp_nh.fp_ip, + IP46_TYPE_ANY); + } + s = format (s, " in fib:%d", + path->recursive.fp_tbl_id, + path->fp_via_fib); + s = format (s, " via-fib:%d", path->fp_via_fib); + s = format (s, " via-dpo:[%U:%d]", + format_dpo_type, path->fp_dpo.dpoi_type, + path->fp_dpo.dpoi_index); + + break; + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_DEAG: + case FIB_PATH_TYPE_EXCLUSIVE: + if (dpo_id_is_valid(&path->fp_dpo)) + { + s = format(s, "%U", format_dpo_id, + &path->fp_dpo, 2); + } + break; + } + return (s); +} + +u8 * +fib_path_format (fib_node_index_t pi, u8 *s) +{ + fib_path_t *path; + + path = fib_path_get(pi); + ASSERT(NULL != path); + + return (format (s, "%U", format_fib_path, path)); +} + +u8 * +fib_path_adj_format (fib_node_index_t pi, + u32 indent, + u8 *s) +{ + fib_path_t *path; + + path = fib_path_get(pi); + ASSERT(NULL != path); + + if (!dpo_id_is_valid(&path->fp_dpo)) + { + s = format(s, " unresolved"); + } + else + { + s = format(s, "%U", format_dpo_id, + &path->fp_dpo, 2); + } + + return (s); +} + +/* + * fib_path_last_lock_gone + * + * We don't share paths, we share path lists, so the [un]lock functions + * are no-ops + */ +static void +fib_path_last_lock_gone (fib_node_t *node) +{ + ASSERT(0); +} + +static const adj_index_t +fib_path_attached_next_hop_get_adj (fib_path_t *path, + vnet_link_t link) +{ + if (vnet_sw_interface_is_p2p(vnet_get_main(), + path->attached_next_hop.fp_interface)) + { + /* + * if the interface is p2p then the adj for the specific + * neighbour on that link will never exist. on p2p links + * the subnet address (the attached route) links to the + * auto-adj (see below), we want that adj here too. + */ + return (adj_nbr_add_or_lock(path->fp_nh_proto, + link, + &zero_addr, + path->attached_next_hop.fp_interface)); + } + else + { + return (adj_nbr_add_or_lock(path->fp_nh_proto, + link, + &path->attached_next_hop.fp_nh, + path->attached_next_hop.fp_interface)); + } +} + +static void +fib_path_attached_next_hop_set (fib_path_t *path) +{ + /* + * resolve directly via the adjacnecy discribed by the + * interface and next-hop + */ + if (!vnet_sw_interface_is_admin_up(vnet_get_main(), + path->attached_next_hop.fp_interface)) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + + dpo_set(&path->fp_dpo, + DPO_ADJACENCY, + fib_proto_to_dpo(path->fp_nh_proto), + fib_path_attached_next_hop_get_adj( + path, + fib_proto_to_link(path->fp_nh_proto))); + + /* + * become a child of the adjacency so we receive updates + * when its rewrite changes + */ + path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index, + FIB_NODE_TYPE_PATH, + fib_path_get_index(path)); +} + +/* + * create of update the paths recursive adj + */ +static void +fib_path_recursive_adj_update (fib_path_t *path, + fib_forward_chain_type_t fct, + dpo_id_t *dpo) +{ + dpo_id_t via_dpo = DPO_INVALID; + + /* + * get the DPO to resolve through from the via-entry + */ + fib_entry_contribute_forwarding(path->fp_via_fib, + fct, + &via_dpo); + + + /* + * hope for the best - clear if restrictions apply. + */ + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + + /* + * Validate any recursion constraints and over-ride the via + * adj if not met + */ + if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + } + else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_HOST) + { + /* + * the via FIB must be a host route. + * note the via FIB just added will always be a host route + * since it is an RR source added host route. So what we need to + * check is whether the route has other sources. If it does then + * some other source has added it as a host route. If it doesn't + * then it was added only here and inherits forwarding from a cover. + * the cover is not a host route. + * The RR source is the lowest priority source, so we check if it + * is the best. if it is there are no other sources. + */ + if (fib_entry_get_best_source(path->fp_via_fib) >= FIB_SOURCE_RR) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + + /* + * PIC edge trigger. let the load-balance maps know + */ + load_balance_map_path_state_change(fib_path_get_index(path)); + } + } + else if (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED) + { + /* + * RR source entries inherit the flags from the cover, so + * we can check the via directly + */ + if (!(FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(path->fp_via_fib))) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + dpo_copy(&via_dpo, drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + + /* + * PIC edge trigger. let the load-balance maps know + */ + load_balance_map_path_state_change(fib_path_get_index(path)); + } + } + + /* + * update the path's contributed DPO + */ + dpo_copy(dpo, &via_dpo); + + FIB_PATH_DBG(path, "recursive update: %U", + fib_get_lookup_main(path->fp_nh_proto), + &path->fp_dpo, 2); + + dpo_reset(&via_dpo); +} + +/* + * fib_path_is_permanent_drop + * + * Return !0 if the path is configured to permanently drop, + * despite other attributes. + */ +static int +fib_path_is_permanent_drop (fib_path_t *path) +{ + return ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP) || + (path->fp_oper_flags & FIB_PATH_OPER_FLAG_DROP)); +} + +/* + * fib_path_unresolve + * + * Remove our dependency on the resolution target + */ +static void +fib_path_unresolve (fib_path_t *path) +{ + /* + * the forced drop path does not need unresolving + */ + if (fib_path_is_permanent_drop(path)) + { + return; + } + + switch (path->fp_type) + { + case FIB_PATH_TYPE_RECURSIVE: + if (FIB_NODE_INDEX_INVALID != path->fp_via_fib) + { + fib_prefix_t pfx; + + fib_entry_get_prefix(path->fp_via_fib, &pfx); + fib_entry_child_remove(path->fp_via_fib, + path->fp_sibling); + fib_table_entry_special_remove(path->recursive.fp_tbl_id, + &pfx, + FIB_SOURCE_RR); + path->fp_via_fib = FIB_NODE_INDEX_INVALID; + } + break; + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + case FIB_PATH_TYPE_ATTACHED: + adj_child_remove(path->fp_dpo.dpoi_index, + path->fp_sibling); + adj_unlock(path->fp_dpo.dpoi_index); + break; + case FIB_PATH_TYPE_EXCLUSIVE: + dpo_reset(&path->exclusive.fp_ex_dpo); + break; + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_DEAG: + /* + * these hold only the path's DPO, which is reset below. + */ + break; + } + + /* + * release the adj we were holding and pick up the + * drop just in case. + */ + dpo_reset(&path->fp_dpo); + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + + return; +} + +static fib_forward_chain_type_t +fib_path_proto_to_chain_type (fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case FIB_PROTOCOL_IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + case FIB_PROTOCOL_MPLS: + return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + } + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); +} + +/* + * fib_path_back_walk_notify + * + * A back walk has reach this path. + */ +static fib_node_back_walk_rc_t +fib_path_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + fib_path_t *path; + + path = fib_path_from_fib_node(node); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_RECURSIVE: + if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason) + { + /* + * modify the recursive adjacency to use the new forwarding + * of the via-fib. + * this update is visible to packets in flight in the DP. + */ + fib_path_recursive_adj_update( + path, + fib_path_proto_to_chain_type(path->fp_nh_proto), + &path->fp_dpo); + } + if ((FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) || + (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason)) + { + /* + * ADJ updates (complete<->incomplete) do not need to propagate to + * recursive entries. + * The only reason its needed as far back as here, is that the adj + * and the incomplete adj are a different DPO type, so the LBs need + * to re-stack. + * If this walk was quashed in the fib_entry, then any non-fib_path + * children (like tunnels that collapse out the LB when they stack) + * would not see the update. + */ + return (FIB_NODE_BACK_WALK_CONTINUE); + } + break; + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + /* +FIXME comment + * ADJ_UPDATE backwalk pass silently through here and up to + * the path-list when the multipath adj collapse occurs. + * The reason we do this is that the assumtption is that VPP + * runs in an environment where the Control-Plane is remote + * and hence reacts slowly to link up down. In order to remove + * this down link from the ECMP set quickly, we back-walk. + * VPP also has dedicated CPUs, so we are not stealing resources + * from the CP to do so. + */ + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason) + { + if (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) + { + /* + * alreday resolved. no need to walk back again + */ + return (FIB_NODE_BACK_WALK_CONTINUE); + } + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + } + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason) + { + if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED)) + { + /* + * alreday unresolved. no need to walk back again + */ + return (FIB_NODE_BACK_WALK_CONTINUE); + } + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason) + { + /* + * The interface this path resolves through has been deleted. + * This will leave the path in a permanent drop state. The route + * needs to be removed and readded (and hence the path-list deleted) + * before it can forward again. + */ + fib_path_unresolve(path); + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP; + } + if (FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason) + { + /* + * restack the DPO to pick up the correct DPO sub-type + */ + uword if_is_up; + adj_index_t ai; + + if_is_up = vnet_sw_interface_is_admin_up( + vnet_get_main(), + path->attached_next_hop.fp_interface); + + if (if_is_up) + { + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + } + + ai = fib_path_attached_next_hop_get_adj( + path, + fib_proto_to_link(path->fp_nh_proto)); + + dpo_set(&path->fp_dpo, DPO_ADJACENCY, + fib_proto_to_dpo(path->fp_nh_proto), + ai); + adj_unlock(ai); + + if (!if_is_up) + { + /* + * If the interface is not up there is no reason to walk + * back to children. if we did they would only evalute + * that this path is unresolved and hence it would + * not contribute the adjacency - so it would be wasted + * CPU time. + */ + return (FIB_NODE_BACK_WALK_CONTINUE); + } + } + if (FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason) + { + if (!(path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED)) + { + /* + * alreday unresolved. no need to walk back again + */ + return (FIB_NODE_BACK_WALK_CONTINUE); + } + /* + * the adj has gone down. the path is no longer resolved. + */ + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + break; + case FIB_PATH_TYPE_ATTACHED: + /* + * FIXME; this could schedule a lower priority walk, since attached + * routes are not usually in ECMP configurations so the backwalk to + * the FIB entry does not need to be high priority + */ + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason) + { + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + } + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + if (FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason) + { + fib_path_unresolve(path); + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP; + } + break; + case FIB_PATH_TYPE_DEAG: + /* + * FIXME When VRF delete is allowed this will need a poke. + */ + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_EXCLUSIVE: + /* + * these path types have no parents. so to be + * walked from one is unexpected. + */ + ASSERT(0); + break; + } + + /* + * propagate the backwalk further to the path-list + */ + fib_path_list_back_walk(path->fp_pl_index, ctx); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +static void +fib_path_memory_show (void) +{ + fib_show_memory_usage("Path", + pool_elts(fib_path_pool), + pool_len(fib_path_pool), + sizeof(fib_path_t)); +} + +/* + * The FIB path's graph node virtual function table + */ +static const fib_node_vft_t fib_path_vft = { + .fnv_get = fib_path_get_node, + .fnv_last_lock = fib_path_last_lock_gone, + .fnv_back_walk = fib_path_back_walk_notify, + .fnv_mem_show = fib_path_memory_show, +}; + +static fib_path_cfg_flags_t +fib_path_route_flags_to_cfg_flags (const fib_route_path_t *rpath) +{ + fib_path_cfg_flags_t cfg_flags = FIB_PATH_CFG_FLAG_NONE; + + if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_HOST) + cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_HOST; + if (rpath->frp_flags & FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED) + cfg_flags |= FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED; + + return (cfg_flags); +} + +/* + * fib_path_create + * + * Create and initialise a new path object. + * return the index of the path. + */ +fib_node_index_t +fib_path_create (fib_node_index_t pl_index, + fib_protocol_t nh_proto, + fib_path_cfg_flags_t flags, + const fib_route_path_t *rpath) +{ + fib_path_t *path; + + pool_get(fib_path_pool, path); + memset(path, 0, sizeof(*path)); + + fib_node_init(&path->fp_node, + FIB_NODE_TYPE_PATH); + + dpo_reset(&path->fp_dpo); + path->fp_pl_index = pl_index; + path->fp_nh_proto = nh_proto; + path->fp_via_fib = FIB_NODE_INDEX_INVALID; + path->fp_weight = rpath->frp_weight; + if (0 == path->fp_weight) + { + /* + * a weight of 0 is a meaningless value. We could either reject it, and thus force + * clients to always use 1, or we can accept it and fixup approrpiately. + */ + path->fp_weight = 1; + } + path->fp_cfg_flags = flags; + path->fp_cfg_flags |= fib_path_route_flags_to_cfg_flags(rpath); + + /* + * deduce the path's tpye from the parementers and save what is needed. + */ + if (~0 != rpath->frp_sw_if_index) + { + if (flags & FIB_PATH_CFG_FLAG_LOCAL) + { + path->fp_type = FIB_PATH_TYPE_RECEIVE; + path->receive.fp_interface = rpath->frp_sw_if_index; + path->receive.fp_addr = rpath->frp_addr; + } + else + { + if (ip46_address_is_zero(&rpath->frp_addr)) + { + path->fp_type = FIB_PATH_TYPE_ATTACHED; + path->attached.fp_interface = rpath->frp_sw_if_index; + } + else + { + path->fp_type = FIB_PATH_TYPE_ATTACHED_NEXT_HOP; + path->attached_next_hop.fp_interface = rpath->frp_sw_if_index; + path->attached_next_hop.fp_nh = rpath->frp_addr; + } + } + } + else + { + if (ip46_address_is_zero(&rpath->frp_addr)) + { + if (~0 == rpath->frp_fib_index) + { + path->fp_type = FIB_PATH_TYPE_SPECIAL; + } + else + { + path->fp_type = FIB_PATH_TYPE_DEAG; + path->deag.fp_tbl_id = rpath->frp_fib_index; + } + } + else + { + path->fp_type = FIB_PATH_TYPE_RECURSIVE; + if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + { + path->recursive.fp_nh.fp_local_label = rpath->frp_local_label; + } + else + { + path->recursive.fp_nh.fp_ip = rpath->frp_addr; + } + path->recursive.fp_tbl_id = rpath->frp_fib_index; + } + } + + FIB_PATH_DBG(path, "create"); + + return (fib_path_get_index(path)); +} + +/* + * fib_path_create_special + * + * Create and initialise a new path object. + * return the index of the path. + */ +fib_node_index_t +fib_path_create_special (fib_node_index_t pl_index, + fib_protocol_t nh_proto, + fib_path_cfg_flags_t flags, + const dpo_id_t *dpo) +{ + fib_path_t *path; + + pool_get(fib_path_pool, path); + memset(path, 0, sizeof(*path)); + + fib_node_init(&path->fp_node, + FIB_NODE_TYPE_PATH); + dpo_reset(&path->fp_dpo); + + path->fp_pl_index = pl_index; + path->fp_weight = 1; + path->fp_nh_proto = nh_proto; + path->fp_via_fib = FIB_NODE_INDEX_INVALID; + path->fp_cfg_flags = flags; + + if (FIB_PATH_CFG_FLAG_DROP & flags) + { + path->fp_type = FIB_PATH_TYPE_SPECIAL; + } + else if (FIB_PATH_CFG_FLAG_LOCAL & flags) + { + path->fp_type = FIB_PATH_TYPE_RECEIVE; + path->attached.fp_interface = FIB_NODE_INDEX_INVALID; + } + else + { + path->fp_type = FIB_PATH_TYPE_EXCLUSIVE; + ASSERT(NULL != dpo); + dpo_copy(&path->exclusive.fp_ex_dpo, dpo); + } + + return (fib_path_get_index(path)); +} + +/* + * fib_path_copy + * + * Copy a path. return index of new path. + */ +fib_node_index_t +fib_path_copy (fib_node_index_t path_index, + fib_node_index_t path_list_index) +{ + fib_path_t *path, *orig_path; + + pool_get(fib_path_pool, path); + + orig_path = fib_path_get(path_index); + ASSERT(NULL != orig_path); + + memcpy(path, orig_path, sizeof(*path)); + + FIB_PATH_DBG(path, "create-copy:%d", path_index); + + /* + * reset the dynamic section + */ + fib_node_init(&path->fp_node, FIB_NODE_TYPE_PATH); + path->fp_oper_flags = FIB_PATH_OPER_FLAG_NONE; + path->fp_pl_index = path_list_index; + path->fp_via_fib = FIB_NODE_INDEX_INVALID; + memset(&path->fp_dpo, 0, sizeof(path->fp_dpo)); + dpo_reset(&path->fp_dpo); + + return (fib_path_get_index(path)); +} + +/* + * fib_path_destroy + * + * destroy a path that is no longer required + */ +void +fib_path_destroy (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(NULL != path); + FIB_PATH_DBG(path, "destroy"); + + fib_path_unresolve(path); + + fib_node_deinit(&path->fp_node); + pool_put(fib_path_pool, path); +} + +/* + * fib_path_destroy + * + * destroy a path that is no longer required + */ +uword +fib_path_hash (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (hash_memory(STRUCT_MARK_PTR(path, path_hash_start), + (STRUCT_OFFSET_OF(fib_path_t, path_hash_end) - + STRUCT_OFFSET_OF(fib_path_t, path_hash_start)), + 0)); +} + +/* + * fib_path_cmp_i + * + * Compare two paths for equivalence. + */ +static int +fib_path_cmp_i (const fib_path_t *path1, + const fib_path_t *path2) +{ + int res; + + res = 1; + + /* + * paths of different types and protocol are not equal. + * different weights only are the same path. + */ + if (path1->fp_type != path2->fp_type) + { + res = (path1->fp_type - path2->fp_type); + } + if (path1->fp_nh_proto != path2->fp_nh_proto) + { + res = (path1->fp_nh_proto - path2->fp_nh_proto); + } + else + { + /* + * both paths are of the same type. + * consider each type and its attributes in turn. + */ + switch (path1->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + res = ip46_address_cmp(&path1->attached_next_hop.fp_nh, + &path2->attached_next_hop.fp_nh); + if (0 == res) { + res = vnet_sw_interface_compare( + vnet_get_main(), + path1->attached_next_hop.fp_interface, + path2->attached_next_hop.fp_interface); + } + break; + case FIB_PATH_TYPE_ATTACHED: + res = vnet_sw_interface_compare( + vnet_get_main(), + path1->attached.fp_interface, + path2->attached.fp_interface); + break; + case FIB_PATH_TYPE_RECURSIVE: + res = ip46_address_cmp(&path1->recursive.fp_nh, + &path2->recursive.fp_nh); + + if (0 == res) + { + res = (path1->recursive.fp_tbl_id - path2->recursive.fp_tbl_id); + } + break; + case FIB_PATH_TYPE_DEAG: + res = (path1->deag.fp_tbl_id - path2->deag.fp_tbl_id); + break; + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_EXCLUSIVE: + res = 0; + break; + } + } + return (res); +} + +/* + * fib_path_cmp_for_sort + * + * Compare two paths for equivalence. Used during path sorting. + * As usual 0 means equal. + */ +int +fib_path_cmp_for_sort (void * v1, + void * v2) +{ + fib_node_index_t *pi1 = v1, *pi2 = v2; + fib_path_t *path1, *path2; + + path1 = fib_path_get(*pi1); + path2 = fib_path_get(*pi2); + + return (fib_path_cmp_i(path1, path2)); +} + +/* + * fib_path_cmp + * + * Compare two paths for equivalence. + */ +int +fib_path_cmp (fib_node_index_t pi1, + fib_node_index_t pi2) +{ + fib_path_t *path1, *path2; + + path1 = fib_path_get(pi1); + path2 = fib_path_get(pi2); + + return (fib_path_cmp_i(path1, path2)); +} + +int +fib_path_cmp_w_route_path (fib_node_index_t path_index, + const fib_route_path_t *rpath) +{ + fib_path_t *path; + int res; + + path = fib_path_get(path_index); + + res = 1; + + if (path->fp_weight != rpath->frp_weight) + { + res = (path->fp_weight - rpath->frp_weight); + } + else + { + /* + * both paths are of the same type. + * consider each type and its attributes in turn. + */ + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + res = ip46_address_cmp(&path->attached_next_hop.fp_nh, + &rpath->frp_addr); + if (0 == res) + { + res = vnet_sw_interface_compare( + vnet_get_main(), + path->attached_next_hop.fp_interface, + rpath->frp_sw_if_index); + } + break; + case FIB_PATH_TYPE_ATTACHED: + res = vnet_sw_interface_compare( + vnet_get_main(), + path->attached.fp_interface, + rpath->frp_sw_if_index); + break; + case FIB_PATH_TYPE_RECURSIVE: + if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + { + res = path->recursive.fp_nh.fp_local_label - rpath->frp_local_label; + } + else + { + res = ip46_address_cmp(&path->recursive.fp_nh.fp_ip, + &rpath->frp_addr); + } + + if (0 == res) + { + res = (path->recursive.fp_tbl_id - rpath->frp_fib_index); + } + break; + case FIB_PATH_TYPE_DEAG: + res = (path->deag.fp_tbl_id - rpath->frp_fib_index); + break; + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_EXCLUSIVE: + res = 0; + break; + } + } + return (res); +} + +/* + * fib_path_recursive_loop_detect + * + * A forward walk of the FIB object graph to detect for a cycle/loop. This + * walk is initiated when an entry is linking to a new path list or from an old. + * The entry vector passed contains all the FIB entrys that are children of this + * path (it is all the entries encountered on the walk so far). If this vector + * contains the entry this path resolve via, then a loop is about to form. + * The loop must be allowed to form, since we need the dependencies in place + * so that we can track when the loop breaks. + * However, we MUST not produce a loop in the forwarding graph (else packets + * would loop around the switch path until the loop breaks), so we mark recursive + * paths as looped so that they do not contribute forwarding information. + * By marking the path as looped, an etry such as; + * X/Y + * via a.a.a.a (looped) + * via b.b.b.b (not looped) + * can still forward using the info provided by b.b.b.b only + */ +int +fib_path_recursive_loop_detect (fib_node_index_t path_index, + fib_node_index_t **entry_indicies) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + /* + * the forced drop path is never looped, cos it is never resolved. + */ + if (fib_path_is_permanent_drop(path)) + { + return (0); + } + + switch (path->fp_type) + { + case FIB_PATH_TYPE_RECURSIVE: + { + fib_node_index_t *entry_index, *entries; + int looped = 0; + entries = *entry_indicies; + + vec_foreach(entry_index, entries) { + if (*entry_index == path->fp_via_fib) + { + /* + * the entry that is about to link to this path-list (or + * one of this path-list's children) is the same entry that + * this recursive path resolves through. this is a cycle. + * abort the walk. + */ + looped = 1; + break; + } + } + + if (looped) + { + FIB_PATH_DBG(path, "recursive loop formed"); + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP; + + dpo_copy(&path->fp_dpo, + drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + } + else + { + /* + * no loop here yet. keep forward walking the graph. + */ + if (fib_entry_recursive_loop_detect(path->fp_via_fib, entry_indicies)) + { + FIB_PATH_DBG(path, "recursive loop formed"); + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RECURSIVE_LOOP; + } + else + { + FIB_PATH_DBG(path, "recursive loop cleared"); + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RECURSIVE_LOOP; + } + } + break; + } + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + case FIB_PATH_TYPE_ATTACHED: + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_DEAG: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_EXCLUSIVE: + /* + * these path types cannot be part of a loop, since they are the leaves + * of the graph. + */ + break; + } + + return (fib_path_is_looped(path_index)); +} + +int +fib_path_resolve (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + /* + * hope for the best. + */ + path->fp_oper_flags |= FIB_PATH_OPER_FLAG_RESOLVED; + + /* + * the forced drop path resolves via the drop adj + */ + if (fib_path_is_permanent_drop(path)) + { + dpo_copy(&path->fp_dpo, + drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + return (fib_path_is_resolved(path_index)); + } + + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + fib_path_attached_next_hop_set(path); + break; + case FIB_PATH_TYPE_ATTACHED: + /* + * path->attached.fp_interface + */ + if (!vnet_sw_interface_is_admin_up(vnet_get_main(), + path->attached.fp_interface)) + { + path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED; + } + if (vnet_sw_interface_is_p2p(vnet_get_main(), + path->attached.fp_interface)) + { + /* + * point-2-point interfaces do not require a glean, since + * there is nothing to ARP. Install a rewrite/nbr adj instead + */ + dpo_set(&path->fp_dpo, + DPO_ADJACENCY, + fib_proto_to_dpo(path->fp_nh_proto), + adj_nbr_add_or_lock( + path->fp_nh_proto, + fib_proto_to_link(path->fp_nh_proto), + &zero_addr, + path->attached.fp_interface)); + } + else + { + dpo_set(&path->fp_dpo, + DPO_ADJACENCY_GLEAN, + fib_proto_to_dpo(path->fp_nh_proto), + adj_glean_add_or_lock(path->fp_nh_proto, + path->attached.fp_interface, + NULL)); + } + /* + * become a child of the adjacency so we receive updates + * when the interface state changes + */ + path->fp_sibling = adj_child_add(path->fp_dpo.dpoi_index, + FIB_NODE_TYPE_PATH, + fib_path_get_index(path)); + + break; + case FIB_PATH_TYPE_RECURSIVE: + { + /* + * Create a RR source entry in the table for the address + * that this path recurses through. + * This resolve action is recursive, hence we may create + * more paths in the process. more creates mean maybe realloc + * of this path. + */ + fib_node_index_t fei; + fib_prefix_t pfx; + + ASSERT(FIB_NODE_INDEX_INVALID == path->fp_via_fib); + + if (FIB_PROTOCOL_MPLS == path->fp_nh_proto) + { + fib_prefix_from_mpls_label(path->recursive.fp_nh.fp_local_label, &pfx); + } + else + { + fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx); + } + + fei = fib_table_entry_special_add(path->recursive.fp_tbl_id, + &pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + + path = fib_path_get(path_index); + path->fp_via_fib = fei; + + /* + * become a dependent child of the entry so the path is + * informed when the forwarding for the entry changes. + */ + path->fp_sibling = fib_entry_child_add(path->fp_via_fib, + FIB_NODE_TYPE_PATH, + fib_path_get_index(path)); + + /* + * create and configure the IP DPO + */ + fib_path_recursive_adj_update( + path, + fib_path_proto_to_chain_type(path->fp_nh_proto), + &path->fp_dpo); + + break; + } + case FIB_PATH_TYPE_SPECIAL: + /* + * Resolve via the drop + */ + dpo_copy(&path->fp_dpo, + drop_dpo_get(fib_proto_to_dpo(path->fp_nh_proto))); + break; + case FIB_PATH_TYPE_DEAG: + /* + * Resolve via a lookup DPO. + * FIXME. control plane should add routes with a table ID + */ + lookup_dpo_add_or_lock_w_fib_index(path->deag.fp_tbl_id, + fib_proto_to_dpo(path->fp_nh_proto), + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + &path->fp_dpo); + break; + case FIB_PATH_TYPE_RECEIVE: + /* + * Resolve via a receive DPO. + */ + receive_dpo_add_or_lock(fib_proto_to_dpo(path->fp_nh_proto), + path->receive.fp_interface, + &path->receive.fp_addr, + &path->fp_dpo); + break; + case FIB_PATH_TYPE_EXCLUSIVE: + /* + * Resolve via the user provided DPO + */ + dpo_copy(&path->fp_dpo, &path->exclusive.fp_ex_dpo); + break; + } + + return (fib_path_is_resolved(path_index)); +} + +u32 +fib_path_get_resolving_interface (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + return (path->attached_next_hop.fp_interface); + case FIB_PATH_TYPE_ATTACHED: + return (path->attached.fp_interface); + case FIB_PATH_TYPE_RECEIVE: + return (path->receive.fp_interface); + case FIB_PATH_TYPE_RECURSIVE: + return (fib_entry_get_resolving_interface(path->fp_via_fib)); + case FIB_PATH_TYPE_SPECIAL: + case FIB_PATH_TYPE_DEAG: + case FIB_PATH_TYPE_EXCLUSIVE: + break; + } + return (~0); +} + +adj_index_t +fib_path_get_adj (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(dpo_is_adj(&path->fp_dpo)); + if (dpo_is_adj(&path->fp_dpo)) + { + return (path->fp_dpo.dpoi_index); + } + return (ADJ_INDEX_INVALID); +} + +int +fib_path_get_weight (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(path); + + return (path->fp_weight); +} + +/** + * @brief Contribute the path's adjacency to the list passed. + * By calling this function over all paths, recursively, a child + * can construct its full set of forwarding adjacencies, and hence its + * uRPF list. + */ +void +fib_path_contribute_urpf (fib_node_index_t path_index, + index_t urpf) +{ + fib_path_t *path; + + if (!fib_path_is_resolved(path_index)) + return; + + path = fib_path_get(path_index); + + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + fib_urpf_list_append(urpf, path->attached_next_hop.fp_interface); + break; + + case FIB_PATH_TYPE_ATTACHED: + fib_urpf_list_append(urpf, path->attached.fp_interface); + break; + + case FIB_PATH_TYPE_RECURSIVE: + fib_entry_contribute_urpf(path->fp_via_fib, urpf); + break; + + case FIB_PATH_TYPE_EXCLUSIVE: + case FIB_PATH_TYPE_SPECIAL: + /* + * these path types may link to an adj, if that's what + * the clinet gave + */ + if (dpo_is_adj(&path->fp_dpo)) + { + ip_adjacency_t *adj; + + adj = adj_get(path->fp_dpo.dpoi_index); + + fib_urpf_list_append(urpf, adj->rewrite_header.sw_if_index); + } + break; + + case FIB_PATH_TYPE_DEAG: + case FIB_PATH_TYPE_RECEIVE: + /* + * these path types don't link to an adj + */ + break; + } +} + +void +fib_path_contribute_forwarding (fib_node_index_t path_index, + fib_forward_chain_type_t fct, + dpo_id_t *dpo) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(path); + ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct); + + FIB_PATH_DBG(path, "contribute"); + + /* + * The DPO stored in the path was created when the path was resolved. + * This then represents the path's 'native' protocol; IP. + * For all others will need to go find something else. + */ + if (fib_path_proto_to_chain_type(path->fp_nh_proto) == fct) + { + dpo_copy(dpo, &path->fp_dpo); + } + else + { + switch (path->fp_type) + { + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + case FIB_FORW_CHAIN_TYPE_ETHERNET: + { + adj_index_t ai; + + /* + * get a appropriate link type adj. + */ + ai = fib_path_attached_next_hop_get_adj( + path, + fib_forw_chain_type_to_link_type(fct)); + dpo_set(dpo, DPO_ADJACENCY, + fib_forw_chain_type_to_dpo_proto(fct), ai); + adj_unlock(ai); + + break; + } + } + break; + case FIB_PATH_TYPE_RECURSIVE: + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + fib_path_recursive_adj_update(path, fct, dpo); + break; + case FIB_FORW_CHAIN_TYPE_ETHERNET: + ASSERT(0); + break; + } + break; + case FIB_PATH_TYPE_DEAG: + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + lookup_dpo_add_or_lock_w_table_id(MPLS_FIB_DEFAULT_TABLE_ID, + DPO_PROTO_MPLS, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + dpo); + break; + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + dpo_copy(dpo, &path->fp_dpo); + break; + case FIB_FORW_CHAIN_TYPE_ETHERNET: + ASSERT(0); + break; + } + break; + case FIB_PATH_TYPE_EXCLUSIVE: + dpo_copy(dpo, &path->exclusive.fp_ex_dpo); + break; + case FIB_PATH_TYPE_ATTACHED: + case FIB_PATH_TYPE_RECEIVE: + case FIB_PATH_TYPE_SPECIAL: + ASSERT(0); + break; + } + + } +} + +load_balance_path_t * +fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index, + fib_forward_chain_type_t fct, + load_balance_path_t *hash_key) +{ + load_balance_path_t *mnh; + fib_path_t *path; + + path = fib_path_get(path_index); + + ASSERT(path); + + if (fib_path_is_resolved(path_index)) + { + vec_add2(hash_key, mnh, 1); + + mnh->path_weight = path->fp_weight; + mnh->path_index = path_index; + fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo); + } + + return (hash_key); +} + +int +fib_path_is_recursive (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (FIB_PATH_TYPE_RECURSIVE == path->fp_type); +} + +int +fib_path_is_exclusive (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (FIB_PATH_TYPE_EXCLUSIVE == path->fp_type); +} + +int +fib_path_is_deag (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (FIB_PATH_TYPE_DEAG == path->fp_type); +} + +int +fib_path_is_resolved (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (dpo_id_is_valid(&path->fp_dpo) && + (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RESOLVED) && + !fib_path_is_looped(path_index) && + !fib_path_is_permanent_drop(path)); +} + +int +fib_path_is_looped (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (path->fp_oper_flags & FIB_PATH_OPER_FLAG_RECURSIVE_LOOP); +} + +int +fib_path_encode (fib_node_index_t path_list_index, + fib_node_index_t path_index, + void *ctx) +{ + fib_route_path_encode_t **api_rpaths = ctx; + fib_route_path_encode_t *api_rpath; + fib_path_t *path; + + path = fib_path_get(path_index); + if (!path) + return (0); + vec_add2(*api_rpaths, api_rpath, 1); + api_rpath->rpath.frp_weight = path->fp_weight; + api_rpath->rpath.frp_proto = path->fp_nh_proto; + api_rpath->rpath.frp_sw_if_index = ~0; + api_rpath->dpo = path->exclusive.fp_ex_dpo; + switch (path->fp_type) + { + case FIB_PATH_TYPE_RECEIVE: + api_rpath->rpath.frp_addr = path->receive.fp_addr; + api_rpath->rpath.frp_sw_if_index = path->receive.fp_interface; + break; + case FIB_PATH_TYPE_ATTACHED: + api_rpath->rpath.frp_sw_if_index = path->attached.fp_interface; + break; + case FIB_PATH_TYPE_ATTACHED_NEXT_HOP: + api_rpath->rpath.frp_sw_if_index = path->attached_next_hop.fp_interface; + api_rpath->rpath.frp_addr = path->attached_next_hop.fp_nh; + break; + case FIB_PATH_TYPE_SPECIAL: + break; + case FIB_PATH_TYPE_DEAG: + break; + case FIB_PATH_TYPE_RECURSIVE: + api_rpath->rpath.frp_addr = path->recursive.fp_nh.fp_ip; + break; + default: + break; + } + return (1); +} + +fib_protocol_t +fib_path_get_proto (fib_node_index_t path_index) +{ + fib_path_t *path; + + path = fib_path_get(path_index); + + return (path->fp_nh_proto); +} + +void +fib_path_module_init (void) +{ + fib_node_register_type (FIB_NODE_TYPE_PATH, &fib_path_vft); +} + +static clib_error_t * +show_fib_path_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_node_index_t pi; + fib_path_t *path; + + if (unformat (input, "%d", &pi)) + { + /* + * show one in detail + */ + if (!pool_is_free_index(fib_path_pool, pi)) + { + path = fib_path_get(pi); + u8 *s = fib_path_format(pi, NULL); + s = format(s, "children:"); + s = fib_node_children_format(path->fp_node.fn_children, s); + vlib_cli_output (vm, "%s", s); + vec_free(s); + } + else + { + vlib_cli_output (vm, "path %d invalid", pi); + } + } + else + { + vlib_cli_output (vm, "FIB Paths"); + pool_foreach(path, fib_path_pool, + ({ + vlib_cli_output (vm, "%U", format_fib_path, path); + })); + } + + return (NULL); +} + +VLIB_CLI_COMMAND (show_fib_path, static) = { + .path = "show fib paths", + .function = show_fib_path_command, + .short_help = "show fib paths", +}; diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h new file mode 100644 index 00000000000..91f49d09234 --- /dev/null +++ b/src/vnet/fib/fib_path.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Given a route of the form; + * q.r.s.t/Y + * via <interface> <next-hop> + * + * The prefix is: q.r.s.t./Y + * the path is: 'via <interface> <next-hop> + * + * The path is the description of where to send the traffic, and the + * the prefix is a description of which traffic to send. + * It is the aim of the FIB to resolve the path, i.e. to find the corresponding + * adjacency to match the path's description. + */ + +#ifndef __FIB_PATH_H__ +#define __FIB_PATH_H__ + +#include <vnet/ip/ip.h> +#include <vnet/dpo/load_balance.h> + +#include <vnet/fib/fib_types.h> +#include <vnet/adj/adj_types.h> + +/** + * Enurmeration of path configuration attributes + */ +typedef enum fib_path_cfg_attribute_t_ { + /** + * Marker. Add new types after this one. + */ + FIB_PATH_CFG_ATTRIBUTE_FIRST = 0, + /** + * The path is forced to a drop, whatever the next-hop info says. + * something somewhere knows better... + */ + FIB_PATH_CFG_ATTRIBUTE_DROP = FIB_PATH_CFG_ATTRIBUTE_FIRST, + /** + * The path uses an adj that is exclusive. I.e. it is known only by + * the source of the route. + */ + FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE, + /** + * Recursion constraint via host + */ + FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST, + /** + * Recursion constraint via attached + */ + FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED, + /** + * The path is a for-us path + */ + FIB_PATH_CFG_ATTRIBUTE_LOCAL, + /** + * Marker. Add new types before this one, then update it. + */ + FIB_PATH_CFG_ATTRIBUTE_LAST = FIB_PATH_CFG_ATTRIBUTE_LOCAL, +} __attribute__ ((packed)) fib_path_cfg_attribute_t; + +/** + * The maximum number of path attributes + */ +#define FIB_PATH_CFG_ATTRIBUTE_MAX (FIB_PATH_CFG_ATTRIBUTE_LAST + 1) + +#define FIB_PATH_CFG_ATTRIBUTES { \ + [FIB_PATH_CFG_ATTRIBUTE_DROP] = "drop", \ + [FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ + [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST] = "resolve-host", \ + [FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED] = "resolve-attached", \ + [FIB_PATH_CFG_ATTRIBUTE_LOCAL] = "local", \ +} + +#define FOR_EACH_FIB_PATH_CFG_ATTRIBUTE(_item) \ + for (_item = FIB_PATH_CFG_ATTRIBUTE_FIRST; \ + _item <= FIB_PATH_CFG_ATTRIBUTE_LAST; \ + _item++) + +/** + * Path config flags from the attributes + */ +typedef enum fib_path_cfg_flags_t_ { + FIB_PATH_CFG_FLAG_NONE = 0, + FIB_PATH_CFG_FLAG_DROP = (1 << FIB_PATH_CFG_ATTRIBUTE_DROP), + FIB_PATH_CFG_FLAG_EXCLUSIVE = (1 << FIB_PATH_CFG_ATTRIBUTE_EXCLUSIVE), + FIB_PATH_CFG_FLAG_RESOLVE_HOST = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_HOST), + FIB_PATH_CFG_FLAG_RESOLVE_ATTACHED = (1 << FIB_PATH_CFG_ATTRIBUTE_RESOLVE_ATTACHED), + FIB_PATH_CFG_FLAG_LOCAL = (1 << FIB_PATH_CFG_ATTRIBUTE_LOCAL), +} __attribute__ ((packed)) fib_path_cfg_flags_t; + + +extern u8 *fib_path_format(fib_node_index_t pi, u8 *s); +extern u8 *fib_path_adj_format(fib_node_index_t pi, + u32 indent, + u8 *s); + +extern u8 * format_fib_path(u8 * s, va_list * args); + +extern fib_node_index_t fib_path_create(fib_node_index_t pl_index, + fib_protocol_t nh_proto, + fib_path_cfg_flags_t flags, + const fib_route_path_t *path); +extern fib_node_index_t fib_path_create_special(fib_node_index_t pl_index, + fib_protocol_t nh_proto, + fib_path_cfg_flags_t flags, + const dpo_id_t *dpo); + +extern int fib_path_cmp(fib_node_index_t path_index1, + fib_node_index_t path_index2); +extern int fib_path_cmp_for_sort(void * a1, void * a2); +extern int fib_path_cmp_w_route_path(fib_node_index_t path_index, + const fib_route_path_t *rpath); +extern fib_node_index_t fib_path_copy(fib_node_index_t path_index, + fib_node_index_t path_list_index); +extern int fib_path_resolve(fib_node_index_t path_index); +extern int fib_path_is_resolved(fib_node_index_t path_index); +extern int fib_path_is_recursive(fib_node_index_t path_index); +extern int fib_path_is_exclusive(fib_node_index_t path_index); +extern int fib_path_is_deag(fib_node_index_t path_index); +extern int fib_path_is_looped(fib_node_index_t path_index); +extern fib_protocol_t fib_path_get_proto(fib_node_index_t path_index); +extern void fib_path_destroy(fib_node_index_t path_index); +extern uword fib_path_hash(fib_node_index_t path_index); +extern load_balance_path_t * fib_path_append_nh_for_multipath_hash( + fib_node_index_t path_index, + fib_forward_chain_type_t fct, + load_balance_path_t *hash_key); +extern void fib_path_contribute_forwarding(fib_node_index_t path_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo); +extern void fib_path_contribute_urpf(fib_node_index_t path_index, + index_t urpf); +extern adj_index_t fib_path_get_adj(fib_node_index_t path_index); +extern int fib_path_recursive_loop_detect(fib_node_index_t path_index, + fib_node_index_t **entry_indicies); +extern u32 fib_path_get_resolving_interface(fib_node_index_t fib_entry_index); +extern int fib_path_get_weight(fib_node_index_t path_index); + +extern void fib_path_module_init(void); +extern int fib_path_encode(fib_node_index_t path_list_index, + fib_node_index_t path_index, + void *ctx); + +#endif diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c new file mode 100644 index 00000000000..f75b5626c04 --- /dev/null +++ b/src/vnet/fib/fib_path_ext.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/mpls/mpls.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/drop_dpo.h> + +#include <vnet/fib/fib_path_ext.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_path.h> +#include <vnet/fib/fib_path_list.h> +#include <vnet/fib/fib_internal.h> + +u8 * +format_fib_path_ext (u8 * s, va_list * args) +{ + fib_path_ext_t *path_ext; + u32 ii; + + path_ext = va_arg (*args, fib_path_ext_t *); + + s = format(s, "path:%d labels:", + path_ext->fpe_path_index); + for (ii = 0; ii < vec_len(path_ext->fpe_path.frp_label_stack); ii++) + { + s = format(s, "%U ", + format_mpls_unicast_label, + path_ext->fpe_path.frp_label_stack[ii]); + } + return (s); +} + +int +fib_path_ext_cmp (fib_path_ext_t *path_ext, + const fib_route_path_t *rpath) +{ + return (fib_route_path_cmp(&path_ext->fpe_path, rpath)); +} + +static int +fib_path_ext_match (fib_node_index_t pl_index, + fib_node_index_t path_index, + void *ctx) +{ + fib_path_ext_t *path_ext = ctx; + + if (!fib_path_cmp_w_route_path(path_index, + &path_ext->fpe_path)) + { + path_ext->fpe_path_index = path_index; + return (0); + } + // keep going + return (1); +} + +void +fib_path_ext_resolve (fib_path_ext_t *path_ext, + fib_node_index_t path_list_index) +{ + /* + * Find the path on the path list that this is an extension for + */ + path_ext->fpe_path_index = FIB_NODE_INDEX_INVALID; + fib_path_list_walk(path_list_index, + fib_path_ext_match, + path_ext); +} + +void +fib_path_ext_init (fib_path_ext_t *path_ext, + fib_node_index_t path_list_index, + const fib_route_path_t *rpath) +{ + path_ext->fpe_path = *rpath; + path_ext->fpe_path_index = FIB_NODE_INDEX_INVALID; + + fib_path_ext_resolve(path_ext, path_list_index); +} + +/** + * @brief Return true if the label stack is implicit null + */ +static int +fib_path_ext_is_imp_null (fib_path_ext_t *path_ext) +{ + return ((1 == vec_len(path_ext->fpe_label_stack)) && + (MPLS_IETF_IMPLICIT_NULL_LABEL == path_ext->fpe_label_stack[0])); +} + +load_balance_path_t * +fib_path_ext_stack (fib_path_ext_t *path_ext, + const fib_entry_t *entry, + fib_forward_chain_type_t child_fct, + load_balance_path_t *nhs) +{ + fib_forward_chain_type_t parent_fct; + load_balance_path_t *nh; + + if (!fib_path_is_resolved(path_ext->fpe_path_index)) + return (nhs); + + /* + * Since we are stacking this path-extension, it must have a valid out + * label. From the chain type request by the child, determine what + * chain type we will request from the parent. + */ + switch (child_fct) + { + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + { + /* + * The EOS chain is a tricky since, when the path has an imp NULL one cannot know + * the adjacency to link to without knowing what the packets payload protocol + * will be once the label is popped. + */ + if (fib_path_ext_is_imp_null(path_ext)) + { + parent_fct = fib_entry_chain_type_fixup(entry, child_fct); + } + else + { + /* + * we have a label to stack. packets will thus be labelled when + * they encounter the child, ergo, non-eos. + */ + parent_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS; + } + break; + } + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + if (fib_path_ext_is_imp_null(path_ext)) + { + /* + * implicit-null label for the eos or IP chain, need to pick up + * the IP adj + */ + parent_fct = child_fct; + } + else + { + /* + * we have a label to stack. packets will thus be labelled when + * they encounter the child, ergo, non-eos. + */ + parent_fct = FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS; + } + break; + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + parent_fct = child_fct; + break; + default: + return (nhs); + break; + } + + dpo_id_t via_dpo = DPO_INVALID; + + /* + * The next object in the graph after the imposition of the label + * will be the DPO contributed by the path through which the packets + * are to be sent. We stack the MPLS Label DPO on this path DPO + */ + fib_path_contribute_forwarding(path_ext->fpe_path_index, + parent_fct, + &via_dpo); + + if (dpo_is_drop(&via_dpo) || + load_balance_is_drop(&via_dpo)) + { + /* + * don't stack a path extension on a drop. doing so will create + * a LB bucket entry on drop, and we will lose a percentage of traffic. + */ + } + else + { + vec_add2(nhs, nh, 1); + nh->path_weight = fib_path_get_weight(path_ext->fpe_path_index); + nh->path_index = path_ext->fpe_path_index; + dpo_copy(&nh->path_dpo, &via_dpo); + + /* + * The label is stackable for this chain type + * construct the mpls header that will be imposed in the data-path + */ + if (!fib_path_ext_is_imp_null(path_ext)) + { + /* + * we use the parent protocol for the label so that + * we pickup the correct MPLS imposition nodes to do + * ip[46] processing. + */ + dpo_proto_t chain_proto; + mpls_eos_bit_t eos; + index_t mldi; + + eos = (child_fct == FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS ? + MPLS_NON_EOS : + MPLS_EOS); + chain_proto = fib_forw_chain_type_to_dpo_proto(child_fct); + + mldi = mpls_label_dpo_create(path_ext->fpe_label_stack, + eos, 255, 0, + chain_proto, + &nh->path_dpo); + + dpo_set(&nh->path_dpo, + DPO_MPLS_LABEL, + chain_proto, + mldi); + } + } + dpo_reset(&via_dpo); + + return (nhs); +} diff --git a/src/vnet/fib/fib_path_ext.h b/src/vnet/fib/fib_path_ext.h new file mode 100644 index 00000000000..cf8f8df00c6 --- /dev/null +++ b/src/vnet/fib/fib_path_ext.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_PATH_EXT_H__ +#define __FIB_PATH_EXT_H__ + +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_types.h> + +/** + * A path extension is a per-entry addition to the forwarding information + * when packets are sent for that entry over that path. + * + * For example: + * ip route add 1.1.1.1/32 via 10.10.10.10 out-label 100 + * + * The out-going MPLS label value 100 is a path-extension. It is a value sepcific + * to the entry 1.1.1.1/32 and valid only when packets are sent via 10.10.10.10. + */ +typedef struct fib_path_ext_t_ +{ + /** + * A description of the path that is being extended. + * This description is used to match this extension with the [changing] + * instance of a fib_path_t that is extended + */ + fib_route_path_t fpe_path; +#define fpe_label_stack fpe_path.frp_label_stack + + /** + * The index of the path. This is the global index, not the path's + * position in the path-list. + */ + fib_node_index_t fpe_path_index; +} fib_path_ext_t; + +struct fib_entry_t_; + +extern u8 * format_fib_path_ext(u8 * s, va_list * args); + +extern void fib_path_ext_init(fib_path_ext_t *path_ext, + fib_node_index_t path_list_index, + const fib_route_path_t *rpath); + +extern int fib_path_ext_cmp(fib_path_ext_t *path_ext, + const fib_route_path_t *rpath); + +extern void fib_path_ext_resolve(fib_path_ext_t *path_ext, + fib_node_index_t path_list_index); + +extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext, + const struct fib_entry_t_ *entry, + fib_forward_chain_type_t fct, + load_balance_path_t *nhs); + +#endif + diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c new file mode 100644 index 00000000000..5b35e9b87e7 --- /dev/null +++ b/src/vnet/fib/fib_path_list.c @@ -0,0 +1,1223 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/mhash.h> +#include <vnet/ip/ip.h> +#include <vnet/adj/adj.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> + +#include <vnet/fib/fib_path_list.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/fib/fib_node_list.h> +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_urpf_list.h> + +/** + * FIB path-list + * A representation of the list/set of path trough which a prefix is reachable + */ +typedef struct fib_path_list_t_ { + /** + * A path-list is a node in the FIB graph. + */ + fib_node_t fpl_node; + + /** + * Flags on the path-list + */ + fib_path_list_flags_t fpl_flags; + + /** + * The next-hop protocol for the paths in this path list. + * Note that fixing the proto here means we don't support a mix of + * v4 and v6 paths. ho hum. + */ + fib_protocol_t fpl_nh_proto; + + /** + * Vector of paths indicies for all configured paths. + * For shareable path-lists this list MUST not change. + */ + fib_node_index_t *fpl_paths; + + /** + * the RPF list calculated for this path list + */ + fib_node_index_t fpl_urpf; +} fib_path_list_t; + +/* + * Array of strings/names for the FIB sources + */ +static const char *fib_path_list_attr_names[] = FIB_PATH_LIST_ATTRIBUTES; + +/* + * The memory pool from which we allocate all the path-lists + */ +static fib_path_list_t * fib_path_list_pool; + +/* + * The data-base of shared path-lists + */ +static uword *fib_path_list_db; + +/* + * Debug macro + */ +#ifdef FIB_DEBUG +#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...) \ +{ \ + u8 *_tmp = 0; \ + _tmp = fib_path_list_format( \ + fib_path_list_get_index(_pl), _tmp); \ + clib_warning("pl:[%d:%p:%p:%s]:" _fmt, \ + fib_path_list_get_index(_pl), \ + _pl, _pl->fpl_paths, _tmp, \ + ##_args); \ + vec_free(_tmp); \ +} +#else +#define FIB_PATH_LIST_DBG(_pl, _fmt, _args...) +#endif + +static fib_path_list_t * +fib_path_list_get (fib_node_index_t index) +{ + return (pool_elt_at_index(fib_path_list_pool, index)); +} + +static fib_node_t * +fib_path_list_get_node (fib_node_index_t index) +{ + return ((fib_node_t*)fib_path_list_get(index)); +} + +static fib_path_list_t* +fib_path_list_from_fib_node (fib_node_t *node) +{ +#if CLIB_DEBUG > 0 + ASSERT(FIB_NODE_TYPE_PATH_LIST == node->fn_type); +#endif + return ((fib_path_list_t*)node); +} + +static fib_node_index_t +fib_path_list_get_index (fib_path_list_t *path_list) +{ + return (path_list - fib_path_list_pool); +} + +static u8 * +format_fib_path_list (u8 * s, va_list * args) +{ + fib_path_list_attribute_t attr; + fib_node_index_t *path_index; + fib_path_list_t *path_list; + + path_list = va_arg (*args, fib_path_list_t *); + + s = format (s, " index:%u", fib_path_list_get_index(path_list)); + s = format (s, " locks:%u", path_list->fpl_node.fn_locks); + s = format (s, " proto:%U", format_fib_protocol, path_list->fpl_nh_proto); + + if (FIB_PATH_LIST_FLAG_NONE != path_list->fpl_flags) + { + s = format (s, " flags:"); + FOR_EACH_PATH_LIST_ATTRIBUTE(attr) + { + if ((1<<attr) & path_list->fpl_flags) + { + s = format (s, "%s,", fib_path_list_attr_names[attr]); + } + } + } + s = format (s, " %U\n", format_fib_urpf_list, path_list->fpl_urpf); + + vec_foreach (path_index, path_list->fpl_paths) + { + s = fib_path_format(*path_index, s); + s = format(s, "\n"); + } + + return (s); +} + +u8 * +fib_path_list_adjs_format (fib_node_index_t path_list_index, + u32 indent, + u8 * s) +{ + fib_path_list_t *path_list; + u32 i; + + path_list = fib_path_list_get(path_list_index); + + vec_foreach_index (i, path_list->fpl_paths) + { + s = fib_path_adj_format(path_list->fpl_paths[i], + indent, s); + } + + return (s); +} + + +u8 * +fib_path_list_format (fib_node_index_t path_list_index, + u8 * s) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + return (format(s, "%U", format_fib_path_list, path_list)); +} + +static uword +fib_path_list_hash (fib_path_list_t *path_list) +{ + uword old_path_list_hash, new_path_list_hash, path_hash; + fib_node_index_t *path_index; + + ASSERT(path_list); + + new_path_list_hash = old_path_list_hash = vec_len(path_list->fpl_paths); + + vec_foreach (path_index, path_list->fpl_paths) + { + path_hash = fib_path_hash(*path_index); +#if uword_bits == 64 + hash_mix64(path_hash, old_path_list_hash, new_path_list_hash); +#else + hash_mix32(path_hash, old_path_list_hash, new_path_list_hash); +#endif + } + + return (new_path_list_hash); +} + +always_inline uword +fib_path_list_db_hash_key_from_index (uword index) +{ + return 1 + 2*index; +} + +always_inline uword +fib_path_list_db_hash_key_is_index (uword key) +{ + return key & 1; +} + +always_inline uword +fib_path_list_db_hash_key_2_index (uword key) +{ + ASSERT (fib_path_list_db_hash_key_is_index (key)); + return key / 2; +} + +static fib_path_list_t* +fib_path_list_db_get_from_hash_key (uword key) +{ + fib_path_list_t *path_list; + + if (fib_path_list_db_hash_key_is_index (key)) + { + fib_node_index_t path_list_index; + + path_list_index = fib_path_list_db_hash_key_2_index(key); + path_list = fib_path_list_get(path_list_index); + } + else + { + path_list = uword_to_pointer (key, fib_path_list_t *); + } + + return (path_list); +} + +static uword +fib_path_list_db_hash_key_sum (hash_t * h, + uword key) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_db_get_from_hash_key(key); + + return (fib_path_list_hash(path_list)); +} + +static uword +fib_path_list_db_hash_key_equal (hash_t * h, + uword key1, + uword key2) +{ + fib_path_list_t *path_list1, *path_list2; + + path_list1 = fib_path_list_db_get_from_hash_key(key1); + path_list2 = fib_path_list_db_get_from_hash_key(key2); + + return (fib_path_list_hash(path_list1) == + fib_path_list_hash(path_list2)); +} + +static fib_node_index_t +fib_path_list_db_find (fib_path_list_t *path_list) +{ + uword *p; + + p = hash_get(fib_path_list_db, path_list); + + if (NULL != p) + { + return p[0]; + } + + return (FIB_NODE_INDEX_INVALID); +} + +static void +fib_path_list_db_insert (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + ASSERT(FIB_NODE_INDEX_INVALID == fib_path_list_db_find(path_list)); + + hash_set (fib_path_list_db, + fib_path_list_db_hash_key_from_index(path_list_index), + path_list_index); + + FIB_PATH_LIST_DBG(path_list, "DB-inserted"); +} + +static void +fib_path_list_db_remove (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + ASSERT(FIB_NODE_INDEX_INVALID != fib_path_list_db_find(path_list)); + + hash_unset(fib_path_list_db, + fib_path_list_db_hash_key_from_index(path_list_index)); + + FIB_PATH_LIST_DBG(path_list, "DB-removed"); +} + +static void +fib_path_list_destroy (fib_path_list_t *path_list) +{ + fib_node_index_t *path_index; + + FIB_PATH_LIST_DBG(path_list, "destroy"); + + vec_foreach (path_index, path_list->fpl_paths) + { + fib_path_destroy(*path_index); + } + + vec_free(path_list->fpl_paths); + fib_urpf_list_unlock(path_list->fpl_urpf); + + fib_node_deinit(&path_list->fpl_node); + pool_put(fib_path_list_pool, path_list); +} + +static void +fib_path_list_last_lock_gone (fib_node_t *node) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_from_fib_node(node); + + FIB_PATH_LIST_DBG(path_list, "last-lock"); + + if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED) + { + fib_path_list_db_remove(fib_path_list_get_index(path_list)); + } + fib_path_list_destroy(path_list); +} + +/* + * fib_path_mk_lb + * + * update the multipath adj this path-list will contribute to its + * children's forwarding. + */ +static void +fib_path_list_mk_lb (fib_path_list_t *path_list, + fib_forward_chain_type_t fct, + dpo_id_t *dpo) +{ + load_balance_path_t *hash_key; + fib_node_index_t *path_index; + + hash_key = NULL; + + if (!dpo_id_is_valid(dpo)) + { + /* + * first time create + */ + dpo_set(dpo, + DPO_LOAD_BALANCE, + fib_forw_chain_type_to_dpo_proto(fct), + load_balance_create(0, + fib_forw_chain_type_to_dpo_proto(fct), + 0 /* FIXME FLOW HASH */)); + } + + /* + * We gather the DPOs from resolved paths. + */ + vec_foreach (path_index, path_list->fpl_paths) + { + hash_key = fib_path_append_nh_for_multipath_hash( + *path_index, + fct, + hash_key); + } + + /* + * Path-list load-balances, which if used, would be shared and hence + * never need a load-balance map. + */ + load_balance_multipath_update(dpo, hash_key, LOAD_BALANCE_FLAG_NONE); + + FIB_PATH_LIST_DBG(path_list, "mk lb: %d", dpo->dpoi_index); + + vec_free(hash_key); +} + +/** + * @brief [re]build the path list's uRPF list + */ +static void +fib_path_list_mk_urpf (fib_path_list_t *path_list) +{ + fib_node_index_t *path_index; + + /* + * ditch the old one. by iterating through all paths we are going + * to re-find all the adjs that were in the old one anyway. If we + * keep the old one, then the |sort|uniq requires more work. + * All users of the RPF list have their own lock, so we can release + * immediately. + */ + fib_urpf_list_unlock(path_list->fpl_urpf); + path_list->fpl_urpf = fib_urpf_list_alloc_and_lock(); + + vec_foreach (path_index, path_list->fpl_paths) + { + fib_path_contribute_urpf(*path_index, path_list->fpl_urpf); + } + + fib_urpf_list_bake(path_list->fpl_urpf); +} + +/** + * @brief Contribute (add) this path list's uRPF list. This allows the child + * to construct an aggregate list. + */ +void +fib_path_list_contribute_urpf (fib_node_index_t path_list_index, + index_t urpf) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + fib_urpf_list_combine(urpf, path_list->fpl_urpf); +} + +/** + * @brief Return the the child the RPF list pre-built for this path list + */ +index_t +fib_path_list_get_urpf (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + return (path_list->fpl_urpf); +} + +/* + * fib_path_list_back_walk + * + * Called from one of this path-list's paths to progate + * a back walk + */ +void +fib_path_list_back_walk (fib_node_index_t path_list_index, + fib_node_back_walk_ctx_t *ctx) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + fib_path_list_mk_urpf(path_list); + + /* + * propagate the backwalk further + */ + if (32 >= fib_node_list_get_size(path_list->fpl_node.fn_children)) + { + /* + * only a few children. continue the walk synchronously + */ + fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, ctx); + } + else + { + /* + * many children. schedule a async walk + */ + fib_walk_async(FIB_NODE_TYPE_PATH_LIST, + path_list_index, + FIB_WALK_PRIORITY_LOW, + ctx); + } +} + +/* + * fib_path_list_back_walk_notify + * + * A back walk has reach this path-list. + */ +static fib_node_back_walk_rc_t +fib_path_list_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + /* + * the path-list is not a direct child of any other node type + * paths, which do not change thier to-list-mapping, save the + * list they are a member of, and invoke the BW function directly. + */ + ASSERT(0); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/* + * Display the path-list memory usage + */ +static void +fib_path_list_memory_show (void) +{ + fib_show_memory_usage("Path-list", + pool_elts(fib_path_list_pool), + pool_len(fib_path_list_pool), + sizeof(fib_path_list_t)); + fib_urpf_list_show_mem(); +} + +/* + * The FIB path-list's graph node virtual function table + */ +static const fib_node_vft_t fib_path_list_vft = { + .fnv_get = fib_path_list_get_node, + .fnv_last_lock = fib_path_list_last_lock_gone, + .fnv_back_walk = fib_path_list_back_walk_notify, + .fnv_mem_show = fib_path_list_memory_show, +}; + +static fib_path_list_t * +fib_path_list_alloc (fib_node_index_t *path_list_index) +{ + fib_path_list_t *path_list; + + pool_get(fib_path_list_pool, path_list); + memset(path_list, 0, sizeof(*path_list)); + + fib_node_init(&path_list->fpl_node, + FIB_NODE_TYPE_PATH_LIST); + path_list->fpl_urpf = INDEX_INVALID; + + if (NULL != path_list_index) + { + *path_list_index = fib_path_list_get_index(path_list); + } + + FIB_PATH_LIST_DBG(path_list, "alloc"); + + return (path_list); +} + +static fib_path_list_t * +fib_path_list_resolve (fib_path_list_t *path_list) +{ + fib_node_index_t *path_index, *paths, path_list_index; + + ASSERT(!(path_list->fpl_flags & FIB_PATH_LIST_FLAG_RESOLVED)); + + /* + * resolving a path-list is a recursive action. this means more path + * lists can be created during this call, and hence this path-list + * can be realloc'd. so we work with copies. + * this function is called only once per-path list, so its no great overhead. + */ + path_list_index = fib_path_list_get_index(path_list); + paths = vec_dup(path_list->fpl_paths); + + vec_foreach (path_index, paths) + { + fib_path_resolve(*path_index); + } + + vec_free(paths); + path_list = fib_path_list_get(path_list_index); + + FIB_PATH_LIST_DBG(path_list, "resovled"); + fib_path_list_mk_urpf(path_list); + + return (path_list); +} + +u32 +fib_path_list_get_resolving_interface (fib_node_index_t path_list_index) +{ + fib_node_index_t *path_index; + fib_path_list_t *path_list; + u32 sw_if_index; + + path_list = fib_path_list_get(path_list_index); + + sw_if_index = ~0; + vec_foreach (path_index, path_list->fpl_paths) + { + sw_if_index = fib_path_get_resolving_interface(*path_index); + if (~0 != sw_if_index) + { + return (sw_if_index); + } + } + + return (sw_if_index); +} + +fib_protocol_t +fib_path_list_get_proto (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + /* + * we don't support a mix of path protocols, so we can return the proto + * of the first + */ + return (fib_path_get_proto(path_list->fpl_paths[0])); +} + +int +fib_path_list_is_looped (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + return (path_list->fpl_flags & FIB_PATH_LIST_FLAG_LOOPED); +} + +static fib_path_cfg_flags_t +fib_path_list_flags_2_path_flags (fib_path_list_flags_t plf) +{ + fib_path_cfg_flags_t pf = FIB_PATH_CFG_FLAG_NONE; + + if (plf & FIB_PATH_LIST_FLAG_LOCAL) + { + pf |= FIB_PATH_CFG_FLAG_LOCAL; + } + if (plf & FIB_PATH_LIST_FLAG_DROP) + { + pf |= FIB_PATH_CFG_FLAG_DROP; + } + if (plf & FIB_PATH_LIST_FLAG_EXCLUSIVE) + { + pf |= FIB_PATH_CFG_FLAG_EXCLUSIVE; + } + + return (pf); +} + +static fib_path_list_flags_t +fib_path_list_flags_fixup (fib_path_list_flags_t flags) +{ + /* + * we do no share drop nor exclusive path-lists + */ + if (flags & FIB_PATH_LIST_FLAG_DROP || + flags & FIB_PATH_LIST_FLAG_EXCLUSIVE) + { + flags &= ~FIB_PATH_LIST_FLAG_SHARED; + } + + return (flags); +} + +fib_node_index_t +fib_path_list_create (fib_path_list_flags_t flags, + const fib_route_path_t *rpaths) +{ + fib_node_index_t path_list_index, old_path_list_index; + fib_path_list_t *path_list; + int i; + + flags = fib_path_list_flags_fixup(flags); + path_list = fib_path_list_alloc(&path_list_index); + path_list->fpl_flags = flags; + /* + * we'll assume for now all paths are the same next-hop protocol + */ + path_list->fpl_nh_proto = rpaths[0].frp_proto; + + vec_foreach_index(i, rpaths) + { + vec_add1(path_list->fpl_paths, + fib_path_create(path_list_index, + path_list->fpl_nh_proto, + fib_path_list_flags_2_path_flags(flags), + &rpaths[i])); + } + + /* + * If a shared path list is requested, consult the DB for a match + */ + if (flags & FIB_PATH_LIST_FLAG_SHARED) + { + /* + * check for a matching path-list in the DB. + * If we find one then we can return the existing one and destroy the + * new one just created. + */ + old_path_list_index = fib_path_list_db_find(path_list); + if (FIB_NODE_INDEX_INVALID != old_path_list_index) + { + fib_path_list_destroy(path_list); + + path_list_index = old_path_list_index; + } + else + { + /* + * if there was not a matching path-list, then this + * new one will need inserting into the DB and resolving. + */ + fib_path_list_db_insert(path_list_index); + path_list = fib_path_list_resolve(path_list); + } + } + else + { + /* + * no shared path list requested. resolve and use the one + * just created. + */ + path_list = fib_path_list_resolve(path_list); + } + + return (path_list_index); +} + +fib_node_index_t +fib_path_list_create_special (fib_protocol_t nh_proto, + fib_path_list_flags_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t path_index, path_list_index; + fib_path_list_t *path_list; + + path_list = fib_path_list_alloc(&path_list_index); + path_list->fpl_flags = flags; + path_list->fpl_nh_proto = nh_proto; + + path_index = + fib_path_create_special(path_list_index, + path_list->fpl_nh_proto, + fib_path_list_flags_2_path_flags(flags), + dpo); + vec_add1(path_list->fpl_paths, path_index); + + /* + * we don't share path-lists. we can do PIC on them so why bother. + */ + path_list = fib_path_list_resolve(path_list); + + return (path_list_index); +} + +/* + * fib_path_list_copy_and_path_add + * + * Create a copy of a path-list and append one more path to it. + * The path-list returned could either have been newly created, or + * can be a shared path-list from the data-base. + */ +fib_node_index_t +fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index, + fib_path_list_flags_t flags, + const fib_route_path_t *rpaths) +{ + fib_node_index_t path_index, new_path_index, *orig_path_index; + fib_path_list_t *path_list, *orig_path_list; + fib_node_index_t path_list_index; + fib_node_index_t pi; + + ASSERT(1 == vec_len(rpaths)); + + /* + * alloc the new list before we retrieve the old one, lest + * the alloc result in a realloc + */ + path_list = fib_path_list_alloc(&path_list_index); + + orig_path_list = fib_path_list_get(orig_path_list_index); + + FIB_PATH_LIST_DBG(orig_path_list, "copy-add"); + + flags = fib_path_list_flags_fixup(flags); + path_list->fpl_flags = flags; + path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto; + vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths)); + pi = 0; + + new_path_index = fib_path_create(path_list_index, + path_list->fpl_nh_proto, + fib_path_list_flags_2_path_flags(flags), + rpaths); + + vec_foreach (orig_path_index, orig_path_list->fpl_paths) + { + /* + * don't add duplicate paths + * In the unlikely event the path is a duplicate, then we'll + * find a matching path-list later and this one will be toast. + */ + if (0 != fib_path_cmp(new_path_index, *orig_path_index)) + { + path_index = fib_path_copy(*orig_path_index, path_list_index); + path_list->fpl_paths[pi++] = path_index; + } + else + { + _vec_len(path_list->fpl_paths) = vec_len(orig_path_list->fpl_paths); + } + } + + path_list->fpl_paths[pi] = new_path_index; + + /* + * we sort the paths since the key for the path-list is + * the description of the paths it contains. The paths need to + * be sorted else this description will differ. + */ + vec_sort_with_function(path_list->fpl_paths, fib_path_cmp_for_sort); + + FIB_PATH_LIST_DBG(path_list, "path-added"); + + /* + * If a shared path list is requested, consult the DB for a match + */ + if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED) + { + fib_node_index_t exist_path_list_index; + /* + * check for a matching path-list in the DB. + * If we find one then we can return the existing one and destroy the + * new one just created. + */ + exist_path_list_index = fib_path_list_db_find(path_list); + if (FIB_NODE_INDEX_INVALID != exist_path_list_index) + { + fib_path_list_destroy(path_list); + + path_list_index = exist_path_list_index; + } + else + { + /* + * if there was not a matching path-list, then this + * new one will need inserting into the DB and resolving. + */ + fib_path_list_db_insert(path_list_index); + + path_list = fib_path_list_resolve(path_list); + } + } + else + { + /* + * no shared path list requested. resolve and use the one + * just created. + */ + path_list = fib_path_list_resolve(path_list); + } + + return (path_list_index); +} + +/* + * fib_path_list_copy_and_path_remove + * + * Copy the path-list excluding the path passed. + * If the path is the last one, then the index reurned will be invalid. + * i.e. the path-list is toast. + */ +fib_node_index_t +fib_path_list_copy_and_path_remove (fib_node_index_t orig_path_list_index, + fib_path_list_flags_t flags, + const fib_route_path_t *rpaths) +{ + fib_node_index_t path_index, *orig_path_index, path_list_index, tmp_path_index; + fib_path_list_t *path_list, *orig_path_list; + fib_node_index_t pi; + + ASSERT(1 == vec_len(rpaths)); + + path_list = fib_path_list_alloc(&path_list_index); + + flags = fib_path_list_flags_fixup(flags); + orig_path_list = fib_path_list_get(orig_path_list_index); + + FIB_PATH_LIST_DBG(orig_path_list, "copy-remove"); + + path_list->fpl_flags = flags; + path_list->fpl_nh_proto = orig_path_list->fpl_nh_proto; + /* + * allocate as many paths as we might need in one go, rather than + * using vec_add to do a few at a time. + */ + if (vec_len(orig_path_list->fpl_paths) > 1) + { + vec_validate(path_list->fpl_paths, vec_len(orig_path_list->fpl_paths) - 2); + } + pi = 0; + + /* + * create a representation of the path to be removed, so it + * can be used as a comparison object during the copy. + */ + tmp_path_index = fib_path_create(path_list_index, + path_list->fpl_nh_proto, + fib_path_list_flags_2_path_flags(flags), + rpaths); + + vec_foreach (orig_path_index, orig_path_list->fpl_paths) + { + if (0 != fib_path_cmp(tmp_path_index, *orig_path_index)) { + path_index = fib_path_copy(*orig_path_index, path_list_index); + if (pi < vec_len(path_list->fpl_paths)) + { + path_list->fpl_paths[pi++] = path_index; + } + else + { + /* + * this is the unlikely case that the path being + * removed does not match one in the path-list, so + * we end up with as many paths as we started with. + * the paths vector was sized above with the expectation + * that we would have 1 less. + */ + vec_add1(path_list->fpl_paths, path_index); + } + } + } + + /* + * done with the temporary now + */ + fib_path_destroy(tmp_path_index); + + /* + * if there are no paths, then the new path-list is aborted + */ + if (0 == vec_len(path_list->fpl_paths)) { + FIB_PATH_LIST_DBG(path_list, "last-path-removed"); + + fib_path_list_destroy(path_list); + + path_list_index = FIB_NODE_INDEX_INVALID; + } else { + /* + * we sort the paths since the key for the path-list is + * the description of the paths it contains. The paths need to + * be sorted else this description will differ. + */ + vec_sort_with_function(path_list->fpl_paths, fib_path_cmp_for_sort); + + /* + * If a shared path list is requested, consult the DB for a match + */ + if (path_list->fpl_flags & FIB_PATH_LIST_FLAG_SHARED) + { + fib_node_index_t exist_path_list_index; + + /* + * check for a matching path-list in the DB. + * If we find one then we can return the existing one and destroy the + * new one just created. + */ + exist_path_list_index = fib_path_list_db_find(path_list); + if (FIB_NODE_INDEX_INVALID != exist_path_list_index) + { + fib_path_list_destroy(path_list); + + path_list_index = exist_path_list_index; + } + else + { + /* + * if there was not a matching path-list, then this + * new one will need inserting into the DB and resolving. + */ + fib_path_list_db_insert(path_list_index); + + path_list = fib_path_list_resolve(path_list); + } + } + else + { + /* + * no shared path list requested. resolve and use the one + * just created. + */ + path_list = fib_path_list_resolve(path_list); + } + } + + return (path_list_index); +} + +/* + * fib_path_list_contribute_forwarding + * + * Return the index of a load-balance that user of this path-list should + * use for forwarding + */ +void +fib_path_list_contribute_forwarding (fib_node_index_t path_list_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + fib_path_list_mk_lb(path_list, type, dpo); +} + +/* + * fib_path_list_get_adj + * + * Return the index of a adjacency for the first path that user of this + * path-list should use for forwarding + */ +adj_index_t +fib_path_list_get_adj (fib_node_index_t path_list_index, + fib_forward_chain_type_t type) +{ + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + return (fib_path_get_adj(path_list->fpl_paths[0])); +} + +int +fib_path_list_recursive_loop_detect (fib_node_index_t path_list_index, + fib_node_index_t **entry_indicies) +{ + fib_node_index_t *path_index; + int is_looped, list_looped; + fib_path_list_t *path_list; + + list_looped = 0; + path_list = fib_path_list_get(path_list_index); + + vec_foreach (path_index, path_list->fpl_paths) + { + fib_node_index_t *copy, **copy_ptr; + + /* + * we need a copy of the nodes visited so that when we add entries + * we explore on the nth path and a looped is detected, those entries + * are not again searched for n+1 path and so finding a loop that does + * not exist. + */ + copy = vec_dup(*entry_indicies); + copy_ptr = © + + is_looped = fib_path_recursive_loop_detect(*path_index, copy_ptr); + list_looped += is_looped; + } + + FIB_PATH_LIST_DBG(path_list, "loop-detect: eval:%d", eval); + + if (list_looped) + { + path_list->fpl_flags |= FIB_PATH_LIST_FLAG_LOOPED; + } + else + { + path_list->fpl_flags &= ~FIB_PATH_LIST_FLAG_LOOPED; + } + + return (list_looped); +} + +u32 +fib_path_list_child_add (fib_node_index_t path_list_index, + fib_node_type_t child_type, + fib_node_index_t child_index) +{ + return (fib_node_child_add(FIB_NODE_TYPE_PATH_LIST, + path_list_index, + child_type, + child_index)); +} + +void +fib_path_list_child_remove (fib_node_index_t path_list_index, + u32 si) +{ + fib_node_child_remove(FIB_NODE_TYPE_PATH_LIST, + path_list_index, + si); +} + +void +fib_path_list_lock(fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + if (FIB_NODE_INDEX_INVALID != path_list_index) + { + path_list = fib_path_list_get(path_list_index); + + fib_node_lock(&path_list->fpl_node); + FIB_PATH_LIST_DBG(path_list, "lock"); + } +} + +void +fib_path_list_unlock (fib_node_index_t path_list_index) +{ + fib_path_list_t *path_list; + + if (FIB_NODE_INDEX_INVALID != path_list_index) + { + path_list = fib_path_list_get(path_list_index); + FIB_PATH_LIST_DBG(path_list, "unlock"); + + fib_node_unlock(&path_list->fpl_node); + } +} + +u32 +fib_path_list_pool_size (void) +{ + return (pool_elts(fib_path_list_pool)); +} + +u32 +fib_path_list_db_size (void) +{ + return (hash_elts(fib_path_list_db)); +} + +void +fib_path_list_walk (fib_node_index_t path_list_index, + fib_path_list_walk_fn_t func, + void *ctx) +{ + fib_node_index_t *path_index; + fib_path_list_t *path_list; + + path_list = fib_path_list_get(path_list_index); + + vec_foreach(path_index, path_list->fpl_paths) + { + if (!func(path_list_index, *path_index, ctx)) + break; + } +} + + +void +fib_path_list_module_init (void) +{ + fib_node_register_type (FIB_NODE_TYPE_PATH_LIST, &fib_path_list_vft); + + fib_path_list_db = hash_create2 (/* elts */ 0, + /* user */ 0, + /* value_bytes */ sizeof (fib_node_index_t), + fib_path_list_db_hash_key_sum, + fib_path_list_db_hash_key_equal, + /* format pair/arg */ + 0, 0); +} + +static clib_error_t * +show_fib_path_list_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_path_list_t *path_list; + fib_node_index_t pli; + + if (unformat (input, "%d", &pli)) + { + /* + * show one in detail + */ + if (!pool_is_free_index(fib_path_list_pool, pli)) + { + path_list = fib_path_list_get(pli); + u8 *s = fib_path_list_format(pli, NULL); + s = format(s, "children:"); + s = fib_node_children_format(path_list->fpl_node.fn_children, s); + vlib_cli_output (vm, "%s", s); + vec_free(s); + } + else + { + vlib_cli_output (vm, "path list %d invalid", pli); + } + } + else + { + /* + * show all + */ + vlib_cli_output (vm, "FIB Path Lists"); + pool_foreach(path_list, fib_path_list_pool, + ({ + vlib_cli_output (vm, "%U", format_fib_path_list, path_list); + })); + } + return (NULL); +} + +VLIB_CLI_COMMAND (show_fib_path_list, static) = { + .path = "show fib path-lists", + .function = show_fib_path_list_command, + .short_help = "show fib path-lists", +}; diff --git a/src/vnet/fib/fib_path_list.h b/src/vnet/fib/fib_path_list.h new file mode 100644 index 00000000000..8bc1b20b6bf --- /dev/null +++ b/src/vnet/fib/fib_path_list.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_PATH_LIST_H__ +#define __FIB_PATH_LIST_H__ + +#include <vlib/vlib.h> +#include <vnet/adj/adj.h> + +#include "fib_node.h" +#include "fib_path.h" + +/** + * Enumeration of path-list flags. + */ +typedef enum fib_path_list_attribute_t_ { + /** + * Marker. Add new flags after this one. + */ + FIB_PATH_LIST_ATTRIBUTE_FIRST = 0, + /** + * This path list is shareable. Shareable path-lists + * are inserted into the path-list data-base. + * All path-list are inherently shareable, the reason we share some and + * not others is to limit the size of the path-list database. This DB must + * be searched for each route update. + */ + FIB_PATH_LIST_ATTRIBUTE_SHARED = FIB_PATH_LIST_ATTRIBUTE_FIRST, + /** + * explicit drop path-list. Used when the entry source needs to + * force a drop, despite the fact the path info is present. + */ + FIB_PATH_LIST_ATTRIBUTE_DROP, + /** + * explicit local path-list. + */ + FIB_PATH_LIST_ATTRIBUTE_LOCAL, + /** + * exclusive path-list. Exclusive means the path will resolve via the + * exclusive (user provided) adj. + */ + FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE, + /** + * resolved path-list + */ + FIB_PATH_LIST_ATTRIBUTE_RESOLVED, + /** + * looped path-list. one path looped implies the whole list is + */ + FIB_PATH_LIST_ATTRIBUTE_LOOPED, + /** + * Marher. Add new flags before this one, and then update it. + */ + FIB_PATH_LIST_ATTRIBUTE_LAST = FIB_PATH_LIST_ATTRIBUTE_LOOPED, +} fib_path_list_attribute_t; + +typedef enum fib_path_list_flags_t_ { + FIB_PATH_LIST_FLAG_NONE = 0, + FIB_PATH_LIST_FLAG_SHARED = (1 << FIB_PATH_LIST_ATTRIBUTE_SHARED), + FIB_PATH_LIST_FLAG_DROP = (1 << FIB_PATH_LIST_ATTRIBUTE_DROP), + FIB_PATH_LIST_FLAG_LOCAL = (1 << FIB_PATH_LIST_ATTRIBUTE_LOCAL), + FIB_PATH_LIST_FLAG_EXCLUSIVE = (1 << FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE), + FIB_PATH_LIST_FLAG_RESOLVED = (1 << FIB_PATH_LIST_ATTRIBUTE_RESOLVED), + FIB_PATH_LIST_FLAG_LOOPED = (1 << FIB_PATH_LIST_ATTRIBUTE_LOOPED), +} fib_path_list_flags_t; + +#define FIB_PATH_LIST_ATTRIBUTES { \ + [FIB_PATH_LIST_ATTRIBUTE_SHARED] = "shared", \ + [FIB_PATH_LIST_ATTRIBUTE_RESOLVED] = "resolved", \ + [FIB_PATH_LIST_ATTRIBUTE_DROP] = "drop", \ + [FIB_PATH_LIST_ATTRIBUTE_EXCLUSIVE] = "exclusive", \ + [FIB_PATH_LIST_ATTRIBUTE_LOCAL] = "local", \ + [FIB_PATH_LIST_ATTRIBUTE_LOOPED] = "looped", \ +} + +#define FOR_EACH_PATH_LIST_ATTRIBUTE(_item) \ + for (_item = FIB_PATH_LIST_ATTRIBUTE_FIRST; \ + _item <= FIB_PATH_LIST_ATTRIBUTE_LAST; \ + _item++) + +extern fib_node_index_t fib_path_list_create(fib_path_list_flags_t flags, + const fib_route_path_t *paths); +extern fib_node_index_t fib_path_list_create_special(fib_protocol_t nh_proto, + fib_path_list_flags_t flags, + const dpo_id_t *dpo); + +extern fib_node_index_t fib_path_list_copy_and_path_add( + fib_node_index_t pl_index, + fib_path_list_flags_t flags, + const fib_route_path_t *path); +extern fib_node_index_t fib_path_list_copy_and_path_remove( + fib_node_index_t pl_index, + fib_path_list_flags_t flags, + const fib_route_path_t *path); +extern void fib_path_list_contribute_forwarding(fib_node_index_t path_list_index, + fib_forward_chain_type_t type, + dpo_id_t *dpo); +extern void fib_path_list_contribute_urpf(fib_node_index_t path_index, + index_t urpf); +extern index_t fib_path_list_get_urpf(fib_node_index_t path_list_index); +extern index_t fib_path_list_get_adj(fib_node_index_t path_list_index, + fib_forward_chain_type_t type); + +extern u32 fib_path_list_child_add(fib_node_index_t pl_index, + fib_node_type_t type, + fib_node_index_t child_index); +extern void fib_path_list_child_remove(fib_node_index_t pl_index, + fib_node_index_t sibling_index); +extern void fib_path_list_back_walk(fib_node_index_t pl_index, + fib_node_back_walk_ctx_t *ctx); +extern void fib_path_list_lock(fib_node_index_t pl_index); +extern void fib_path_list_unlock(fib_node_index_t pl_index); +extern int fib_path_list_recursive_loop_detect(fib_node_index_t path_list_index, + fib_node_index_t **entry_indicies); +extern u32 fib_path_list_get_resolving_interface(fib_node_index_t path_list_index); +extern int fib_path_list_is_looped(fib_node_index_t path_list_index); +extern fib_protocol_t fib_path_list_get_proto(fib_node_index_t path_list_index); +extern u8 * fib_path_list_format(fib_node_index_t pl_index, + u8 * s); +extern u8 * fib_path_list_adjs_format(fib_node_index_t pl_index, + u32 indent, + u8 * s); +extern index_t fib_path_list_lb_map_add_or_lock(fib_node_index_t pl_index, + const fib_node_index_t *pis); +/** + * A callback function type for walking a path-list's paths + */ +typedef int (*fib_path_list_walk_fn_t)(fib_node_index_t pl_index, + fib_node_index_t path_index, + void *ctx); + +extern void fib_path_list_walk(fib_node_index_t pl_index, + fib_path_list_walk_fn_t func, + void *ctx); + +extern void fib_path_list_module_init(void); + +extern void fib_path_list_module_init(void); + +/* + * functions for testing. + */ +u32 fib_path_list_pool_size(void); +u32 fib_path_list_db_size(void); + +#endif diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c new file mode 100644 index 00000000000..76db42d0ec7 --- /dev/null +++ b/src/vnet/fib/fib_table.c @@ -0,0 +1,1104 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/dpo/drop_dpo.h> + +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry_cover.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/mpls_fib.h> + +fib_table_t * +fib_table_get (fib_node_index_t index, + fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (pool_elt_at_index(ip4_main.fibs, index)); + case FIB_PROTOCOL_IP6: + return (pool_elt_at_index(ip6_main.fibs, index)); + case FIB_PROTOCOL_MPLS: + return (pool_elt_at_index(mpls_main.fibs, index)); + } + ASSERT(0); + return (NULL); +} + +static inline fib_node_index_t +fib_table_lookup_i (fib_table_t *fib_table, + const fib_prefix_t *prefix) +{ + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_lookup(&fib_table->v4, + &prefix->fp_addr.ip4, + prefix->fp_len)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_lookup(fib_table->ft_index, + &prefix->fp_addr.ip6, + prefix->fp_len)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_table_lookup(&fib_table->mpls, + prefix->fp_label, + prefix->fp_eos)); + } + return (FIB_NODE_INDEX_INVALID); +} + +fib_node_index_t +fib_table_lookup (u32 fib_index, + const fib_prefix_t *prefix) +{ + return (fib_table_lookup_i(fib_table_get(fib_index, prefix->fp_proto), prefix)); +} + +static inline fib_node_index_t +fib_table_lookup_exact_match_i (const fib_table_t *fib_table, + const fib_prefix_t *prefix) +{ + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_lookup_exact_match(&fib_table->v4, + &prefix->fp_addr.ip4, + prefix->fp_len)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_lookup_exact_match(fib_table->ft_index, + &prefix->fp_addr.ip6, + prefix->fp_len)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_table_lookup(&fib_table->mpls, + prefix->fp_label, + prefix->fp_eos)); + } + return (FIB_NODE_INDEX_INVALID); +} + +fib_node_index_t +fib_table_lookup_exact_match (u32 fib_index, + const fib_prefix_t *prefix) +{ + return (fib_table_lookup_exact_match_i(fib_table_get(fib_index, + prefix->fp_proto), + prefix)); +} + +static fib_node_index_t +fib_table_get_less_specific_i (fib_table_t *fib_table, + const fib_prefix_t *prefix) +{ + fib_prefix_t pfx; + + pfx = *prefix; + + if (FIB_PROTOCOL_MPLS == pfx.fp_proto) + { + return (FIB_NODE_INDEX_INVALID); + } + + /* + * in the absence of a tree structure for the table that allows for an O(1) + * parent get, a cheeky way to find the cover is to LPM for the prefix with + * mask-1. + * there should always be a cover, though it may be the default route. the + * default route's cover is the default route. + */ + if (pfx.fp_len != 0) { + pfx.fp_len -= 1; + } + + return (fib_table_lookup_i(fib_table, &pfx)); +} + +fib_node_index_t +fib_table_get_less_specific (u32 fib_index, + const fib_prefix_t *prefix) +{ + return (fib_table_get_less_specific_i(fib_table_get(fib_index, + prefix->fp_proto), + prefix)); +} + +static void +fib_table_entry_remove (fib_table_t *fib_table, + const fib_prefix_t *prefix, + fib_node_index_t fib_entry_index) +{ + vlib_smp_unsafe_warning(); + + fib_table->ft_total_route_counts--; + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + ip4_fib_table_entry_remove(&fib_table->v4, + &prefix->fp_addr.ip4, + prefix->fp_len); + break; + case FIB_PROTOCOL_IP6: + ip6_fib_table_entry_remove(fib_table->ft_index, + &prefix->fp_addr.ip6, + prefix->fp_len); + break; + case FIB_PROTOCOL_MPLS: + mpls_fib_table_entry_remove(&fib_table->mpls, + prefix->fp_label, + prefix->fp_eos); + break; + } + + fib_entry_unlock(fib_entry_index); +} + +static void +fib_table_post_insert_actions (fib_table_t *fib_table, + const fib_prefix_t *prefix, + fib_node_index_t fib_entry_index) +{ + fib_node_index_t fib_entry_cover_index; + + /* + * no cover relationships in the MPLS FIB + */ + if (FIB_PROTOCOL_MPLS == prefix->fp_proto) + return; + + /* + * find and inform the covering entry that a new more specific + * has been inserted beneath it + */ + fib_entry_cover_index = fib_table_get_less_specific_i(fib_table, prefix); + /* + * the indicies are the same when the default route is first added + */ + if (fib_entry_cover_index != fib_entry_index) + { + fib_entry_cover_change_notify(fib_entry_cover_index, + fib_entry_index); + } +} + +static void +fib_table_entry_insert (fib_table_t *fib_table, + const fib_prefix_t *prefix, + fib_node_index_t fib_entry_index) +{ + vlib_smp_unsafe_warning(); + + fib_entry_lock(fib_entry_index); + fib_table->ft_total_route_counts++; + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + ip4_fib_table_entry_insert(&fib_table->v4, + &prefix->fp_addr.ip4, + prefix->fp_len, + fib_entry_index); + break; + case FIB_PROTOCOL_IP6: + ip6_fib_table_entry_insert(fib_table->ft_index, + &prefix->fp_addr.ip6, + prefix->fp_len, + fib_entry_index); + break; + case FIB_PROTOCOL_MPLS: + mpls_fib_table_entry_insert(&fib_table->mpls, + prefix->fp_label, + prefix->fp_eos, + fib_entry_index); + break; + } + + fib_table_post_insert_actions(fib_table, prefix, fib_entry_index); +} + +void +fib_table_fwding_dpo_update (u32 fib_index, + const fib_prefix_t *prefix, + const dpo_id_t *dpo) +{ + vlib_smp_unsafe_warning(); + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_fwding_dpo_update(ip4_fib_get(fib_index), + &prefix->fp_addr.ip4, + prefix->fp_len, + dpo)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_fwding_dpo_update(fib_index, + &prefix->fp_addr.ip6, + prefix->fp_len, + dpo)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_forwarding_table_update(mpls_fib_get(fib_index), + prefix->fp_label, + prefix->fp_eos, + dpo)); + } +} + +void +fib_table_fwding_dpo_remove (u32 fib_index, + const fib_prefix_t *prefix, + const dpo_id_t *dpo) +{ + vlib_smp_unsafe_warning(); + + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index), + &prefix->fp_addr.ip4, + prefix->fp_len, + dpo)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_fwding_dpo_remove(fib_index, + &prefix->fp_addr.ip6, + prefix->fp_len, + dpo)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_forwarding_table_reset(mpls_fib_get(fib_index), + prefix->fp_label, + prefix->fp_eos)); + } +} + + +fib_node_index_t +fib_table_entry_special_dpo_add (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + fib_entry_index = fib_entry_create_special(fib_index, prefix, + source, flags, + dpo); + + fib_table_entry_insert(fib_table, prefix, fib_entry_index); + fib_table->ft_src_route_counts[source]++; + } + else + { + int was_sourced; + + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + fib_entry_special_add(fib_entry_index, source, flags, dpo); + + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]++; + } + } + + + return (fib_entry_index); +} + +fib_node_index_t +fib_table_entry_special_dpo_update (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + const dpo_id_t *dpo) +{ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + fib_entry_index = fib_entry_create_special(fib_index, prefix, + source, flags, + dpo); + + fib_table_entry_insert(fib_table, prefix, fib_entry_index); + fib_table->ft_src_route_counts[source]++; + } + else + { + int was_sourced; + + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + + if (was_sourced) + fib_entry_special_update(fib_entry_index, source, flags, dpo); + else + fib_entry_special_add(fib_entry_index, source, flags, dpo); + + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]++; + } + } + + return (fib_entry_index); +} + +fib_node_index_t +fib_table_entry_special_add (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + adj_index_t adj_index) +{ + fib_node_index_t fib_entry_index; + dpo_id_t tmp_dpo = DPO_INVALID; + + if (ADJ_INDEX_INVALID != adj_index) + { + dpo_set(&tmp_dpo, + DPO_ADJACENCY, + FIB_PROTOCOL_MAX, + adj_index); + } + else + { + dpo_copy(&tmp_dpo, drop_dpo_get(fib_proto_to_dpo(prefix->fp_proto))); + } + + fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix, source, + flags, &tmp_dpo); + + dpo_unlock(&tmp_dpo); + + return (fib_entry_index); +} + +void +fib_table_entry_special_remove (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source) +{ + /* + * 1 is it present + * yes => remove source + * 2 - is it still sourced? + * no => cover walk + */ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + /* + * removing an etry that does not exist. i'll allow it. + */ + } + else + { + fib_entry_src_flag_t src_flag; + int was_sourced; + + /* + * don't nobody go nowhere + */ + fib_entry_lock(fib_entry_index); + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + + src_flag = fib_entry_special_remove(fib_entry_index, source); + + if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag)) + { + /* + * last source gone. remove from the table + */ + fib_table_entry_remove(fib_table, prefix, fib_entry_index); + + /* + * now the entry is no longer in the table, we can + * inform the entries that it covers to re-calculate their cover + */ + fib_entry_cover_change_notify(fib_entry_index, + FIB_NODE_INDEX_INVALID); + } + /* + * else + * still has sources, leave it be. + */ + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]--; + } + + fib_entry_unlock(fib_entry_index); + } +} + +/** + * fib_table_route_path_fixup + * + * Convert attached hosts to attached next-hops. + * + * This special case is required because an attached path will link to a + * glean, and the FIB entry will have the interface or API/CLI source. When + * the ARP/ND process is completes then that source (which will provide a + * complete adjacency) will be lower priority and so the FIB entry will + * remain linked to a glean and traffic will never reach the hosts. For + * an ATTAHCED_HOST path we can link the path directly to the [incomplete] + * adjacency. + */ +static void +fib_table_route_path_fixup (const fib_prefix_t *prefix, + fib_route_path_t *path) +{ + if (fib_prefix_is_host(prefix) && + ip46_address_is_zero(&path->frp_addr) && + path->frp_sw_if_index != ~0) + { + path->frp_addr = prefix->fp_addr; + } +} + +fib_node_index_t +fib_table_entry_path_add (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t *next_hop_labels, + fib_route_path_flags_t path_flags) +{ + fib_route_path_t path = { + .frp_proto = next_hop_proto, + .frp_addr = (NULL == next_hop? zero_addr : *next_hop), + .frp_sw_if_index = next_hop_sw_if_index, + .frp_fib_index = next_hop_fib_index, + .frp_weight = next_hop_weight, + .frp_flags = path_flags, + .frp_label_stack = next_hop_labels, + }; + fib_node_index_t fib_entry_index; + fib_route_path_t *paths = NULL; + + vec_add1(paths, path); + + fib_entry_index = fib_table_entry_path_add2(fib_index, prefix, + source, flags, paths); + + vec_free(paths); + return (fib_entry_index); +} + +fib_node_index_t +fib_table_entry_path_add2 (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_route_path_t *rpath) +{ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + u32 ii; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + for (ii = 0; ii < vec_len(rpath); ii++) + { + fib_table_route_path_fixup(prefix, &rpath[ii]); + } + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + fib_entry_index = fib_entry_create(fib_index, prefix, + source, flags, + rpath); + + fib_table_entry_insert(fib_table, prefix, fib_entry_index); + fib_table->ft_src_route_counts[source]++; + } + else + { + int was_sourced; + + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + fib_entry_path_add(fib_entry_index, source, flags, rpath);; + + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]++; + } + } + + return (fib_entry_index); +} + +void +fib_table_entry_path_remove2 (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_route_path_t *rpath) +{ + /* + * 1 is it present + * yes => remove source + * 2 - is it still sourced? + * no => cover walk + */ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + u32 ii; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + for (ii = 0; ii < vec_len(rpath); ii++) + { + fib_table_route_path_fixup(prefix, &rpath[ii]); + } + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + /* + * removing an etry that does not exist. i'll allow it. + */ + } + else + { + fib_entry_src_flag_t src_flag; + int was_sourced; + + /* + * don't nobody go nowhere + */ + fib_entry_lock(fib_entry_index); + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + + src_flag = fib_entry_path_remove(fib_entry_index, source, rpath); + + if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag)) + { + /* + * last source gone. remove from the table + */ + fib_table_entry_remove(fib_table, prefix, fib_entry_index); + + /* + * now the entry is no longer in the table, we can + * inform the entries that it covers to re-calculate their cover + */ + fib_entry_cover_change_notify(fib_entry_index, + FIB_NODE_INDEX_INVALID); + } + /* + * else + * still has sources, leave it be. + */ + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]--; + } + + fib_entry_unlock(fib_entry_index); + } +} + +void +fib_table_entry_path_remove (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + fib_route_path_flags_t path_flags) +{ + /* + * 1 is it present + * yes => remove source + * 2 - is it still sourced? + * no => cover walk + */ + fib_route_path_t path = { + .frp_proto = next_hop_proto, + .frp_addr = (NULL == next_hop? zero_addr : *next_hop), + .frp_sw_if_index = next_hop_sw_if_index, + .frp_fib_index = next_hop_fib_index, + .frp_weight = next_hop_weight, + .frp_flags = path_flags, + }; + fib_route_path_t *paths = NULL; + + fib_table_route_path_fixup(prefix, &path); + vec_add1(paths, path); + + fib_table_entry_path_remove2(fib_index, prefix, source, paths); + + vec_free(paths); +} + +static int +fib_route_path_cmp_for_sort (void * v1, + void * v2) +{ + return (fib_route_path_cmp(v1, v2)); +} + +fib_node_index_t +fib_table_entry_update (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_route_path_t *paths) +{ + fib_node_index_t fib_entry_index; + fib_table_t *fib_table; + u32 ii; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + fib_entry_index = fib_table_lookup_exact_match_i(fib_table, prefix); + + for (ii = 0; ii < vec_len(paths); ii++) + { + fib_table_route_path_fixup(prefix, &paths[ii]); + } + /* + * sort the paths provided by the control plane. this means + * the paths and the extension on the entry will be sorted. + */ + vec_sort_with_function(paths, fib_route_path_cmp_for_sort); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + fib_entry_index = fib_entry_create(fib_index, prefix, + source, flags, + paths); + + fib_table_entry_insert(fib_table, prefix, fib_entry_index); + fib_table->ft_src_route_counts[source]++; + } + else + { + int was_sourced; + + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + fib_entry_update(fib_entry_index, source, flags, paths); + + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]++; + } + } + + return (fib_entry_index); +} + +fib_node_index_t +fib_table_entry_update_one_path (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t *next_hop_labels, + fib_route_path_flags_t path_flags) +{ + fib_node_index_t fib_entry_index; + fib_route_path_t path = { + .frp_proto = next_hop_proto, + .frp_addr = (NULL == next_hop? zero_addr : *next_hop), + .frp_sw_if_index = next_hop_sw_if_index, + .frp_fib_index = next_hop_fib_index, + .frp_weight = next_hop_weight, + .frp_flags = path_flags, + .frp_label_stack = next_hop_labels, + }; + fib_route_path_t *paths = NULL; + + fib_table_route_path_fixup(prefix, &path); + vec_add1(paths, path); + + fib_entry_index = + fib_table_entry_update(fib_index, prefix, source, flags, paths); + + vec_free(paths); + + return (fib_entry_index); +} + +static void +fib_table_entry_delete_i (u32 fib_index, + fib_node_index_t fib_entry_index, + const fib_prefix_t *prefix, + fib_source_t source) +{ + fib_entry_src_flag_t src_flag; + fib_table_t *fib_table; + int was_sourced; + + fib_table = fib_table_get(fib_index, prefix->fp_proto); + was_sourced = fib_entry_is_sourced(fib_entry_index, source); + + /* + * don't nobody go nowhere + */ + fib_entry_lock(fib_entry_index); + + src_flag = fib_entry_delete(fib_entry_index, source); + + if (!(FIB_ENTRY_SRC_FLAG_ADDED & src_flag)) + { + /* + * last source gone. remove from the table + */ + fib_table_entry_remove(fib_table, prefix, fib_entry_index); + + /* + * now the entry is no longer in the table, we can + * inform the entries that it covers to re-calculate their cover + */ + fib_entry_cover_change_notify(fib_entry_index, + FIB_NODE_INDEX_INVALID); + } + /* + * else + * still has sources, leave it be. + */ + if (was_sourced != fib_entry_is_sourced(fib_entry_index, source)) + { + fib_table->ft_src_route_counts[source]--; + } + + fib_entry_unlock(fib_entry_index); +} + +void +fib_table_entry_delete (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source) +{ + fib_node_index_t fib_entry_index; + + fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + { + /* + * removing an etry that does not exist. + * i'll allow it, but i won't like it. + */ + clib_warning("%U not in FIB", format_fib_prefix, prefix); + } + else + { + fib_table_entry_delete_i(fib_index, fib_entry_index, prefix, source); + } +} + +void +fib_table_entry_delete_index (fib_node_index_t fib_entry_index, + fib_source_t source) +{ + fib_prefix_t prefix; + + fib_entry_get_prefix(fib_entry_index, &prefix); + + fib_table_entry_delete_i(fib_entry_get_fib_index(fib_entry_index), + fib_entry_index, &prefix, source); +} + +fib_node_index_t +fib_table_entry_local_label_add (u32 fib_index, + const fib_prefix_t *prefix, + mpls_label_t label) +{ + fib_node_index_t fib_entry_index; + + fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index || + !fib_entry_is_sourced(fib_entry_index, FIB_SOURCE_MPLS)) + { + /* + * only source the prefix once. this allows the label change + * operation to work + */ + fib_entry_index = fib_table_entry_special_dpo_add(fib_index, prefix, + FIB_SOURCE_MPLS, + FIB_ENTRY_FLAG_NONE, + NULL); + } + + fib_entry_set_source_data(fib_entry_index, FIB_SOURCE_MPLS, &label); + + return (fib_entry_index); +} + +void +fib_table_entry_local_label_remove (u32 fib_index, + const fib_prefix_t *prefix, + mpls_label_t label) +{ + fib_node_index_t fib_entry_index; + const void *data; + mpls_label_t pl; + + fib_entry_index = fib_table_lookup_exact_match(fib_index, prefix); + + if (FIB_NODE_INDEX_INVALID == fib_entry_index) + return; + + data = fib_entry_get_source_data(fib_entry_index, FIB_SOURCE_MPLS); + + if (NULL == data) + return; + + pl = *(mpls_label_t*)data; + + if (pl != label) + return; + + pl = MPLS_LABEL_INVALID; + + fib_entry_set_source_data(fib_entry_index, FIB_SOURCE_MPLS, &pl); + fib_table_entry_special_remove(fib_index, + prefix, + FIB_SOURCE_MPLS); +} + +u32 +fib_table_get_index_for_sw_if_index (fib_protocol_t proto, + u32 sw_if_index) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_get_index_for_sw_if_index(sw_if_index)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_get_index_for_sw_if_index(sw_if_index)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_table_get_index_for_sw_if_index(sw_if_index)); + } + return (~0); +} + +flow_hash_config_t +fib_table_get_flow_hash_config (u32 fib_index, + fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_table_get_flow_hash_config(fib_index)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_table_get_flow_hash_config(fib_index)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_table_get_flow_hash_config(fib_index)); + } + return (0); +} + + +u32 +fib_table_get_table_id_for_sw_if_index (fib_protocol_t proto, + u32 sw_if_index) +{ + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_table_get_index_for_sw_if_index( + proto, sw_if_index), + proto); + + return ((NULL != fib_table ? fib_table->ft_table_id : ~0)); +} + +u32 +fib_table_find (fib_protocol_t proto, + u32 table_id) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_fib_index_from_table_id(table_id)); + case FIB_PROTOCOL_IP6: + return (ip6_fib_index_from_table_id(table_id)); + case FIB_PROTOCOL_MPLS: + return (mpls_fib_index_from_table_id(table_id)); + } + return (~0); +} + +u32 +fib_table_find_or_create_and_lock (fib_protocol_t proto, + u32 table_id) +{ + fib_table_t *fib_table; + fib_node_index_t fi; + + switch (proto) + { + case FIB_PROTOCOL_IP4: + fi = ip4_fib_table_find_or_create_and_lock(table_id); + break; + case FIB_PROTOCOL_IP6: + fi = ip6_fib_table_find_or_create_and_lock(table_id); + break; + case FIB_PROTOCOL_MPLS: + fi = mpls_fib_table_find_or_create_and_lock(table_id); + break; + default: + return (~0); + } + + fib_table = fib_table_get(fi, proto); + + fib_table->ft_desc = format(NULL, "%U-VRF:%d", + format_fib_protocol, proto, + table_id); + + return (fi); +} + +u32 +fib_table_create_and_lock (fib_protocol_t proto, + const char *const fmt, + ...) +{ + fib_table_t *fib_table; + fib_node_index_t fi; + va_list ap; + + va_start(ap, fmt); + + switch (proto) + { + case FIB_PROTOCOL_IP4: + fi = ip4_fib_table_create_and_lock(); + break; + case FIB_PROTOCOL_IP6: + fi = ip6_fib_table_create_and_lock(); + break; + case FIB_PROTOCOL_MPLS: + fi = mpls_fib_table_create_and_lock(); + break; + default: + return (~0); + } + + fib_table = fib_table_get(fi, proto); + + fib_table->ft_desc = va_format(fib_table->ft_desc, fmt, &ap); + + va_end(ap); + return (fi); +} + +static void +fib_table_destroy (fib_table_t *fib_table) +{ + vec_free(fib_table->ft_desc); + + switch (fib_table->ft_proto) + { + case FIB_PROTOCOL_IP4: + ip4_fib_table_destroy(&fib_table->v4); + break; + case FIB_PROTOCOL_IP6: + ip6_fib_table_destroy(fib_table->ft_index); + break; + case FIB_PROTOCOL_MPLS: + mpls_fib_table_destroy(&fib_table->mpls); + break; + } +} + +void +fib_table_unlock (u32 fib_index, + fib_protocol_t proto) +{ + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, proto); + fib_table->ft_locks--; + + if (0 == fib_table->ft_locks) + { + fib_table_destroy(fib_table); + } +} +void +fib_table_lock (u32 fib_index, + fib_protocol_t proto) +{ + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, proto); + fib_table->ft_locks++; +} + +u32 +fib_table_get_num_entries (u32 fib_index, + fib_protocol_t proto, + fib_source_t source) +{ + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, proto); + + return (fib_table->ft_src_route_counts[source]); +} + +u8* +format_fib_table_name (u8* s, va_list ap) +{ + fib_node_index_t fib_index = va_arg(ap, fib_node_index_t); + fib_protocol_t proto = va_arg(ap, int); // int promotion + fib_table_t *fib_table; + + fib_table = fib_table_get(fib_index, proto); + + s = format(s, "%v", fib_table->ft_desc); + + return (s); +} + +void +fib_table_flush (u32 fib_index, + fib_protocol_t proto, + fib_source_t source) +{ + // FIXME + ASSERT(0); +} diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h new file mode 100644 index 00000000000..cfec516de1a --- /dev/null +++ b/src/vnet/fib/fib_table.h @@ -0,0 +1,732 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_TABLE_H__ +#define __FIB_TABLE_H__ + +#include <vnet/ip/ip.h> +#include <vnet/adj/adj.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/mpls/mpls.h> +#include <vnet/mpls/packet.h> + +/** + * @brief + * A protocol Independent FIB table + */ +typedef struct fib_table_t_ +{ + /** + * A union of the protocol specific FIBs that provide the + * underlying LPM mechanism. + * This element is first in the struct so that it is in the + * first cache line. + */ + union { + ip4_fib_t v4; + ip6_fib_t v6; + mpls_fib_t mpls; + }; + + /** + * Which protocol this table serves. Used to switch on the union above. + */ + fib_protocol_t ft_proto; + + /** + * number of locks on the table + */ + u16 ft_locks; + + /** + * Table ID (hash key) for this FIB. + */ + u32 ft_table_id; + + /** + * Index into FIB vector. + */ + fib_node_index_t ft_index; + + /** + * flow hash configuration + */ + u32 ft_flow_hash_config; + + /** + * Per-source route counters + */ + u32 ft_src_route_counts[FIB_SOURCE_MAX]; + + /** + * Total route counters + */ + u32 ft_total_route_counts; + + /** + * Table description + */ + u8* ft_desc; +} fib_table_t; + +/** + * @brief + * Format the description/name of the table + */ +extern u8* format_fib_table_name(u8* s, va_list ap); + +/** + * @brief + * Perfom a longest prefix match in the non-forwarding table + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to lookup + * + * @return + * The index of the fib_entry_t for the best match, which may be the default route + */ +extern fib_node_index_t fib_table_lookup(u32 fib_index, + const fib_prefix_t *prefix); + +/** + * @brief + * Perfom an exact match in the non-forwarding table + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to lookup + * + * @return + * The index of the fib_entry_t for the exact match, or INVALID + * is there is no match. + */ +extern fib_node_index_t fib_table_lookup_exact_match(u32 fib_index, + const fib_prefix_t *prefix); + +/** + * @brief + * Get the less specific (covering) prefix + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to lookup + * + * @return + * The index of the less specific fib_entry_t. + */ +extern fib_node_index_t fib_table_get_less_specific(u32 fib_index, + const fib_prefix_t *prefix); + +/** + * @brief + * Add a 'special' entry to the FIB that links to the adj passed + * A special entry is an entry that the FIB is not expect to resolve + * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup). + * Instead the client/source provides the adj to link to. + * This add is reference counting per-source. So n 'removes' are required + * for n 'adds', if the entry is no longer required. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param adj_index + * The adjacency to link to. + * + * @return + * the index of the fib_entry_t that is created (or exists already). + */ +extern fib_node_index_t fib_table_entry_special_add(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + adj_index_t adj_index); + +/** + * @brief + * Add a 'special' entry to the FIB that links to the DPO passed + * A special entry is an entry that the FIB is not expect to resolve + * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup). + * Instead the client/source provides the DPO to link to. + * This add is reference counting per-source. So n 'removes' are required + * for n 'adds', if the entry is no longer required. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param dpo + * The DPO to link to. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_special_dpo_add(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t stype, + const dpo_id_t *dpo); + +/** + * @brief + * Update a 'special' entry to the FIB that links to the DPO passed + * A special entry is an entry that the FIB is not expect to resolve + * via the usual mechanisms (i.e. recurisve or neighbour adj DB lookup). + * Instead the client/source provides the DPO to link to. + * Special entries are add/remove reference counted per-source. So n + * 'removes' are required for n 'adds', if the entry is no longer required. + * An 'update' is an 'add' if no 'add' has already been called, otherwise an 'add' + * is therefore assumed to act on the reference instance of that add. + * + * @param fib_entry_index + * The index of the FIB entry to update + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param dpo + * The DPO to link to. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_special_dpo_update (u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t stype, + const dpo_id_t *dpo); + +/** + * @brief + * Remove a 'special' entry from the FIB. + * This add is reference counting per-source. So n 'removes' are required + * for n 'adds', if the entry is no longer required. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix to remove + * + * @param source + * The ID of the client/source adding the entry. + * + */ +extern void fib_table_entry_special_remove(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source); + +/** + * @brief + * Add one path to an entry (aka route) in the FIB. If the entry does not + * exist, it will be created. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @paran next_hop_proto + * The protocol of the next hop. This cannot be derived in the event that + * the next hop is all zeros. + * + * @param next_hop + * The address of the next-hop. + * + * @param sw_if_index + * The index of the interface. + * + * @param next_hop_fib_index, + * The fib index of the next-hop for recursive resolution + * + * @param next_hop_weight + * [un]equal cost path weight + * + * @param next_hop_label_stack + * The path's out-going label stack. NULL is there is none. + * + * @param pf + * Flags for the path + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_path_add(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t *next_hop_label_stack, + fib_route_path_flags_t pf); +/** + * @brief + * Add n paths to an entry (aka route) in the FIB. If the entry does not + * exist, it will be created. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @param rpaths + * A vector of paths. Not const since they may be modified. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_path_add2(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_route_path_t *rpath); + +/** + * @brief + * remove one path to an entry (aka route) in the FIB. If this is the entry's + * last path, then the entry will be removed, unless it has other sources. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @paran next_hop_proto + * The protocol of the next hop. This cannot be derived in the event that + * the next hop is all zeros. + * + * @param next_hop + * The address of the next-hop. + * + * @param sw_if_index + * The index of the interface. + * + * @param next_hop_fib_index, + * The fib index of the next-hop for recursive resolution + * + * @param next_hop_weight + * [un]equal cost path weight + * + * @param pf + * Flags for the path + */ +extern void fib_table_entry_path_remove(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + fib_route_path_flags_t pf); + +/** + * @brief + * Remove n paths to an entry (aka route) in the FIB. If this is the entry's + * last path, then the entry will be removed, unless it has other sources. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param rpaths + * A vector of paths. + */ +extern void fib_table_entry_path_remove2(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_route_path_t *paths); + +/** + * @brief + * Update an entry to have a new set of paths. If the entry does not + * exist, it will be created. + * The difference between an 'path-add' and an update, is that path-add is + * an incremental addition of paths, whereas an update is a wholesale swap. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param rpaths + * A vector of paths. Not const since they may be modified. + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_update(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_route_path_t *paths); + +/** + * @brief + * Update the entry to have just one path. If the entry does not + * exist, it will be created. + * See the documentation for fib_route_path_t for more descirptions of + * the path parameters. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to add + * + * @param source + * The ID of the client/source adding the entry. + * + * @param flags + * Flags for the entry. + * + * @paran next_hop_proto + * The protocol of the next hop. This cannot be derived in the event that + * the next hop is all zeros. + * + * @param next_hop + * The address of the next-hop. + * + * @param sw_if_index + * The index of the interface. + * + * @param next_hop_fib_index, + * The fib index of the next-hop for recursive resolution + * + * @param next_hop_weight + * [un]equal cost path weight + * + * @param next_hop_label_stack + * The path's out-going label stack. NULL is there is none. + * + * @param pf + * Flags for the path + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_update_one_path(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source, + fib_entry_flag_t flags, + fib_protocol_t next_hop_proto, + const ip46_address_t *next_hop, + u32 next_hop_sw_if_index, + u32 next_hop_fib_index, + u32 next_hop_weight, + mpls_label_t *next_hop_label_stack, + fib_route_path_flags_t pf); + +/** + * @brief + * Add a MPLS local label for the prefix/route. If the entry does not + * exist, it will be created. In theory more than one local label can be + * added, but this is not yet supported. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to which to add the label + * + * @param label + * The MPLS label to add + * + * @return + * the index of the fib_entry_t that is created (or existed already). + */ +extern fib_node_index_t fib_table_entry_local_label_add(u32 fib_index, + const fib_prefix_t *prefix, + mpls_label_t label); +/** + * @brief + * remove a MPLS local label for the prefix/route. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to which to add the label + * + * @param label + * The MPLS label to add + */ +extern void fib_table_entry_local_label_remove(u32 fib_index, + const fib_prefix_t *prefix, + mpls_label_t label); + +/** + * @brief + * Delete a FIB entry. If the entry has no more sources, then it is + * removed from the table. + * + * @param fib_index + * The index of the FIB + * + * @param prefix + * The prefix for the entry to remove + * + * @param source + * The ID of the client/source adding the entry. + */ +extern void fib_table_entry_delete(u32 fib_index, + const fib_prefix_t *prefix, + fib_source_t source); + +/** + * @brief + * Delete a FIB entry. If the entry has no more sources, then it is + * removed from the table. + * + * @param entry_index + * The index of the FIB entry + * + * @param source + * The ID of the client/source adding the entry. + */ +extern void fib_table_entry_delete_index(fib_node_index_t entry_index, + fib_source_t source); + +/** + * @brief + * Flush all entries from a table for the source + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the entries in the table + * + * @param source + * the source to flush + */ +extern void fib_table_flush(u32 fib_index, + fib_protocol_t proto, + fib_source_t source); + +/** + * @brief + * Get the index of the FIB bound to the interface + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param sw_if_index + * The interface index + * + * @return fib_index + * The index of the FIB + */ +extern u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, + u32 sw_if_index); + +/** + * @brief + * Get the Table-ID of the FIB bound to the interface + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param sw_if_index + * The interface index + * + * @return fib_index + * The tableID of the FIB + */ +extern u32 fib_table_get_table_id_for_sw_if_index(fib_protocol_t proto, + u32 sw_if_index); + +/** + * @brief + * Get the index of the FIB for a Table-ID. This DOES NOT create the + * FIB if it does not exist. + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param table-id + * The Table-ID + * + * @return fib_index + * The index of the FIB, which may be INVALID. + */ +extern u32 fib_table_find(fib_protocol_t proto, u32 table_id); + + +/** + * @brief + * Get the index of the FIB for a Table-ID. This DOES create the + * FIB if it does not exist. + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param table-id + * The Table-ID + * + * @return fib_index + * The index of the FIB + */ +extern u32 fib_table_find_or_create_and_lock(fib_protocol_t proto, + u32 table_id); + +/** + * @brief + * Create a new table with no table ID. This means it does not get + * added to the hash-table and so can only be found by using the index returned. + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @param fmt + * A string to describe the table + * + * @return fib_index + * The index of the FIB + */ +extern u32 fib_table_create_and_lock(fib_protocol_t proto, + const char *const fmt, + ...); + +/** + * @brief + * Get the flow hash configured used by the table + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @return The flow hash config + */ +extern flow_hash_config_t fib_table_get_flow_hash_config(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Take a reference counting lock on the table + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + */ +extern void fib_table_unlock(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Release a reference counting lock on the table. When the last lock + * has gone. the FIB is deleted. + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + */ +extern void fib_table_lock(u32 fib_index, + fib_protocol_t proto); + +/** + * @brief + * Return the number of entries in the FIB added by a given source. + * + * @param fib_index + * The index of the FIB + * + * @paran proto + * The protocol of the FIB (and thus the entries therein) + * + * @return number of sourced entries. + */ +extern u32 fib_table_get_num_entries(u32 fib_index, + fib_protocol_t proto, + fib_source_t source); + +/** + * @brief + * Get a pointer to a FIB table + */ +extern fib_table_t *fib_table_get(fib_node_index_t index, + fib_protocol_t proto); + +#endif diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c new file mode 100644 index 00000000000..5083db26872 --- /dev/null +++ b/src/vnet/fib/fib_test.c @@ -0,0 +1,7112 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/mpls_fib.h> +#include <vnet/adj/adj.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/mpls_label_dpo.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/receive_dpo.h> +#include <vnet/dpo/ip_null_dpo.h> + +#include <vnet/mpls/mpls.h> + +#include <vnet/fib/fib_path_list.h> +#include <vnet/fib/fib_entry_src.h> +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_node_list.h> +#include <vnet/fib/fib_urpf_list.h> + +#define FIB_TEST_I(_cond, _comment, _args...) \ +({ \ + int _evald = (_cond); \ + if (!(_evald)) { \ + fformat(stderr, "FAIL:%d: " _comment "\n", \ + __LINE__, ##_args); \ + } else { \ + fformat(stderr, "PASS:%d: " _comment "\n", \ + __LINE__, ##_args); \ + } \ + _evald; \ +}) +#define FIB_TEST(_cond, _comment, _args...) \ +{ \ + if (!FIB_TEST_I(_cond, _comment, ##_args)) { \ + return 1; \ + ASSERT(!("FAIL: " _comment)); \ + } \ +} + +/** + * A 'i'm not fussed is this is not efficient' store of test data + */ +typedef struct test_main_t_ { + /** + * HW if indicies + */ + u32 hw_if_indicies[4]; + /** + * HW interfaces + */ + vnet_hw_interface_t * hw[4]; + +} test_main_t; +static test_main_t test_main; + +/* fake ethernet device class, distinct from "fake-ethX" */ +static u8 * format_test_interface_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "test-eth%d", dev_instance); +} + +static uword dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +static clib_error_t * +test_interface_admin_up_down (vnet_main_t * vnm, + u32 hw_if_index, + u32 flags) +{ + u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ? + VNET_HW_INTERFACE_FLAG_LINK_UP : 0; + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + return 0; +} + +VNET_DEVICE_CLASS (test_interface_device_class,static) = { + .name = "Test interface", + .format_device_name = format_test_interface_name, + .tx_function = dummy_interface_tx, + .admin_up_down_function = test_interface_admin_up_down, +}; + +static u8 *hw_address; + +static int +fib_test_mk_intf (u32 ninterfaces) +{ + clib_error_t * error = NULL; + test_main_t *tm = &test_main; + u8 byte; + u32 i; + + ASSERT(ninterfaces <= ARRAY_LEN(tm->hw_if_indicies)); + + for (i=0; i<6; i++) + { + byte = 0xd0+i; + vec_add1(hw_address, byte); + } + + for (i = 0; i < ninterfaces; i++) + { + hw_address[5] = i; + + error = ethernet_register_interface(vnet_get_main(), + test_interface_device_class.index, + i /* instance */, + hw_address, + &tm->hw_if_indicies[i], + /* flag change */ 0); + + FIB_TEST((NULL == error), "ADD interface %d", i); + + error = vnet_hw_interface_set_flags(vnet_get_main(), + tm->hw_if_indicies[i], + VNET_HW_INTERFACE_FLAG_LINK_UP); + tm->hw[i] = vnet_get_hw_interface(vnet_get_main(), + tm->hw_if_indicies[i]); + vec_validate (ip4_main.fib_index_by_sw_if_index, + tm->hw[i]->sw_if_index); + vec_validate (ip6_main.fib_index_by_sw_if_index, + tm->hw[i]->sw_if_index); + ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0; + ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0; + + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[i]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + FIB_TEST((NULL == error), "UP interface %d", i); + } + /* + * re-eval after the inevitable realloc + */ + for (i = 0; i < ninterfaces; i++) + { + tm->hw[i] = vnet_get_hw_interface(vnet_get_main(), + tm->hw_if_indicies[i]); + } + + return (0); +} + +#define FIB_TEST_REC_FORW(_rec_prefix, _via_prefix, _bucket) \ +{ \ + const dpo_id_t *_rec_dpo = fib_entry_contribute_ip_forwarding( \ + fib_table_lookup_exact_match(fib_index, (_rec_prefix))); \ + const dpo_id_t *_via_dpo = fib_entry_contribute_ip_forwarding( \ + fib_table_lookup(fib_index, (_via_prefix))); \ + FIB_TEST(!dpo_cmp(_via_dpo, \ + load_balance_get_bucket(_rec_dpo->dpoi_index, \ + _bucket)), \ + "%U is recursive via %U", \ + format_fib_prefix, (_rec_prefix), \ + format_fib_prefix, _via_prefix); \ +} + +#define FIB_TEST_LB_BUCKET_VIA_ADJ(_prefix, _bucket, _ai) \ +{ \ + const dpo_id_t *_dpo = fib_entry_contribute_ip_forwarding( \ + fib_table_lookup_exact_match(fib_index, (_prefix))); \ + const dpo_id_t *_dpo1 = \ + load_balance_get_bucket(_dpo->dpoi_index, _bucket); \ + FIB_TEST(DPO_ADJACENCY == _dpo1->dpoi_type, "type is %U", \ + format_dpo_type, _dpo1->dpoi_type); \ + FIB_TEST((_ai == _dpo1->dpoi_index), \ + "%U bucket %d resolves via %U", \ + format_fib_prefix, (_prefix), \ + _bucket, \ + format_dpo_id, _dpo1, 0); \ +} + +#define FIB_TEST_RPF(_cond, _comment, _args...) \ +{ \ + if (!FIB_TEST_I(_cond, _comment, ##_args)) { \ + return (0); \ + } \ +} + +static int +fib_test_urpf_is_equal (fib_node_index_t fei, + fib_forward_chain_type_t fct, + u32 num, ...) +{ + dpo_id_t dpo = DPO_INVALID; + fib_urpf_list_t *urpf; + index_t ui; + va_list ap; + int ii; + + va_start(ap, num); + + fib_entry_contribute_forwarding(fei, fct, &dpo); + ui = load_balance_get_urpf(dpo.dpoi_index); + + urpf = fib_urpf_list_get(ui); + + FIB_TEST_RPF(num == vec_len(urpf->furpf_itfs), + "RPF:%U len %d == %d", + format_fib_urpf_list, ui, + num, vec_len(urpf->furpf_itfs)); + FIB_TEST_RPF(num == fib_urpf_check_size(ui), + "RPF:%U check-size %d == %d", + format_fib_urpf_list, ui, + num, vec_len(urpf->furpf_itfs)); + + for (ii = 0; ii < num; ii++) + { + adj_index_t ai = va_arg(ap, adj_index_t); + + FIB_TEST_RPF(ai == urpf->furpf_itfs[ii], + "RPF:%d item:%d - %d == %d", + ui, ii, ai, urpf->furpf_itfs[ii]); + FIB_TEST_RPF(fib_urpf_check(ui, ai), + "RPF:%d %d found", + ui, ai); + } + + dpo_reset(&dpo); + + va_end(ap); + + return (1); +} + +static u8* +fib_test_build_rewrite (u8 *eth_addr) +{ + u8* rewrite = NULL; + + vec_validate(rewrite, 13); + + memcpy(rewrite, eth_addr, 6); + memcpy(rewrite+6, eth_addr, 6); + + return (rewrite); +} + +typedef enum fib_test_lb_bucket_type_t_ { + FT_LB_LABEL_O_ADJ, + FT_LB_LABEL_STACK_O_ADJ, + FT_LB_LABEL_O_LB, + FT_LB_O_LB, + FT_LB_SPECIAL, + FT_LB_ADJ, +} fib_test_lb_bucket_type_t; + +typedef struct fib_test_lb_bucket_t_ { + fib_test_lb_bucket_type_t type; + + union + { + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + adj_index_t adj; + } label_o_adj; + struct + { + mpls_eos_bit_t eos; + mpls_label_t label_stack[8]; + u8 label_stack_size; + u8 ttl; + adj_index_t adj; + } label_stack_o_adj; + struct + { + mpls_eos_bit_t eos; + mpls_label_t label; + u8 ttl; + index_t lb; + } label_o_lb; + struct + { + index_t adj; + } adj; + struct + { + index_t lb; + } lb; + struct + { + index_t adj; + } special; + }; +} fib_test_lb_bucket_t; + +#define FIB_TEST_LB(_cond, _comment, _args...) \ +{ \ + if (!FIB_TEST_I(_cond, _comment, ##_args)) { \ + return (0); \ + } \ +} + +static int +fib_test_validate_lb_v (const load_balance_t *lb, + u16 n_buckets, + va_list ap) +{ + const dpo_id_t *dpo; + int bucket; + + FIB_TEST_LB((n_buckets == lb->lb_n_buckets), "n_buckets = %d", lb->lb_n_buckets); + + for (bucket = 0; bucket < n_buckets; bucket++) + { + const fib_test_lb_bucket_t *exp; + + exp = va_arg(ap, fib_test_lb_bucket_t*); + dpo = load_balance_get_bucket_i(lb, bucket); + + switch (exp->type) + { + case FT_LB_LABEL_STACK_O_ADJ: + { + const mpls_label_dpo_t *mld; + mpls_label_t hdr; + u32 ii; + + FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + mld = mpls_label_dpo_get(dpo->dpoi_index); + + FIB_TEST_LB(exp->label_stack_o_adj.label_stack_size == mld->mld_n_labels, + "label stack size", + mld->mld_n_labels); + + for (ii = 0; ii < mld->mld_n_labels; ii++) + { + hdr = clib_net_to_host_u32(mld->mld_hdr[ii].label_exp_s_ttl); + FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) == + exp->label_stack_o_adj.label_stack[ii]), + "bucket %d stacks on label %d", + bucket, + exp->label_stack_o_adj.label_stack[ii]); + + if (ii == mld->mld_n_labels-1) + { + FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == + exp->label_o_adj.eos), + "bucket %d stacks on label %d %U!=%U", + bucket, + exp->label_stack_o_adj.label_stack[ii], + format_mpls_eos_bit, exp->label_o_adj.eos, + format_mpls_eos_bit, vnet_mpls_uc_get_s(hdr)); + } + else + { + FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == MPLS_NON_EOS), + "bucket %d stacks on label %d %U", + bucket, + exp->label_stack_o_adj.label_stack[ii], + format_mpls_eos_bit, vnet_mpls_uc_get_s(hdr)); + } + } + + FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type), + "bucket %d label stacks on %U", + bucket, + format_dpo_type, mld->mld_dpo.dpoi_type); + + FIB_TEST_LB((exp->label_stack_o_adj.adj == mld->mld_dpo.dpoi_index), + "bucket %d label stacks on adj %d", + bucket, + exp->label_stack_o_adj.adj); + } + break; + case FT_LB_LABEL_O_ADJ: + { + const mpls_label_dpo_t *mld; + mpls_label_t hdr; + FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + mld = mpls_label_dpo_get(dpo->dpoi_index); + hdr = clib_net_to_host_u32(mld->mld_hdr[0].label_exp_s_ttl); + + FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) == + exp->label_o_adj.label), + "bucket %d stacks on label %d", + bucket, + exp->label_o_adj.label); + + FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == + exp->label_o_adj.eos), + "bucket %d stacks on label %d %U", + bucket, + exp->label_o_adj.label, + format_mpls_eos_bit, exp->label_o_adj.eos); + + FIB_TEST_LB((DPO_ADJACENCY_INCOMPLETE == mld->mld_dpo.dpoi_type), + "bucket %d label stacks on %U", + bucket, + format_dpo_type, mld->mld_dpo.dpoi_type); + + FIB_TEST_LB((exp->label_o_adj.adj == mld->mld_dpo.dpoi_index), + "bucket %d label stacks on adj %d", + bucket, + exp->label_o_adj.adj); + } + break; + case FT_LB_LABEL_O_LB: + { + const mpls_label_dpo_t *mld; + mpls_label_t hdr; + + FIB_TEST_LB((DPO_MPLS_LABEL == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + + mld = mpls_label_dpo_get(dpo->dpoi_index); + hdr = clib_net_to_host_u32(mld->mld_hdr[0].label_exp_s_ttl); + + FIB_TEST_LB(1 == mld->mld_n_labels, "label stack size", + mld->mld_n_labels); + FIB_TEST_LB((vnet_mpls_uc_get_label(hdr) == + exp->label_o_lb.label), + "bucket %d stacks on label %d", + bucket, + exp->label_o_lb.label); + + FIB_TEST_LB((vnet_mpls_uc_get_s(hdr) == + exp->label_o_lb.eos), + "bucket %d stacks on label %d %U", + bucket, + exp->label_o_lb.label, + format_mpls_eos_bit, exp->label_o_lb.eos); + + FIB_TEST_LB((DPO_LOAD_BALANCE == mld->mld_dpo.dpoi_type), + "bucket %d label stacks on %U", + bucket, + format_dpo_type, mld->mld_dpo.dpoi_type); + + FIB_TEST_LB((exp->label_o_lb.lb == mld->mld_dpo.dpoi_index), + "bucket %d label stacks on LB %d", + bucket, + exp->label_o_lb.lb); + } + break; + case FT_LB_ADJ: + FIB_TEST_I(((DPO_ADJACENCY == dpo->dpoi_type) || + (DPO_ADJACENCY_INCOMPLETE == dpo->dpoi_type)), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + FIB_TEST_LB((exp->adj.adj == dpo->dpoi_index), + "bucket %d stacks on adj %d", + bucket, + exp->adj.adj); + break; + case FT_LB_O_LB: + FIB_TEST_I((DPO_LOAD_BALANCE == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + FIB_TEST_LB((exp->lb.lb == dpo->dpoi_index), + "bucket %d stacks on lb %d", + bucket, + exp->lb.lb); + break; + case FT_LB_SPECIAL: + FIB_TEST_I((DPO_DROP == dpo->dpoi_type), + "bucket %d stacks on %U", + bucket, + format_dpo_type, dpo->dpoi_type); + FIB_TEST_LB((exp->special.adj == dpo->dpoi_index), + "bucket %d stacks on drop %d", + bucket, + exp->special.adj); + break; + } + } + return (!0); +} + +static int +fib_test_validate_entry (fib_node_index_t fei, + fib_forward_chain_type_t fct, + u16 n_buckets, + ...) +{ + dpo_id_t dpo = DPO_INVALID; + const load_balance_t *lb; + fib_prefix_t pfx; + index_t fw_lbi; + u32 fib_index; + va_list ap; + int res; + + va_start(ap, n_buckets); + + fib_entry_get_prefix(fei, &pfx); + fib_index = fib_entry_get_fib_index(fei); + fib_entry_contribute_forwarding(fei, fct, &dpo); + + FIB_TEST_LB((DPO_LOAD_BALANCE == dpo.dpoi_type), + "Entry links to %U", + format_dpo_type, dpo.dpoi_type); + lb = load_balance_get(dpo.dpoi_index); + + res = fib_test_validate_lb_v(lb, n_buckets, ap); + + /* + * ensure that the LB contributed by the entry is the + * same as the LB in the forwarding tables + */ + if (fct == fib_entry_get_default_chain_type(fib_entry_get(fei))) + { + switch (pfx.fp_proto) + { + case FIB_PROTOCOL_IP4: + fw_lbi = ip4_fib_forwarding_lookup(fib_index, &pfx.fp_addr.ip4); + break; + case FIB_PROTOCOL_IP6: + fw_lbi = ip6_fib_table_fwding_lookup(&ip6_main, fib_index, &pfx.fp_addr.ip6); + break; + case FIB_PROTOCOL_MPLS: + { + mpls_unicast_header_t hdr = { + .label_exp_s_ttl = 0, + }; + + vnet_mpls_uc_set_label(&hdr.label_exp_s_ttl, pfx.fp_label); + vnet_mpls_uc_set_s(&hdr.label_exp_s_ttl, pfx.fp_eos); + hdr.label_exp_s_ttl = clib_host_to_net_u32(hdr.label_exp_s_ttl); + + fw_lbi = mpls_fib_table_forwarding_lookup(fib_index, &hdr); + break; + } + default: + fw_lbi = 0; + } + FIB_TEST_LB((fw_lbi == dpo.dpoi_index), + "Contributed LB = FW LB: %U\n %U", + format_load_balance, fw_lbi, 0, + format_load_balance, dpo.dpoi_index, 0); + } + + dpo_reset(&dpo); + + va_end(ap); + + return (res); +} + +static int +fib_test_v4 (void) +{ + /* + * In the default table check for the presence and correct forwarding + * of the special entries + */ + fib_node_index_t dfrt, fei, ai, ai2, locked_ai, ai_01, ai_02, ai_03; + const dpo_id_t *dpo, *dpo1, *dpo2, *dpo_drop; + const ip_adjacency_t *adj; + const load_balance_t *lb; + test_main_t *tm; + u32 fib_index; + int ii; + + /* via 10.10.10.1 */ + ip46_address_t nh_10_10_10_1 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }; + /* via 10.10.10.2 */ + ip46_address_t nh_10_10_10_2 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), + }; + + tm = &test_main; + + /* Find or create FIB table 11 */ + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + + for (ii = 0; ii < 4; ii++) + { + ip4_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index; + } + + fib_prefix_t pfx_0_0_0_0_s_0 = { + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + {0} + }, + }, + }; + + fib_prefix_t pfx = { + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + {0} + }, + }, + }; + + dpo_drop = drop_dpo_get(DPO_PROTO_IP4); + + dfrt = fib_table_lookup(fib_index, &pfx_0_0_0_0_s_0); + FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)), + "Default route is DROP"); + + pfx.fp_len = 32; + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all zeros route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "all 0s route is DROP"); + + pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xffffffff); + pfx.fp_len = 32; + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all ones route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "all 1s route is DROP"); + + pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xe0000000); + pfx.fp_len = 8; + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "all-mcast route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "all-mcast route is DROP"); + + pfx.fp_addr.ip4.as_u32 = clib_host_to_net_u32(0xf0000000); + pfx.fp_len = 8; + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "class-e route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "class-e route is DROP"); + + /* + * at this stage there are 5 entries in the test FIB (plus 5 in the default), + * all of which are special sourced and so none of which share path-lists. + * There are also 6 entries, and 6 non-shared path-lists, in the v6 default + * table + */ +#define NBR (5+5+6) + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * add interface routes. + * validate presence of /24 attached and /32 recieve. + * test for the presence of the receive address in the glean and local adj + */ + fib_prefix_t local_pfx = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), + }, + }, + }; + + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, // weight + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present"); + FIB_TEST(((FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED) == + fib_entry_get_flags(fei)), + "Flags set on attached interface"); + + ai = fib_entry_get_adj(fei); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "attached interface route adj present"); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index), + "attached interface adj is glean"); + FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr, + &adj->sub_type.glean.receive_addr)), + "attached interface adj is receive ok"); + + local_pfx.fp_len = 32; + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, // weight + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &local_pfx); + FIB_TEST(((FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED) == + fib_entry_get_flags(fei)), + "Flags set on local interface"); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 0), + "RPF list for local length 0"); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST((DPO_RECEIVE == dpo->dpoi_type), + "local interface adj is local"); + receive_dpo_t *rd = receive_dpo_get(dpo->dpoi_index); + + FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr, + &rd->rd_addr)), + "local interface adj is receive ok"); + + FIB_TEST((2 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_INTERFACE)), + "2 Interface Source'd prefixes"); + + /* + * +2 interface routes +2 non-shared path-lists + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Modify the default route to be via an adj not yet known. + * this sources the defalut route with the API source, which is + * a higher preference to the DEFAULT_ROUTE source + */ + pfx.fp_addr.ip4.as_u32 = 0; + pfx.fp_len = 0; + fib_table_entry_path_add(fib_index, &pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx); + FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)), + "Flags set on API route"); + + FIB_TEST((fei == dfrt), "default route same index"); + ai = fib_entry_get_adj(fei); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "default route adj present"); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&nh_10_10_10_1, &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + FIB_TEST((1 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_API)), + "1 API Source'd prefixes"); + + /* + * find the adj in the shared db + */ + locked_ai = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + FIB_TEST((locked_ai == ai), "ADJ NBR DB find"); + adj_unlock(locked_ai); + + /* + * +1 shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+3 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove the API source from the default route. We expected + * the route to remain, sourced by DEFAULT_ROUTE, and hence a DROP + */ + pfx.fp_addr.ip4.as_u32 = 0; + pfx.fp_len = 0; + fib_table_entry_path_remove(fib_index, &pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // non-recursive path, so no FIB index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx); + + FIB_TEST((fei == dfrt), "default route same index"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "Default route is DROP"); + + /* + * -1 shared-path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add an 2 ARP entry => a complete ADJ plus adj-fib. + */ + fib_prefix_t pfx_10_10_10_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.1 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }, + }; + fib_prefix_t pfx_10_10_10_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.2 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), + }, + }; + fib_prefix_t pfx_11_11_11_11_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 11.11.11.11 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0b0b0b0b), + }, + }; + u8 eth_addr[] = { + 0xde, 0xde, 0xde, 0xba, 0xba, 0xba, + }; + + ip46_address_t nh_12_12_12_12 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0c0c0c0c), + }; + adj_index_t ai_12_12_12_12; + + /* + * Add a route via an incomplete ADJ. then complete the ADJ + * Expect the route LB is updated to use complete adj type. + */ + fei = fib_table_entry_update_one_path(fib_index, + &pfx_11_11_11_11_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY_INCOMPLETE == dpo1->dpoi_type, + "11.11.11.11/32 via incomplete adj"); + + ai_01 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_01), "adj created"); + adj = adj_get(ai_01); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_1_s_32.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, + "11.11.11.11/32 via complete adj"); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1, + tm->hw[0]->sw_if_index), + "RPF list for adj-fib contains adj"); + + ai_12_12_12_12 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &nh_12_12_12_12, + tm->hw[1]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_12_12_12_12), "adj created"); + adj = adj_get(ai_12_12_12_12); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&nh_12_12_12_12, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + adj_nbr_update_rewrite(ai_12_12_12_12, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + + /* + * add the adj fib + */ + fei = fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST((FIB_ENTRY_FLAG_ATTACHED == fib_entry_get_flags(fei)), + "Flags set on adj-fib"); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + + fib_table_entry_path_remove(fib_index, + &pfx_11_11_11_11_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + eth_addr[5] = 0xb2; + + ai_02 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &pfx_10_10_10_2_s_32.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_02), "adj created"); + adj = adj_get(ai_02); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_10_10_10_2_s_32.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + FIB_TEST((ai_01 != ai_02), "ADJs are different"); + + fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_2_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_10_10_10_2_s_32); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj"); + + /* + * +2 adj-fibs, and their non-shared path-lists + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add 2 routes via the first ADJ. ensure path-list sharing + */ + fib_prefix_t pfx_1_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 1.1.1.1/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x01010101), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.1.1 resolves via 10.10.10.1"); + + /* + * +1 entry and a shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* 1.1.2.0/24 */ + fib_prefix_t pfx_1_1_2_0_s_24 = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010200), + } + }; + + fib_table_entry_path_add(fib_index, + &pfx_1_1_2_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.2.0/24 resolves via 10.10.10.1"); + + /* + * +1 entry only + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * modify 1.1.2.0/24 to use multipath. + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_2_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, tm->hw[0]->sw_if_index), + "RPF list for 1.1.2.0/24 contains both adjs"); + + dpo1 = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, "type is %d", dpo1->dpoi_type); + FIB_TEST((ai_01 == dpo1->dpoi_index), + "1.1.2.0/24 bucket 0 resolves via 10.10.10.1 (%d=%d)", + ai_01, dpo1->dpoi_index); + + dpo1 = load_balance_get_bucket(dpo->dpoi_index, 1); + FIB_TEST(DPO_ADJACENCY == dpo1->dpoi_type, "type is %d", dpo1->dpoi_type); + FIB_TEST((ai_02 == dpo1->dpoi_index), + "1.1.2.0/24 bucket 1 resolves via 10.10.10.2"); + + /* + * +1 shared-pathlist + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * revert the modify + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_2_0_s_24, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_2_0_s_24); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, tm->hw[0]->sw_if_index), + "RPF list for 1.1.2.0/24 contains one adj"); + + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.2.0/24 resolves via 10.10.10.1"); + + /* + * +1 shared-pathlist + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB is %d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add 2 recursive routes: + * 100.100.100.100/32 via 1.1.1.1/32 => the via entry is installed. + * 100.100.100.101/32 via 1.1.1.1/32 => the via entry is installed. + */ + fib_prefix_t bgp_100_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 100.100.100.100/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x64646464), + }, + }; + /* via 1.1.1.1 */ + ip46_address_t nh_1_1_1_1 = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010101), + }; + + fei = fib_table_entry_path_add(fib_index, + &bgp_100_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, // no index provided. + fib_index, // nexthop in same fib as route + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_100_pfx, &pfx_1_1_1_1_s_32, 0); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1, + tm->hw[0]->sw_if_index), + "RPF list for adj-fib contains adj"); + + /* + * +1 entry and +1 shared-path-list + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + fib_prefix_t bgp_101_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 100.100.100.101/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x64646465), + }, + }; + + fib_table_entry_path_add(fib_index, + &bgp_101_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, // no index provided. + fib_index, // nexthop in same fib as route + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_101_pfx, &pfx_1_1_1_1_s_32, 0); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1, + tm->hw[0]->sw_if_index), + "RPF list for adj-fib contains adj"); + + /* + * +1 entry, but the recursive path-list is shared. + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * An EXCLUSIVE route; one where the user (me) provides the exclusive + * adjacency through which the route will resovle + */ + fib_prefix_t ex_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 4.4.4.4/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x04040404), + }, + }; + + fib_table_entry_special_add(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + locked_ai); + fei = fib_table_lookup_exact_match(fib_index, &ex_pfx); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "Exclusive route links to user adj"); + + fib_table_entry_special_remove(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &ex_pfx), + "Exclusive reoute removed"); + + /* + * An EXCLUSIVE route; one where the user (me) provides the exclusive + * adjacency through which the route will resovle + */ + dpo_id_t ex_dpo = DPO_INVALID; + + lookup_dpo_add_or_lock_w_fib_index(fib_index, + DPO_PROTO_IP4, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_CONFIG, + &ex_dpo); + + fib_table_entry_special_dpo_add(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &ex_dpo); + fei = fib_table_lookup_exact_match(fib_index, &ex_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(&ex_dpo, load_balance_get_bucket(dpo->dpoi_index, 0)), + "exclusive remote uses lookup DPO"); + + /* + * update the exclusive to use a different DPO + */ + ip_null_dpo_add_and_lock(DPO_PROTO_IP4, + IP_NULL_ACTION_SEND_ICMP_UNREACH, + &ex_dpo); + fib_table_entry_special_dpo_update(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &ex_dpo); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(&ex_dpo, load_balance_get_bucket(dpo->dpoi_index, 0)), + "exclusive remote uses now uses NULL DPO"); + + fib_table_entry_special_remove(fib_index, + &ex_pfx, + FIB_SOURCE_SPECIAL); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &ex_pfx), + "Exclusive reoute removed"); + dpo_reset(&ex_dpo); + + /* + * Add a recursive route: + * 200.200.200.200/32 via 1.1.1.2/32 => the via entry is NOT installed. + */ + fib_prefix_t bgp_200_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 200.200.200.200/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0xc8c8c8c8), + }, + }; + /* via 1.1.1.2 */ + fib_prefix_t pfx_1_1_1_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010102), + }, + }; + + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, // no index provided. + fib_index, // nexthop in same fib as route + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0); + + /* + * the adj should be recursive via drop, since the route resolves via + * the default route, which is itself a DROP + */ + fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(load_balance_is_drop(dpo1), "1.1.1.2/32 is drop"); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 0), + "RPF list for 1.1.1.2/32 contains 0 adjs"); + + /* + * +2 entry and +1 shared-path-list + */ + FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Unequal Cost load-balance. 3:1 ratio. fits in a 4 bucket LB + * The paths are sort by NH first. in this case the the path with greater + * weight is first in the set. This ordering is to test the RPF sort|uniq logic + */ + fib_prefix_t pfx_1_2_3_4_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01020304), + }, + }; + fib_table_entry_path_add(fib_index, + &pfx_1_2_3_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_entry_path_add(fib_index, + &pfx_1_2_3_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_12_12_12_12, + tm->hw[1]->sw_if_index, + ~0, + 3, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.2.3.4/32 presnet"); + dpo = fib_entry_contribute_ip_forwarding(fei); + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 4), + "1.2.3.4/32 LB has %d bucket", + lb->lb_n_buckets); + + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 0, ai_12_12_12_12); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 1, ai_12_12_12_12); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 2, ai_12_12_12_12); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_4_s_32, 3, ai_01); + + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 2, + tm->hw[0]->sw_if_index, + tm->hw[1]->sw_if_index), + "RPF list for 1.2.3.4/32 contains both adjs"); + + + /* + * Unequal Cost load-balance. 4:1 ratio. + * fits in a 16 bucket LB with ratio 13:3 + */ + fib_prefix_t pfx_1_2_3_5_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01020305), + }, + }; + fib_table_entry_path_add(fib_index, + &pfx_1_2_3_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_12_12_12_12, + tm->hw[1]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_entry_path_add(fib_index, + &pfx_1_2_3_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 4, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.2.3.5/32 presnet"); + dpo = fib_entry_contribute_ip_forwarding(fei); + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 16), + "1.2.3.5/32 LB has %d bucket", + lb->lb_n_buckets); + + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 0, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 1, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 2, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 3, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 4, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 5, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 6, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 7, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 8, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 9, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 10, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 11, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 12, ai_01); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 13, ai_12_12_12_12); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 14, ai_12_12_12_12); + FIB_TEST_LB_BUCKET_VIA_ADJ(&pfx_1_2_3_5_s_32, 15, ai_12_12_12_12); + + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 2, + tm->hw[0]->sw_if_index, + tm->hw[1]->sw_if_index), + "RPF list for 1.2.3.4/32 contains both adjs"); + + /* + * Test UCMP with a large weight skew - this produces load-balance objects with large + * numbers of buckets to accommodate the skew. By updating said load-balances we are + * laso testing the LB in placce modify code when number of buckets is large. + */ + fib_prefix_t pfx_6_6_6_6_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 1.1.1.1/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x06060606), + }, + }; + fib_test_lb_bucket_t ip_6_6_6_6_o_10_10_10_1 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_01, + }, + }; + fib_test_lb_bucket_t ip_6_6_6_6_o_10_10_10_2 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_02, + }, + }; + fib_test_lb_bucket_t ip_6_6_6_6_o_12_12_12_12 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_12_12_12_12, + }, + }; + fib_table_entry_update_one_path(fib_index, + &pfx_6_6_6_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 0, // zero weigth + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ip_6_6_6_6_o_10_10_10_1), + "6.6.6.6/32 via 10.10.10.1"); + + fib_table_entry_path_add(fib_index, + &pfx_6_6_6_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 100, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 64, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_1), + "6.6.6.6/32 via 10.10.10.1 and 10.10.10.2 in 63:1 ratio"); + + fib_table_entry_path_add(fib_index, + &pfx_6_6_6_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_12_12_12_12, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 100, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 128, + &ip_6_6_6_6_o_10_10_10_1, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12, + &ip_6_6_6_6_o_12_12_12_12), + "6.6.6.6/32 via 10.10.10.1 and 10.10.10.2 in 63:1 ratio"); + + fib_table_entry_path_remove(fib_index, + &pfx_6_6_6_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_12_12_12_12, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 100, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 64, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_2, + &ip_6_6_6_6_o_10_10_10_1), + "6.6.6.6/32 via 10.10.10.1 and 10.10.10.2 in 63:1 ratio"); + + fib_table_entry_path_remove(fib_index, + &pfx_6_6_6_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 100, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_6_6_6_6_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ip_6_6_6_6_o_10_10_10_1), + "6.6.6.6/32 via 10.10.10.1"); + + fib_table_entry_delete(fib_index, &pfx_6_6_6_6_s_32, FIB_SOURCE_API); + + /* + * A recursive via the two unequal cost entries + */ + fib_prefix_t bgp_44_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 200.200.200.201/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x44444444), + }, + }; + fei = fib_table_entry_path_add(fib_index, + &bgp_44_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_2_3_4_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_entry_path_add(fib_index, + &bgp_44_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_2_3_5_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_44_s_32, &pfx_1_2_3_4_s_32, 0); + FIB_TEST_REC_FORW(&bgp_44_s_32, &pfx_1_2_3_5_s_32, 1); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 2, + tm->hw[0]->sw_if_index, + tm->hw[1]->sw_if_index), + "RPF list for 1.2.3.4/32 contains both adjs"); + + /* + * test the uRPF check functions + */ + dpo_id_t dpo_44 = DPO_INVALID; + index_t urpfi; + + fib_entry_contribute_forwarding(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, &dpo_44); + urpfi = load_balance_get_urpf(dpo_44.dpoi_index); + + FIB_TEST(fib_urpf_check(urpfi, tm->hw[0]->sw_if_index), + "uRPF check for 68.68.68.68/32 on %d OK", + tm->hw[0]->sw_if_index); + FIB_TEST(fib_urpf_check(urpfi, tm->hw[1]->sw_if_index), + "uRPF check for 68.68.68.68/32 on %d OK", + tm->hw[1]->sw_if_index); + FIB_TEST(!fib_urpf_check(urpfi, 99), + "uRPF check for 68.68.68.68/32 on 99 not-OK", + 99); + dpo_reset(&dpo_44); + + fib_table_entry_delete(fib_index, + &bgp_44_s_32, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_1_2_3_5_s_32, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_1_2_3_4_s_32, + FIB_SOURCE_API); + + /* + * Add a recursive route: + * 200.200.200.201/32 via 1.1.1.200/32 => the via entry is NOT installed. + */ + fib_prefix_t bgp_201_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 200.200.200.201/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0xc8c8c8c9), + }, + }; + /* via 1.1.1.200 */ + fib_prefix_t pfx_1_1_1_200_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x010101c8), + }, + }; + + fib_table_entry_path_add(fib_index, + &bgp_201_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_200_s_32.fp_addr, + ~0, // no index provided. + fib_index, // nexthop in same fib as route + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32); + FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)), + "Flags set on RR via non-attached"); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 0), + "RPF list for BGP route empty"); + + /* + * +2 entry (BGP & RR) and +1 shared-path-list + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * insert a route that covers the missing 1.1.1.2/32. we epxect + * 200.200.200.200/32 and 200.200.200.201/32 to resolve through it. + */ + fib_prefix_t pfx_1_1_1_0_s_24 = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 1.1.1.0/24 */ + .ip4.as_u32 = clib_host_to_net_u32(0x01010100), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_0_s_24); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.1.0/24 resolves via 10.10.10.1"); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.1.2/32 resolves via 10.10.10.1"); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_200_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "1.1.1.200/24 resolves via 10.10.10.1"); + + /* + * +1 entry. 1.1.1.1/32 already uses 10.10.10.1 so no new pah-list + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * the recursive adj for 200.200.200.200 should be updated. + */ + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0); + fei = fib_table_lookup(fib_index, &bgp_200_pfx); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1, + tm->hw[0]->sw_if_index), + "RPF list for BGP route has itf index 0"); + + /* + * insert a more specific route than 1.1.1.0/24 that also covers the + * missing 1.1.1.2/32, but not 1.1.1.200/32. we epxect + * 200.200.200.200 to resolve through it. + */ + fib_prefix_t pfx_1_1_1_0_s_28 = { + .fp_len = 28, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 1.1.1.0/24 */ + .ip4.as_u32 = clib_host_to_net_u32(0x01010100), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_0_s_28, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_0_s_28); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "1.1.1.0/24 resolves via 10.10.10.2"); + + /* + * +1 entry. +1 shared path-list + */ + FIB_TEST((5 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+9 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+14 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * the recursive adj for 200.200.200.200 should be updated. + * 200.200.200.201 remains unchanged. + */ + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0); + + /* + * remove this /28. 200.200.200.200/32 should revert back to via 1.1.1.0/24 + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_0_s_28, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28) == + FIB_NODE_INDEX_INVALID), + "1.1.1.0/28 removed"); + FIB_TEST((fib_table_lookup(fib_index, &pfx_1_1_1_0_s_28) == + fib_table_lookup(fib_index, &pfx_1_1_1_0_s_24)), + "1.1.1.0/28 lookup via /24"); + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0); + + /* + * -1 entry. -1 shared path-list + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+13 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove 1.1.1.0/24. 200.200.200.200/32 should revert back to via 0.0.0.0/0 + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_0_s_24, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_24) == + FIB_NODE_INDEX_INVALID), + "1.1.1.0/24 removed"); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "1.1.1.2/32 route is DROP"); + fei = fib_table_lookup(fib_index, &pfx_1_1_1_200_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "1.1.1.200/32 route is DROP"); + + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0); + + /* + * -1 entry + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * insert the missing 1.1.1.2/32 + */ + fei = fib_table_entry_path_add(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai = ai_01), "1.1.1.2/32 resolves via 10.10.10.1"); + + FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0); + FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0); + + /* + * no change. 1.1.1.2/32 was already there RR sourced. + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+12 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove 200.200.200.201/32 which does not have a valid via FIB + */ + fib_table_entry_path_remove(fib_index, + &bgp_201_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_200_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * -2 entries (BGP and RR). -1 shared path-list; + */ + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_201_pfx) == + FIB_NODE_INDEX_INVALID), + "200.200.200.201/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32) == + FIB_NODE_INDEX_INVALID), + "1.1.1.200/32 removed"); + + FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+7 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove 200.200.200.200/32 which does have a valid via FIB + */ + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_200_pfx) == + FIB_NODE_INDEX_INVALID), + "200.200.200.200/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32) != + FIB_NODE_INDEX_INVALID), + "1.1.1.2/32 still present"); + + /* + * -1 entry (BGP, the RR source is also API sourced). -1 shared path-list; + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+9 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A recursive prefix that has a 2 path load-balance. + * It also shares a next-hop with other BGP prefixes and hence + * test the ref counting of RR sourced prefixes and 2 level LB. + */ + const fib_prefix_t bgp_102 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 100.100.100.101/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x64646466), + }, + }; + fib_table_entry_path_add(fib_index, + &bgp_102, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &bgp_102, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &bgp_102); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "100.100.100.102/32 presnet"); + dpo = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 2), "Recursive LB has %d bucket", lb->lb_n_buckets); + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)), + "First via 10.10.10.1"); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo->dpoi_index, 1)), + "Second via 10.10.10.1"); + + fib_table_entry_path_remove(fib_index, + &bgp_102, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &bgp_102, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &bgp_102); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "100.100.100.102/32 removed"); + + /* + * remove the remaining recursives + */ + fib_table_entry_path_remove(fib_index, + &bgp_100_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &bgp_101_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_100_pfx) == + FIB_NODE_INDEX_INVALID), + "100.100.100.100/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_101_pfx) == + FIB_NODE_INDEX_INVALID), + "100.100.100.101/32 removed"); + + /* + * -2 entry (2*BGP, the RR source is also API sourced). -1 shared path-list; + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add a recursive route via a connected cover, using an adj-fib that does exist + */ + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + ~0, // no index provided. + fib_index, // Same as route's FIB + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * +1 entry. +1 shared path-list (recursive via 10.10.10.1) + */ + FIB_TEST((2 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+6 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+8 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)), + "200.200.200.200/32 is recursive via adj for 10.10.10.1"); + + FIB_TEST((FIB_ENTRY_FLAG_ATTACHED == fib_entry_get_flags(fei)), + "Flags set on RR via existing attached"); + + /* + * Add a recursive route via a connected cover, using and adj-fib that does + * not exist + */ + ip46_address_t nh_10_10_10_3 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03), + }; + fib_prefix_t pfx_10_10_10_3 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = nh_10_10_10_3, + }; + + fib_table_entry_path_add(fib_index, + &bgp_201_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_3, + ~0, // no index provided. + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * +2 entries (BGP and RR). +1 shared path-list (recursive via 10.10.10.3) and + * one unshared non-recursive via 10.10.10.3 + */ + FIB_TEST((3 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + ai_03 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &nh_10_10_10_3, + tm->hw[0]->sw_if_index); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_201_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + ai = fib_entry_get_adj(fei); + FIB_TEST((ai == ai_03), "adj for 10.10.10.3/32 is via adj for 10.10.10.3"); + FIB_TEST(((FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED) == + fib_entry_get_flags(fei)), + "Flags set on RR via non-existing attached"); + + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for 10.10.10.3"); + + adj_unlock(ai_03); + + /* + * remove the recursives + */ + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &bgp_201_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_3, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_201_pfx) == + FIB_NODE_INDEX_INVALID), + "200.200.200.201/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &bgp_200_pfx) == + FIB_NODE_INDEX_INVALID), + "200.200.200.200/32 removed"); + FIB_TEST((fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3) == + FIB_NODE_INDEX_INVALID), + "10.10.10.3/32 removed"); + + /* + * -3 entries (2*BGP and RR). -2 shared path-list (recursive via 10.10.10.3 & + * 10.10.10.1) and one unshared non-recursive via 10.10.10.3 + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + + /* + * RECURSION LOOPS + * Add 5.5.5.5/32 -> 5.5.5.6/32 -> 5.5.5.7/32 -> 5.5.5.5/32 + */ + fib_prefix_t pfx_5_5_5_5_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x05050505), + }, + }; + fib_prefix_t pfx_5_5_5_6_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x05050506), + }, + }; + fib_prefix_t pfx_5_5_5_7_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x05050507), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_7_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_7_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_5_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + /* + * +3 entries, +3 shared path-list + */ + FIB_TEST((4 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+8 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+10 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * All the entries have only looped paths, so they are all drop + */ + fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.7/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.5/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.6/32 is via adj for DROP"); + + /* + * provide 5.5.5.6/32 with alternate path. + * this will allow only 5.5.5.6/32 to forward with this path, the others + * are still drop since the loop is still present. + */ + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo1->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 1), "5.5.5.6 LB has %d bucket", lb->lb_n_buckets); + + dpo2 = load_balance_get_bucket(dpo1->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY == dpo2->dpoi_type, "type is %d", dpo2->dpoi_type); + FIB_TEST((ai_01 == dpo2->dpoi_index), + "5.5.5.6 bucket 0 resolves via 10.10.10.2"); + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.7/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.5/32 is via adj for DROP"); + + /* + * remove the alternate path for 5.5.5.6/32 + * back to all drop + */ + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.7/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.5/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.6/32 is via adj for DROP"); + + /* + * break the loop by giving 5.5.5.5/32 a new set of paths + * expect all to forward via this new path. + */ + fib_table_entry_update_one_path(fib_index, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + lb = load_balance_get(dpo1->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 1), "5.5.5.5 LB has %d bucket", lb->lb_n_buckets); + + dpo2 = load_balance_get_bucket(dpo1->dpoi_index, 0); + FIB_TEST(DPO_ADJACENCY == dpo2->dpoi_type, "type is %d", dpo2->dpoi_type); + FIB_TEST((ai_01 == dpo2->dpoi_index), + "5.5.5.5 bucket 0 resolves via 10.10.10.2"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_7_s_32); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo2->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 1), "Recursive LB has %d bucket", lb->lb_n_buckets); + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo2->dpoi_index, 0)), + "5.5.5.5.7 via 5.5.5.5"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_6_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo1->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 1), "Recursive LB has %d bucket", lb->lb_n_buckets); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)), + "5.5.5.5.6 via 5.5.5.7"); + + /* + * revert back to the loop. so we can remove the prefixes with + * the loop intact + */ + fib_table_entry_update_one_path(fib_index, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_5_5_5_7_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.7/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_5_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.5/32 is via adj for DROP"); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "LB for 5.5.5.6/32 is via adj for DROP"); + + /* + * remove all the 5.5.5.x/32 prefixes + */ + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_5_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_7_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_7_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_5_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + /* + * -3 entries, -3 shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Single level loop 5.5.5.5/32 via 5.5.5.5/32 + */ + fib_table_entry_path_add(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_5_5_5_6_s_32); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "1-level 5.5.5.6/32 loop is via adj for DROP"); + + fib_table_entry_path_remove(fib_index, + &pfx_5_5_5_6_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_5_5_5_6_s_32.fp_addr, + ~0, // no index provided. + fib_index, // same as route's FIB + 1, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_5_5_5_6_s_32), + "1-level 5.5.5.6/32 loop is removed"); + + /* + * A recursive route whose next-hop is covered by the prefix. + * This would mean the via-fib, which inherits forwarding from its + * cover, thus picks up forwarding from the prfix, which is via the + * via-fib, and we have a loop. + */ + fib_prefix_t pfx_23_23_23_0_s_24 = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x17171700), + }, + }; + fib_prefix_t pfx_23_23_23_23_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x17171717), + }, + }; + fei = fib_table_entry_path_add(fib_index, + &pfx_23_23_23_0_s_24, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_23_23_23_23_s_32.fp_addr, + ~0, // recursive + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(load_balance_is_drop(dpo), + "23.23.23.0/24 via covered is DROP"); + fib_table_entry_delete_index(fei, FIB_SOURCE_API); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A recursive route with recursion constraints. + * 200.200.200.200/32 via 1.1.1.1 is recurse via host constrained + */ + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1"); + + /* + * save the load-balance. we expect it to be inplace modified + */ + lb = load_balance_get(dpo1->dpoi_index); + + /* + * add a covering prefix for the via fib that would otherwise serve + * as the resolving route when the host is removed + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_0_s_28, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai == ai_01), + "adj for 1.1.1.0/28 is via adj for 1.1.1.1"); + + /* + * remove the host via FIB - expect the BGP prefix to be drop + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo1->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for DROP"); + + /* + * add the via-entry host reoute back. expect to resolve again + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo1->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1"); + + /* + * add another path for the recursive. it will then have 2. + */ + fib_prefix_t pfx_1_1_1_3_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010103), + }, + }; + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_3_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_3_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32); + dpo2 = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket(dpo->dpoi_index, 0)), + "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.1"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_3_s_32); + dpo1 = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket(dpo->dpoi_index, 1)), + "adj for 200.200.200.200/32 is recursive via adj for 1.1.1.3"); + + /* + * expect the lb-map used by the recursive's load-balance is using both buckets + */ + load_balance_map_t *lbm; + index_t lbmi; + + lb = load_balance_get(dpo->dpoi_index); + lbmi = lb->lb_map; + load_balance_map_lock(lbmi); + lbm = load_balance_map_get(lbmi); + + FIB_TEST(lbm->lbm_buckets[0] == 0, + "LB maps's bucket 0 is %d", + lbm->lbm_buckets[0]); + FIB_TEST(lbm->lbm_buckets[1] == 1, + "LB maps's bucket 1 is %d", + lbm->lbm_buckets[1]); + + /* + * withdraw one of the /32 via-entrys. + * that ECMP path will be unresolved and forwarding should continue on the + * other available path. this is an iBGP PIC edge failover. + * Test the forwarding changes without re-fetching the adj from the + * recursive entry. this ensures its the same one that is updated; i.e. an + * inplace-modify. + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + FIB_TEST(!dpo_cmp(dpo, fib_entry_contribute_ip_forwarding(fei)), + "post PIC 200.200.200.200/32 was inplace modified"); + + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket_i(lb, 0)), + "post PIC adj for 200.200.200.200/32 is recursive" + " via adj for 1.1.1.3"); + + /* + * the LB maps that was locked above should have been modified to remove + * the path that was down, and thus its bucket points to a path that is + * still up. + */ + FIB_TEST(lbm->lbm_buckets[0] == 1, + "LB maps's bucket 0 is %d", + lbm->lbm_buckets[0]); + FIB_TEST(lbm->lbm_buckets[1] == 1, + "LB maps's bucket 1 is %d", + lbm->lbm_buckets[1]); + + load_balance_map_unlock(lb->lb_map); + + /* + * add it back. again + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(!dpo_cmp(dpo2, load_balance_get_bucket_i(lb, 0)), + "post PIC recovery adj for 200.200.200.200/32 is recursive " + "via adj for 1.1.1.1"); + FIB_TEST(!dpo_cmp(dpo1, load_balance_get_bucket_i(lb, 1)), + "post PIC recovery adj for 200.200.200.200/32 is recursive " + "via adj for 1.1.1.3"); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(lb == load_balance_get(dpo->dpoi_index), + "post PIC 200.200.200.200/32 was inplace modified"); + + /* + * add a 3rd path. this makes the LB 16 buckets. + */ + fib_table_entry_path_add(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(lb == load_balance_get(dpo->dpoi_index), + "200.200.200.200/32 was inplace modified for 3rd path"); + FIB_TEST(16 == lb->lb_n_buckets, + "200.200.200.200/32 was inplace modified for 3rd path to 16 buckets"); + + lbmi = lb->lb_map; + load_balance_map_lock(lbmi); + lbm = load_balance_map_get(lbmi); + + for (ii = 0; ii < 16; ii++) + { + FIB_TEST(lbm->lbm_buckets[ii] == ii, + "LB Map for 200.200.200.200/32 at %d is %d", + ii, lbm->lbm_buckets[ii]); + } + + /* + * trigger PIC by removing the first via-entry + * the first 6 buckets of the map should map to the next 6 + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(lb == load_balance_get(dpo->dpoi_index), + "200.200.200.200/32 was inplace modified for 3rd path"); + FIB_TEST(2 == lb->lb_n_buckets, + "200.200.200.200/32 was inplace modified for 3rd path remove to 2 buckets"); + + for (ii = 0; ii < 6; ii++) + { + FIB_TEST(lbm->lbm_buckets[ii] == ii+6, + "LB Map for 200.200.200.200/32 at %d is %d", + ii, lbm->lbm_buckets[ii]); + } + for (ii = 6; ii < 16; ii++) + { + FIB_TEST(lbm->lbm_buckets[ii] == ii, + "LB Map for 200.200.200.200/32 at %d is %d", + ii, lbm->lbm_buckets[ii]); + } + + + /* + * tidy up + */ + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, + fib_index, + 1, + MPLS_LABEL_INVALID); + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_1_1_1_1, + ~0, + fib_index, + 1, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + fib_table_entry_path_remove(fib_index, + &bgp_200_pfx, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_3_s_32.fp_addr, + ~0, + fib_index, + 1, + FIB_ROUTE_PATH_RESOLVE_VIA_HOST); + fib_table_entry_delete(fib_index, + &pfx_1_1_1_3_s_32, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_1_1_1_0_s_28, + FIB_SOURCE_API); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_0_s_28)), + "1.1.1.1/28 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_3_s_32)), + "1.1.1.3/32 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &bgp_200_pfx)), + "200.200.200.200/32 removed"); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A route whose paths are built up iteratively and then removed + * all at once + */ + fib_prefix_t pfx_4_4_4_4_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 4.4.4.4/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x04040404), + }, + }; + + fib_table_entry_path_add(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_add(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_3, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(FIB_NODE_INDEX_INVALID != + fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), + "4.4.4.4/32 present"); + + fib_table_entry_delete(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), + "4.4.4.4/32 removed"); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A route with multiple paths at once + */ + fib_route_path_t *r_paths = NULL; + + for (ii = 0; ii < 4; ii++) + { + fib_route_path_t r_path = { + .frp_proto = FIB_PROTOCOL_IP4, + .frp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii), + }, + .frp_sw_if_index = tm->hw[0]->sw_if_index, + .frp_weight = 1, + .frp_fib_index = ~0, + }; + vec_add1(r_paths, r_path); + } + + fib_table_entry_update(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + r_paths); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present"); + dpo = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets); + + fib_table_entry_delete(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), + "4.4.4.4/32 removed"); + vec_free(r_paths); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * A route deag route + */ + fib_table_entry_path_add(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &zero_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + lookup_dpo_t *lkd = lookup_dpo_get(dpo->dpoi_index); + + FIB_TEST((fib_index == lkd->lkd_fib_index), + "4.4.4.4/32 is deag in %d %U", + lkd->lkd_fib_index, + format_dpo_id, dpo, 0); + + fib_table_entry_delete(fib_index, + &pfx_4_4_4_4_s_32, + FIB_SOURCE_API); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), + "4.4.4.4/32 removed"); + vec_free(r_paths); + + /* + * add-remove test. no change. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+7 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Duplicate paths: + * add a recursive with duplicate paths. Expect the duplicate to be ignored. + */ + fib_prefix_t pfx_34_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x22010101), + }, + }; + fib_prefix_t pfx_34_34_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x22220101), + }, + }; + fei = fib_table_entry_path_add(fib_index, + &pfx_34_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_34_34_1_1_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_entry_path_add(fib_index, + &pfx_34_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_34_34_1_1_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST_REC_FORW(&pfx_34_1_1_1_s_32, &pfx_34_34_1_1_s_32, 0); + fib_table_entry_delete_index(fei, FIB_SOURCE_API); + + /* + * CLEANUP + * remove: 1.1.1.2/32, 1.1.2.0/24 and 1.1.1.1/32 + * all of which are via 10.10.10.1, Itf1 + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fib_table_entry_path_remove(fib_index, + &pfx_1_1_2_0_s_24, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_1_s_32), + "1.1.1.1/32 removed"); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_2_s_32), + "1.1.1.2/32 removed"); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_1_1_2_0_s_24), + "1.1.2.0/24 removed"); + + /* + * -3 entries and -1 shared path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * An attached-host route. Expect to link to the incomplete adj + */ + fib_prefix_t pfx_4_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 4.1.1.1/32 */ + .ip4.as_u32 = clib_host_to_net_u32(0x04010101), + }, + }; + fib_table_entry_path_add(fib_index, + &pfx_4_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &zero_addr, + tm->hw[0]->sw_if_index, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_4_1_1_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.1.1.1/32 present"); + ai = fib_entry_get_adj(fei); + + ai2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &pfx_4_1_1_1_s_32.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((ai == ai2), "Attached-host link to incomplete ADJ"); + adj_unlock(ai2); + + /* + * +1 entry and +1 shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+5 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + fib_table_entry_delete(fib_index, + &pfx_4_1_1_1_s_32, + FIB_SOURCE_API); + + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+4 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+4 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * add a v6 prefix via v4 next-hops + */ + fib_prefix_t pfx_2001_s_64 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6.as_u64[0] = clib_host_to_net_u64(0x2001000000000000), + }, + }; + fei = fib_table_entry_path_add(0, //default v6 table + &pfx_2001_s_64, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(0, &pfx_2001_s_64); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "2001::/64 present"); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_ARP), + "2001::/64 via ARP-adj"); + FIB_TEST((adj->ia_link == VNET_LINK_IP6), + "2001::/64 is link type v6"); + FIB_TEST((adj->ia_nh_proto == FIB_PROTOCOL_IP4), + "2001::/64 ADJ-adj is NH proto v4"); + fib_table_entry_delete(0, &pfx_2001_s_64, FIB_SOURCE_API); + + /* + * add a uRPF exempt prefix: + * test: + * - it's forwarding is drop + * - it's uRPF list is not empty + * - the uRPF list for the default route (it's cover) is empty + */ + fei = fib_table_entry_special_add(fib_index, + &pfx_4_1_1_1_s_32, + FIB_SOURCE_URPF_EXEMPT, + FIB_ENTRY_FLAG_DROP, + ADJ_INDEX_INVALID); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(load_balance_is_drop(dpo), + "uRPF exempt 4.1.1.1/32 DROP"); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 1, 0), + "uRPF list for exempt prefix has itf index 0"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_0_0_0_0_s_0); + FIB_TEST(fib_test_urpf_is_equal(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, 0), + "uRPF list for 0.0.0.0/0 empty"); + + fib_table_entry_delete(fib_index, &pfx_4_1_1_1_s_32, FIB_SOURCE_URPF_EXEMPT); + + /* + * CLEANUP + * remove adj-fibs: + */ + fib_table_entry_delete(fib_index, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ); + fib_table_entry_delete(fib_index, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32), + "10.10.10.1/32 adj-fib removed"); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32), + "10.10.10.2/32 adj-fib removed"); + + /* + * -2 entries and -2 non-shared path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR+2 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * unlock the adjacencies for which this test provided a rewrite. + * These are the last locks on these adjs. they should thus go away. + */ + adj_unlock(ai_02); + adj_unlock(ai_01); + adj_unlock(ai_12_12_12_12); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + /* + * CLEANUP + * remove the interface prefixes + */ + local_pfx.fp_len = 32; + fib_table_entry_special_remove(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE); + fei = fib_table_lookup(fib_index, &local_pfx); + + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &local_pfx), + "10.10.10.10/32 adj-fib removed"); + + local_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE); + + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &local_pfx), + "10.10.10.10/24 adj-fib removed"); + + /* + * -2 entries and -2 non-shared path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Last but not least, remove the VRF + */ + FIB_TEST((0 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_API)), + "NO API Source'd prefixes"); + FIB_TEST((0 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_RR)), + "NO RR Source'd prefixes"); + FIB_TEST((0 == fib_table_get_num_entries(fib_index, + FIB_PROTOCOL_IP4, + FIB_SOURCE_INTERFACE)), + "NO INterface Source'd prefixes"); + + fib_table_unlock(fib_index, FIB_PROTOCOL_IP4); + + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NBR-5 == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NBR-5 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + FIB_TEST((NBR-5 == pool_elts(fib_urpf_list_pool)), "uRPF pool size is %d", + pool_elts(fib_urpf_list_pool)); + + return 0; +} + +static int +fib_test_v6 (void) +{ + /* + * In the default table check for the presence and correct forwarding + * of the special entries + */ + fib_node_index_t dfrt, fei, ai, locked_ai, ai_01, ai_02; + const dpo_id_t *dpo, *dpo_drop; + const ip_adjacency_t *adj; + const receive_dpo_t *rd; + test_main_t *tm; + u32 fib_index; + int ii; + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + /* via 2001:0:0:1::2 */ + ip46_address_t nh_2001_2 = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000001), + [1] = clib_host_to_net_u64(0x0000000000000002), + }, + }, + }; + + tm = &test_main; + + dpo_drop = drop_dpo_get(DPO_PROTO_IP6); + + /* Find or create FIB table 11 */ + fib_index = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP6, 11); + + for (ii = 0; ii < 4; ii++) + { + ip6_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index; + } + + fib_prefix_t pfx_0_0 = { + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + {0, 0}, + }, + }, + }; + + dfrt = fib_table_lookup(fib_index, &pfx_0_0); + FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)), + "Default route is DROP"); + + dpo = fib_entry_contribute_ip_forwarding(dfrt); + FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup( + &ip6_main, + 1, + &pfx_0_0.fp_addr.ip6)), + "default-route; fwd and non-fwd tables match"); + + // FIXME - check specials. + + /* + * At this stage there is one v4 FIB with 5 routes and two v6 FIBs + * each with 6 entries. All entries are special so no path-list sharing. + */ +#define NPS (5+6+6) + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is %d", + fib_path_list_pool_size()); + FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * add interface routes. + * validate presence of /64 attached and /128 recieve. + * test for the presence of the receive address in the glean and local adj + * + * receive on 2001:0:0:1::1/128 + */ + fib_prefix_t local_pfx = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000001), + [1] = clib_host_to_net_u64(0x0000000000000001), + }, + }, + } + }; + + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present"); + + ai = fib_entry_get_adj(fei); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "attached interface route adj present"); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index), + "attached interface adj is glean"); + FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr, + &adj->sub_type.glean.receive_addr)), + "attached interface adj is receive ok"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup( + &ip6_main, + 1, + &local_pfx.fp_addr.ip6)), + "attached-route; fwd and non-fwd tables match"); + + local_pfx.fp_len = 128; + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &local_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST((DPO_RECEIVE == dpo->dpoi_type), + "local interface adj is local"); + rd = receive_dpo_get(dpo->dpoi_index); + + FIB_TEST((0 == ip46_address_cmp(&local_pfx.fp_addr, + &rd->rd_addr)), + "local interface adj is receive ok"); + + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST((dpo->dpoi_index == ip6_fib_table_fwding_lookup( + &ip6_main, + 1, + &local_pfx.fp_addr.ip6)), + "local-route; fwd and non-fwd tables match"); + + /* + * +2 entries. +2 unshared path-lists + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB is empty"); + FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Modify the default route to be via an adj not yet known. + * this sources the defalut route with the API source, which is + * a higher preference to the DEFAULT_ROUTE source + */ + fib_table_entry_path_add(fib_index, &pfx_0_0, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_0_0); + + FIB_TEST((fei == dfrt), "default route same index"); + ai = fib_entry_get_adj(fei); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai), "default route adj present"); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&nh_2001_2, &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + /* + * find the adj in the shared db + */ + locked_ai = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, + VNET_LINK_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index); + FIB_TEST((locked_ai == ai), "ADJ NBR DB find"); + adj_unlock(locked_ai); + + /* + * no more entires. +1 shared path-list + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+3 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * remove the API source from the default route. We expected + * the route to remain, sourced by DEFAULT_ROUTE, and hence a DROP + */ + fib_table_entry_path_remove(fib_index, &pfx_0_0, + FIB_SOURCE_API, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_0_0); + + FIB_TEST((fei == dfrt), "default route same index"); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(dfrt)), + "Default route is DROP"); + + /* + * no more entires. -1 shared path-list + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+2 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+2 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add an 2 ARP entry => a complete ADJ plus adj-fib. + */ + fib_prefix_t pfx_2001_1_2_s_128 = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000001), + [1] = clib_host_to_net_u64(0x0000000000000002), + }, + }, + } + }; + fib_prefix_t pfx_2001_1_3_s_128 = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000001), + [1] = clib_host_to_net_u64(0x0000000000000003), + }, + }, + } + }; + u8 eth_addr[] = { + 0xde, 0xde, 0xde, 0xba, 0xba, 0xba, + }; + + ai_01 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, + VNET_LINK_IP6, + &pfx_2001_1_2_s_128.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_01), "adj created"); + adj = adj_get(ai_01); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + adj_nbr_update_rewrite(ai_01, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_2_s_128.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + fib_table_entry_update_one_path(fib_index, + &pfx_2001_1_2_s_128, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP6, + &pfx_2001_1_2_s_128.fp_addr, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_2001_1_2_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + + eth_addr[5] = 0xb2; + + ai_02 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, + VNET_LINK_IP6, + &pfx_2001_1_3_s_128.fp_addr, + tm->hw[0]->sw_if_index); + FIB_TEST((FIB_NODE_INDEX_INVALID != ai_02), "adj created"); + adj = adj_get(ai_02); + FIB_TEST((IP_LOOKUP_NEXT_ARP == adj->lookup_next_index), + "adj is incomplete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + + adj_nbr_update_rewrite(ai_02, ADJ_NBR_REWRITE_FLAG_COMPLETE, + fib_test_build_rewrite(eth_addr)); + FIB_TEST((IP_LOOKUP_NEXT_REWRITE == adj->lookup_next_index), + "adj is complete"); + FIB_TEST((0 == ip46_address_cmp(&pfx_2001_1_3_s_128.fp_addr, + &adj->sub_type.nbr.next_hop)), + "adj nbr next-hop ok"); + FIB_TEST((ai_01 != ai_02), "ADJs are different"); + + fib_table_entry_update_one_path(fib_index, + &pfx_2001_1_3_s_128, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP6, + &pfx_2001_1_3_s_128.fp_addr, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_2001_1_3_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj"); + + /* + * +2 entries, +2 unshread path-lists. + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+4 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+4 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add a 2 routes via the first ADJ. ensure path-list sharing + */ + fib_prefix_t pfx_2001_a_s_64 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x200100000000000a), + [1] = clib_host_to_net_u64(0x0000000000000000), + }, + }, + } + }; + fib_prefix_t pfx_2001_b_s_64 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x200100000000000b), + [1] = clib_host_to_net_u64(0x0000000000000000), + }, + }, + } + }; + + fib_table_entry_path_add(fib_index, + &pfx_2001_a_s_64, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_2001_a_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1"); + fib_table_entry_path_add(fib_index, + &pfx_2001_b_s_64, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &pfx_2001_b_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1"); + + /* + * +2 entries, +1 shared path-list. + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * add a v4 prefix via a v6 next-hop + */ + fib_prefix_t pfx_1_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = 0x01010101, + }, + }; + fei = fib_table_entry_path_add(0, // default table + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + &nh_2001_2, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + FIB_TEST(fei == fib_table_lookup_exact_match(0, &pfx_1_1_1_1_s_32), + "1.1.1.1/32 o v6 route present"); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_ARP), + "1.1.1.1/32 via ARP-adj"); + FIB_TEST((adj->ia_link == VNET_LINK_IP4), + "1.1.1.1/32 ADJ-adj is link type v4"); + FIB_TEST((adj->ia_nh_proto == FIB_PROTOCOL_IP6), + "1.1.1.1/32 ADJ-adj is NH proto v6"); + fib_table_entry_delete(0, &pfx_1_1_1_1_s_32, FIB_SOURCE_API); + + /* + * An attached route + */ + fib_prefix_t pfx_2001_c_s_64 = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + .as_u64 = { + [0] = clib_host_to_net_u64(0x200100000000000c), + [1] = clib_host_to_net_u64(0x0000000000000000), + }, + }, + } + }; + fib_table_entry_path_add(fib_index, + &pfx_2001_c_s_64, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_c_s_64); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached route present"); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((adj->lookup_next_index == IP_LOOKUP_NEXT_GLEAN), + "2001:0:0:c/64 attached resolves via glean"); + + fib_table_entry_path_remove(fib_index, + &pfx_2001_c_s_64, + FIB_SOURCE_CLI, + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_c_s_64); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "attached route removed"); + + /* + * Shutdown the interface on which we have a connected and through + * which the routes are reachable. + * This will result in the connected, adj-fibs, and routes linking to drop + * The local/for-us prefix continues to receive. + */ + clib_error_t * error; + + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + ~VNET_SW_INTERFACE_FLAG_ADMIN_UP); + FIB_TEST((NULL == error), "Interface shutdown OK"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001::b/64 resolves via drop"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001::a/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::3/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::2/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::1/128 not drop"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1/64 resolves via drop"); + + /* + * no change + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+5 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * shutdown one of the other interfaces, then add a connected. + * and swap one of the routes to it. + */ + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[1]->sw_if_index, + ~VNET_SW_INTERFACE_FLAG_ADMIN_UP); + FIB_TEST((NULL == error), "Interface 1 shutdown OK"); + + fib_prefix_t connected_pfx = { + .fp_len = 64, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = { + /* 2001:0:0:2::1/64 */ + .as_u64 = { + [0] = clib_host_to_net_u64(0x2001000000000002), + [1] = clib_host_to_net_u64(0x0000000000000001), + }, + }, + } + }; + fib_table_entry_update_one_path(fib_index, &connected_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP6, + NULL, + tm->hw[1]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &connected_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached interface route present"); + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST(!dpo_cmp(dpo, dpo_drop), + "2001:0:0:2/64 not resolves via drop"); + + connected_pfx.fp_len = 128; + fib_table_entry_update_one_path(fib_index, &connected_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP6, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup(fib_index, &connected_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local interface route present"); + dpo = fib_entry_contribute_ip_forwarding(fei); + dpo = load_balance_get_bucket(dpo->dpoi_index, 0); + FIB_TEST((DPO_RECEIVE == dpo->dpoi_type), + "local interface adj is local"); + rd = receive_dpo_get(dpo->dpoi_index); + FIB_TEST((0 == ip46_address_cmp(&connected_pfx.fp_addr, + &rd->rd_addr)), + "local interface adj is receive ok"); + + /* + * +2 entries, +2 unshared path-lists + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + + /* + * bring the interface back up. we expected the routes to return + * to normal forwarding. + */ + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + FIB_TEST((NULL == error), "Interface bring-up OK"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index), + "attached interface adj is glean"); + + /* + * Same test as above, but this time the HW interface goes down + */ + error = vnet_hw_interface_set_flags(vnet_get_main(), + tm->hw_if_indicies[0], + ~VNET_HW_INTERFACE_FLAG_LINK_UP); + FIB_TEST((NULL == error), "Interface shutdown OK"); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001::a/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::3/128 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::2/128 resolves via drop"); + local_pfx.fp_len = 128; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1::1/128 not drop"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(!dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "2001:0:0:1/64 resolves via drop"); + + error = vnet_hw_interface_set_flags(vnet_get_main(), + tm->hw_if_indicies[0], + VNET_HW_INTERFACE_FLAG_LINK_UP); + FIB_TEST((NULL == error), "Interface bring-up OK"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::a/64 resolves via 2001:0:0:1::1"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "2001::b/64 resolves via 2001:0:0:1::1"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_02 == ai), "ADJ-FIB resolves via adj"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + ai = fib_entry_get_adj(fei); + FIB_TEST((ai_01 == ai), "ADJ-FIB resolves via adj"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + ai = fib_entry_get_adj(fei); + adj = adj_get(ai); + FIB_TEST((IP_LOOKUP_NEXT_GLEAN == adj->lookup_next_index), + "attached interface adj is glean"); + + /* + * Delete the interface that the routes reolve through. + * Again no routes are removed. They all point to drop. + * + * This is considered an error case. The control plane should + * not remove interfaces through which routes resolve, but + * such things can happen. ALL affected routes will drop. + */ + vnet_delete_hw_interface(vnet_get_main(), tm->hw_if_indicies[0]); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::3/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::2/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::1/128 is drop"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1/64 resolves via drop"); + + /* + * no change + */ + FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS+7 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS+8 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * Add the interface back. routes stay unresolved. + */ + error = ethernet_register_interface(vnet_get_main(), + test_interface_device_class.index, + 0 /* instance */, + hw_address, + &tm->hw_if_indicies[0], + /* flag change */ 0); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001::b/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::3/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::2/64 resolves via drop"); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1::1/128 is drop"); + local_pfx.fp_len = 64; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)), + "2001:0:0:1/64 resolves via drop"); + + /* + * CLEANUP ALL the routes + */ + fib_table_entry_delete(fib_index, + &pfx_2001_c_s_64, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2001_a_s_64, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2001_b_s_64, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2001_1_3_s_128, + FIB_SOURCE_ADJ); + fib_table_entry_delete(fib_index, + &pfx_2001_1_2_s_128, + FIB_SOURCE_ADJ); + local_pfx.fp_len = 64; + fib_table_entry_delete(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE); + local_pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE); + connected_pfx.fp_len = 64; + fib_table_entry_delete(fib_index, &connected_pfx, + FIB_SOURCE_INTERFACE); + connected_pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, &connected_pfx, + FIB_SOURCE_INTERFACE); + + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_2001_a_s_64)), + "2001::a/64 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64)), + "2001::b/64 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_2001_1_3_s_128)), + "2001:0:0:1::3/128 removed"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &pfx_2001_1_2_s_128)), + "2001:0:0:1::3/128 removed"); + local_pfx.fp_len = 64; + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &local_pfx)), + "2001:0:0:1/64 removed"); + local_pfx.fp_len = 128; + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &local_pfx)), + "2001:0:0:1::1/128 removed"); + connected_pfx.fp_len = 64; + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &connected_pfx)), + "2001:0:0:2/64 removed"); + connected_pfx.fp_len = 128; + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, &connected_pfx)), + "2001:0:0:2::1/128 removed"); + + /* + * -8 entries. -7 path-lists (1 was shared). + */ + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + /* + * now remove the VRF + */ + fib_table_unlock(fib_index, FIB_PROTOCOL_IP6); + + FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d", + fib_path_list_db_size()); + FIB_TEST((NPS-6 == fib_path_list_pool_size()), "path list pool size is%d", + fib_path_list_pool_size()); + FIB_TEST((NPS-6 == fib_entry_pool_size()), "entry pool size is %d", + fib_entry_pool_size()); + + adj_unlock(ai_02); + adj_unlock(ai_01); + + /* + * return the interfaces to up state + */ + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + error = vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[1]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + return (0); +} + +/* + * Test Attached Exports + */ +static int +fib_test_ae (void) +{ + const dpo_id_t *dpo, *dpo_drop; + const u32 fib_index = 0; + fib_node_index_t fei; + test_main_t *tm; + ip4_main_t *im; + + tm = &test_main; + im = &ip4_main; + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + /* + * add interface routes. We'll assume this works. It's more rigorously + * tested elsewhere. + */ + fib_prefix_t local_pfx = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + /* 10.10.10.10 */ + .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), + }, + }, + }; + + vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index); + im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index; + + dpo_drop = drop_dpo_get(DPO_PROTO_IP4); + + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "attached interface route present"); + + local_pfx.fp_len = 32; + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "local interface route present"); + + /* + * Add an 2 ARP entry => a complete ADJ plus adj-fib. + */ + fib_prefix_t pfx_10_10_10_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.1 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }, + }; + fib_node_index_t ai; + + fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 created"); + ai = fib_entry_get_adj(fei); + + /* + * create another FIB table into which routes will be imported + */ + u32 import_fib_index1; + + import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 11); + + /* + * Add an attached route in the import FIB + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(import_fib_index1, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached export created"); + + /* + * check for the presence of the adj-fibs in the import table + */ + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported"); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "adj-fib1 Import uses same adj as export"); + + /* + * check for the presence of the local in the import table + */ + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported"); + + /* + * Add another adj-fin in the export table. Expect this + * to get magically exported; + */ + fib_prefix_t pfx_10_10_10_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.2 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02), + }, + }; + + fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_2_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 present"); + ai = fib_entry_get_adj(fei); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported"); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "Import uses same adj as export"); + FIB_TEST((FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags(fei)), + "ADJ-fib2 imported flags %d", + fib_entry_get_flags(fei)); + + /* + * create a 2nd FIB table into which routes will be imported + */ + u32 import_fib_index2; + + import_fib_index2 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 12); + + /* + * Add an attached route in the import FIB + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(import_fib_index2, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached export created"); + + /* + * check for the presence of all the adj-fibs and local in the import table + */ + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported"); + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported"); + + /* + * add a 3rd adj-fib. expect it to be exported to both tables. + */ + fib_prefix_t pfx_10_10_10_3_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.10.10.3 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a03), + }, + }; + + fib_table_entry_update_one_path(fib_index, + &pfx_10_10_10_3_s_32, + FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_3_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 present"); + ai = fib_entry_get_adj(fei); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 imported to FIB1"); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "Import uses same adj as export"); + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib3 imported to FIB2"); + FIB_TEST((ai == fib_entry_get_adj(fei)), + "Import uses same adj as export"); + + /* + * remove the 3rd adj fib. we expect it to be removed from both FIBs + */ + fib_table_entry_delete(fib_index, + &pfx_10_10_10_3_s_32, + FIB_SOURCE_ADJ); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 remved"); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 removed from FIB1"); + + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_3_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib3 removed from FIB2"); + + /* + * remove the attached route from the 2nd FIB. expect the imported + * entires to be removed + */ + local_pfx.fp_len = 24; + fib_table_entry_delete(import_fib_index2, + &local_pfx, + FIB_SOURCE_API); + fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "attached export removed"); + + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB2"); + fei = fib_table_lookup_exact_match(import_fib_index2, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB2"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index2, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB2"); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 still in FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 still in FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local still in FIB1"); + + /* + * modify the route in FIB1 so it is no longer attached. expect the imported + * entires to be removed + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(import_fib_index1, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_2_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1"); + + /* + * modify it back to attached. expect the adj-fibs back + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(import_fib_index1, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported in FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported in FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported in FIB1"); + + /* + * add a covering attached next-hop for the interface address, so we have + * a valid adj to find when we check the forwarding tables + */ + fib_prefix_t pfx_10_0_0_0_s_8 = { + .fp_len = 8, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + /* 10.0.0.0 */ + .ip4.as_u32 = clib_host_to_net_u32(0x0a000000), + }, + }; + + fei = fib_table_entry_update_one_path(fib_index, + &pfx_10_0_0_0_s_8, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_3_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + dpo = fib_entry_contribute_ip_forwarding(fei); + + /* + * remove the route in the export fib. expect the adj-fibs to be removed + */ + local_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, + &local_pfx, + FIB_SOURCE_INTERFACE); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "Delete export: ADJ-fib1 removed from FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1"); + + /* + * the adj-fibs in the export VRF are present in the FIB table, + * but not installed in forwarding, since they have no attached cover. + * Consequently a lookup in the MTRIE gives the adj for the covering + * route 10.0.0.0/8. + */ + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 in export"); + + index_t lbi; + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_1_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.1 forwards on \n%U not \n%U", + format_load_balance, lbi, 0, + format_dpo_id, dpo, 0); + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_2_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.2 forwards on %U", format_dpo_id, dpo, 0); + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_3_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.3 forwards on %U", format_dpo_id, dpo, 0); + + /* + * add the export prefix back, but not as attached. + * No adj-fibs in export nor import tables + */ + local_pfx.fp_len = 24; + fei = fib_table_entry_update_one_path(fib_index, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_10_10_10_1_s_32.fp_addr, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + dpo = fib_entry_contribute_ip_forwarding(fei); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "non-attached in export: ADJ-fib1 in export"); + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_1_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.1 forwards on %U", format_dpo_id, dpo, 0); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 in export"); + lbi = ip4_fib_forwarding_lookup(fib_index, &pfx_10_10_10_2_s_32.fp_addr.ip4); + FIB_TEST(lbi == dpo->dpoi_index, + "10.10.10.2 forwards on %U", format_dpo_id, dpo, 0); + + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib1 removed from FIB1"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "ADJ-fib2 removed from FIB1"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID == fei), "local removed from FIB1"); + + /* + * modify the export prefix so it is attached. expect all covereds to return + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(fib_index, + &local_pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 reinstalled in export"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "Adj-fib1 is not drop in export"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 reinstalled in export"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local reinstalled in export"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached in export: ADJ-fib1 imported"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "Adj-fib1 is not drop in export"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 imported"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported"); + + /* + * modify the export prefix so connected. no change. + */ + local_pfx.fp_len = 24; + fib_table_entry_update_one_path(fib_index, &local_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib1 reinstalled in export"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "Adj-fib1 is not drop in export"); + fei = fib_table_lookup_exact_match(fib_index, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 reinstalled in export"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(fib_index, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local reinstalled in export"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "attached in export: ADJ-fib1 imported"); + dpo = fib_entry_contribute_ip_forwarding(fei); + FIB_TEST(dpo_cmp(dpo_drop, load_balance_get_bucket(dpo->dpoi_index, 0)), + "Adj-fib1 is not drop in export"); + fei = fib_table_lookup_exact_match(import_fib_index1, &pfx_10_10_10_2_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "ADJ-fib2 imported"); + local_pfx.fp_len = 32; + fei = fib_table_lookup_exact_match(import_fib_index1, &local_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "local imported"); + + /* + * CLEANUP + */ + fib_table_entry_delete(fib_index, + &pfx_10_0_0_0_s_8, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_10_10_10_1_s_32, + FIB_SOURCE_ADJ); + fib_table_entry_delete(fib_index, + &pfx_10_10_10_2_s_32, + FIB_SOURCE_ADJ); + local_pfx.fp_len = 32; + fib_table_entry_delete(fib_index, + &local_pfx, + FIB_SOURCE_INTERFACE); + local_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, + &local_pfx, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &local_pfx, + FIB_SOURCE_INTERFACE); + local_pfx.fp_len = 24; + fib_table_entry_delete(import_fib_index1, + &local_pfx, + FIB_SOURCE_API); + + fib_table_unlock(import_fib_index1, FIB_PROTOCOL_IP4); + fib_table_unlock(import_fib_index2, FIB_PROTOCOL_IP4); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + return (0); +} + + +/* + * Test the recursive route route handling for GRE tunnels + */ +static int +fib_test_label (void) +{ + fib_node_index_t fei, ai_mpls_10_10_10_1, ai_v4_10_10_11_1, ai_v4_10_10_11_2, ai_mpls_10_10_11_2, ai_mpls_10_10_11_1; + const u32 fib_index = 0; + test_main_t *tm; + ip4_main_t *im; + int lb_count, ii; + + lb_count = pool_elts(load_balance_pool); + tm = &test_main; + im = &ip4_main; + + /* + * add interface routes. We'll assume this works. It's more rigorously + * tested elsewhere. + */ + fib_prefix_t local0_pfx = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + /* 10.10.10.10 */ + .as_u32 = clib_host_to_net_u32(0x0a0a0a0a), + }, + }, + }; + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index); + im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index; + + fib_table_entry_update_one_path(fib_index, &local0_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local0_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "attached interface route present"); + + local0_pfx.fp_len = 32; + fib_table_entry_update_one_path(fib_index, &local0_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local0_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "local interface route present"); + + fib_prefix_t local1_pfx = { + .fp_len = 24, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4 = { + /* 10.10.11.10 */ + .as_u32 = clib_host_to_net_u32(0x0a0a0b0a), + }, + }, + }; + + vec_validate(im->fib_index_by_sw_if_index, tm->hw[1]->sw_if_index); + im->fib_index_by_sw_if_index[tm->hw[1]->sw_if_index] = fib_index; + + fib_table_entry_update_one_path(fib_index, &local1_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[1]->sw_if_index, + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local1_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "attached interface route present"); + + local1_pfx.fp_len = 32; + fib_table_entry_update_one_path(fib_index, &local1_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + FIB_PROTOCOL_IP4, + NULL, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + fei = fib_table_lookup_exact_match(fib_index, &local1_pfx); + + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), + "local interface route present"); + + ip46_address_t nh_10_10_10_1 = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }, + }; + ip46_address_t nh_10_10_11_1 = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0b01), + }, + }; + ip46_address_t nh_10_10_11_2 = { + .ip4 = { + .as_u32 = clib_host_to_net_u32(0x0a0a0b02), + }, + }; + + ai_v4_10_10_11_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index); + ai_v4_10_10_11_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_IP4, + &nh_10_10_11_2, + tm->hw[1]->sw_if_index); + ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_MPLS, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + ai_mpls_10_10_11_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_MPLS, + &nh_10_10_11_2, + tm->hw[1]->sw_if_index); + ai_mpls_10_10_11_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_MPLS, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index); + + /* + * Add an etry with one path with a real out-going label + */ + fib_prefix_t pfx_1_1_1_1_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010101), + }, + }; + fib_test_lb_bucket_t l99_eos_o_10_10_10_1 = { + .type = FT_LB_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 99, + .eos = MPLS_EOS, + }, + }; + fib_test_lb_bucket_t l99_neos_o_10_10_10_1 = { + .type = FT_LB_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 99, + .eos = MPLS_NON_EOS, + }, + }; + mpls_label_t *l99 = NULL; + vec_add1(l99, 99); + + fib_table_entry_update_one_path(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + l99, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "1.1.1.1/32 created"); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l99_eos_o_10_10_10_1), + "1.1.1.1/32 LB 1 bucket via label 99 over 10.10.10.1"); + + /* + * add a path with an implicit NULL label + */ + fib_test_lb_bucket_t a_o_10_10_11_1 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_v4_10_10_11_1, + }, + }; + fib_test_lb_bucket_t a_mpls_o_10_10_11_1 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_mpls_10_10_11_1, + }, + }; + mpls_label_t *l_imp_null = NULL; + vec_add1(l_imp_null, MPLS_IETF_IMPLICIT_NULL_LABEL); + + fei = fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + l_imp_null, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1), + "1.1.1.1/32 LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1"); + + /* + * assign the route a local label + */ + fib_table_entry_local_label_add(fib_index, + &pfx_1_1_1_1_s_32, + 24001); + + fib_prefix_t pfx_24001_eos = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 24001, + .fp_eos = MPLS_EOS, + }; + fib_prefix_t pfx_24001_neos = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 24001, + .fp_eos = MPLS_NON_EOS, + }; + + /* + * The EOS entry should link to both the paths, + * and use an ip adj for the imp-null + * The NON-EOS entry should link to both the paths, + * and use an mpls adj for the imp-null + */ + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1), + "24001/eos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1"); + + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 2, + &l99_neos_o_10_10_10_1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 1 bucket via: " + "label 99 over 10.10.10.1 ", + "mpls-adj via 10.10.11.1"); + + /* + * add an unlabelled path, this is excluded from the neos chains, + */ + fib_test_lb_bucket_t adj_o_10_10_11_2 = { + .type = FT_LB_ADJ, + .adj = { + .adj = ai_v4_10_10_11_2, + }, + }; + + fei = fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_2, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 16, // 3 choices spread over 16 buckets + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 16 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1", + "adj over 10.10.11.2"); + + /* + * get and lock a reference to the non-eos of the via entry 1.1.1.1/32 + */ + dpo_id_t non_eos_1_1_1_1 = DPO_INVALID; + fib_entry_contribute_forwarding(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &non_eos_1_1_1_1); + + /* + * n-eos has only the 2 labelled paths + */ + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 2, + &l99_neos_o_10_10_10_1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj-mpls over 10.10.11.2"); + + /* + * A labelled recursive + */ + fib_prefix_t pfx_2_2_2_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020202), + }, + }; + fib_test_lb_bucket_t l1600_eos_o_1_1_1_1 = { + .type = FT_LB_LABEL_O_LB, + .label_o_lb = { + .lb = non_eos_1_1_1_1.dpoi_index, + .label = 1600, + .eos = MPLS_EOS, + }, + }; + mpls_label_t *l1600 = NULL; + vec_add1(l1600, 1600); + + fib_table_entry_update_one_path(fib_index, + &pfx_2_2_2_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, + fib_index, + 1, + l1600, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l1600_eos_o_1_1_1_1), + "2.2.2.2.2/32 LB 1 buckets via: " + "label 1600 over 1.1.1.1"); + + dpo_id_t dpo_44 = DPO_INVALID; + index_t urpfi; + + fib_entry_contribute_forwarding(fei, FIB_FORW_CHAIN_TYPE_UNICAST_IP4, &dpo_44); + urpfi = load_balance_get_urpf(dpo_44.dpoi_index); + + FIB_TEST(fib_urpf_check(urpfi, tm->hw[0]->sw_if_index), + "uRPF check for 2.2.2.2/32 on %d OK", + tm->hw[0]->sw_if_index); + FIB_TEST(fib_urpf_check(urpfi, tm->hw[1]->sw_if_index), + "uRPF check for 2.2.2.2/32 on %d OK", + tm->hw[1]->sw_if_index); + FIB_TEST(!fib_urpf_check(urpfi, 99), + "uRPF check for 2.2.2.2/32 on 99 not-OK", + 99); + + fib_entry_contribute_forwarding(fei, FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, &dpo_44); + FIB_TEST(urpfi == load_balance_get_urpf(dpo_44.dpoi_index), + "Shared uRPF on IP and non-EOS chain"); + + dpo_reset(&dpo_44); + + /* + * we are holding a lock on the non-eos LB of the via-entry. + * do a PIC-core failover by shutting the link of the via-entry. + * + * shut down the link with the valid label + */ + vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + 0); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &a_o_10_10_11_1, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 2 buckets via: " + "adj over 10.10.11.1, ", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &a_o_10_10_11_1, + &adj_o_10_10_11_2), + "24001/eos LB 2 buckets via: " + "adj over 10.10.11.1, ", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 1 buckets via: " + "adj-mpls over 10.10.11.2"); + + /* + * test that the pre-failover load-balance has been in-place + * modified + */ + dpo_id_t current = DPO_INVALID; + fib_entry_contribute_forwarding(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + ¤t); + + FIB_TEST(!dpo_cmp(&non_eos_1_1_1_1, + ¤t), + "PIC-core LB inplace modified %U %U", + format_dpo_id, &non_eos_1_1_1_1, 0, + format_dpo_id, ¤t, 0); + + dpo_reset(&non_eos_1_1_1_1); + dpo_reset(¤t); + + /* + * no-shut the link with the valid label + */ + vnet_sw_interface_set_flags(vnet_get_main(), + tm->hw[0]->sw_if_index, + VNET_SW_INTERFACE_FLAG_ADMIN_UP); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 16, // 3 choices spread over 16 buckets + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 16 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1", + "adj-v4 over 10.10.11.2"); + + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 16, // 3 choices spread over 16 buckets + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &l99_eos_o_10_10_10_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &a_o_10_10_11_1, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2, + &adj_o_10_10_11_2), + "24001/eos LB 16 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.1", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 2, + &l99_neos_o_10_10_10_1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj-mpls over 10.10.11.2"); + + /* + * remove the first path with the valid label + */ + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &a_o_10_10_11_1, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 2 buckets via: " + "adj over 10.10.11.1", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &a_o_10_10_11_1, + &adj_o_10_10_11_2), + "24001/eos LB 2 buckets via: " + "adj over 10.10.11.1", + "adj-v4 over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &a_mpls_o_10_10_11_1), + "24001/neos LB 1 buckets via: " + "adj-mpls over 10.10.11.2"); + + /* + * remove the other path with a valid label + */ + fib_test_lb_bucket_t bucket_drop = { + .type = FT_LB_SPECIAL, + .special = { + .adj = DPO_PROTO_IP4, + }, + }; + + fib_table_entry_path_remove(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 1 buckets via: " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 1, + &adj_o_10_10_11_2), + "24001/eos LB 1 buckets via: " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &bucket_drop), + "24001/eos LB 1 buckets via: DROP"); + + /* + * add back the path with the valid label + */ + l99 = NULL; + vec_add1(l99, 99); + + fib_table_entry_path_add(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + l99, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l99_eos_o_10_10_10_1, + &adj_o_10_10_11_2), + "1.1.1.1/32 LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &l99_eos_o_10_10_10_1, + &adj_o_10_10_11_2), + "24001/eos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_24001_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &l99_neos_o_10_10_10_1), + "24001/neos LB 1 buckets via: " + "label 99 over 10.10.10.1"); + + /* + * change the local label + */ + fib_table_entry_local_label_add(fib_index, + &pfx_1_1_1_1_s_32, + 25005); + + fib_prefix_t pfx_25005_eos = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 25005, + .fp_eos = MPLS_EOS, + }; + fib_prefix_t pfx_25005_neos = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 25005, + .fp_eos = MPLS_NON_EOS, + }; + + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup(fib_index, &pfx_24001_eos)), + "24001/eos removed after label change"); + FIB_TEST((FIB_NODE_INDEX_INVALID == + fib_table_lookup(fib_index, &pfx_24001_neos)), + "24001/eos removed after label change"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_25005_eos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + 2, + &l99_eos_o_10_10_10_1, + &adj_o_10_10_11_2), + "25005/eos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.2"); + + fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID, + &pfx_25005_neos); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &l99_neos_o_10_10_10_1), + "25005/neos LB 1 buckets via: " + "label 99 over 10.10.10.1"); + + /* + * remove the local label. + * the check that the MPLS entries are gone is done by the fact the + * MPLS table is no longer present. + */ + fib_table_entry_local_label_remove(fib_index, + &pfx_1_1_1_1_s_32, + 25005); + + fei = fib_table_lookup(fib_index, &pfx_1_1_1_1_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l99_eos_o_10_10_10_1, + &adj_o_10_10_11_2), + "24001/eos LB 2 buckets via: " + "label 99 over 10.10.10.1, " + "adj over 10.10.11.2"); + + FIB_TEST((FIB_NODE_INDEX_INVALID == + mpls_fib_index_from_table_id(MPLS_FIB_DEFAULT_TABLE_ID)), + "No more MPLS FIB entries => table removed"); + + /* + * add another via-entry for the recursive + */ + fib_prefix_t pfx_1_1_1_2_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x01010102), + }, + }; + fib_test_lb_bucket_t l101_eos_o_10_10_10_1 = { + .type = FT_LB_LABEL_O_ADJ, + .label_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label = 101, + .eos = MPLS_EOS, + }, + }; + mpls_label_t *l101 = NULL; + vec_add1(l101, 101); + + fei = fib_table_entry_update_one_path(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + l101, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l101_eos_o_10_10_10_1), + "1.1.1.2/32 LB 1 buckets via: " + "label 101 over 10.10.10.1"); + + dpo_id_t non_eos_1_1_1_2 = DPO_INVALID; + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, + &pfx_1_1_1_1_s_32), + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &non_eos_1_1_1_1); + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, + &pfx_1_1_1_2_s_32), + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &non_eos_1_1_1_2); + + fib_test_lb_bucket_t l1601_eos_o_1_1_1_2 = { + .type = FT_LB_LABEL_O_LB, + .label_o_lb = { + .lb = non_eos_1_1_1_2.dpoi_index, + .label = 1601, + .eos = MPLS_EOS, + }, + }; + mpls_label_t *l1601 = NULL; + vec_add1(l1601, 1601); + + l1600_eos_o_1_1_1_1.label_o_lb.lb = non_eos_1_1_1_1.dpoi_index; + + fei = fib_table_entry_path_add(fib_index, + &pfx_2_2_2_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_2_s_32.fp_addr, + ~0, + fib_index, + 1, + l1601, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l1600_eos_o_1_1_1_1, + &l1601_eos_o_1_1_1_2), + "2.2.2.2/32 LB 2 buckets via: " + "label 1600 via 1.1,1.1, " + "label 16001 via 1.1.1.2"); + + /* + * update the via-entry so it no longer has an imp-null path. + * the LB for the recursive can use an imp-null + */ + l_imp_null = NULL; + vec_add1(l_imp_null, MPLS_IETF_IMPLICIT_NULL_LABEL); + + fei = fib_table_entry_update_one_path(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + l_imp_null, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &a_o_10_10_11_1), + "1.1.1.2/32 LB 1 buckets via: " + "adj 10.10.11.1"); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 2, + &l1600_eos_o_1_1_1_1, + &l1601_eos_o_1_1_1_2), + "2.2.2.2/32 LB 2 buckets via: " + "label 1600 via 1.1,1.1, " + "label 16001 via 1.1.1.2"); + + /* + * update the via-entry so it no longer has labelled paths. + * the LB for the recursive should exclue this via form its LB + */ + fei = fib_table_entry_update_one_path(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &a_o_10_10_11_1), + "1.1.1.2/32 LB 1 buckets via: " + "adj 10.10.11.1"); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l1600_eos_o_1_1_1_1), + "2.2.2.2/32 LB 1 buckets via: " + "label 1600 via 1.1,1.1"); + + dpo_reset(&non_eos_1_1_1_1); + dpo_reset(&non_eos_1_1_1_2); + + /* + * Add a recursive with no out-labels. We expect to use the IP of the via + */ + fib_prefix_t pfx_2_2_2_3_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020203), + }, + }; + dpo_id_t ip_1_1_1_1 = DPO_INVALID; + + fib_table_entry_update_one_path(fib_index, + &pfx_2_2_2_3_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, + &pfx_1_1_1_1_s_32), + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + &ip_1_1_1_1); + + fib_test_lb_bucket_t ip_o_1_1_1_1 = { + .type = FT_LB_O_LB, + .lb = { + .lb = ip_1_1_1_1.dpoi_index, + }, + }; + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_3_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ip_o_1_1_1_1), + "2.2.2.2.3/32 LB 1 buckets via: " + "ip 1.1.1.1"); + + /* + * Add a recursive with an imp-null out-label. + * We expect to use the IP of the via + */ + fib_prefix_t pfx_2_2_2_4_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020204), + }, + }; + + fib_table_entry_update_one_path(fib_index, + &pfx_2_2_2_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &pfx_1_1_1_1_s_32.fp_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_4_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ip_o_1_1_1_1), + "2.2.2.2.4/32 LB 1 buckets via: " + "ip 1.1.1.1"); + + dpo_reset(&ip_1_1_1_1); + + /* + * Create an entry with a deep label stack + */ + fib_prefix_t pfx_2_2_5_5_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020505), + }, + }; + fib_test_lb_bucket_t ls_eos_o_10_10_10_1 = { + .type = FT_LB_LABEL_STACK_O_ADJ, + .label_stack_o_adj = { + .adj = ai_mpls_10_10_11_1, + .label_stack_size = 8, + .label_stack = { + 200, 201, 202, 203, 204, 205, 206, 207 + }, + .eos = MPLS_EOS, + }, + }; + mpls_label_t *label_stack = NULL; + vec_validate(label_stack, 7); + for (ii = 0; ii < 8; ii++) + { + label_stack[ii] = ii + 200; + } + + fei = fib_table_entry_update_one_path(fib_index, + &pfx_2_2_5_5_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_11_1, + tm->hw[1]->sw_if_index, + ~0, // invalid fib index + 1, + label_stack, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ls_eos_o_10_10_10_1), + "2.2.5.5/32 LB 1 buckets via: " + "adj 10.10.11.1"); + fib_table_entry_delete_index(fei, FIB_SOURCE_API); + + /* + * cleanup + */ + fib_table_entry_delete(fib_index, + &pfx_1_1_1_2_s_32, + FIB_SOURCE_API); + + fei = fib_table_lookup(fib_index, &pfx_2_2_2_2_s_32); + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &l1600_eos_o_1_1_1_1), + "2.2.2.2/32 LB 1 buckets via: " + "label 1600 via 1.1,1.1"); + + fib_table_entry_delete(fib_index, + &pfx_1_1_1_1_s_32, + FIB_SOURCE_API); + + FIB_TEST(fib_test_validate_entry(fei, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &bucket_drop), + "2.2.2.2/32 LB 1 buckets via: DROP"); + + fib_table_entry_delete(fib_index, + &pfx_2_2_2_2_s_32, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2_2_2_3_s_32, + FIB_SOURCE_API); + fib_table_entry_delete(fib_index, + &pfx_2_2_2_4_s_32, + FIB_SOURCE_API); + + adj_unlock(ai_mpls_10_10_10_1); + adj_unlock(ai_mpls_10_10_11_2); + adj_unlock(ai_v4_10_10_11_1); + adj_unlock(ai_v4_10_10_11_2); + adj_unlock(ai_mpls_10_10_11_1); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + local0_pfx.fp_len = 32; + fib_table_entry_delete(fib_index, + &local0_pfx, + FIB_SOURCE_INTERFACE); + local0_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, + &local0_pfx, + FIB_SOURCE_INTERFACE); + local1_pfx.fp_len = 32; + fib_table_entry_delete(fib_index, + &local1_pfx, + FIB_SOURCE_INTERFACE); + local1_pfx.fp_len = 24; + fib_table_entry_delete(fib_index, + &local1_pfx, + FIB_SOURCE_INTERFACE); + + /* + * +1 for the drop LB in the MPLS tables. + */ + FIB_TEST(lb_count+1 == pool_elts(load_balance_pool), + "Load-balance resources freed %d of %d", + lb_count+1, pool_elts(load_balance_pool)); + + return (0); +} + +#define N_TEST_CHILDREN 4 +#define PARENT_INDEX 0 + +typedef struct fib_node_test_t_ +{ + fib_node_t node; + u32 sibling; + u32 index; + fib_node_back_walk_ctx_t *ctxs; + u32 destroyed; +} fib_node_test_t; + +static fib_node_test_t fib_test_nodes[N_TEST_CHILDREN+1]; + +#define PARENT() (&fib_test_nodes[PARENT_INDEX].node) + +#define FOR_EACH_TEST_CHILD(_tc) \ + for (ii = 1, (_tc) = &fib_test_nodes[1]; \ + ii < N_TEST_CHILDREN+1; \ + ii++, (_tc) = &fib_test_nodes[ii]) + +static fib_node_t * +fib_test_child_get_node (fib_node_index_t index) +{ + return (&fib_test_nodes[index].node); +} + +static int fib_test_walk_spawns_walks; + +static fib_node_back_walk_rc_t +fib_test_child_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + fib_node_test_t *tc = (fib_node_test_t*) node; + + vec_add1(tc->ctxs, *ctx); + + if (1 == fib_test_walk_spawns_walks) + fib_walk_sync(FIB_NODE_TYPE_TEST, tc->index, ctx); + if (2 == fib_test_walk_spawns_walks) + fib_walk_async(FIB_NODE_TYPE_TEST, tc->index, + FIB_WALK_PRIORITY_HIGH, ctx); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +static void +fib_test_child_last_lock_gone (fib_node_t *node) +{ + fib_node_test_t *tc = (fib_node_test_t *)node; + + tc->destroyed = 1; +} + +/** + * The FIB walk's graph node virtual function table + */ +static const fib_node_vft_t fib_test_child_vft = { + .fnv_get = fib_test_child_get_node, + .fnv_last_lock = fib_test_child_last_lock_gone, + .fnv_back_walk = fib_test_child_back_walk_notify, +}; + +/* + * the function (that should have been static but isn't so I can do this) + * that processes the walk from the async queue, + */ +f64 fib_walk_process_queues(vlib_main_t * vm, + const f64 quota); +u32 fib_walk_queue_get_size(fib_walk_priority_t prio); + +static int +fib_test_walk (void) +{ + fib_node_back_walk_ctx_t high_ctx = {}, low_ctx = {}; + fib_node_test_t *tc; + vlib_main_t *vm; + u32 ii; + + vm = vlib_get_main(); + fib_node_register_type(FIB_NODE_TYPE_TEST, &fib_test_child_vft); + + /* + * init a fake node on which we will add children + */ + fib_node_init(&fib_test_nodes[PARENT_INDEX].node, + FIB_NODE_TYPE_TEST); + + FOR_EACH_TEST_CHILD(tc) + { + fib_node_init(&tc->node, FIB_NODE_TYPE_TEST); + fib_node_lock(&tc->node); + tc->ctxs = NULL; + tc->index = ii; + tc->sibling = fib_node_child_add(FIB_NODE_TYPE_TEST, + PARENT_INDEX, + FIB_NODE_TYPE_TEST, ii); + } + + /* + * enqueue a walk across the parents children. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children pre-walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * give the walk a large amount of time so it gets to the end + */ + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is empty post walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * walk again. should be no increase in the number of visits, since + * the walk will have terminated. + */ + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(0 == vec_len(tc->ctxs), + "%d child visitsed %d times", + ii, vec_len(tc->ctxs)); + } + + /* + * schedule a low and hig priority walk. expect the high to be performed + * before the low. + * schedule the high prio walk first so that it is further from the head + * of the dependency list. that way it won't merge with the low one. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_LOW, &low_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(high_ctx.fnbw_reason == tc->ctxs[0].fnbw_reason, + "%d child visitsed by high prio walk", ii); + FIB_TEST(low_ctx.fnbw_reason == tc->ctxs[1].fnbw_reason, + "%d child visitsed by low prio walk", ii); + vec_free(tc->ctxs); + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is empty post prio walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post prio walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * schedule 2 walks of the same priority that can be megred. + * expect that each child is thus visited only once. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &low_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times during merge walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is empty post merge walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post merge walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * schedule 2 walks of the same priority that cannot be megred. + * expect that each child is thus visited twice and in the order + * in which the walks were scheduled. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &low_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + FIB_TEST(high_ctx.fnbw_reason == tc->ctxs[0].fnbw_reason, + "%d child visitsed by high prio walk", ii); + FIB_TEST(low_ctx.fnbw_reason == tc->ctxs[1].fnbw_reason, + "%d child visitsed by low prio walk", ii); + vec_free(tc->ctxs); + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is empty post no-merge walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post no-merge walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * schedule a walk that makes one one child progress. + * we do this by giving the queue draining process zero + * time quanta. it's a do..while loop, so it does something. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_process_queues(vm, 0); + + FOR_EACH_TEST_CHILD(tc) + { + if (ii == N_TEST_CHILDREN) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times in zero quanta walk", + ii, vec_len(tc->ctxs)); + } + else + { + FIB_TEST(0 == vec_len(tc->ctxs), + "%d child visitsed %d times in 0 quanta walk", + ii, vec_len(tc->ctxs)); + } + } + FIB_TEST(1 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is not empty post zero quanta walk"); + FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post zero qunta walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * another one step + */ + fib_walk_process_queues(vm, 0); + + FOR_EACH_TEST_CHILD(tc) + { + if (ii >= N_TEST_CHILDREN-1) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times in 2nd zero quanta walk", + ii, vec_len(tc->ctxs)); + } + else + { + FIB_TEST(0 == vec_len(tc->ctxs), + "%d child visitsed %d times in 2nd 0 quanta walk", + ii, vec_len(tc->ctxs)); + } + } + FIB_TEST(1 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is not empty post zero quanta walk"); + FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post zero qunta walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * schedule another walk that will catch-up and merge. + */ + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + if (ii >= N_TEST_CHILDREN-1) + { + FIB_TEST(2 == vec_len(tc->ctxs), + "%d child visitsed %d times in 2nd zero quanta merge walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + else + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times in 2nd 0 quanta merge walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is not empty post 2nd zero quanta merge walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post 2nd zero qunta merge walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * park a async walk in the middle of the list, then have an sync walk catch + * it. same expectations as async catches async. + */ + high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE; + + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + + fib_walk_process_queues(vm, 0); + fib_walk_process_queues(vm, 0); + + fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx); + + FOR_EACH_TEST_CHILD(tc) + { + if (ii >= N_TEST_CHILDREN-1) + { + FIB_TEST(2 == vec_len(tc->ctxs), + "%d child visitsed %d times in sync catches async walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + else + { + FIB_TEST(1 == vec_len(tc->ctxs), + "%d child visitsed %d times in sync catches async walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + } + FIB_TEST(0 == fib_walk_queue_get_size(FIB_WALK_PRIORITY_HIGH), + "Queue is not empty post 2nd zero quanta merge walk"); + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post 2nd zero qunta merge walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * make the parent a child of one of its children, thus inducing a routing loop. + */ + fib_test_nodes[PARENT_INDEX].sibling = + fib_node_child_add(FIB_NODE_TYPE_TEST, + 1, // the first child + FIB_NODE_TYPE_TEST, + PARENT_INDEX); + + /* + * execute a sync walk from the parent. each child visited spawns more sync + * walks. we expect the walk to terminate. + */ + fib_test_walk_spawns_walks = 1; + + fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx); + + FOR_EACH_TEST_CHILD(tc) + { + /* + * child 1 - which is last in the list - has the loop. + * the other children a re thus visitsed first. the we meet + * child 1. we go round the loop again, visting the other children. + * then we meet the walk in the dep list and bail. child 1 is not visitsed + * again. + */ + if (1 == ii) + { + FIB_TEST(1 == vec_len(tc->ctxs), + "child %d visitsed %d times during looped sync walk", + ii, vec_len(tc->ctxs)); + } + else + { + FIB_TEST(2 == vec_len(tc->ctxs), + "child %d visitsed %d times during looped sync walk", + ii, vec_len(tc->ctxs)); + } + vec_free(tc->ctxs); + } + FIB_TEST(N_TEST_CHILDREN == fib_node_list_get_size(PARENT()->fn_children), + "Parent has %d children post sync loop walk", + fib_node_list_get_size(PARENT()->fn_children)); + + /* + * the walk doesn't reach the max depth because the infra knows that sync + * meets sync implies a loop and bails early. + */ + FIB_TEST(high_ctx.fnbw_depth == 9, + "Walk context depth %d post sync loop walk", + high_ctx.fnbw_depth); + + /* + * execute an async walk of the graph loop, with each child spawns sync walks + */ + high_ctx.fnbw_depth = 0; + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + /* + * we don't really care how many times the children are visisted, as long as + * it is more than once. + */ + FIB_TEST(1 <= vec_len(tc->ctxs), + "child %d visitsed %d times during looped aync spawns sync walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + + /* + * execute an async walk of the graph loop, with each child spawns async walks + */ + fib_test_walk_spawns_walks = 2; + high_ctx.fnbw_depth = 0; + fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX, + FIB_WALK_PRIORITY_HIGH, &high_ctx); + + fib_walk_process_queues(vm, 1); + + FOR_EACH_TEST_CHILD(tc) + { + /* + * we don't really care how many times the children are visisted, as long as + * it is more than once. + */ + FIB_TEST(1 <= vec_len(tc->ctxs), + "child %d visitsed %d times during looped async spawns async walk", + ii, vec_len(tc->ctxs)); + vec_free(tc->ctxs); + } + + + fib_node_child_remove(FIB_NODE_TYPE_TEST, + 1, // the first child + fib_test_nodes[PARENT_INDEX].sibling); + + /* + * cleanup + */ + FOR_EACH_TEST_CHILD(tc) + { + fib_node_child_remove(FIB_NODE_TYPE_TEST, PARENT_INDEX, + tc->sibling); + fib_node_deinit(&tc->node); + fib_node_unlock(&tc->node); + } + fib_node_deinit(PARENT()); + + /* + * The parent will be destroyed when the last lock on it goes. + * this test ensures all the walk objects are unlocking it. + */ + FIB_TEST((1 == fib_test_nodes[PARENT_INDEX].destroyed), + "Parent was destroyed"); + + return (0); +} + +static int +lfib_test (void) +{ + const mpls_label_t deag_label = 50; + const u32 lfib_index = 0; + const u32 fib_index = 0; + dpo_id_t dpo = DPO_INVALID; + const dpo_id_t *dpo1; + fib_node_index_t lfe; + lookup_dpo_t *lkd; + test_main_t *tm; + int lb_count; + adj_index_t ai_mpls_10_10_10_1; + + tm = &test_main; + lb_count = pool_elts(load_balance_pool); + + FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d", + adj_nbr_db_size()); + + /* + * MPLS enable an interface so we get the MPLS table created + */ + mpls_sw_interface_enable_disable(&mpls_main, + tm->hw[0]->sw_if_index, + 1); + + ip46_address_t nh_10_10_10_1 = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01), + }; + ai_mpls_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, + VNET_LINK_MPLS, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index); + + /* + * Test the specials stack properly. + */ + fib_prefix_t exp_null_v6_pfx = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_eos = MPLS_EOS, + .fp_label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL, + .fp_payload_proto = DPO_PROTO_IP6, + }; + lfe = fib_table_lookup(lfib_index, &exp_null_v6_pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != lfe), + "%U/%U present", + format_mpls_unicast_label, MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL, + format_mpls_eos_bit, MPLS_EOS); + fib_entry_contribute_forwarding(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &dpo); + dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0); + lkd = lookup_dpo_get(dpo1->dpoi_index); + + FIB_TEST((fib_index == lkd->lkd_fib_index), + "%U/%U is deag in %d %U", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS, + lkd->lkd_fib_index, + format_dpo_id, &dpo, 0); + FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input), + "%U/%U is dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + FIB_TEST((LOOKUP_TABLE_FROM_INPUT_INTERFACE == lkd->lkd_table), + "%U/%U is lookup in interface's table", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + FIB_TEST((DPO_PROTO_IP6 == lkd->lkd_proto), + "%U/%U is %U dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS, + format_dpo_proto, lkd->lkd_proto); + + + /* + * A route deag route for EOS + */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_eos = MPLS_EOS, + .fp_label = deag_label, + .fp_payload_proto = DPO_PROTO_IP4, + }; + lfe = fib_table_entry_path_add(lfib_index, + &pfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &zero_addr, + ~0, + fib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((lfe == fib_table_lookup(lfib_index, &pfx)), + "%U/%U present", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + + fib_entry_contribute_forwarding(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + &dpo); + dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0); + lkd = lookup_dpo_get(dpo1->dpoi_index); + + FIB_TEST((fib_index == lkd->lkd_fib_index), + "%U/%U is deag in %d %U", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS, + lkd->lkd_fib_index, + format_dpo_id, &dpo, 0); + FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input), + "%U/%U is dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + FIB_TEST((DPO_PROTO_IP4 == lkd->lkd_proto), + "%U/%U is %U dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS, + format_dpo_proto, lkd->lkd_proto); + + fib_table_entry_delete_index(lfe, FIB_SOURCE_CLI); + + FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup(lfib_index, + &pfx)), + "%U/%U not present", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + + /* + * A route deag route for non-EOS + */ + pfx.fp_eos = MPLS_NON_EOS; + lfe = fib_table_entry_path_add(lfib_index, + &pfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &zero_addr, + ~0, + lfib_index, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST((lfe == fib_table_lookup(lfib_index, &pfx)), + "%U/%U present", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_NON_EOS); + + fib_entry_contribute_forwarding(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &dpo); + dpo1 = load_balance_get_bucket(dpo.dpoi_index, 0); + lkd = lookup_dpo_get(dpo1->dpoi_index); + + FIB_TEST((fib_index == lkd->lkd_fib_index), + "%U/%U is deag in %d %U", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_NON_EOS, + lkd->lkd_fib_index, + format_dpo_id, &dpo, 0); + FIB_TEST((LOOKUP_INPUT_DST_ADDR == lkd->lkd_input), + "%U/%U is dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_NON_EOS); + + FIB_TEST((DPO_PROTO_MPLS == lkd->lkd_proto), + "%U/%U is %U dst deag", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_NON_EOS, + format_dpo_proto, lkd->lkd_proto); + + fib_table_entry_delete_index(lfe, FIB_SOURCE_CLI); + + FIB_TEST((FIB_NODE_INDEX_INVALID == fib_table_lookup(lfib_index, + &pfx)), + "%U/%U not present", + format_mpls_unicast_label, deag_label, + format_mpls_eos_bit, MPLS_EOS); + + dpo_reset(&dpo); + + /* + * An MPLS x-connect + */ + fib_prefix_t pfx_1200 = { + .fp_len = 21, + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_label = 1200, + .fp_eos = MPLS_NON_EOS, + }; + fib_test_lb_bucket_t neos_o_10_10_10_1 = { + .type = FT_LB_LABEL_STACK_O_ADJ, + .label_stack_o_adj = { + .adj = ai_mpls_10_10_10_1, + .label_stack_size = 4, + .label_stack = { + 200, 300, 400, 500, + }, + .eos = MPLS_NON_EOS, + }, + }; + dpo_id_t neos_1200 = DPO_INVALID; + dpo_id_t ip_1200 = DPO_INVALID; + mpls_label_t *l200 = NULL; + vec_add1(l200, 200); + vec_add1(l200, 300); + vec_add1(l200, 400); + vec_add1(l200, 500); + + lfe = fib_table_entry_update_one_path(fib_index, + &pfx_1200, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP4, + &nh_10_10_10_1, + tm->hw[0]->sw_if_index, + ~0, // invalid fib index + 1, + l200, + FIB_ROUTE_PATH_FLAG_NONE); + + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + 1, + &neos_o_10_10_10_1), + "1200/0 LB 1 buckets via: " + "adj 10.10.11.1"); + + /* + * A recursive route via the MPLS x-connect + */ + fib_prefix_t pfx_2_2_2_3_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020203), + }, + }; + fib_route_path_t *rpaths = NULL, rpath = { + .frp_proto = FIB_PROTOCOL_MPLS, + .frp_local_label = 1200, + .frp_sw_if_index = ~0, // recurive + .frp_fib_index = 0, // Default MPLS fib + .frp_weight = 1, + .frp_flags = FIB_ROUTE_PATH_FLAG_NONE, + .frp_label_stack = NULL, + }; + vec_add1(rpaths, rpath); + + fib_table_entry_path_add2(fib_index, + &pfx_2_2_2_3_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + rpaths); + + /* + * A labelled recursive route via the MPLS x-connect + */ + fib_prefix_t pfx_2_2_2_4_s_32 = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x02020204), + }, + }; + mpls_label_t *l999 = NULL; + vec_add1(l999, 999); + rpaths[0].frp_label_stack = l999, + + fib_table_entry_path_add2(fib_index, + &pfx_2_2_2_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + rpaths); + + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, &pfx_1200), + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + &ip_1200); + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, &pfx_1200), + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + &neos_1200); + + fib_test_lb_bucket_t ip_o_1200 = { + .type = FT_LB_O_LB, + .lb = { + .lb = ip_1200.dpoi_index, + }, + }; + fib_test_lb_bucket_t mpls_o_1200 = { + .type = FT_LB_LABEL_O_LB, + .label_o_lb = { + .lb = neos_1200.dpoi_index, + .label = 999, + .eos = MPLS_EOS, + }, + }; + + lfe = fib_table_lookup(fib_index, &pfx_2_2_2_3_s_32); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ip_o_1200), + "2.2.2.2.3/32 LB 1 buckets via: label 1200 EOS"); + lfe = fib_table_lookup(fib_index, &pfx_2_2_2_4_s_32); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &mpls_o_1200), + "2.2.2.2.4/32 LB 1 buckets via: label 1200 non-EOS"); + + fib_table_entry_delete(fib_index, &pfx_1200, FIB_SOURCE_API); + fib_table_entry_delete(fib_index, &pfx_2_2_2_3_s_32, FIB_SOURCE_API); + fib_table_entry_delete(fib_index, &pfx_2_2_2_4_s_32, FIB_SOURCE_API); + + dpo_reset(&neos_1200); + dpo_reset(&ip_1200); + + /* + * A recursive via a label that does not exist + */ + fib_test_lb_bucket_t bucket_drop = { + .type = FT_LB_SPECIAL, + .special = { + .adj = DPO_PROTO_MPLS, + }, + }; + + rpaths[0].frp_label_stack = NULL; + lfe = fib_table_entry_path_add2(fib_index, + &pfx_2_2_2_4_s_32, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + rpaths); + + fib_entry_contribute_forwarding(fib_table_lookup(fib_index, &pfx_1200), + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + &ip_1200); + ip_o_1200.lb.lb = ip_1200.dpoi_index; + + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &ip_o_1200), + "2.2.2.2.4/32 LB 1 buckets via: label 1200 EOS"); + lfe = fib_table_lookup(fib_index, &pfx_1200); + FIB_TEST(fib_test_validate_entry(lfe, + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + 1, + &bucket_drop), + "2.2.2.4/32 LB 1 buckets via: ip4-DROP"); + + fib_table_entry_delete(fib_index, &pfx_2_2_2_4_s_32, FIB_SOURCE_API); + + dpo_reset(&ip_1200); + + /* + * cleanup + */ + mpls_sw_interface_enable_disable(&mpls_main, + tm->hw[0]->sw_if_index, + 0); + + FIB_TEST(lb_count == pool_elts(load_balance_pool), + "Load-balance resources freed %d of %d", + lb_count, pool_elts(load_balance_pool)); + + return (0); +} + +static clib_error_t * +fib_test (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd_arg) +{ + int res; + + res = 0; + fib_test_mk_intf(4); + + if (unformat (input, "ip")) + { + res += fib_test_v4(); + res += fib_test_v6(); + } + else if (unformat (input, "label")) + { + res += fib_test_label(); + } + else if (unformat (input, "ae")) + { + res += fib_test_ae(); + } + else if (unformat (input, "lfib")) + { + res += lfib_test(); + } + else if (unformat (input, "walk")) + { + res += fib_test_walk(); + } + else + { + /* + * These walk UT aren't run as part of the full suite, since the + * fib-walk process must be disabled in order for the tests to work + * + * fib_test_walk(); + */ + res += fib_test_v4(); + res += fib_test_v6(); + res += fib_test_ae(); + res += fib_test_label(); + res += lfib_test(); + } + + if (res) + { + return clib_error_return(0, "FIB Unit Test Failed"); + } + else + { + return (NULL); + } +} + +VLIB_CLI_COMMAND (test_fib_command, static) = { + .path = "test fib", + .short_help = "fib unit tests - DO NOT RUN ON A LIVE SYSTEM", + .function = fib_test, +}; + +clib_error_t * +fib_test_init (vlib_main_t *vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (fib_test_init); diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c new file mode 100644 index 00000000000..b66e71940a5 --- /dev/null +++ b/src/vnet/fib/fib_types.c @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> + +#include <vnet/fib/fib_types.h> +#include <vnet/fib/fib_internal.h> +#include <vnet/mpls/mpls.h> + +/* + * arrays of protocol and link names + */ +static const char* fib_protocol_names[] = FIB_PROTOCOLS; +static const char* vnet_link_names[] = VNET_LINKS; +static const char* fib_forw_chain_names[] = FIB_FORW_CHAINS; + +u8 * +format_fib_protocol (u8 * s, va_list ap) +{ + fib_protocol_t proto = va_arg(ap, int); // fib_protocol_t promotion + + return (format (s, "%s", fib_protocol_names[proto])); +} + +u8 * +format_vnet_link (u8 * s, va_list ap) +{ + vnet_link_t link = va_arg(ap, int); // vnet_link_t promotion + + return (format (s, "%s", vnet_link_names[link])); +} + +u8 * +format_fib_forw_chain_type (u8 * s, va_list * args) +{ + fib_forward_chain_type_t fct = va_arg(*args, int); + + return (format (s, "%s", fib_forw_chain_names[fct])); +} + +void +fib_prefix_from_ip46_addr (const ip46_address_t *addr, + fib_prefix_t *pfx) +{ + ASSERT(!ip46_address_is_zero(addr)); + + pfx->fp_proto = ((ip46_address_is_ip4(addr) ? + FIB_PROTOCOL_IP4 : + FIB_PROTOCOL_IP6)); + pfx->fp_len = ((ip46_address_is_ip4(addr) ? + 32 : 128)); + pfx->fp_addr = *addr; +} + +void +fib_prefix_from_mpls_label (mpls_label_t label, + fib_prefix_t *pfx) +{ + pfx->fp_proto = FIB_PROTOCOL_MPLS; + pfx->fp_len = 21; + pfx->fp_label = label; + pfx->fp_eos = MPLS_NON_EOS; +} + +int +fib_prefix_cmp (const fib_prefix_t *p1, + const fib_prefix_t *p2) +{ + int res; + + res = (p1->fp_proto - p2->fp_proto); + + if (0 == res) + { + switch (p1->fp_proto) + { + case FIB_PROTOCOL_IP4: + case FIB_PROTOCOL_IP6: + res = (p1->fp_len - p2->fp_len); + + if (0 == res) + { + res = ip46_address_cmp(&p1->fp_addr, &p2->fp_addr); + } + break; + case FIB_PROTOCOL_MPLS: + res = (p1->fp_label - p2->fp_label); + + if (0 == res) + { + res = (p1->fp_eos - p2->fp_eos); + } + break; + } + } + + return (res); +} + +int +fib_prefix_is_cover (const fib_prefix_t *p1, + const fib_prefix_t *p2) +{ + switch (p1->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (ip4_destination_matches_route(&ip4_main, + &p1->fp_addr.ip4, + &p2->fp_addr.ip4, + p1->fp_len)); + case FIB_PROTOCOL_IP6: + return (ip6_destination_matches_route(&ip6_main, + &p1->fp_addr.ip6, + &p2->fp_addr.ip6, + p1->fp_len)); + case FIB_PROTOCOL_MPLS: + break; + } + return (0); +} + +int +fib_prefix_is_host (const fib_prefix_t *prefix) +{ + switch (prefix->fp_proto) + { + case FIB_PROTOCOL_IP4: + return (prefix->fp_len == 32); + case FIB_PROTOCOL_IP6: + return (prefix->fp_len == 128); + case FIB_PROTOCOL_MPLS: + return (!0); + } + return (0); +} + +u8 * +format_fib_prefix (u8 * s, va_list * args) +{ + fib_prefix_t *fp = va_arg (*args, fib_prefix_t *); + + /* + * protocol specific so it prints ::/0 correctly. + */ + switch (fp->fp_proto) + { + case FIB_PROTOCOL_IP6: + { + ip6_address_t p6 = fp->fp_addr.ip6; + + ip6_address_mask(&p6, &(ip6_main.fib_masks[fp->fp_len])); + s = format (s, "%U", format_ip6_address, &p6); + break; + } + case FIB_PROTOCOL_IP4: + { + ip4_address_t p4 = fp->fp_addr.ip4; + p4.as_u32 &= ip4_main.fib_masks[fp->fp_len]; + + s = format (s, "%U", format_ip4_address, &p4); + break; + } + case FIB_PROTOCOL_MPLS: + s = format (s, "%U:%U", + format_mpls_unicast_label, fp->fp_label, + format_mpls_eos_bit, fp->fp_eos); + break; + } + s = format (s, "/%d", fp->fp_len); + + return (s); +} + +int +fib_route_path_cmp (const fib_route_path_t *rpath1, + const fib_route_path_t *rpath2) +{ + int res; + + res = ip46_address_cmp(&rpath1->frp_addr, + &rpath2->frp_addr); + + if (0 != res) return (res); + + if (~0 != rpath1->frp_sw_if_index && + ~0 != rpath2->frp_sw_if_index) + { + res = vnet_sw_interface_compare(vnet_get_main(), + rpath1->frp_sw_if_index, + rpath2->frp_sw_if_index); + } + else + { + res = rpath1->frp_sw_if_index - rpath2->frp_sw_if_index; + } + + if (0 != res) return (res); + + if (ip46_address_is_zero(&rpath1->frp_addr)) + { + res = rpath1->frp_fib_index - rpath2->frp_fib_index; + } + + return (res); +} + +dpo_proto_t +fib_proto_to_dpo (fib_protocol_t fib_proto) +{ + switch (fib_proto) + { + case FIB_PROTOCOL_IP6: + return (DPO_PROTO_IP6); + case FIB_PROTOCOL_IP4: + return (DPO_PROTO_IP4); + case FIB_PROTOCOL_MPLS: + return (DPO_PROTO_MPLS); + } + ASSERT(0); + return (0); +} + +fib_protocol_t +dpo_proto_to_fib (dpo_proto_t dpo_proto) +{ + switch (dpo_proto) + { + case DPO_PROTO_IP6: + return (FIB_PROTOCOL_IP6); + case DPO_PROTO_IP4: + return (FIB_PROTOCOL_IP4); + case DPO_PROTO_MPLS: + return (FIB_PROTOCOL_MPLS); + default: + break; + } + ASSERT(0); + return (0); +} + +vnet_link_t +fib_proto_to_link (fib_protocol_t proto) +{ + switch (proto) + { + case FIB_PROTOCOL_IP4: + return (VNET_LINK_IP4); + case FIB_PROTOCOL_IP6: + return (VNET_LINK_IP6); + case FIB_PROTOCOL_MPLS: + return (VNET_LINK_MPLS); + } + ASSERT(0); + return (0); +} + +fib_forward_chain_type_t +fib_forw_chain_type_from_dpo_proto (dpo_proto_t proto) +{ + switch (proto) + { + case DPO_PROTO_IP4: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); + case DPO_PROTO_IP6: + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP6); + case DPO_PROTO_MPLS: + return (FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS); + case DPO_PROTO_ETHERNET: + return (FIB_FORW_CHAIN_TYPE_ETHERNET); + } + ASSERT(0); + return (FIB_FORW_CHAIN_TYPE_UNICAST_IP4); +} + +vnet_link_t +fib_forw_chain_type_to_link_type (fib_forward_chain_type_t fct) +{ + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + return (VNET_LINK_IP4); + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + return (VNET_LINK_IP6); + case FIB_FORW_CHAIN_TYPE_ETHERNET: + return (VNET_LINK_ETHERNET); + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + /* + * insufficient information to to convert + */ + ASSERT(0); + break; + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + return (VNET_LINK_MPLS); + } + return (VNET_LINK_IP4); +} + +dpo_proto_t +fib_forw_chain_type_to_dpo_proto (fib_forward_chain_type_t fct) +{ + switch (fct) + { + case FIB_FORW_CHAIN_TYPE_UNICAST_IP4: + return (DPO_PROTO_IP4); + case FIB_FORW_CHAIN_TYPE_UNICAST_IP6: + return (DPO_PROTO_IP6); + case FIB_FORW_CHAIN_TYPE_ETHERNET: + return (DPO_PROTO_ETHERNET); + case FIB_FORW_CHAIN_TYPE_MPLS_EOS: + case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS: + return (DPO_PROTO_MPLS); + } + return (DPO_PROTO_IP4); +} diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h new file mode 100644 index 00000000000..0a15fef1b28 --- /dev/null +++ b/src/vnet/fib/fib_types.h @@ -0,0 +1,340 @@ + /* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_TYPES_H__ +#define __FIB_TYPES_H__ + +#include <vlib/vlib.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/mpls/packet.h> +#include <vnet/dpo/dpo.h> + +/** + * A typedef of a node index. + * we make this typedef so the code becomes easier for a human to parse. + */ +typedef u32 fib_node_index_t; +#define FIB_NODE_INDEX_INVALID ((fib_node_index_t)(~0)) + +/** + * Protocol Type. packed so it consumes a u8 only + */ +typedef enum fib_protocol_t_ { + FIB_PROTOCOL_IP4 = 0, + FIB_PROTOCOL_IP6, + FIB_PROTOCOL_MPLS, +} __attribute__ ((packed)) fib_protocol_t; + +#define FIB_PROTOCOLS { \ + [FIB_PROTOCOL_IP4] = "ipv4", \ + [FIB_PROTOCOL_IP6] = "ipv6", \ + [FIB_PROTOCOL_MPLS] = "MPLS", \ +} + +/** + * Definition outside of enum so it does not need to be included in non-defaulted + * switch statements + */ +#define FIB_PROTOCOL_MAX (FIB_PROTOCOL_MPLS + 1) + +/** + * Not part of the enum so it does not have to be handled in switch statements + */ +#define FIB_PROTOCOL_NONE (FIB_PROTOCOL_MAX+1) + +#define FOR_EACH_FIB_PROTOCOL(_item) \ + for (_item = FIB_PROTOCOL_IP4; \ + _item <= FIB_PROTOCOL_MPLS; \ + _item++) + +#define FOR_EACH_FIB_IP_PROTOCOL(_item) \ + for (_item = FIB_PROTOCOL_IP4; \ + _item <= FIB_PROTOCOL_IP6; \ + _item++) + +/** + * @brief Convert from a protocol to a link type + */ +vnet_link_t fib_proto_to_link (fib_protocol_t proto); + +/** + * FIB output chain type. When a child object requests a forwarding contribution + * from a parent, it does so for a particular scenario. This enumererates those + * sceanrios + */ +typedef enum fib_forward_chain_type_t_ { + /** + * Contribute an object that is to be used to forward IP4 packets + */ + FIB_FORW_CHAIN_TYPE_UNICAST_IP4, + /** + * Contribute an object that is to be used to forward IP6 packets + */ + FIB_FORW_CHAIN_TYPE_UNICAST_IP6, + /** + * Contribute an object that is to be used to forward non-end-of-stack + * MPLS packets + */ + FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS, + /** + * Contribute an object that is to be used to forward end-of-stack + * MPLS packets. This is a convenient ID for clients. A real EOS chain + * must be pay-load protocol specific. This + * option is converted into one of the other three internally. + */ + FIB_FORW_CHAIN_TYPE_MPLS_EOS, + /** + * Contribute an object that is to be used to forward Ethernet packets. + * This is last in the list since it is not valid for many FIB objects, + * and thus their array of per-chain-type DPOs can be sized smaller. + */ + FIB_FORW_CHAIN_TYPE_ETHERNET, +} __attribute__ ((packed)) fib_forward_chain_type_t; + +#define FIB_FORW_CHAINS { \ + [FIB_FORW_CHAIN_TYPE_ETHERNET] = "ethernet", \ + [FIB_FORW_CHAIN_TYPE_UNICAST_IP4] = "unicast-ip4", \ + [FIB_FORW_CHAIN_TYPE_UNICAST_IP6] = "unicast-ip6", \ + [FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS] = "mpls-neos", \ + [FIB_FORW_CHAIN_TYPE_MPLS_EOS] = "mpls-eos", \ +} + +#define FIB_FORW_CHAIN_NUM (FIB_FORW_CHAIN_TYPE_MPLS_ETHERNET+1) +#define FIB_FORW_CHAIN_MPLS_NUM (FIB_FORW_CHAIN_TYPE_MPLS_EOS+1) + +#define FOR_EACH_FIB_FORW_CHAIN(_item) \ + for (_item = FIB_FORW_CHAIN_TYPE_UNICAST_IP4; \ + _item <= FIB_FORW_CHAIN_TYPE_ETHERNET; \ + _item++) + +#define FOR_EACH_FIB_FORW_MPLS_CHAIN(_item) \ + for (_item = FIB_FORW_CHAIN_TYPE_UNICAST_IP4; \ + _item <= FIB_FORW_CHAIN_TYPE_MPLS_EOS; \ + _item++) + +/** + * @brief Convert from a chain type to the adjacencies link type + */ +extern vnet_link_t fib_forw_chain_type_to_link_type(fib_forward_chain_type_t fct); + +/** + * @brief Convert from a payload-protocol to a chain type. + */ +extern fib_forward_chain_type_t fib_forw_chain_type_from_dpo_proto(dpo_proto_t proto); + +/** + * @brief Convert from a chain type to the DPO proto it will install + */ +extern dpo_proto_t fib_forw_chain_type_to_dpo_proto(fib_forward_chain_type_t fct); + +/** + * Aggregrate type for a prefix + */ +typedef struct fib_prefix_t_ { + /** + * The mask length + */ + u16 fp_len; + + /** + * protocol type + */ + fib_protocol_t fp_proto; + + /** + * Pad to keep the address 4 byte aligned + */ + u8 ___fp___pad; + + union { + /** + * The address type is not deriveable from the fp_addr member. + * If it's v4, then the first 3 u32s of the address will be 0. + * v6 addresses (even v4 mapped ones) have at least 2 u32s assigned + * to non-zero values. true. but when it's all zero, one cannot decide. + */ + ip46_address_t fp_addr; + + struct { + mpls_label_t fp_label; + mpls_eos_bit_t fp_eos; + /** + * This protocol determines the payload protocol of packets + * that will be forwarded by this entry once the label is popped. + * For a non-eos entry it will be MPLS. + */ + dpo_proto_t fp_payload_proto; + }; + }; +} fib_prefix_t; + +STATIC_ASSERT(STRUCT_OFFSET_OF(fib_prefix_t, fp_addr) == 4, + "FIB Prefix's address is 4 byte aligned."); + +/** + * \brief Compare two prefixes for equality + */ +extern int fib_prefix_cmp(const fib_prefix_t *p1, + const fib_prefix_t *p2); + +/** + * \brief Compare two prefixes for covering relationship + * + * \return non-zero if the first prefix is a cover for the second + */ +extern int fib_prefix_is_cover(const fib_prefix_t *p1, + const fib_prefix_t *p2); + +/** + * \brief Return true is the prefix is a host prefix + */ +extern int fib_prefix_is_host(const fib_prefix_t *p); + + +/** + * \brief Host prefix from ip + */ +extern void fib_prefix_from_ip46_addr (const ip46_address_t *addr, + fib_prefix_t *pfx); + +extern u8 * format_fib_prefix(u8 * s, va_list * args); +extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args); + +extern dpo_proto_t fib_proto_to_dpo(fib_protocol_t fib_proto); +extern fib_protocol_t dpo_proto_to_fib(dpo_proto_t dpo_proto); + +/** + * Enurmeration of special path/entry types + */ +typedef enum fib_special_type_t_ { + /** + * Marker. Add new types after this one. + */ + FIB_SPECIAL_TYPE_FIRST = 0, + /** + * Local/for-us paths + */ + FIB_SPECIAL_TYPE_LOCAL = FIB_SPECIAL_TYPE_FIRST, + /** + * drop paths + */ + FIB_SPECIAL_TYPE_DROP, + /** + * Marker. Add new types before this one, then update it. + */ + FIB_SPECIAL_TYPE_LAST = FIB_SPECIAL_TYPE_DROP, +} __attribute__ ((packed)) fib_special_type_t; + +/** + * The maximum number of types + */ +#define FIB_SPEICAL_TYPE_MAX (FIB_SPEICAL_TYPE_LAST + 1) + +#define FOR_EACH_FIB_SPEICAL_TYPE(_item) \ + for (_item = FIB_TYPE_SPEICAL_FIRST; \ + _item <= FIB_SPEICAL_TYPE_LAST; _item++) + +extern u8 * format_fib_protocol(u8 * s, va_list ap); +extern u8 * format_vnet_link(u8 *s, va_list ap); + +/** + * Path flags from the control plane + */ +typedef enum fib_route_path_flags_t_ +{ + FIB_ROUTE_PATH_FLAG_NONE = 0, + /** + * Recursion constraint of via a host prefix + */ + FIB_ROUTE_PATH_RESOLVE_VIA_HOST = (1 << 0), + /** + * Recursion constraint of via an attahced prefix + */ + FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED = (1 << 1), +} fib_route_path_flags_t; + +/** + * @brief + * A representation of a path as described by a route producer. + * These paramenters will determine the path 'type', of which there are: + * 1) Attached-next-hop: + * a single peer on a link. + * It is 'attached' because it is in the same sub-net as the router, on a link + * directly connected to the route. + * It is 'next=hop' since the next-hop address of the peer is known. + * 2) Attached: + * the next-hop is not known. but we can ARP for it. + * 3) Recursive. + * The next-hop is known but the interface is not. So to find the adj to use + * we must recursively resolve the next-hop. + * 3) deaggregate (deag) + * A further lookup is required. + */ +typedef struct fib_route_path_t_ { + /** + * The protocol of the address below. We need this since the all + * zeros address is ambiguous. + */ + fib_protocol_t frp_proto; + + union { + /** + * The next-hop address. + * Will be NULL for attached paths. + * Will be all zeros for attached-next-hop paths on a p2p interface + * Will be all zeros for a deag path. + */ + ip46_address_t frp_addr; + + /** + * The MPLS local Label to reursively resolve through. + * This is valid when the path type is MPLS. + */ + mpls_label_t frp_local_label; + }; + /** + * The interface. + * Will be invalid for recursive paths. + */ + u32 frp_sw_if_index; + /** + * The FIB index to lookup the nexthop + * Only valid for recursive paths. + */ + u32 frp_fib_index; + /** + * [un]equal cost path weight + */ + u32 frp_weight; + /** + * flags on the path + */ + fib_route_path_flags_t frp_flags; + /** + * The outgoing MPLS label Stack. NULL implies no label. + */ + mpls_label_t *frp_label_stack; +} fib_route_path_t; + +/** + * @brief + * A representation of a fib path for fib_path_encode to convey the information to the caller + */ +typedef struct fib_route_path_encode_t_ { + fib_route_path_t rpath; + dpo_id_t dpo; +} fib_route_path_encode_t; + +#endif diff --git a/src/vnet/fib/fib_urpf_list.c b/src/vnet/fib/fib_urpf_list.c new file mode 100644 index 00000000000..263812ade40 --- /dev/null +++ b/src/vnet/fib/fib_urpf_list.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_urpf_list.h> +#include <vnet/adj/adj.h> + +/** + * @brief pool of all fib_urpf_list + */ +fib_urpf_list_t *fib_urpf_list_pool; + +u8 * +format_fib_urpf_list (u8 *s, va_list args) +{ + fib_urpf_list_t *urpf; + index_t ui; + u32 *swi; + + ui = va_arg(args, index_t); + urpf = fib_urpf_list_get(ui); + + s = format(s, "uPRF-list:%d len:%d itfs:[", + ui, vec_len(urpf->furpf_itfs)); + + vec_foreach(swi, urpf->furpf_itfs) + { + s = format(s, "%d, ", *swi); + } + s = format(s, "]"); + + return (s); +} + +index_t +fib_urpf_list_alloc_and_lock (void) +{ + fib_urpf_list_t *urpf; + + pool_get(fib_urpf_list_pool, urpf); + memset(urpf, 0, sizeof(*urpf)); + + urpf->furpf_locks++; + + return (urpf - fib_urpf_list_pool); +} + +void +fib_urpf_list_unlock (index_t ui) +{ + fib_urpf_list_t *urpf; + + if (INDEX_INVALID == ui) + return; + + urpf = fib_urpf_list_get(ui); + + urpf->furpf_locks--; + + if (0 == urpf->furpf_locks) + { + vec_free(urpf->furpf_itfs); + pool_put(fib_urpf_list_pool, urpf); + } +} + +void +fib_urpf_list_lock (index_t ui) +{ + fib_urpf_list_t *urpf; + + urpf = fib_urpf_list_get(ui); + + urpf->furpf_locks++; +} + +/** + * @brief Append another interface to the list. + */ +void +fib_urpf_list_append (index_t ui, + u32 sw_if_index) +{ + fib_urpf_list_t *urpf; + + urpf = fib_urpf_list_get(ui); + + vec_add1(urpf->furpf_itfs, sw_if_index); +} + +/** + * @brief Combine to interface lists + */ +void +fib_urpf_list_combine (index_t ui1, + index_t ui2) +{ + fib_urpf_list_t *urpf1, *urpf2; + + urpf1 = fib_urpf_list_get(ui1); + urpf2 = fib_urpf_list_get(ui2); + + vec_append(urpf1->furpf_itfs, urpf2->furpf_itfs); +} + +/** + * @brief Sort the interface indicies. + * The sort is the first step in obtaining a unique list, so the order, + * w.r.t. next-hop, interface,etc is not important. So a sort based on the + * index is all we need. + */ +static int +fib_urpf_itf_cmp_for_sort (void * v1, + void * v2) +{ + fib_node_index_t *i1 = v1, *i2 = v2; + + return (*i2 < *i1); +} + +/** + * @brief Convert the uRPF list from the itf set obtained during the walk + * to a unique list. + */ +void +fib_urpf_list_bake (index_t ui) +{ + fib_urpf_list_t *urpf; + + urpf = fib_urpf_list_get(ui); + + ASSERT(!(urpf->furpf_flags & FIB_URPF_LIST_BAKED)); + + if (vec_len(urpf->furpf_itfs) > 1) + { + u32 i,j; + + /* + * cat list | sort | uniq > rpf_list + */ + vec_sort_with_function(urpf->furpf_itfs, fib_urpf_itf_cmp_for_sort); + + i = 0, j = 1; + while (j < vec_len(urpf->furpf_itfs)) + { + if (urpf->furpf_itfs[i] == urpf->furpf_itfs[j]) + { + /* + * the itfacenct entries are the same. + * search forward for a unique one + */ + while (urpf->furpf_itfs[i] == urpf->furpf_itfs[j] && + j < vec_len(urpf->furpf_itfs)) + { + j++; + } + if (j == vec_len(urpf->furpf_itfs)) + { + /* + * ran off the end without finding a unique index. + * we are done. + */ + break; + } + else + { + urpf->furpf_itfs[i+1] = urpf->furpf_itfs[j]; + } + } + i++, j++; + } + + /* + * set the length of the vector to the number of unique itfs + */ + _vec_len(urpf->furpf_itfs) = i+1; + } + + urpf->furpf_flags |= FIB_URPF_LIST_BAKED; +} + +void +fib_urpf_list_show_mem (void) +{ + fib_show_memory_usage("uRPF-list", + pool_elts(fib_urpf_list_pool), + pool_len(fib_urpf_list_pool), + sizeof(fib_urpf_list_t)); +} + +static clib_error_t * +show_fib_urpf_list_command (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + index_t ui; + + if (unformat (input, "%d", &ui)) + { + /* + * show one in detail + */ + if (!pool_is_free_index(fib_urpf_list_pool, ui)) + { + vlib_cli_output (vm, "%d@%U", + ui, + format_fib_urpf_list, ui); + } + else + { + vlib_cli_output (vm, "uRPF %d invalid", ui); + } + } + else + { + /* + * show all + */ + vlib_cli_output (vm, "FIB uRPF Entries:"); + pool_foreach_index(ui, fib_urpf_list_pool, + ({ + vlib_cli_output (vm, "%d@%U", + ui, + format_fib_urpf_list, ui); + })); + } + + return (NULL); +} + +/* *INDENT-OFF* */ +/*? + * The '<em>sh fib uRPF [index] </em>' command displays the uRPF lists + * + * @cliexpar + * @cliexstart{show fib uRPF} + * FIB uRPF Entries: + * 0@uPRF-list:0 len:0 itfs:[] + * 1@uPRF-list:1 len:2 itfs:[1, 2, ] + * 2@uPRF-list:2 len:1 itfs:[3, ] + * 3@uPRF-list:3 len:1 itfs:[9, ] + * @cliexend +?*/ +VLIB_CLI_COMMAND (show_fib_urpf_list, static) = { + .path = "show fib uRPF", + .function = show_fib_urpf_list_command, + .short_help = "show fib uRPF", +}; +/* *INDENT-OFF* */ diff --git a/src/vnet/fib/fib_urpf_list.h b/src/vnet/fib/fib_urpf_list.h new file mode 100644 index 00000000000..09f475747cf --- /dev/null +++ b/src/vnet/fib/fib_urpf_list.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief A unicast RPF list. + * The uRPF list is the set of interfaces that a prefix can be reached through. + * There are 3 levels of RPF check: + * - do we have any route to the source (i.e. it's not drop) + * - did the packet arrive on an interface that the source is reachable through + * - did the packet arrive from a peer that the source is reachable through + * we don't support the last. But it could be done by storing adjs in the uPRF + * list rather than interface indices. + * + * these conditions are checked against the list by: + * - the list is not empty + * - there is an interface in the list that is on the input interface. + * - there is an adj in the list whose MAC address matches the packet's + * source MAC and input interface. + * + * To speed the last two checks the interface list only needs to have the unique + * interfaces present. If the uRPF check was instead implemented by forward + * walking the DPO chain, then that walk would encounter a great deal of + * non-adjacency objects (i.e. load-balances, mpls-labels, etc) and potentially + * the same adjacency many times (esp. when UCMP is used). + * To that end the uRPF list is a collapsed, unique interface only list. + */ + +#ifndef __FIB_URPF_LIST_H__ +#define __FIB_URPF_LIST_H__ + +#include <vnet/fib/fib_types.h> +#include <vnet/adj/adj.h> + +/** + * @brief flags + */ +typedef enum fib_urpf_list_flag_t_ +{ + /** + * @brief Set to indicated that the uRPF list has already been baked. + * This is protection against it being baked more than once. These + * are not chunky fries - once is enough. + */ + FIB_URPF_LIST_BAKED = (1 << 0), +} fib_urpf_list_flag_t; + +typedef struct fib_urpf_list_t_ +{ + /** + * The list of interfaces that comprise the allowed accepting interfaces + */ + adj_index_t *furpf_itfs; + + /** + * flags + */ + fib_urpf_list_flag_t furpf_flags; + + /** + * uRPF lists are shared amongst many entries so we require a locking + * mechanism. + */ + u32 furpf_locks; +} fib_urpf_list_t; + +extern index_t fib_urpf_list_alloc_and_lock(void); +extern void fib_urpf_list_unlock(index_t urpf); +extern void fib_urpf_list_lock(index_t urpf); + +extern void fib_urpf_list_append(index_t urpf, adj_index_t adj); +extern void fib_urpf_list_combine(index_t urpf1, index_t urpf2); + +extern void fib_urpf_list_bake(index_t urpf); + +extern u8 *format_fib_urpf_list(u8 *s, va_list ap); + +extern void fib_urpf_list_show_mem(void); + +/** + * @brief pool of all fib_urpf_list + */ +extern fib_urpf_list_t *fib_urpf_list_pool; + +static inline fib_urpf_list_t * +fib_urpf_list_get (index_t index) +{ + return (pool_elt_at_index(fib_urpf_list_pool, index)); +} + +/** + * @brief Data-Plane function to check an input interface against an uRPF list + * + * @param ui The uRPF list index to check against. Get this from the load-balance + * object that is the result of the FIB lookup + * @param sw_if_index The SW interface index to validate + * + * @return 1 if the interface is found, 0 otherwise + */ +always_inline int +fib_urpf_check (index_t ui, u32 sw_if_index) +{ + fib_urpf_list_t *urpf; + u32 *swi; + + urpf = fib_urpf_list_get(ui); + + vec_foreach(swi, urpf->furpf_itfs) + { + if (*swi == sw_if_index) + return (1); + } + + return (0); +} + +/** + * @brief Data-Plane function to check the size of an uRPF list, (i.e. the number + * of interfaces in the list). + * + * @param ui The uRPF list index to check against. Get this from the load-balance + * object that is the result of the FIB lookup + * + * @return the number of interfaces in the list + */ +always_inline int +fib_urpf_check_size (index_t ui) +{ + fib_urpf_list_t *urpf; + + urpf = fib_urpf_list_get(ui); + + return (vec_len(urpf->furpf_itfs)); +} + +#endif diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c new file mode 100644 index 00000000000..938f7b8c1c6 --- /dev/null +++ b/src/vnet/fib/fib_walk.c @@ -0,0 +1,1108 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_walk.h> +#include <vnet/fib/fib_node_list.h> + +/** + * The flags on a walk + */ +typedef enum fib_walk_flags_t_ +{ + /** + * A synchronous walk. + * This walk will run to completion, i.e. visit ALL the children. + * It is a depth first traversal of the graph. + */ + FIB_WALK_FLAG_SYNC = (1 << 0), + /** + * An asynchronous walk. + * This walk will be scheduled to run in the background. It will thus visits + * the children at a later point in time. + * It is a depth first traversal of the graph. + */ + FIB_WALK_FLAG_ASYNC = (1 << 1), + /** + * An indication that the walk is currently executing. + */ + FIB_WALK_FLAG_EXECUTING = (1 << 2), +} fib_walk_flags_t; + +/** + * A representation of a graph walk from a parent object to its children + */ +typedef struct fib_walk_t_ +{ + /** + * FIB node linkage. This object is not in the FIB object graph, + * but it is present in other node's dependency lists, so it needs to + * be pointerable to. + */ + fib_node_t fw_node; + + /** + * the walk's flags + */ + fib_walk_flags_t fw_flags; + + /** + * Sibling index in the dependency list + */ + u32 fw_dep_sibling; + + /** + * Sibling index in the list of all walks + */ + u32 fw_prio_sibling; + + /** + * Pointer to the node whose dependants this walk is walking + */ + fib_node_ptr_t fw_parent; + + /** + * Number of nodes visited by this walk. saved for debugging purposes. + */ + u32 fw_n_visits; + + /** + * Time the walk started + */ + f64 fw_start_time; + + /** + * The reasons this walk is occuring. + * This is a vector ordered in time. The reasons and the front were started + * first, and so should be acted first when a node is visisted. + */ + fib_node_back_walk_ctx_t *fw_ctx; +} fib_walk_t; + +/** + * @brief The pool of all walk objects + */ +static fib_walk_t *fib_walk_pool; + +/** + * @brief There's only one event type sent to the walk process + */ +#define FIB_WALK_EVENT 0 + +/** + * Statistics maintained per-walk queue + */ +typedef enum fib_walk_queue_stats_t_ +{ + FIB_WALK_SCHEDULED, + FIB_WALK_COMPLETED, +} fib_walk_queue_stats_t; +#define FIB_WALK_QUEUE_STATS_NUM ((fib_walk_queue_stats_t)(FIB_WALK_COMPLETED+1)) + +#define FIB_WALK_QUEUE_STATS { \ + [FIB_WALK_SCHEDULED] = "scheduled", \ + [FIB_WALK_COMPLETED] = "completed", \ +} + +#define FOR_EACH_FIB_WALK_QUEUE_STATS(_wqs) \ + for ((_wqs) = FIB_WALK_SCHEDULED; \ + (_wqs) < FIB_WALK_QUEUE_STATS_NUM; \ + (_wqs)++) + +/** + * The names of the walk stats + */ +static const char * const fib_walk_queue_stats_names[] = FIB_WALK_QUEUE_STATS; +/** + * The names of the walk reasons + */ +static const char * const fib_node_bw_reason_names[] = FIB_NODE_BW_REASONS; + +/** + * A represenation of one queue of walk + */ +typedef struct fib_walk_queue_t_ +{ + /** + * Qeuee stats + */ + u64 fwq_stats[FIB_WALK_QUEUE_STATS_NUM]; + + /** + * The node list which acts as the queue + */ + fib_node_list_t fwq_queue; +} fib_walk_queue_t; + +/** + * A set of priority queues for outstanding walks + */ +typedef struct fib_walk_queues_t_ +{ + fib_walk_queue_t fwqs_queues[FIB_WALK_PRIORITY_NUM]; +} fib_walk_queues_t; + +/** + * The global queues of outstanding walks + */ +static fib_walk_queues_t fib_walk_queues; + +/** + * The names of the walk priorities + */ +static const char * const fib_walk_priority_names[] = FIB_WALK_PRIORITIES; + +/** + * @brief Histogram stats on the lenths of each walk in elemenets visisted. + * Store upto 1<<23 elements in increments of 1<<10 + */ +#define HISTOGRAM_VISITS_PER_WALK_MAX (1<<23) +#define HISTOGRAM_VISITS_PER_WALK_INCR (1<<10) +#define HISTOGRAM_VISITS_PER_WALK_N_BUCKETS \ + (HISTOGRAM_VISITS_PER_WALK_MAX/HISTOGRAM_VISITS_PER_WALK_INCR) +static u64 fib_walk_hist_vists_per_walk[HISTOGRAM_VISITS_PER_WALK_N_BUCKETS]; + +/** + * @brief History of state for the last 128 walks + */ +#define HISTORY_N_WALKS 128 +#define MAX_HISTORY_REASONS 16 +static u32 history_last_walk_pos; +typedef struct fib_walk_history_t_ { + u32 fwh_n_visits; + f64 fwh_duration; + f64 fwh_completed; + fib_node_ptr_t fwh_parent; + fib_walk_flags_t fwh_flags; + fib_node_bw_reason_flag_t fwh_reason[MAX_HISTORY_REASONS]; +} fib_walk_history_t; +static fib_walk_history_t fib_walk_history[HISTORY_N_WALKS]; + +u8* +format_fib_walk_priority (u8 *s, va_list ap) +{ + fib_walk_priority_t prio = va_arg(ap, fib_walk_priority_t); + + ASSERT(prio < FIB_WALK_PRIORITY_NUM); + + return (format(s, "%s", fib_walk_priority_names[prio])); +} +static u8* +format_fib_walk_queue_stats (u8 *s, va_list ap) +{ + fib_walk_queue_stats_t wqs = va_arg(ap, fib_walk_queue_stats_t); + + ASSERT(wqs < FIB_WALK_QUEUE_STATS_NUM); + + return (format(s, "%s", fib_walk_queue_stats_names[wqs])); +} + +static index_t +fib_walk_get_index (fib_walk_t *fwalk) +{ + return (fwalk - fib_walk_pool); +} + +static fib_walk_t * +fib_walk_get (index_t fwi) +{ + return (pool_elt_at_index(fib_walk_pool, fwi)); +} + +/* + * not static so it can be used in the unit tests + */ +u32 +fib_walk_queue_get_size (fib_walk_priority_t prio) +{ + return (fib_node_list_get_size(fib_walk_queues.fwqs_queues[prio].fwq_queue)); +} + +static fib_node_index_t +fib_walk_queue_get_front (fib_walk_priority_t prio) +{ + fib_node_ptr_t wp; + + fib_node_list_get_front(fib_walk_queues.fwqs_queues[prio].fwq_queue, &wp); + + return (wp.fnp_index); +} + +static void +fib_walk_destroy (fib_walk_t *fwalk) +{ + u32 bucket, ii; + + if (FIB_NODE_INDEX_INVALID != fwalk->fw_prio_sibling) + { + fib_node_list_elt_remove(fwalk->fw_prio_sibling); + } + fib_node_child_remove(fwalk->fw_parent.fnp_type, + fwalk->fw_parent.fnp_index, + fwalk->fw_dep_sibling); + + /* + * add the stats to the continuous histogram collection. + */ + bucket = (fwalk->fw_n_visits / HISTOGRAM_VISITS_PER_WALK_INCR); + bucket = (bucket >= HISTOGRAM_VISITS_PER_WALK_N_BUCKETS ? + HISTOGRAM_VISITS_PER_WALK_N_BUCKETS - 1 : + bucket); + fib_walk_hist_vists_per_walk[bucket]++; + + /* + * save stats to the recent history + */ + + fib_walk_history[history_last_walk_pos].fwh_n_visits = + fwalk->fw_n_visits; + fib_walk_history[history_last_walk_pos].fwh_completed = + vlib_time_now(vlib_get_main()); + fib_walk_history[history_last_walk_pos].fwh_duration = + fib_walk_history[history_last_walk_pos].fwh_completed - + fwalk->fw_start_time; + fib_walk_history[history_last_walk_pos].fwh_parent = + fwalk->fw_parent; + fib_walk_history[history_last_walk_pos].fwh_flags = + fwalk->fw_flags; + + vec_foreach_index(ii, fwalk->fw_ctx) + { + if (ii < MAX_HISTORY_REASONS) + { + fib_walk_history[history_last_walk_pos].fwh_reason[ii] = + fwalk->fw_ctx[ii].fnbw_reason; + } + } + + history_last_walk_pos = (history_last_walk_pos + 1) % HISTORY_N_WALKS; + + fib_node_deinit(&fwalk->fw_node); + vec_free(fwalk->fw_ctx); + pool_put(fib_walk_pool, fwalk); +} + +/** + * return code when advancing a walk + */ +typedef enum fib_walk_advance_rc_t_ +{ + /** + * The walk is complete + */ + FIB_WALK_ADVANCE_DONE, + /** + * the walk has more work + */ + FIB_WALK_ADVANCE_MORE, + /** + * The walk merged with the one in front + */ + FIB_WALK_ADVANCE_MERGE, +} fib_walk_advance_rc_t; + +/** + * @brief Advance the walk one element in its work list + */ +static fib_walk_advance_rc_t +fib_walk_advance (fib_node_index_t fwi) +{ + fib_node_back_walk_ctx_t *ctx, *old; + fib_node_back_walk_rc_t wrc; + fib_node_ptr_t sibling; + fib_walk_t *fwalk; + int more_elts; + + /* + * this walk function is re-entrant - walks acan spawn walks. + * fib_walk_t objects come from a pool, so they can realloc. we need + * to retch from said pool at the appropriate times. + */ + fwalk = fib_walk_get(fwi); + + more_elts = fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &sibling); + + if (more_elts) + { + old = fwalk->fw_ctx; + + vec_foreach(ctx, fwalk->fw_ctx) + { + wrc = fib_node_back_walk_one(&sibling, ctx); + + fwalk = fib_walk_get(fwi); + fwalk->fw_n_visits++; + + if (FIB_NODE_BACK_WALK_MERGE == wrc) + { + /* + * this walk has merged with the one further along the node's + * dependecy list. + */ + return (FIB_WALK_ADVANCE_MERGE); + } + if (old != fwalk->fw_ctx) + { + /* + * nasty re-entrant addition of a walk has realloc'd the vector + * break out + */ + return (FIB_WALK_ADVANCE_MERGE); + } + } + /* + * move foward to the next node to visit + */ + more_elts = fib_node_list_advance(fwalk->fw_dep_sibling); + } + + if (more_elts) + { + return (FIB_WALK_ADVANCE_MORE); + } + + return (FIB_WALK_ADVANCE_DONE); +} + +/** + * @breif Enurmerate the times of sleep between walks + */ +typedef enum fib_walk_sleep_type_t_ +{ + FIB_WALK_SHORT_SLEEP, + FIB_WALK_LONG_SLEEP, +} fib_walk_sleep_type_t; + +#define FIB_WALK_N_SLEEP (FIB_WALK_LONG_SLEEP+1) + +/** + * @brief Durations for the sleep types + */ +static f64 fib_walk_sleep_duration[] = { + [FIB_WALK_LONG_SLEEP] = 1e-3, + [FIB_WALK_SHORT_SLEEP] = 1e-8, +}; + +/** + * @brief The time quota for a walk. When more than this amount of time is + * spent, the walk process will yield. + */ +static f64 quota = 1e-4; + +/** + * Histogram on the amount of work done (in msecs) in each walk + */ +#define N_TIME_BUCKETS 128 +#define TIME_INCREMENTS (N_TIME_BUCKETS/2) +static u64 fib_walk_work_time_taken[N_TIME_BUCKETS]; + +/** + * Histogram on the number of nodes visted in each quota + */ +#define N_ELTS_BUCKETS 128 +static u32 fib_walk_work_nodes_visisted_incr = 2; +static u64 fib_walk_work_nodes_visited[N_ELTS_BUCKETS]; + +/** + * Histogram of the sleep lengths + */ +static u64 fib_walk_sleep_lengths[2]; + +/** + * @brief Service the queues + * This is not declared static so that it can be unit tested - i know i know... + */ +f64 +fib_walk_process_queues (vlib_main_t * vm, + const f64 quota) +{ + f64 start_time, consumed_time; + fib_walk_sleep_type_t sleep; + fib_walk_priority_t prio; + fib_walk_advance_rc_t rc; + fib_node_index_t fwi; + fib_walk_t *fwalk; + u32 n_elts; + i32 bucket; + + consumed_time = 0; + start_time = vlib_time_now(vm); + n_elts = 0; + + FOR_EACH_FIB_WALK_PRIORITY(prio) + { + while (0 != fib_walk_queue_get_size(prio)) + { + fwi = fib_walk_queue_get_front(prio); + + /* + * set this walk as executing + */ + fwalk = fib_walk_get(fwi); + fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING; + + do + { + rc = fib_walk_advance(fwi); + n_elts++; + consumed_time = (vlib_time_now(vm) - start_time); + } while ((consumed_time < quota) && + (FIB_WALK_ADVANCE_MORE == rc)); + + /* + * if this walk has no more work then pop it from the queue + * and move on to the next. + */ + if (FIB_WALK_ADVANCE_MORE != rc) + { + fwalk = fib_walk_get(fwi); + fib_walk_destroy(fwalk); + fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_COMPLETED]++; + } + else + { + /* + * passed our work quota. sleep time. + */ + fwalk = fib_walk_get(fwi); + fwalk->fw_flags &= ~FIB_WALK_FLAG_EXECUTING; + sleep = FIB_WALK_SHORT_SLEEP; + goto that_will_do_for_now; + } + } + } + /* + * got to the end of all the work + */ + sleep = FIB_WALK_LONG_SLEEP; + +that_will_do_for_now: + + /* + * collect the stats: + * - for the number of nodes visisted we store 128 increments + * - for the time consumed we store quota/TIME_INCREMENTS increments. + */ + bucket = ((n_elts/fib_walk_work_nodes_visisted_incr) > N_ELTS_BUCKETS ? + N_ELTS_BUCKETS-1 : + n_elts/fib_walk_work_nodes_visisted_incr); + ++fib_walk_work_nodes_visited[bucket]; + + bucket = (consumed_time - quota) / (quota / TIME_INCREMENTS); + bucket += N_TIME_BUCKETS/2; + bucket = (bucket < 0 ? 0 : bucket); + bucket = (bucket > N_TIME_BUCKETS-1 ? N_TIME_BUCKETS-1 : bucket); + ++fib_walk_work_time_taken[bucket]; + + ++fib_walk_sleep_lengths[sleep]; + + return (fib_walk_sleep_duration[sleep]); +} + +/** + * @brief The 'fib-walk' process's main loop. + */ +static uword +fib_walk_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + f64 sleep_time; + + sleep_time = fib_walk_sleep_duration[FIB_WALK_SHORT_SLEEP]; + + while (1) + { + vlib_process_wait_for_event_or_clock(vm, sleep_time); + + /* + * there may be lots of event queued between the processes, + * but the walks we want to schedule are in the priority queues, + * so we ignore the process events. + */ + vlib_process_get_events(vm, NULL); + + sleep_time = fib_walk_process_queues(vm, quota); + } + + /* + * Unreached + */ + ASSERT(!"WTF"); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (fib_walk_process_node,static) = { + .function = fib_walk_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "fib-walk", +}; +/* *INDENT-ON* */ + +/** + * @brief Allocate a new walk object + */ +static fib_walk_t * +fib_walk_alloc (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_walk_flags_t flags, + fib_node_back_walk_ctx_t *ctx) +{ + fib_walk_t *fwalk; + + pool_get(fib_walk_pool, fwalk); + + fib_node_init(&fwalk->fw_node, FIB_NODE_TYPE_WALK); + + fwalk->fw_flags = flags; + fwalk->fw_dep_sibling = FIB_NODE_INDEX_INVALID; + fwalk->fw_prio_sibling = FIB_NODE_INDEX_INVALID; + fwalk->fw_parent.fnp_index = parent_index; + fwalk->fw_parent.fnp_type = parent_type; + fwalk->fw_ctx = NULL; + fwalk->fw_start_time = vlib_time_now(vlib_get_main()); + fwalk->fw_n_visits = 0; + + /* + * make a copy of the backwalk context so the depth count remains + * the same for each sibling visitsed. This is important in the case + * where a parent has a loop via one child, but all the others are not. + * if the looped child were visited first, the depth count would exceed, the + * max and the walk would terminate before it reached the other siblings. + */ + vec_add1(fwalk->fw_ctx, *ctx); + + return (fwalk); +} + +/** + * @brief Enqueue a walk onto the appropriate priority queue. Then signal + * the background process there is work to do. + */ +static index_t +fib_walk_prio_queue_enquue (fib_walk_priority_t prio, + fib_walk_t *fwalk) +{ + index_t sibling; + + sibling = fib_node_list_push_front(fib_walk_queues.fwqs_queues[prio].fwq_queue, + 0, + FIB_NODE_TYPE_WALK, + fib_walk_get_index(fwalk)); + fib_walk_queues.fwqs_queues[prio].fwq_stats[FIB_WALK_SCHEDULED]++; + + /* + * poke the fib-walk process to perform the async walk. + * we are not passing it specific data, hence the last two args, + * the process will drain the queues + */ + vlib_process_signal_event(vlib_get_main(), + fib_walk_process_node.index, + FIB_WALK_EVENT, + FIB_WALK_EVENT); + + return (sibling); +} + +void +fib_walk_async (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_walk_priority_t prio, + fib_node_back_walk_ctx_t *ctx) +{ + fib_walk_t *fwalk; + + if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth) + { + /* + * The walk has reached the maximum depth. there is a loop in the graph. + * bail. + */ + return; + } + if (0 == fib_node_get_n_children(parent_type, + parent_index)) + { + /* + * no children to walk - quit now + */ + return; + } + if (ctx->fnbw_flags & FIB_NODE_BW_FLAG_FORCE_SYNC) + { + /* + * the originator of the walk wanted it to be synchronous, but the + * parent object chose async - denied. + */ + return (fib_walk_sync(parent_type, parent_index, ctx)); + } + + + fwalk = fib_walk_alloc(parent_type, + parent_index, + FIB_WALK_FLAG_ASYNC, + ctx); + + fwalk->fw_dep_sibling = fib_node_child_add(parent_type, + parent_index, + FIB_NODE_TYPE_WALK, + fib_walk_get_index(fwalk)); + + fwalk->fw_prio_sibling = fib_walk_prio_queue_enquue(prio, fwalk); +} + +/** + * @brief Back walk all the children of a FIB node. + * + * note this is a synchronous depth first walk. Children visited may propagate + * the walk to thier children. Other children node types may not propagate, + * synchronously but instead queue the walk for later async completion. + */ +void +fib_walk_sync (fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_back_walk_ctx_t *ctx) +{ + fib_walk_advance_rc_t rc; + fib_node_index_t fwi; + fib_walk_t *fwalk; + + if (FIB_NODE_GRAPH_MAX_DEPTH < ++ctx->fnbw_depth) + { + /* + * The walk has reached the maximum depth. there is a loop in the graph. + * bail. + */ + return; + } + if (0 == fib_node_get_n_children(parent_type, + parent_index)) + { + /* + * no children to walk - quit now + */ + return; + } + + fwalk = fib_walk_alloc(parent_type, + parent_index, + FIB_WALK_FLAG_SYNC, + ctx); + + fwalk->fw_dep_sibling = fib_node_child_add(parent_type, + parent_index, + FIB_NODE_TYPE_WALK, + fib_walk_get_index(fwalk)); + fwi = fib_walk_get_index(fwalk); + + while (1) + { + /* + * set this walk as executing + */ + fwalk->fw_flags |= FIB_WALK_FLAG_EXECUTING; + + do + { + rc = fib_walk_advance(fwi); + } while (FIB_WALK_ADVANCE_MORE == rc); + + + /* + * this walk function is re-entrant - walks can spawn walks. + * fib_walk_t objects come from a pool, so they can realloc. we need + * to re-fetch from said pool at the appropriate times. + */ + fwalk = fib_walk_get(fwi); + + if (FIB_WALK_ADVANCE_MERGE == rc) + { + /* + * this sync walk merged with an walk in front. + * by reqeusting a sync walk the client wanted all children walked, + * so we ditch the walk object in hand and continue with the one + * we merged into + */ + fib_node_ptr_t merged_walk; + + fib_node_list_elt_get_next(fwalk->fw_dep_sibling, &merged_walk); + + ASSERT(FIB_NODE_INDEX_INVALID != merged_walk.fnp_index); + ASSERT(FIB_NODE_TYPE_WALK == merged_walk.fnp_type); + + fib_walk_destroy(fwalk); + + fwi = merged_walk.fnp_index; + fwalk = fib_walk_get(fwi); + + if (FIB_WALK_FLAG_EXECUTING & fwalk->fw_flags) + { + /* + * we are executing a sync walk, and we have met with another + * walk that is also executing. since only one walk executs at once + * (there is no multi-threading) this implies we have met ourselves + * and hence the is a loop in the graph. + * This function is re-entrant, so the walk object we met is being + * acted on in a stack frame below this one. We must therefore not + * continue with it now, but let the stack unwind and along the + * appropriate frame to read the depth count and bail. + */ + fwalk = NULL; + break; + } + } + else + { + /* + * the walk reached the end of the depdency list. + */ + break; + } + } + + if (NULL != fwalk) + { + fib_walk_destroy(fwalk); + } +} + +static fib_node_t * +fib_walk_get_node (fib_node_index_t index) +{ + fib_walk_t *fwalk; + + fwalk = fib_walk_get(index); + + return (&(fwalk->fw_node)); +} + +/** + * Walk objects are not parents, nor are they locked. + * are no-ops + */ +static void +fib_walk_last_lock_gone (fib_node_t *node) +{ + ASSERT(0); +} + +static fib_walk_t* +fib_walk_get_from_node (fib_node_t *node) +{ + return ((fib_walk_t*)(((char*)node) - + STRUCT_OFFSET_OF(fib_walk_t, fw_node))); +} + +/** + * @brief Another back walk has reach this walk. + * Megre them so there is only one left. It is this node being + * visited that will remain, so copy or merge the context onto it. + */ +static fib_node_back_walk_rc_t +fib_walk_back_walk_notify (fib_node_t *node, + fib_node_back_walk_ctx_t *ctx) +{ + fib_node_back_walk_ctx_t *last; + fib_walk_t *fwalk; + + fwalk = fib_walk_get_from_node(node); + + /* + * check whether the walk context can be merged with the most recent. + * the most recent was the one last added and is thus at the back of the vector. + * we can merge walks if the reason for the walk is the same. + */ + last = vec_end(fwalk->fw_ctx) - 1; + + if (last->fnbw_reason == ctx->fnbw_reason) + { + /* + * copy the largest of the depth values. in the presence of a loop, + * the same walk will merge with itself. if we take the smaller depth + * then it will never end. + */ + last->fnbw_depth = ((last->fnbw_depth >= ctx->fnbw_depth) ? + last->fnbw_depth : + ctx->fnbw_depth); + } + else + { + /* + * walks could not be merged, this means that the walk infront needs to + * perform different action to this one that has caught up. the one in + * front was scheduled first so append the new walk context to the back + * of the list. + */ + vec_add1(fwalk->fw_ctx, *ctx); + } + + return (FIB_NODE_BACK_WALK_MERGE); +} + +/** + * The FIB walk's graph node virtual function table + */ +static const fib_node_vft_t fib_walk_vft = { + .fnv_get = fib_walk_get_node, + .fnv_last_lock = fib_walk_last_lock_gone, + .fnv_back_walk = fib_walk_back_walk_notify, +}; + +void +fib_walk_module_init (void) +{ + fib_walk_priority_t prio; + + FOR_EACH_FIB_WALK_PRIORITY(prio) + { + fib_walk_queues.fwqs_queues[prio].fwq_queue = fib_node_list_create(); + } + + fib_node_register_type(FIB_NODE_TYPE_WALK, &fib_walk_vft); +} + +static u8* +format_fib_walk (u8* s, va_list ap) +{ + fib_node_index_t fwi = va_arg(ap, fib_node_index_t); + fib_walk_t *fwalk; + + fwalk = fib_walk_get(fwi); + + return (format(s, " parent:{%s:%d} visits:%d flags:%d", + fib_node_type_get_name(fwalk->fw_parent.fnp_type), + fwalk->fw_parent.fnp_index, + fwalk->fw_n_visits, + fwalk->fw_flags)); +} + +static clib_error_t * +fib_walk_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_walk_queue_stats_t wqs; + fib_walk_priority_t prio; + fib_node_ptr_t sibling; + fib_node_index_t fwi; + fib_walk_t *fwalk; + int more_elts, ii; + u8 *s = NULL; + +#define USEC 1000000 + vlib_cli_output(vm, "FIB Walk Quota = %.2fusec:", quota * USEC); + vlib_cli_output(vm, "FIB Walk queues:"); + + FOR_EACH_FIB_WALK_PRIORITY(prio) + { + vlib_cli_output(vm, " %U priority queue:", + format_fib_walk_priority, prio); + vlib_cli_output(vm, " Stats: "); + + FOR_EACH_FIB_WALK_QUEUE_STATS(wqs) + { + vlib_cli_output(vm, " %U:%d", + format_fib_walk_queue_stats, wqs, + fib_walk_queues.fwqs_queues[prio].fwq_stats[wqs]); + } + vlib_cli_output(vm, " Occupancy:%d", + fib_node_list_get_size( + fib_walk_queues.fwqs_queues[prio].fwq_queue)); + + more_elts = fib_node_list_get_front( + fib_walk_queues.fwqs_queues[prio].fwq_queue, + &sibling); + + while (more_elts) + { + ASSERT(FIB_NODE_INDEX_INVALID != sibling.fnp_index); + ASSERT(FIB_NODE_TYPE_WALK == sibling.fnp_type); + + fwi = sibling.fnp_index; + fwalk = fib_walk_get(fwi); + + vlib_cli_output(vm, " %U", format_fib_walk, fwi); + + more_elts = fib_node_list_elt_get_next(fwalk->fw_prio_sibling, + &sibling); + } + } + + vlib_cli_output(vm, "Histogram Statistics:"); + vlib_cli_output(vm, " Number of Elements visit per-quota:"); + for (ii = 0; ii < N_ELTS_BUCKETS; ii++) + { + if (0 != fib_walk_work_nodes_visited[ii]) + s = format(s, "%d:%d ", + (ii * fib_walk_work_nodes_visisted_incr), + fib_walk_work_nodes_visited[ii]); + } + vlib_cli_output(vm, " %v", s); + vec_free(s); + + vlib_cli_output(vm, " Time consumed per-quota (Quota=%f usec):", quota*USEC); + s = format(s, "0:%d ", fib_walk_work_time_taken[0]); + for (ii = 1; ii < N_TIME_BUCKETS; ii++) + { + if (0 != fib_walk_work_time_taken[ii]) + s = format(s, "%d:%d ", (u32)((((ii - N_TIME_BUCKETS/2) * + (quota / TIME_INCREMENTS)) + quota) * + USEC), + fib_walk_work_time_taken[ii]); + } + vlib_cli_output(vm, " %v", s); + vec_free(s); + + vlib_cli_output(vm, " Sleep Types:"); + vlib_cli_output(vm, " Short Long:"); + vlib_cli_output(vm, " %d %d:", + fib_walk_sleep_lengths[FIB_WALK_SHORT_SLEEP], + fib_walk_sleep_lengths[FIB_WALK_LONG_SLEEP]); + + vlib_cli_output(vm, " Number of Elements visited per-walk:"); + for (ii = 0; ii < HISTOGRAM_VISITS_PER_WALK_N_BUCKETS; ii++) + { + if (0 != fib_walk_hist_vists_per_walk[ii]) + s = format(s, "%d:%d ", + ii*HISTOGRAM_VISITS_PER_WALK_INCR, + fib_walk_hist_vists_per_walk[ii]); + } + vlib_cli_output(vm, " %v", s); + vec_free(s); + + + vlib_cli_output(vm, "Brief History (last %d walks):", HISTORY_N_WALKS); + ii = history_last_walk_pos - 1; + if (ii < 0) + ii = HISTORY_N_WALKS - 1; + + while (ii != history_last_walk_pos) + { + if (0 != fib_walk_history[ii].fwh_reason[0]) + { + fib_node_back_walk_reason_t reason; + u8 *s = NULL; + u32 jj; + + s = format(s, "[@%d]: %s:%d visits:%d duration:%.2f completed:%.2f ", + ii, fib_node_type_get_name(fib_walk_history[ii].fwh_parent.fnp_type), + fib_walk_history[ii].fwh_parent.fnp_index, + fib_walk_history[ii].fwh_n_visits, + fib_walk_history[ii].fwh_duration, + fib_walk_history[ii].fwh_completed); + if (FIB_WALK_FLAG_SYNC & fib_walk_history[ii].fwh_flags) + s = format(s, "sync, "); + if (FIB_WALK_FLAG_ASYNC & fib_walk_history[ii].fwh_flags) + s = format(s, "async, "); + + s = format(s, "reason:"); + jj = 0; + while (0 != fib_walk_history[ii].fwh_reason[jj]) + { + FOR_EACH_FIB_NODE_BW_REASON(reason) { + if ((1<<reason) & fib_walk_history[ii].fwh_reason[jj]) { + s = format (s, "%s,", fib_node_bw_reason_names[reason]); + } + } + jj++; + } + vlib_cli_output(vm, "%v", s); + } + + ii--; + if (ii < 0) + ii = HISTORY_N_WALKS - 1; + } + + return (NULL); +} + +VLIB_CLI_COMMAND (fib_walk_show_command, static) = { + .path = "show fib walk", + .short_help = "show fib walk", + .function = fib_walk_show, +}; + +static clib_error_t * +fib_walk_set_quota (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = NULL; + f64 new_quota; + + if (unformat (input, "%f", &new_quota)) + { + quota = new_quota; + } + else + { + error = clib_error_return(0 , "Pass a float value"); + } + + return (error); +} + +VLIB_CLI_COMMAND (fib_walk_set_quota_command, static) = { + .path = "set fib walk quota", + .short_help = "set fib walk quota", + .function = fib_walk_set_quota, +}; + +static clib_error_t * +fib_walk_set_histogram_elements_size (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = NULL; + u32 new; + + if (unformat (input, "%d", &new)) + { + fib_walk_work_nodes_visisted_incr = new; + } + else + { + error = clib_error_return(0 , "Pass an int value"); + } + + return (error); +} + +VLIB_CLI_COMMAND (fib_walk_set_histogram_elements_size_command, static) = { + .path = "set fib walk histogram elements size", + .short_help = "set fib walk histogram elements size", + .function = fib_walk_set_histogram_elements_size, +}; + +static clib_error_t * +fib_walk_clear (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + memset(fib_walk_hist_vists_per_walk, 0, sizeof(fib_walk_hist_vists_per_walk)); + memset(fib_walk_history, 0, sizeof(fib_walk_history)); + memset(fib_walk_work_time_taken, 0, sizeof(fib_walk_work_time_taken)); + memset(fib_walk_work_nodes_visited, 0, sizeof(fib_walk_work_nodes_visited)); + memset(fib_walk_sleep_lengths, 0, sizeof(fib_walk_sleep_lengths)); + + return (NULL); +} + +VLIB_CLI_COMMAND (fib_walk_clear_command, static) = { + .path = "clear fib walk", + .short_help = "clear fib walk", + .function = fib_walk_clear, +}; diff --git a/src/vnet/fib/fib_walk.h b/src/vnet/fib/fib_walk.h new file mode 100644 index 00000000000..7413d8a2c78 --- /dev/null +++ b/src/vnet/fib/fib_walk.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FIB_WALK_H__ +#define __FIB_WALK_H__ + +#include <vnet/fib/fib_node.h> + +/** + * @brief Walk priorities. + * Strict priorities. All walks a priority n are completed before n+1 is started. + * Increasing numerical value implies decreasing priority. + */ +typedef enum fib_walk_priority_t_ +{ + FIB_WALK_PRIORITY_HIGH = 0, + FIB_WALK_PRIORITY_LOW = 1, +} fib_walk_priority_t; + +#define FIB_WALK_PRIORITY_NUM ((fib_walk_priority_t)(FIB_WALK_PRIORITY_LOW+1)) + +#define FIB_WALK_PRIORITIES { \ + [FIB_WALK_PRIORITY_HIGH] = "high", \ + [FIB_WALK_PRIORITY_LOW] = "low", \ +} + +#define FOR_EACH_FIB_WALK_PRIORITY(_prio) \ + for ((_prio) = FIB_WALK_PRIORITY_HIGH; \ + (_prio) < FIB_WALK_PRIORITY_NUM; \ + (_prio)++) + +extern void fib_walk_module_init(void); + +extern void fib_walk_async(fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_walk_priority_t prio, + fib_node_back_walk_ctx_t *ctx); + +extern void fib_walk_sync(fib_node_type_t parent_type, + fib_node_index_t parent_index, + fib_node_back_walk_ctx_t *ctx); + +extern u8* format_fib_walk_priority(u8 *s, va_list ap); + +#endif + diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c new file mode 100644 index 00000000000..f6ebce00837 --- /dev/null +++ b/src/vnet/fib/ip4_fib.c @@ -0,0 +1,664 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/ip4_fib.h> + +/* + * A table of pefixes to be added to tables and the sources for them + */ +typedef struct ip4_fib_table_special_prefix_t_ { + fib_prefix_t ift_prefix; + fib_source_t ift_source; + fib_entry_flag_t ift_flag; +} ip4_fib_table_special_prefix_t; + +static const ip4_fib_table_special_prefix_t ip4_specials[] = { + { + /* 0.0.0.0/0*/ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0, + }, + .fp_len = 0, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_DEFAULT_ROUTE, + .ift_flag = FIB_ENTRY_FLAG_DROP, + }, + { + /* 0.0.0.0/32*/ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0, + }, + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_DEFAULT_ROUTE, + .ift_flag = FIB_ENTRY_FLAG_DROP, + }, + { + /* + * 240.0.0.0/4 + * drop class E + */ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0xf0000000, + }, + .fp_len = 4, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_SPECIAL, + .ift_flag = FIB_ENTRY_FLAG_DROP, + + }, + { + /* + * 224.0.0.0/4 + * drop all mcast + */ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0xe0000000, + }, + .fp_len = 4, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_SPECIAL, + .ift_flag = FIB_ENTRY_FLAG_DROP, + }, + { + /* + * 255.255.255.255/32 + * drop, but we'll allow it to be usurped by the likes of DHCP + */ + .ift_prefix = { + .fp_addr = { + .ip4.data_u32 = 0xffffffff, + }, + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + }, + .ift_source = FIB_SOURCE_DEFAULT_ROUTE, + .ift_flag = FIB_ENTRY_FLAG_DROP, + } +}; + + +static u32 +ip4_create_fib_with_table_id (u32 table_id) +{ + fib_table_t *fib_table; + + pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + + fib_table->ft_proto = FIB_PROTOCOL_IP4; + fib_table->ft_index = + fib_table->v4.index = + (fib_table - ip4_main.fibs); + + hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index); + + fib_table->ft_table_id = + fib_table->v4.table_id = + table_id; + fib_table->ft_flow_hash_config = + fib_table->v4.flow_hash_config = + IP_FLOW_HASH_DEFAULT; + fib_table->v4.fwd_classify_table_index = ~0; + fib_table->v4.rev_classify_table_index = ~0; + + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4); + + ip4_mtrie_init(&fib_table->v4.mtrie); + + /* + * add the special entries into the new FIB + */ + int ii; + + for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++) + { + fib_prefix_t prefix = ip4_specials[ii].ift_prefix; + + prefix.fp_addr.ip4.data_u32 = + clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32); + + fib_table_entry_special_add(fib_table->ft_index, + &prefix, + ip4_specials[ii].ift_source, + ip4_specials[ii].ift_flag, + ADJ_INDEX_INVALID); + } + + return (fib_table->ft_index); +} + +void +ip4_fib_table_destroy (ip4_fib_t *fib) +{ + fib_table_t *fib_table = (fib_table_t*)fib; + int ii; + + /* + * remove all the specials we added when the table was created. + */ + for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++) + { + fib_prefix_t prefix = ip4_specials[ii].ift_prefix; + + prefix.fp_addr.ip4.data_u32 = + clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32); + + fib_table_entry_special_remove(fib_table->ft_index, + &prefix, + ip4_specials[ii].ift_source); + } + + /* + * validate no more routes. + */ + ASSERT(0 == fib_table->ft_total_route_counts); + FOR_EACH_FIB_SOURCE(ii) + { + ASSERT(0 == fib_table->ft_src_route_counts[ii]); + } + + if (~0 != fib_table->ft_table_id) + { + hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id); + } + pool_put(ip4_main.fibs, fib_table); +} + + +u32 +ip4_fib_table_find_or_create_and_lock (u32 table_id) +{ + u32 index; + + index = ip4_fib_index_from_table_id(table_id); + if (~0 == index) + return ip4_create_fib_with_table_id(table_id); + + fib_table_lock(index, FIB_PROTOCOL_IP4); + + return (index); +} + +u32 +ip4_fib_table_create_and_lock (void) +{ + return (ip4_create_fib_with_table_id(~0)); +} + +u32 +ip4_fib_table_get_index_for_sw_if_index (u32 sw_if_index) +{ + if (sw_if_index >= vec_len(ip4_main.fib_index_by_sw_if_index)) + { + /* + * This is the case for interfaces that are not yet mapped to + * a IP table + */ + return (~0); + } + return (ip4_main.fib_index_by_sw_if_index[sw_if_index]); +} + +flow_hash_config_t +ip4_fib_table_get_flow_hash_config (u32 fib_index) +{ + return (ip4_fib_get(fib_index)->flow_hash_config); +} + +/* + * ip4_fib_table_lookup_exact_match + * + * Exact match prefix lookup + */ +fib_node_index_t +ip4_fib_table_lookup_exact_match (const ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len) +{ + uword * hash, * result; + u32 key; + + hash = fib->fib_entry_by_dst_address[len]; + key = (addr->data_u32 & ip4_main.fib_masks[len]); + + result = hash_get(hash, key); + + if (NULL != result) { + return (result[0]); + } + return (FIB_NODE_INDEX_INVALID); +} + +/* + * ip4_fib_table_lookup_adj + * + * Longest prefix match + */ +index_t +ip4_fib_table_lookup_lb (ip4_fib_t *fib, + const ip4_address_t *addr) +{ + fib_node_index_t fei; + + fei = ip4_fib_table_lookup(fib, addr, 32); + + if (FIB_NODE_INDEX_INVALID != fei) + { + const dpo_id_t *dpo; + + dpo = fib_entry_contribute_ip_forwarding(fei); + + return (dpo->dpoi_index); + } + return (INDEX_INVALID); +} + +/* + * ip4_fib_table_lookup + * + * Longest prefix match + */ +fib_node_index_t +ip4_fib_table_lookup (const ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len) +{ + uword * hash, * result; + i32 mask_len; + u32 key; + + for (mask_len = len; mask_len >= 0; mask_len--) + { + hash = fib->fib_entry_by_dst_address[mask_len]; + key = (addr->data_u32 & ip4_main.fib_masks[mask_len]); + + result = hash_get (hash, key); + + if (NULL != result) { + return (result[0]); + } + } + return (FIB_NODE_INDEX_INVALID); +} + +void +ip4_fib_table_entry_insert (ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + fib_node_index_t fib_entry_index) +{ + uword * hash, * result; + u32 key; + + key = (addr->data_u32 & ip4_main.fib_masks[len]); + hash = fib->fib_entry_by_dst_address[len]; + result = hash_get (hash, key); + + if (NULL == result) { + /* + * adding a new entry + */ + if (NULL == hash) { + hash = hash_create (32 /* elts */, sizeof (uword)); + hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK); + } + hash = hash_set(hash, key, fib_entry_index); + fib->fib_entry_by_dst_address[len] = hash; + } + else + { + ASSERT(0); + } +} + +void +ip4_fib_table_entry_remove (ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len) +{ + uword * hash, * result; + u32 key; + + key = (addr->data_u32 & ip4_main.fib_masks[len]); + hash = fib->fib_entry_by_dst_address[len]; + result = hash_get (hash, key); + + if (NULL == result) + { + /* + * removing a non-existant entry. i'll allow it. + */ + } + else + { + hash_unset(hash, key); + } + + fib->fib_entry_by_dst_address[len] = hash; +} + +void +ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + const dpo_id_t *dpo) +{ + ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD +} + +void +ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + const dpo_id_t *dpo) +{ + ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE +} + +static void +ip4_fib_table_show_all (ip4_fib_t *fib, + vlib_main_t * vm) +{ + fib_node_index_t *fib_entry_indicies; + fib_node_index_t *fib_entry_index; + int i; + + fib_entry_indicies = NULL; + + for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++) + { + uword * hash = fib->fib_entry_by_dst_address[i]; + + if (NULL != hash) + { + hash_pair_t * p; + + hash_foreach_pair (p, hash, + ({ + vec_add1(fib_entry_indicies, p->value[0]); + })); + } + } + + vec_sort_with_function(fib_entry_indicies, fib_entry_cmp_for_sort); + + vec_foreach(fib_entry_index, fib_entry_indicies) + { + vlib_cli_output(vm, "%U", + format_fib_entry, + *fib_entry_index, + FIB_ENTRY_FORMAT_BRIEF); + } + + vec_free(fib_entry_indicies); +} + +static void +ip4_fib_table_show_one (ip4_fib_t *fib, + vlib_main_t * vm, + ip4_address_t *address, + u32 mask_len) +{ + vlib_cli_output(vm, "%U", + format_fib_entry, + ip4_fib_table_lookup(fib, address, mask_len), + FIB_ENTRY_FORMAT_DETAIL); +} + +static clib_error_t * +ip4_show_fib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip4_main_t * im4 = &ip4_main; + fib_table_t * fib_table; + int verbose, matching, mtrie; + ip4_address_t matching_address; + u32 matching_mask = 32; + int i, table_id = -1, fib_index = ~0; + + verbose = 1; + matching = 0; + mtrie = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "brief") || unformat (input, "summary") + || unformat (input, "sum")) + verbose = 0; + + else if (unformat (input, "mtrie")) + mtrie = 1; + + else if (unformat (input, "%U/%d", + unformat_ip4_address, &matching_address, &matching_mask)) + matching = 1; + + else if (unformat (input, "%U", unformat_ip4_address, &matching_address)) + matching = 1; + + else if (unformat (input, "table %d", &table_id)) + ; + else if (unformat (input, "index %d", &fib_index)) + ; + else + break; + } + + pool_foreach (fib_table, im4->fibs, + ({ + ip4_fib_t *fib = &fib_table->v4; + + if (table_id >= 0 && table_id != (int)fib->table_id) + continue; + if (fib_index != ~0 && fib_index != (int)fib->index) + continue; + + vlib_cli_output (vm, "%U, fib_index %d, flow hash: %U", + format_fib_table_name, fib->index, FIB_PROTOCOL_IP4, + fib->index, + format_ip_flow_hash_config, fib->flow_hash_config); + + /* Show summary? */ + if (! verbose) + { + vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); + for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++) + { + uword * hash = fib->fib_entry_by_dst_address[i]; + uword n_elts = hash_elts (hash); + if (n_elts > 0) + vlib_cli_output (vm, "%20d%16d", i, n_elts); + } + continue; + } + + if (!matching) + { + ip4_fib_table_show_all(fib, vm); + } + else + { + ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask); + } + + if (mtrie) + vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie); + })); + + return 0; +} + +/*? + * This command displays the IPv4 FIB Tables (VRF Tables) and the route + * entries for each table. + * + * @note This command will run for a long time when the FIB tables are + * comprised of millions of entries. For those senarios, consider displaying + * a single table or summary mode. + * + * @cliexpar + * Example of how to display all the IPv4 FIB tables: + * @cliexstart{show ip fib} + * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * 0.0.0.0/0 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 0.0.0.0/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 6.0.1.2/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]] + * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0 + * 7.0.0.1/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]] + * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 + * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 + * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 + * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0 + * 224.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 240.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 255.255.255.255/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto + * 0.0.0.0/0 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 0.0.0.0/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 172.16.1.0/24 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]] + * [0] [@4]: ipv4-glean: af_packet0 + * 172.16.1.1/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]] + * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0 + * 172.16.1.2/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]] + * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36 + * 172.16.2.0/24 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]] + * [0] [@4]: ipv4-glean: af_packet1 + * 172.16.2.1/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]] + * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1 + * 224.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 240.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 255.255.255.255/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * @cliexend + * Example of how to display a single IPv4 FIB table: + * @cliexstart{show ip fib table 7} + * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto + * 0.0.0.0/0 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 0.0.0.0/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 172.16.1.0/24 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]] + * [0] [@4]: ipv4-glean: af_packet0 + * 172.16.1.1/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]] + * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0 + * 172.16.1.2/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]] + * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36 + * 172.16.2.0/24 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]] + * [0] [@4]: ipv4-glean: af_packet1 + * 172.16.2.1/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]] + * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1 + * 224.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 240.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 255.255.255.255/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * @cliexend + * Example of how to display a summary of all IPv4 FIB tables: + * @cliexstart{show ip fib summary} + * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * Prefix length Count + * 0 1 + * 8 2 + * 32 4 + * ipv4-VRF:7, fib_index 1, flow hash: src dst sport dport proto + * Prefix length Count + * 0 1 + * 8 2 + * 24 2 + * 32 4 + * @cliexend + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip4_show_fib_command, static) = { + .path = "show ip fib", + .short_help = "show ip fib [summary] [table <table-id>] [index <fib-id>] [<ip4-addr>[/<mask>]] [mtrie]", + .function = ip4_show_fib, +}; +/* *INDENT-ON* */ diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h new file mode 100644 index 00000000000..cf312cdc629 --- /dev/null +++ b/src/vnet/fib/ip4_fib.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @brief The IPv4 FIB + * + * FIBs are composed of two prefix data-bases (akak tables). The non-forwarding + * table contains all the routes that the control plane has programmed, the + * forwarding table contains the sub-set of those routes that can be used to + * forward packets. + * In the IPv4 FIB the non-forwarding table is an array of hash tables indexed + * by mask length, the forwarding table is an mtrie + * + * This IPv4 FIB is used by the protocol independent FIB. So directly using + * this APIs in client code is not encouraged. However, this IPv4 FIB can be + * used if all the client wants is an IPv4 prefix data-base + */ + +#ifndef __IP4_FIB_H__ +#define __IP4_FIB_H__ + +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_table.h> + +extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len); +extern fib_node_index_t ip4_fib_table_lookup_exact_match(const ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len); + +extern void ip4_fib_table_entry_remove(ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len); + +extern void ip4_fib_table_entry_insert(ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + fib_node_index_t fib_entry_index); +extern void ip4_fib_table_destroy(ip4_fib_t *fib); + +extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + const dpo_id_t *dpo); + +extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib, + const ip4_address_t *addr, + u32 len, + const dpo_id_t *dpo); +extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib, + const ip4_address_t * dst); + +/** + * @brief Get the FIB at the given index + */ +static inline ip4_fib_t * +ip4_fib_get (u32 index) +{ + return (&(pool_elt_at_index(ip4_main.fibs, index)->v4)); +} + +always_inline u32 +ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst) +{ + return (ip4_fib_table_lookup_lb( + ip4_fib_get(vec_elt (im->fib_index_by_sw_if_index, sw_if_index)), + dst)); +} + +/** + * @brief Get or create an IPv4 fib. + * + * Get or create an IPv4 fib with the provided table ID. + * + * @param table_id + * When set to \c ~0, an arbitrary and unused fib ID is picked + * and can be retrieved with \c ret->table_id. + * Otherwise, the fib ID to be used to retrieve or create the desired fib. + * @returns A pointer to the retrieved or created fib. + * + */ +extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id); +extern u32 ip4_fib_table_create_and_lock(void); + + +static inline +u32 ip4_fib_index_from_table_id (u32 table_id) +{ + ip4_main_t * im = &ip4_main; + uword * p; + + p = hash_get (im->fib_index_by_table_id, table_id); + if (!p) + return ~0; + + return p[0]; +} + +extern u32 ip4_fib_table_get_index_for_sw_if_index(u32 sw_if_index); + +extern flow_hash_config_t ip4_fib_table_get_flow_hash_config(u32 fib_index); + + +always_inline index_t +ip4_fib_forwarding_lookup (u32 fib_index, + const ip4_address_t * addr) +{ + ip4_fib_mtrie_leaf_t leaf; + ip4_fib_mtrie_t * mtrie; + + mtrie = &ip4_fib_get(fib_index)->mtrie; + + leaf = IP4_FIB_MTRIE_LEAF_ROOT; + leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 0); + leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1); + leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2); + leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3); + + /* Handle default route. */ + leaf = (leaf == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie->default_leaf : leaf); + + return (ip4_fib_mtrie_leaf_get_adj_index(leaf)); +} + + +#endif + diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c new file mode 100644 index 00000000000..d5b9bdcbd52 --- /dev/null +++ b/src/vnet/fib/ip6_fib.c @@ -0,0 +1,784 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/fib_table.h> + +static void +vnet_ip6_fib_init (u32 fib_index) +{ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 0, + .fp_addr = { + .ip6 = { + { 0, 0, }, + }, + } + }; + + /* + * Add the default route. + */ + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_DEFAULT_ROUTE, + FIB_ENTRY_FLAG_DROP, + ADJ_INDEX_INVALID); + + /* + * Add ff02::1:ff00:0/104 via local route for all tables. + * This is required for neighbor discovery to work. + */ + ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0); + pfx.fp_len = 104; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * Add all-routers multicast address via local route for all tables + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_routers); + pfx.fp_len = 128; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * Add all-nodes multicast address via local route for all tables + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + pfx.fp_len = 128; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * Add all-mldv2 multicast address via local route for all tables + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + pfx.fp_len = 128; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); + + /* + * all link local for us + */ + pfx.fp_addr.ip6.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); + pfx.fp_addr.ip6.as_u64[1] = 0; + pfx.fp_len = 10; + fib_table_entry_special_add(fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_LOCAL, + ADJ_INDEX_INVALID); +} + +static u32 +create_fib_with_table_id (u32 table_id) +{ + fib_table_t *fib_table; + + pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + + fib_table->ft_proto = FIB_PROTOCOL_IP6; + fib_table->ft_index = + fib_table->v6.index = + (fib_table - ip6_main.fibs); + + hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index); + + fib_table->ft_table_id = + fib_table->v6.table_id = + table_id; + fib_table->ft_flow_hash_config = + fib_table->v6.flow_hash_config = + IP_FLOW_HASH_DEFAULT; + + vnet_ip6_fib_init(fib_table->ft_index); + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP6); + + return (fib_table->ft_index); +} + +u32 +ip6_fib_table_find_or_create_and_lock (u32 table_id) +{ + uword * p; + + p = hash_get (ip6_main.fib_index_by_table_id, table_id); + if (NULL == p) + return create_fib_with_table_id(table_id); + + fib_table_lock(p[0], FIB_PROTOCOL_IP6); + + return (p[0]); +} + +u32 +ip6_fib_table_create_and_lock (void) +{ + return (create_fib_with_table_id(~0)); +} + +void +ip6_fib_table_destroy (u32 fib_index) +{ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 0, + .fp_addr = { + .ip6 = { + { 0, 0, }, + }, + } + }; + + /* + * the default route. + */ + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_DEFAULT_ROUTE); + + + /* + * ff02::1:ff00:0/104 + */ + ip6_set_solicited_node_multicast_address(&pfx.fp_addr.ip6, 0); + pfx.fp_len = 104; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + /* + * all-routers multicast address + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_routers); + pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + /* + * all-nodes multicast address + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + /* + * all-mldv2 multicast address + */ + ip6_set_reserved_multicast_address (&pfx.fp_addr.ip6, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + pfx.fp_len = 128; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + /* + * all link local + */ + pfx.fp_addr.ip6.as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); + pfx.fp_addr.ip6.as_u64[1] = 0; + pfx.fp_len = 10; + fib_table_entry_special_remove(fib_index, + &pfx, + FIB_SOURCE_SPECIAL); + + fib_table_t *fib_table = fib_table_get(fib_index, FIB_PROTOCOL_IP6); + fib_source_t source; + + /* + * validate no more routes. + */ + ASSERT(0 == fib_table->ft_total_route_counts); + FOR_EACH_FIB_SOURCE(source) + { + ASSERT(0 == fib_table->ft_src_route_counts[source]); + } + + if (~0 != fib_table->ft_table_id) + { + hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id); + } + pool_put(ip6_main.fibs, fib_table); +} + +fib_node_index_t +ip6_fib_table_lookup (u32 fib_index, + const ip6_address_t *addr, + u32 len) +{ + const ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv, value; + int i, n_p, rv; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING]; + n_p = vec_len (table->prefix_lengths_in_search_order); + + kv.key[0] = addr->as_u64[0]; + kv.key[1] = addr->as_u64[1]; + fib = ((u64)((fib_index))<<32); + + /* + * start search from a mask length same length or shorter. + * we don't want matches longer than the mask passed + */ + i = 0; + while (i < n_p && table->prefix_lengths_in_search_order[i] > len) + { + i++; + } + + for (; i < n_p; i++) + { + int dst_address_length = table->prefix_lengths_in_search_order[i]; + ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length]; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + //As lengths are decreasing, masks are increasingly specific. + kv.key[0] &= mask->as_u64[0]; + kv.key[1] &= mask->as_u64[1]; + kv.key[2] = fib | dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); + if (rv == 0) + return value.value; + } + + return (FIB_NODE_INDEX_INVALID); +} + +fib_node_index_t +ip6_fib_table_lookup_exact_match (u32 fib_index, + const ip6_address_t *addr, + u32 len) +{ + const ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv, value; + ip6_address_t *mask; + u64 fib; + int rv; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + + rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); + if (rv == 0) + return value.value; + + return (FIB_NODE_INDEX_INVALID); +} + +static void +compute_prefix_lengths_in_search_order (ip6_fib_table_instance_t *table) +{ + int i; + vec_reset_length (table->prefix_lengths_in_search_order); + /* Note: bitmap reversed so this is in fact a longest prefix match */ + clib_bitmap_foreach (i, table->non_empty_dst_address_length_bitmap, + ({ + int dst_address_length = 128 - i; + vec_add1(table->prefix_lengths_in_search_order, dst_address_length); + })); +} + +void +ip6_fib_table_entry_remove (u32 fib_index, + const ip6_address_t *addr, + u32 len) +{ + ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv; + ip6_address_t *mask; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + + BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 0); + + /* refcount accounting */ + ASSERT (table->dst_address_length_refcounts[len] > 0); + if (--table->dst_address_length_refcounts[len] == 0) + { + table->non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + 128 - len, 0); + compute_prefix_lengths_in_search_order (table); + } +} + +void +ip6_fib_table_entry_insert (u32 fib_index, + const ip6_address_t *addr, + u32 len, + fib_node_index_t fib_entry_index) +{ + ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv; + ip6_address_t *mask; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_NON_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + kv.value = fib_entry_index; + + BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 1); + + table->dst_address_length_refcounts[len]++; + + table->non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + 128 - len, 1); + compute_prefix_lengths_in_search_order (table); +} + +u32 +ip6_fib_table_fwding_lookup (ip6_main_t * im, + u32 fib_index, + const ip6_address_t * dst) +{ + const ip6_fib_table_instance_t *table; + int i, len; + int rv; + BVT(clib_bihash_kv) kv, value; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; + len = vec_len (table->prefix_lengths_in_search_order); + + kv.key[0] = dst->as_u64[0]; + kv.key[1] = dst->as_u64[1]; + fib = ((u64)((fib_index))<<32); + + for (i = 0; i < len; i++) + { + int dst_address_length = table->prefix_lengths_in_search_order[i]; + ip6_address_t * mask = &ip6_main.fib_masks[dst_address_length]; + + ASSERT(dst_address_length >= 0 && dst_address_length <= 128); + //As lengths are decreasing, masks are increasingly specific. + kv.key[0] &= mask->as_u64[0]; + kv.key[1] &= mask->as_u64[1]; + kv.key[2] = fib | dst_address_length; + + rv = BV(clib_bihash_search_inline_2)(&table->ip6_hash, &kv, &value); + if (rv == 0) + return value.value; + } + + /* default route is always present */ + ASSERT(0); + return 0; +} + +u32 ip6_fib_table_fwding_lookup_with_if_index (ip6_main_t * im, + u32 sw_if_index, + const ip6_address_t * dst) +{ + u32 fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); + return ip6_fib_table_fwding_lookup(im, fib_index, dst); +} + +flow_hash_config_t +ip6_fib_table_get_flow_hash_config (u32 fib_index) +{ + return (ip6_fib_get(fib_index)->flow_hash_config); +} + +u32 +ip6_fib_table_get_index_for_sw_if_index (u32 sw_if_index) +{ + if (sw_if_index >= vec_len(ip6_main.fib_index_by_sw_if_index)) + { + /* + * This is the case for interfaces that are not yet mapped to + * a IP table + */ + return (~0); + } + return (ip6_main.fib_index_by_sw_if_index[sw_if_index]); +} + +void +ip6_fib_table_fwding_dpo_update (u32 fib_index, + const ip6_address_t *addr, + u32 len, + const dpo_id_t *dpo) +{ + ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv; + ip6_address_t *mask; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + kv.value = dpo->dpoi_index; + + BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 1); + + table->dst_address_length_refcounts[len]++; + + table->non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + 128 - len, 1); + compute_prefix_lengths_in_search_order (table); +} + +void +ip6_fib_table_fwding_dpo_remove (u32 fib_index, + const ip6_address_t *addr, + u32 len, + const dpo_id_t *dpo) +{ + ip6_fib_table_instance_t *table; + BVT(clib_bihash_kv) kv; + ip6_address_t *mask; + u64 fib; + + table = &ip6_main.ip6_table[IP6_FIB_TABLE_FWDING]; + mask = &ip6_main.fib_masks[len]; + fib = ((u64)((fib_index))<<32); + + kv.key[0] = addr->as_u64[0] & mask->as_u64[0]; + kv.key[1] = addr->as_u64[1] & mask->as_u64[1]; + kv.key[2] = fib | len; + kv.value = dpo->dpoi_index; + + BV(clib_bihash_add_del)(&table->ip6_hash, &kv, 0); + + /* refcount accounting */ + ASSERT (table->dst_address_length_refcounts[len] > 0); + if (--table->dst_address_length_refcounts[len] == 0) + { + table->non_empty_dst_address_length_bitmap = + clib_bitmap_set (table->non_empty_dst_address_length_bitmap, + 128 - len, 0); + compute_prefix_lengths_in_search_order (table); + } +} + +typedef struct ip6_fib_show_ctx_t_ { + u32 fib_index; + fib_node_index_t *entries; +} ip6_fib_show_ctx_t; + +static void +ip6_fib_table_collect_entries (clib_bihash_kv_24_8_t * kvp, + void *arg) +{ + ip6_fib_show_ctx_t *ctx = arg; + + if ((kvp->key[2] >> 32) == ctx->fib_index) + { + vec_add1(ctx->entries, kvp->value); + } +} + +static void +ip6_fib_table_show_all (ip6_fib_t *fib, + vlib_main_t * vm) +{ + fib_node_index_t *fib_entry_index; + ip6_fib_show_ctx_t ctx = { + .fib_index = fib->index, + .entries = NULL, + }; + ip6_main_t *im = &ip6_main; + + BV(clib_bihash_foreach_key_value_pair)(&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, + ip6_fib_table_collect_entries, + &ctx); + + vec_sort_with_function(ctx.entries, fib_entry_cmp_for_sort); + + vec_foreach(fib_entry_index, ctx.entries) + { + vlib_cli_output(vm, "%U", + format_fib_entry, + *fib_entry_index, + FIB_ENTRY_FORMAT_BRIEF); + } + + vec_free(ctx.entries); +} + +static void +ip6_fib_table_show_one (ip6_fib_t *fib, + vlib_main_t * vm, + ip6_address_t *address, + u32 mask_len) +{ + vlib_cli_output(vm, "%U", + format_fib_entry, + ip6_fib_table_lookup(fib->index, address, mask_len), + FIB_ENTRY_FORMAT_DETAIL); +} + +typedef struct { + u32 fib_index; + u64 count_by_prefix_length[129]; +} count_routes_in_fib_at_prefix_length_arg_t; + +static void count_routes_in_fib_at_prefix_length +(BVT(clib_bihash_kv) * kvp, void *arg) +{ + count_routes_in_fib_at_prefix_length_arg_t * ap = arg; + int mask_width; + + if ((kvp->key[2]>>32) != ap->fib_index) + return; + + mask_width = kvp->key[2] & 0xFF; + + ap->count_by_prefix_length[mask_width]++; +} + +static clib_error_t * +ip6_show_fib (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + count_routes_in_fib_at_prefix_length_arg_t _ca, *ca = &_ca; + ip6_main_t * im6 = &ip6_main; + fib_table_t *fib_table; + ip6_fib_t * fib; + int verbose, matching; + ip6_address_t matching_address; + u32 mask_len = 128; + int table_id = -1, fib_index = ~0; + + verbose = 1; + matching = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "brief") || + unformat (input, "summary") || + unformat (input, "sum")) + verbose = 0; + + else if (unformat (input, "%U/%d", + unformat_ip6_address, &matching_address, &mask_len)) + matching = 1; + + else if (unformat (input, "%U", unformat_ip6_address, &matching_address)) + matching = 1; + + else if (unformat (input, "table %d", &table_id)) + ; + else if (unformat (input, "index %d", &fib_index)) + ; + else + break; + } + + pool_foreach (fib_table, im6->fibs, + ({ + fib = &(fib_table->v6); + if (table_id >= 0 && table_id != (int)fib->table_id) + continue; + if (fib_index != ~0 && fib_index != (int)fib->index) + continue; + + vlib_cli_output (vm, "%s, fib_index %d, flow hash: %U", + fib_table->ft_desc, fib->index, + format_ip_flow_hash_config, fib->flow_hash_config); + + /* Show summary? */ + if (! verbose) + { + BVT(clib_bihash) * h = &im6->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash; + int len; + + vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count"); + + memset (ca, 0, sizeof(*ca)); + ca->fib_index = fib->index; + + BV(clib_bihash_foreach_key_value_pair) + (h, count_routes_in_fib_at_prefix_length, ca); + + for (len = 128; len >= 0; len--) + { + if (ca->count_by_prefix_length[len]) + vlib_cli_output (vm, "%=20d%=16lld", + len, ca->count_by_prefix_length[len]); + } + continue; + } + + if (!matching) + { + ip6_fib_table_show_all(fib, vm); + } + else + { + ip6_fib_table_show_one(fib, vm, &matching_address, mask_len); + } + })); + + return 0; +} + +/*? + * This command displays the IPv6 FIB Tables (VRF Tables) and the route + * entries for each table. + * + * @note This command will run for a long time when the FIB tables are + * comprised of millions of entries. For those senarios, consider displaying + * in summary mode. + * + * @cliexpar + * @parblock + * Example of how to display all the IPv6 FIB tables: + * @cliexstart{show ip6 fib} + * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * @::/0 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * fe80::/10 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::2/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::16/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1:ff00:0/104 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]] + * [0] [@2]: dpo-receive + * ipv6-VRF:8, fib_index 1, flow hash: src dst sport dport proto + * @::/0 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * @::a:1:1:0:4/126 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]] + * [0] [@4]: ipv6-glean: af_packet0 + * @::a:1:1:0:7/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]] + * [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0 + * fe80::/10 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]] + * [0] [@2]: dpo-receive + * fe80::fe:3eff:fe3e:9222/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]] + * [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0 + * ff02::1/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::2/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::16/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1:ff00:0/104 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]] + * [0] [@2]: dpo-receive + * @cliexend + * + * Example of how to display a summary of all IPv6 FIB tables: + * @cliexstart{show ip6 fib summary} + * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * Prefix length Count + * 128 3 + * 104 1 + * 10 1 + * 0 1 + * ipv6-VRF:8, fib_index 1, flow hash: src dst sport dport proto + * Prefix length Count + * 128 5 + * 126 1 + * 104 1 + * 10 1 + * 0 1 + * @cliexend + * @endparblock + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_show_fib_command, static) = { + .path = "show ip6 fib", + .short_help = "show ip6 fib [summary] [table <table-id>] [index <fib-id>] [<ip6-addr>[/<width>]]", + .function = ip6_show_fib, +}; +/* *INDENT-ON* */ diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h new file mode 100644 index 00000000000..f6af993a3c2 --- /dev/null +++ b/src/vnet/fib/ip6_fib.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __IP6_FIB_H__ +#define __IP6_FIB_H__ + +#include <vlib/vlib.h> +#include <vnet/ip/format.h> +#include <vnet/fib/fib_entry.h> +#include <vnet/fib/fib_table.h> +#include <vnet/ip/lookup.h> +#include <vnet/dpo/load_balance.h> + +extern fib_node_index_t ip6_fib_table_lookup(u32 fib_index, + const ip6_address_t *addr, + u32 len); +extern fib_node_index_t ip6_fib_table_lookup_exact_match(u32 fib_index, + const ip6_address_t *addr, + u32 len); + +extern void ip6_fib_table_entry_remove(u32 fib_index, + const ip6_address_t *addr, + u32 len); + +extern void ip6_fib_table_entry_insert(u32 fib_index, + const ip6_address_t *addr, + u32 len, + fib_node_index_t fib_entry_index); +extern void ip6_fib_table_destroy(u32 fib_index); + +extern void ip6_fib_table_fwding_dpo_update(u32 fib_index, + const ip6_address_t *addr, + u32 len, + const dpo_id_t *dpo); + +extern void ip6_fib_table_fwding_dpo_remove(u32 fib_index, + const ip6_address_t *addr, + u32 len, + const dpo_id_t *dpo); + +u32 ip6_fib_table_fwding_lookup_with_if_index(ip6_main_t * im, + u32 sw_if_index, + const ip6_address_t * dst); +u32 ip6_fib_table_fwding_lookup(ip6_main_t * im, + u32 fib_index, + const ip6_address_t * dst); + +/** + * @biref return the DPO that the LB stacks on. + */ +always_inline u32 +ip6_src_lookup_for_packet (ip6_main_t * im, + vlib_buffer_t * b, + ip6_header_t * i) +{ + if (vnet_buffer (b)->ip.adj_index[VLIB_RX] == ~0) + { + const dpo_id_t *dpo; + index_t lbi; + + lbi = ip6_fib_table_fwding_lookup_with_if_index( + im, + vnet_buffer (b)->sw_if_index[VLIB_RX], + &i->src_address); + + dpo = load_balance_get_bucket_i(load_balance_get(lbi), 0); + + if (dpo_is_adj(dpo)) + { + vnet_buffer (b)->ip.adj_index[VLIB_RX] = dpo->dpoi_index; + } + } + return vnet_buffer (b)->ip.adj_index[VLIB_RX]; +} + +/** + * \brief Get or create an IPv6 fib. + * + * Get or create an IPv4 fib with the provided table ID. + * + * \param im + * ip4_main pointer. + * \param table_id + * When set to \c ~0, an arbitrary and unused fib ID is picked + * and can be retrieved with \c ret->table_id. + * Otherwise, the fib ID to be used to retrieve or create the desired fib. + * \returns A pointer to the retrieved or created fib. + * + */ +extern u32 ip6_fib_table_find_or_create_and_lock(u32 table_id); +extern u32 ip6_fib_table_create_and_lock(void); + +static inline ip6_fib_t * +ip6_fib_get (fib_node_index_t index) +{ + ASSERT(!pool_is_free_index(ip6_main.fibs, index)); + return (&pool_elt_at_index (ip6_main.fibs, index)->v6); +} + +static inline +u32 ip6_fib_index_from_table_id (u32 table_id) +{ + ip6_main_t * im = &ip6_main; + uword * p; + + p = hash_get (im->fib_index_by_table_id, table_id); + if (!p) + return ~0; + + return p[0]; +} + +extern u32 ip6_fib_table_get_index_for_sw_if_index(u32 sw_if_index); + +extern flow_hash_config_t ip6_fib_table_get_flow_hash_config(u32 fib_index); + +#endif + diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c new file mode 100644 index 00000000000..6a9b1ac2989 --- /dev/null +++ b/src/vnet/fib/mpls_fib.c @@ -0,0 +1,439 @@ +/* + * mpls_fib.h: The Label/MPLS FIB + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * An MPLS_FIB table; + * + * The entries in the table are programmed wtih one or more MOIs. These MOIs + * may result in different forwarding actions for end-of-stack (EOS) and non-EOS + * packets. Whether the two actions are the same more often than they are + * different, or vice versa, is a function of the deployment in which the router + * is used and thus not predictable. + * The desgin choice to make with an MPLS_FIB table is: + * 1 - 20 bit key: label only. + * When the EOS and non-EOS actions differ the result is a 'EOS-choice' object. + * 2 - 21 bit key: label and EOS-bit. + * The result is then the specific action based on EOS-bit. + * + * 20 bit key: + * Advantages: + * - lower memory overhead, since there are few DB entries. + * Disadvantages: + * - slower DP performance in the case the chains differ, as more objects are + * encounterd in the switch path + * + * 21 bit key: + * Advantages: + * - faster DP performance + * Disadvantages + * - increased memory footprint. + * + * Switching between schemes based on observed/measured action similarity is not + * considered on the grounds of complexity and flip-flopping. + * + * VPP mantra - favour performance over memory. We choose a 21 bit key. + */ + +#include <vnet/fib/fib_table.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/punt_dpo.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/mpls/mpls.h> + +/** + * All lookups in an MPLS_FIB table must result in a DPO of type load-balance. + * This is the default result which links to drop + */ +static index_t mpls_fib_drop_dpo_index = INDEX_INVALID; + +/** + * FIXME + */ +#define MPLS_FLOW_HASH_DEFAULT 0 + +static inline u32 +mpls_fib_entry_mk_key (mpls_label_t label, + mpls_eos_bit_t eos) +{ + ASSERT(eos <= 1); + return (label << 1 | eos); +} + +u32 +mpls_fib_index_from_table_id (u32 table_id) +{ + mpls_main_t *mm = &mpls_main; + uword * p; + + p = hash_get (mm->fib_index_by_table_id, table_id); + if (!p) + return FIB_NODE_INDEX_INVALID; + + return p[0]; +} + +static u32 +mpls_fib_create_with_table_id (u32 table_id) +{ + dpo_id_t dpo = DPO_INVALID; + fib_table_t *fib_table; + mpls_eos_bit_t eos; + mpls_fib_t *mf; + int i; + + pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES); + memset(fib_table, 0, sizeof(*fib_table)); + + fib_table->ft_proto = FIB_PROTOCOL_MPLS; + fib_table->ft_index = + (fib_table - mpls_main.fibs); + + hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index); + + fib_table->ft_table_id = + table_id; + fib_table->ft_flow_hash_config = + MPLS_FLOW_HASH_DEFAULT; + fib_table->v4.fwd_classify_table_index = ~0; + fib_table->v4.rev_classify_table_index = ~0; + + fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS); + + if (INDEX_INVALID == mpls_fib_drop_dpo_index) + { + mpls_fib_drop_dpo_index = load_balance_create(1, DPO_PROTO_MPLS, 0); + load_balance_set_bucket(mpls_fib_drop_dpo_index, + 0, + drop_dpo_get(DPO_PROTO_MPLS)); + } + + mf = &fib_table->mpls; + mf->mf_entries = hash_create(0, sizeof(fib_node_index_t)); + for (i = 0; i < MPLS_FIB_DB_SIZE; i++) + { + /* + * initialise each DPO in the data-path lookup table + * to be the special MPLS drop + */ + mf->mf_lbs[i] = mpls_fib_drop_dpo_index; + } + + /* + * non-default forwarding for the special labels. + */ + fib_prefix_t prefix = { + .fp_proto = FIB_PROTOCOL_MPLS, + .fp_payload_proto = DPO_PROTO_MPLS, + }; + + /* + * PUNT the router alert, both EOS and non-eos + */ + prefix.fp_label = MPLS_IETF_ROUTER_ALERT_LABEL; + FOR_EACH_MPLS_EOS_BIT(eos) + { + prefix.fp_eos = eos; + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + punt_dpo_get(DPO_PROTO_MPLS)); + } + + /* + * IPv4 explicit NULL EOS lookup in the interface's IPv4 table + */ + prefix.fp_label = MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL; + prefix.fp_payload_proto = DPO_PROTO_IP4; + prefix.fp_eos = MPLS_EOS; + + lookup_dpo_add_or_lock_w_fib_index(0, // unused + DPO_PROTO_IP4, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + &dpo); + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + prefix.fp_payload_proto = DPO_PROTO_MPLS; + prefix.fp_eos = MPLS_NON_EOS; + + lookup_dpo_add_or_lock_w_fib_index(0, //unsued + DPO_PROTO_MPLS, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + &dpo); + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + /* + * IPv6 explicit NULL EOS lookup in the interface's IPv6 table + */ + prefix.fp_label = MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL; + prefix.fp_payload_proto = DPO_PROTO_IP6; + prefix.fp_eos = MPLS_EOS; + + lookup_dpo_add_or_lock_w_fib_index(0, //unused + DPO_PROTO_IP6, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + &dpo); + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + prefix.fp_payload_proto = DPO_PROTO_MPLS; + prefix.fp_eos = MPLS_NON_EOS; + lookup_dpo_add_or_lock_w_fib_index(0, // unsued + DPO_PROTO_MPLS, + LOOKUP_INPUT_DST_ADDR, + LOOKUP_TABLE_FROM_INPUT_INTERFACE, + &dpo); + fib_table_entry_special_dpo_add(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpo); + + return (fib_table->ft_index); +} + +u32 +mpls_fib_table_find_or_create_and_lock (u32 table_id) +{ + u32 index; + + index = mpls_fib_index_from_table_id(table_id); + if (~0 == index) + return mpls_fib_create_with_table_id(table_id); + + fib_table_lock(index, FIB_PROTOCOL_MPLS); + + return (index); +} +u32 +mpls_fib_table_create_and_lock (void) +{ + return (mpls_fib_create_with_table_id(~0)); +} + +void +mpls_fib_table_destroy (mpls_fib_t *mf) +{ + fib_table_t *fib_table = (fib_table_t*)mf; + fib_prefix_t prefix = { + .fp_proto = FIB_PROTOCOL_MPLS, + }; + mpls_label_t special_labels[] = { + MPLS_IETF_ROUTER_ALERT_LABEL, + MPLS_IETF_IPV6_EXPLICIT_NULL_LABEL, + MPLS_IETF_IPV4_EXPLICIT_NULL_LABEL, + }; + mpls_eos_bit_t eos; + u32 ii; + + for (ii = 0; ii < ARRAY_LEN(special_labels); ii++) + { + FOR_EACH_MPLS_EOS_BIT(eos) + { + prefix.fp_label = special_labels[ii]; + prefix.fp_eos = eos; + + fib_table_entry_delete(fib_table->ft_index, + &prefix, + FIB_SOURCE_SPECIAL); + } + } + if (~0 != fib_table->ft_table_id) + { + hash_unset(mpls_main.fib_index_by_table_id, + fib_table->ft_table_id); + } + hash_delete(mf->mf_entries); + + pool_put(mpls_main.fibs, fib_table); +} + +fib_node_index_t +mpls_fib_table_lookup (const mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos) +{ + uword *p; + + p = hash_get(mf->mf_entries, mpls_fib_entry_mk_key(label, eos)); + + if (NULL == p) + return FIB_NODE_INDEX_INVALID; + + return p[0]; +} + +void +mpls_fib_table_entry_insert (mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos, + fib_node_index_t lfei) +{ + hash_set(mf->mf_entries, mpls_fib_entry_mk_key(label, eos), lfei); +} + +void +mpls_fib_table_entry_remove (mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos) +{ + hash_unset(mf->mf_entries, mpls_fib_entry_mk_key(label, eos)); +} + +void +mpls_fib_forwarding_table_update (mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos, + const dpo_id_t *dpo) +{ + mpls_label_t key; + + ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type); + + key = mpls_fib_entry_mk_key(label, eos); + + mf->mf_lbs[key] = dpo->dpoi_index; +} + +void +mpls_fib_forwarding_table_reset (mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos) +{ + mpls_label_t key; + + key = mpls_fib_entry_mk_key(label, eos); + + mf->mf_lbs[key] = mpls_fib_drop_dpo_index; +} + +flow_hash_config_t +mpls_fib_table_get_flow_hash_config (u32 fib_index) +{ + // FIXME. + return (0); +} + +static void +mpls_fib_table_show_all (const mpls_fib_t *mpls_fib, + vlib_main_t * vm) +{ + fib_node_index_t lfei, *lfeip, *lfeis = NULL; + mpls_label_t key; + + hash_foreach(key, lfei, mpls_fib->mf_entries, + ({ + vec_add1(lfeis, lfei); + })); + + vec_sort_with_function(lfeis, fib_entry_cmp_for_sort); + + vec_foreach(lfeip, lfeis) + { + vlib_cli_output (vm, "%U", + format_fib_entry, *lfeip, + FIB_ENTRY_FORMAT_DETAIL); + } + vec_free(lfeis); +} + +static void +mpls_fib_table_show_one (const mpls_fib_t *mpls_fib, + mpls_label_t label, + vlib_main_t * vm) +{ + fib_node_index_t lfei; + mpls_eos_bit_t eos; + + FOR_EACH_MPLS_EOS_BIT(eos) + { + lfei = mpls_fib_table_lookup(mpls_fib, label, eos); + + if (FIB_NODE_INDEX_INVALID != lfei) + { + vlib_cli_output (vm, "%U", + format_fib_entry, lfei, FIB_ENTRY_FORMAT_DETAIL); + } + } +} + +static clib_error_t * +mpls_fib_show (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + fib_table_t * fib_table; + mpls_label_t label; + int table_id; + + table_id = -1; + label = MPLS_LABEL_INVALID; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* if (unformat (input, "brief") || unformat (input, "summary") */ + /* || unformat (input, "sum")) */ + /* verbose = 0; */ + + if (unformat (input, "%d", &label)) + continue; + else if (unformat (input, "table %d", &table_id)) + ; + else + break; + } + + pool_foreach (fib_table, mpls_main.fibs, + ({ + if (table_id >= 0 && table_id != fib_table->ft_table_id) + continue; + + vlib_cli_output (vm, "%v, fib_index %d", + fib_table->ft_desc, mpls_main.fibs - fib_table); + + if (MPLS_LABEL_INVALID == label) + { + mpls_fib_table_show_all(&(fib_table->mpls), vm); + } + else + { + mpls_fib_table_show_one(&(fib_table->mpls), label, vm); + } + })); + + return 0; +} + +VLIB_CLI_COMMAND (mpls_fib_show_command, static) = { + .path = "show mpls fib", + .short_help = "show mpls fib [summary] [table <n>]", + .function = mpls_fib_show, +}; diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h new file mode 100644 index 00000000000..93ae4623016 --- /dev/null +++ b/src/vnet/fib/mpls_fib.h @@ -0,0 +1,106 @@ +/* + * mpls_fib.h: The Label/MPLS FIB + * + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MPLS_FIB_TABLE_H__ +#define __MPLS_FIB_TABLE_H__ + +#include <vnet/vnet.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_types.h> +#include <vnet/dpo/dpo.h> +#include <vnet/mpls/mpls.h> +#include <vnet/fib/fib_table.h> + +static inline mpls_fib_t* +mpls_fib_get (fib_node_index_t index) +{ + if (!pool_is_free_index(mpls_main.fibs, index)) + return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls)); + return (NULL); +} + +extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id); +extern u32 mpls_fib_table_create_and_lock(void); +// extern mpls_fib_t * mpls_fib_find(u32 table_id); +extern u32 mpls_fib_index_from_table_id(u32 table_id); + +extern u8 *format_mpls_fib_table_name(u8 * s, va_list * args); + +extern fib_node_index_t mpls_fib_table_entry_add_from_ip_fib_entry ( + u32 table_id, + mpls_label_t label, + mpls_eos_bit_t eos, + fib_node_index_t fib_entry_index); + + +extern fib_node_index_t mpls_fib_table_lookup(const mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos); + +extern void mpls_fib_table_entry_remove(mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos); +extern void mpls_fib_table_entry_insert(mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos, + fib_node_index_t fei); +extern void mpls_fib_table_destroy(mpls_fib_t *mf); + + + +extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos, + const dpo_id_t *dpo); +extern void mpls_fib_forwarding_table_reset(mpls_fib_t *mf, + mpls_label_t label, + mpls_eos_bit_t eos); + +/** + * @brief + * Lookup a label and EOS bit in the MPLS_FIB table to retrieve the + * load-balance index to be used for packet forwarding. + */ +static inline index_t +mpls_fib_table_forwarding_lookup (u32 mpls_fib_index, + const mpls_unicast_header_t *hdr) +{ + mpls_label_t label; + mpls_fib_t *mf; + u32 key; + + label = clib_net_to_host_u32(hdr->label_exp_s_ttl); + key = (vnet_mpls_uc_get_label(label) << 1) | vnet_mpls_uc_get_s(label); + + mf = mpls_fib_get(mpls_fib_index); + + return (mf->mf_lbs[key]); +} + +static inline u32 +mpls_fib_table_get_index_for_sw_if_index (u32 sw_if_index) +{ + mpls_main_t *mm = &mpls_main; + + ASSERT(vec_len(mm->fib_index_by_sw_if_index) > sw_if_index); + + return (mm->fib_index_by_sw_if_index[sw_if_index]); +} + +extern flow_hash_config_t mpls_fib_table_get_flow_hash_config(u32 fib_index); + +#endif |