diff options
Diffstat (limited to 'src/vnet')
-rw-r--r-- | src/vnet/ip/ip6_packet.h | 33 | ||||
-rwxr-xr-x[-rw-r--r--] | src/vnet/sr/dir.dox | 4 | ||||
-rw-r--r-- | src/vnet/sr/examples/sr_multicastmap.script | 4 | ||||
-rwxr-xr-x | src/vnet/sr/ietf_draft_05.txt | 1564 | ||||
-rw-r--r-- | src/vnet/sr/rfc_draft_05.txt | 1265 | ||||
-rw-r--r-- | src/vnet/sr/sr.api | 203 | ||||
-rwxr-xr-x[-rw-r--r--] | src/vnet/sr/sr.c | 3397 | ||||
-rwxr-xr-x[-rw-r--r--] | src/vnet/sr/sr.h | 361 | ||||
-rw-r--r-- | src/vnet/sr/sr_api.c | 250 | ||||
-rw-r--r-- | src/vnet/sr/sr_doc.md | 161 | ||||
-rw-r--r-- | src/vnet/sr/sr_error.def | 20 | ||||
-rw-r--r-- | src/vnet/sr/sr_fix_dst_error.def | 17 | ||||
-rwxr-xr-x | src/vnet/sr/sr_localsid.c | 1478 | ||||
-rwxr-xr-x[-rw-r--r--] | src/vnet/sr/sr_packet.h | 248 | ||||
-rwxr-xr-x | src/vnet/sr/sr_policy_rewrite.c | 3253 | ||||
-rwxr-xr-x | src/vnet/sr/sr_steering.c | 568 |
16 files changed, 7599 insertions, 5227 deletions
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h index 4fd14b96..6eabeef1 100644 --- a/src/vnet/ip/ip6_packet.h +++ b/src/vnet/ip/ip6_packet.h @@ -448,20 +448,47 @@ always_inline u8 ip6_ext_hdr(u8 nexthdr) * find out if nexthdr is an extension header or a protocol */ return (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) || - (nexthdr == IP_PROTOCOL_IP6_NONXT) || (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION) || (nexthdr == IP_PROTOCOL_IPSEC_AH) || (nexthdr == IP_PROTOCOL_IPV6_ROUTE) || (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS); } -#define ip6_ext_header_len(p) (((p)->n_data_u64s+1) << 3) -#define ip6_ext_authhdr_len(p) (((p)->n_data_u64s+2) << 2) +#define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3) +#define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2) always_inline void * ip6_ext_next_header (ip6_ext_header_t *ext_hdr ) { return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); } +/* + * Macro to find the IPv6 ext header of type t + * I is the IPv6 header + * P is the previous IPv6 ext header (NULL if none) + * M is the matched IPv6 ext header of type t + */ +#define ip6_ext_header_find_t(i, p, m, t) \ +if ((i)->protocol == t) \ +{ \ + (m) = (void *)((i)+1); \ + (p) = NULL; \ +} \ +else \ +{ \ + (m) = NULL; \ + (p) = (void *)((i)+1); \ + while (ip6_ext_hdr((p)->next_hdr) && \ + ((ip6_ext_header_t *)(p))->next_hdr != (t)) \ + { \ + (p) = ip6_ext_next_header((p)); \ + } \ + if ( ip6_ext_hdr((p)->next_hdr) == (t)) \ + { \ + (m) = (void *)(ip6_ext_next_header((p))); \ + } \ +} + + typedef CLIB_PACKED (struct { u8 next_hdr; /* Length of this header plus option data in 8 byte units. */ diff --git a/src/vnet/sr/dir.dox b/src/vnet/sr/dir.dox index a98b202c..3f539a58 100644..100755 --- a/src/vnet/sr/dir.dox +++ b/src/vnet/sr/dir.dox @@ -18,8 +18,8 @@ @brief Segment Routing code An implementation of Segment Routing as per: - draft-previdi-6man-segment-routing-header-05 + draft-ietf-6man-segment-routing-header-05 - See file: rfc_draft_05.txt + @see ietf_draft_05.txt */
\ No newline at end of file diff --git a/src/vnet/sr/examples/sr_multicastmap.script b/src/vnet/sr/examples/sr_multicastmap.script deleted file mode 100644 index 20bf7dc0..00000000 --- a/src/vnet/sr/examples/sr_multicastmap.script +++ /dev/null @@ -1,4 +0,0 @@ -sr_tunnel_add_del name sr2 src ::a:1:1:0:6 dst ff15::2/128 next ::a:1:1:0:f next ::a:1:1:0:1a next ff15::1 tag ::a:1:1:0:7 clean -sr_tunnel_add_del name sr3 src ::b:1:1:0:6 dst ff16::2/128 next ::a:1:1:0:13 next ::a:1:1:0:1a next ff15::1 tag ::a:1:1:0:7 clean -sr_policy_add_del name pol1 tunnel sr2 tunnel sr3 -sr_multicast_map_add_del address ff15::1 sr-policy pol1 diff --git a/src/vnet/sr/ietf_draft_05.txt b/src/vnet/sr/ietf_draft_05.txt new file mode 100755 index 00000000..e9bff04f --- /dev/null +++ b/src/vnet/sr/ietf_draft_05.txt @@ -0,0 +1,1564 @@ +Network Working Group S. Previdi, Ed. +Internet-Draft C. Filsfils +Intended status: Standards Track Cisco Systems, Inc. +Expires: August 5, 2017 B. Field + Comcast + I. Leung + Rogers Communications + J. Linkova + Google + E. Aries + Facebook + T. Kosugi + NTT + E. Vyncke + Cisco Systems, Inc. + D. Lebrun + Universite Catholique de Louvain + February 1, 2017 + + + IPv6 Segment Routing Header (SRH) + draft-ietf-6man-segment-routing-header-05 + +Abstract + + Segment Routing (SR) allows a node to steer a packet through a + controlled set of instructions, called segments, by prepending an SR + header to the packet. A segment can represent any instruction, + topological or service-based. SR allows to enforce a flow through + any path (topological, or application/service based) while + maintaining per-flow state only at the ingress node to the SR domain. + + Segment Routing can be applied to the IPv6 data plane with the + addition of a new type of Routing Extension Header. This draft + describes the Segment Routing Extension Header Type and how it is + used by SR capable nodes. + +Requirements Language + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +Status of This Memo + + This Internet-Draft is submitted in full conformance with the + provisions of BCP 78 and BCP 79. + + + + +Previdi, et al. Expires August 5, 2017 [Page 1] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF). Note that other groups may also distribute + working documents as Internet-Drafts. The list of current Internet- + Drafts is at http://datatracker.ietf.org/drafts/current/. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as "work in progress." + + This Internet-Draft will expire on August 5, 2017. + +Copyright Notice + + Copyright (c) 2017 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + +Table of Contents + + 1. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3 + 2. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 + 2.1. Data Planes supporting Segment Routing . . . . . . . . . 4 + 2.2. Segment Routing (SR) Domain . . . . . . . . . . . . . . . 4 + 2.2.1. SR Domain in a Service Provider Network . . . . . . . 5 + 2.2.2. SR Domain in a Overlay Network . . . . . . . . . . . 6 + 3. Segment Routing Extension Header (SRH) . . . . . . . . . . . 7 + 3.1. SRH TLVs . . . . . . . . . . . . . . . . . . . . . . . . 9 + 3.1.1. Ingress Node TLV . . . . . . . . . . . . . . . . . . 10 + 3.1.2. Egress Node TLV . . . . . . . . . . . . . . . . . . . 11 + 3.1.3. Opaque Container TLV . . . . . . . . . . . . . . . . 11 + 3.1.4. Padding TLV . . . . . . . . . . . . . . . . . . . . . 12 + 3.1.5. HMAC TLV . . . . . . . . . . . . . . . . . . . . . . 13 + 3.2. SRH and RFC2460 behavior . . . . . . . . . . . . . . . . 14 + 4. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 14 + 4.1. Source SR Node . . . . . . . . . . . . . . . . . . . . . 14 + 4.2. Transit Node . . . . . . . . . . . . . . . . . . . . . . 15 + 4.3. SR Segment Endpoint Node . . . . . . . . . . . . . . . . 16 + 5. Security Considerations . . . . . . . . . . . . . . . . . . . 16 + + + +Previdi, et al. Expires August 5, 2017 [Page 2] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 5.1. Threat model . . . . . . . . . . . . . . . . . . . . . . 17 + 5.1.1. Source routing threats . . . . . . . . . . . . . . . 17 + 5.1.2. Applicability of RFC 5095 to SRH . . . . . . . . . . 17 + 5.1.3. Service stealing threat . . . . . . . . . . . . . . . 18 + 5.1.4. Topology disclosure . . . . . . . . . . . . . . . . . 18 + 5.1.5. ICMP Generation . . . . . . . . . . . . . . . . . . . 18 + 5.2. Security fields in SRH . . . . . . . . . . . . . . . . . 19 + 5.2.1. Selecting a hash algorithm . . . . . . . . . . . . . 20 + 5.2.2. Performance impact of HMAC . . . . . . . . . . . . . 21 + 5.2.3. Pre-shared key management . . . . . . . . . . . . . . 21 + 5.3. Deployment Models . . . . . . . . . . . . . . . . . . . . 22 + 5.3.1. Nodes within the SR domain . . . . . . . . . . . . . 22 + 5.3.2. Nodes outside of the SR domain . . . . . . . . . . . 22 + 5.3.3. SR path exposure . . . . . . . . . . . . . . . . . . 23 + 5.3.4. Impact of BCP-38 . . . . . . . . . . . . . . . . . . 23 + 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 24 + 7. Manageability Considerations . . . . . . . . . . . . . . . . 24 + 8. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 24 + 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 24 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . 25 + 10.1. Normative References . . . . . . . . . . . . . . . . . . 25 + 10.2. Informative References . . . . . . . . . . . . . . . . . 25 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 27 + +1. Segment Routing Documents + + Segment Routing terminology is defined in + [I-D.ietf-spring-segment-routing]. + + Segment Routing use cases are described in [RFC7855] and + [I-D.ietf-spring-ipv6-use-cases]. + + Segment Routing protocol extensions are defined in + [I-D.ietf-isis-segment-routing-extensions], and + [I-D.ietf-ospf-ospfv3-segment-routing-extensions]. + +2. Introduction + + Segment Routing (SR), defined in [I-D.ietf-spring-segment-routing], + allows a node to steer a packet through a controlled set of + instructions, called segments, by prepending an SR header to the + packet. A segment can represent any instruction, topological or + service-based. SR allows to enforce a flow through any path + (topological or service/application based) while maintaining per-flow + state only at the ingress node to the SR domain. Segments can be + derived from different components: IGP, BGP, Services, Contexts, + Locators, etc. The list of segment forming the path is called the + Segment List and is encoded in the packet header. + + + +Previdi, et al. Expires August 5, 2017 [Page 3] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + SR allows the use of strict and loose source based routing paradigms + without requiring any additional signaling protocols in the + infrastructure hence delivering an excellent scalability property. + + The source based routing model described in + [I-D.ietf-spring-segment-routing] is inherited from the ones proposed + by [RFC1940] and [RFC2460]. The source based routing model offers + the support for explicit routing capability. + +2.1. Data Planes supporting Segment Routing + + Segment Routing (SR), can be instantiated over MPLS + ([I-D.ietf-spring-segment-routing-mpls]) and IPv6. This document + defines its instantiation over the IPv6 data-plane based on the use- + cases defined in [I-D.ietf-spring-ipv6-use-cases]. + + This document defines a new type of Routing Header (originally + defined in [RFC2460]) called the Segment Routing Header (SRH) in + order to convey the Segment List in the packet header as defined in + [I-D.ietf-spring-segment-routing]. Mechanisms through which segment + are known and advertised are outside the scope of this document. + + A segment is materialized by an IPv6 address. A segment identifies a + topological instruction or a service instruction. A segment can be + either: + + o global: a global segment represents an instruction supported by + all nodes in the SR domain and it is instantiated through an IPv6 + address globally known in the SR domain. + + o local: a local segment represents an instruction supported only by + the node who originates it and it is instantiated through an IPv6 + address that is known only by the local node. + +2.2. Segment Routing (SR) Domain + + We define the concept of the Segment Routing Domain (SR Domain) as + the set of nodes participating into the source based routing model. + These nodes may be connected to the same physical infrastructure + (e.g.: a Service Provider's network) as well as nodes remotely + connected to each other (e.g.: an enterprise VPN or an overlay). + + A non-exhaustive list of examples of SR Domains is: + + o The network of an operator, service provider, content provider, + enterprise including nodes, links and Autonomous Systems. + + + + + +Previdi, et al. Expires August 5, 2017 [Page 4] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o A set of nodes connected as an overlay over one or more transit + providers. The overlay nodes exchange SR-enabled traffic with + segments belonging solely to the overlay routers (the SR domain). + None of the segments in the SR-enabled packets exchanged by the + overlay belong to the transit networks + + The source based routing model through its instantiation of the + Segment Routing Header (SRH) defined in this document equally applies + to all the above examples. + + It is assumed in this document that the SRH is added to the packet by + its source, consistently with the source routing model defined in + [RFC2460]. For example: + + o At the node originating the packet (host, server). + + o At the ingress node of an SR domain where the ingress node + receives an IPv6 packet and encapsulates it into an outer IPv6 + header followed by a Segment Routing header. + +2.2.1. SR Domain in a Service Provider Network + + The following figure illustrates an SR domain consisting of an + operator's network infrastructure. + + (-------------------------- Operator 1 -----------------------) + ( ) + ( (-----AS 1-----) (-------AS 2-------) (----AS 3-------) ) + ( ( ) ( ) ( ) ) + A1--(--(--11---13--14-)--(-21---22---23--24-)--(-31---32---34--)--)--Z1 + ( ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) ) + A2--(--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--)--Z2 + ( ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) ) + ( ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) ) + A3--(--(--15---17--18-)--(-25---26---27--28-)--(-35---36---38--)--)--Z3 + ( ( ) ( ) ( ) ) + ( (--------------) (------------------) (---------------) ) + ( ) + (-------------------------------------------------------------) + + Figure 1: Service Provider SR Domain + + Figure 1 describes an operator network including several ASes and + delivering connectivity between endpoints. In this scenario, Segment + Routing is used within the operator networks and across the ASes + boundaries (all being under the control of the same operator). In + this case segment routing can be used in order to address use cases + such as end-to-end traffic engineering, fast re-route, egress peer + + + +Previdi, et al. Expires August 5, 2017 [Page 5] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + engineering, data-center traffic engineering as described in + [RFC7855], [I-D.ietf-spring-ipv6-use-cases] and + [I-D.ietf-spring-resiliency-use-cases]. + + Typically, an IPv6 packet received at ingress (i.e.: from outside the + SR domain), is classified according to network operator policies and + such classification results into an outer header with an SRH applied + to the incoming packet. The SRH contains the list of segment + representing the path the packet must take inside the SR domain. + Thus, the SA of the packet is the ingress node, the DA (due to SRH + procedures described in Section 4) is set as the first segment of the + path and the last segment of the path is the egress node of the SR + domain. + + The path may include intra-AS as well as inter-AS segments. It has + to be noted that all nodes within the SR domain are under control of + the same administration. When the packet reaches the egress point of + the SR domain, the outer header and its SRH are removed so that the + destination of the packet is unaware of the SR domain the packet has + traversed. + + The outer header with the SRH is no different from any other + tunneling encapsulation mechanism and allows a network operator to + implement traffic engineering mechanisms so to efficiently steer + traffic across his infrastructure. + +2.2.2. SR Domain in a Overlay Network + + The following figure illustrates an SR domain consisting of an + overlay network over multiple operator's networks. + + (--Operator 1---) (-----Operator 2-----) (--Operator 3---) + ( ) ( ) ( ) + A1--(--11---13--14--)--(--21---22---23--24--)--(-31---32---34--)--C1 + ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) + A2--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--C2 + ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) + ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) + A3--(--15---17--18--)--(--25---26---27--28--)--(-35---36---38--)--C3 + ( ) ( | | | ) ( ) + (---------------) (--|----|---------|--) (---------------) + | | | + B1 B2 B3 + + Figure 2: Overlay SR Domain + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 6] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Figure 2 describes an overlay consisting of nodes connected to three + different network operators and forming a single overlay network + where Segment routing packets are exchanged. + + The overlay consists of nodes A1, A2, A3, B1, B2, B3, C1, C2 and C3. + These nodes are connected to their respective network operator and + form an overlay network. + + Each node may originate packets with an SRH which contains, in the + segment list of the SRH or in the DA, segments identifying other + overlay nodes. This implies that packets with an SRH may traverse + operator's networks but, obviously, these SRHs cannot contain an + address/segment of the transit operators 1, 2 and 3. The SRH + originated by the overlay can only contain address/segment under the + administration of the overlay (e.g. address/segments supported by A1, + A2, A3, B1, B2, B3, C1,C2 or C3). + + In this model, the operator network nodes are transit nodes and, + according to [RFC2460], MUST NOT inspect the routing extension header + since they are not the DA of the packet. + + It is a common practice in operators networks to filter out, at + ingress, any packet whose DA is the address of an internal node and + it is also possible that an operator would filter out any packet + destined to an internal address and having an extension header in it. + + This common practice does not impact the SR-enabled traffic between + the overlay nodes as the intermediate transit networks never see a + destination address belonging to their infrastructure. These SR- + enabled overlay packets will thus never be filtered by the transit + operators. + + In all cases, transit packets (i.e.: packets whose DA is outside the + domain of the operator's network) will be forwarded accordingly + without introducing any security concern in the operator's network. + This is similar to tunneled packets. + +3. Segment Routing Extension Header (SRH) + + A new type of the Routing Header (originally defined in [RFC2460]) is + defined: the Segment Routing Header (SRH) which has a new Routing + Type, (suggested value 4) to be assigned by IANA. + + The Segment Routing Header (SRH) is defined as follows: + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 7] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next Header | Hdr Ext Len | Routing Type | Segments Left | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | First Segment | Flags | RESERVED | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[0] (128 bits IPv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | | + ... + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[n] (128 bits IPv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // // + // Optional Type Length Value objects (variable) // + // // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Next Header: 8-bit selector. Identifies the type of header + immediately following the SRH. + + o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH + header in 8-octet units, not including the first 8 octets. + + o Routing Type: TBD, to be assigned by IANA (suggested value: 4). + + o Segments Left. Defined in [RFC2460], it contains the index, in + the Segment List, of the next segment to inspect. Segments Left + is decremented at each segment. + + o First Segment: contains the index, in the Segment List, of the + first segment of the path which is in fact the last element of the + Segment List. + + o Flags: 8 bits of flags. Following flags are defined: + + + + +Previdi, et al. Expires August 5, 2017 [Page 8] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 4 5 6 7 + +-+-+-+-+-+-+-+-+ + |U|P|O|A|H| U | + +-+-+-+-+-+-+-+-+ + + U: Unused and for future use. SHOULD be unset on transmission + and MUST be ignored on receipt. + + P-flag: Protected flag. Set when the packet has been rerouted + through FRR mechanism by an SR endpoint node. + + O-flag: OAM flag. When set, it indicates that this packet is + an operations and management (OAM) packet. + + A-flag: Alert flag. If present, it means important Type Length + Value (TLV) objects are present. See Section 3.1 for details + on TLVs objects. + + H-flag: HMAC flag. If set, the HMAC TLV is present and is + encoded as the last TLV of the SRH. In other words, the last + 36 octets of the SRH represent the HMAC information. See + Section 3.1.5 for details on the HMAC TLV. + + o RESERVED: SHOULD be unset on transmission and MUST be ignored on + receipt. + + o Segment List[n]: 128 bit IPv6 addresses representing the nth + segment in the Segment List. The Segment List is encoded starting + from the last segment of the path. I.e., the first element of the + segment list (Segment List [0]) contains the last segment of the + path while the last segment of the Segment List (Segment List[n]) + contains the first segment of the path. The index contained in + "Segments Left" identifies the current active segment. + + o Type Length Value (TLV) are described in Section 3.1. + +3.1. SRH TLVs + + This section defines TLVs of the Segment Routing Header. + + Type Length Value (TLV) contain optional information that may be used + by the node identified in the DA of the packet. It has to be noted + that the information carried in the TLVs is not intended to be used + by the routing layer. Typically, TLVs carry information that is + consumed by other components (e.g.: OAM) than the routing function. + + Each TLV has its own length, format and semantic. The code-point + allocated (by IANA) to each TLV defines both the format and the + + + +Previdi, et al. Expires August 5, 2017 [Page 9] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + semantic of the information carried in the TLV. Multiple TLVs may be + encoded in the same SRH. + + The "Length" field of the TLV is primarily used to skip the TLV while + inspecting the SRH in case the node doesn't support or recognize the + TLV codepoint. The "Length" defines the TLV length in octets and not + including the "Type" and "Length" fields. + + The primary scope of TLVs is to give the receiver of the packet + information related to the source routed path (e.g.: where the packet + entered in the SR domain and where it is expected to exit). + + Additional TLVs may be defined in the future. + +3.1.1. Ingress Node TLV + + The Ingress Node TLV is optional and identifies the node this packet + traversed when entered the SR domain. The Ingress Node TLV has + following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Ingress Node (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 1). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Ingress Node: 128 bits. Defines the node where the packet is + expected to enter the SR domain. In the encapsulation case + described in Section 2.2.1, this information corresponds to the SA + of the encapsulating header. + + + + + +Previdi, et al. Expires August 5, 2017 [Page 10] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + +3.1.2. Egress Node TLV + + The Egress Node TLV is optional and identifies the node this packet + is expected to traverse when exiting the SR domain. The Egress Node + TLV has following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Egress Node (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 2). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Egress Node: 128 bits. Defines the node where the packet is + expected to exit the SR domain. In the encapsulation case + described in Section 2.2.1, this information corresponds to the + last segment of the SRH in the encapsulating header. + +3.1.3. Opaque Container TLV + + The Opaque Container TLV is optional and has the following format: + + + + + + + + + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 11] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Opaque Container (16 octets) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 3). + + o Length: 18. + + o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be + ignored on receipt. + + o Flags: 8 bits. No flags are defined in this document. + + o Opaque Container: 128 bits of opaque data not relevant for the + routing layer. Typically, this information is consumed by a non- + routing component of the node receiving the packet (i.e.: the node + in the DA). + +3.1.4. Padding TLV + + The Padding TLV is optional and with the purpose of aligning the SRH + on a 8 octet boundary. The Padding TLV has the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | Padding (variable) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // Padding (variable) // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 4). + + o Length: 1 to 7 + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 12] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o Padding: from 1 to 7 octets of padding. Padding bits have no + semantic. They SHOULD be set to 0 on transmission and MUST be + ignored on receipt. + + The following applies to the Padding TLV: + + o Padding TLV is optional and MAY only appear once in the SRH. If + present, it MUST have a length between 1 and 7 octets. + + o The Padding TLV is used in order to align the SRH total length on + the 8 octet boundary. + + o When present, the Padding TLV MUST appear as the last TLV before + the HMAC TLV (if HMAC TLV is present). + + o When present, the Padding TLV MUST have a length from 1 to 7 in + order to align the SRH total lenght on a 8-octet boundary. + + o When a router inspecting the SRH encounters the Padding TLV, it + MUST assume that no other TLV (other than the HMAC) follow the + Padding TLV. + +3.1.5. HMAC TLV + + HMAC TLV is optional and contains the HMAC information. The HMAC TLV + has the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Length | RESERVED | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | HMAC Key ID (4 octets) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | // + | HMAC (32 octets) // + | // + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Type: to be assigned by IANA (suggested value 5). + + o Length: 38. + + o RESERVED: 2 octets. SHOULD be unset on transmission and MUST be + ignored on receipt. + + + + +Previdi, et al. Expires August 5, 2017 [Page 13] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o HMAC Key ID: 4 octets. + + o HMAC: 32 octets. + + o HMAC and HMAC Key ID usage is described in Section 5 + + The Following applies to the HMAC TLV: + + o When present, the HMAC TLV MUST be encoded as the last TLV of the + SRH. + + o If the HMAC TLV is present, the SRH H-Flag (Figure 4) MUST be set. + + o When the H-flag is set in the SRH, the router inspecting the SRH + MUST find the HMAC TLV in the last 38 octets of the SRH. + +3.2. SRH and RFC2460 behavior + + The SRH being a new type of the Routing Header, it also has the same + properties: + + SHOULD only appear once in the packet. + + Only the router whose address is in the DA field of the packet + header MUST inspect the SRH. + + Therefore, Segment Routing in IPv6 networks implies that the segment + identifier (i.e.: the IPv6 address of the segment) is moved into the + DA of the packet. + + The DA of the packet changes at each segment termination/completion + and therefore the final DA of the packet MUST be encoded as the last + segment of the path. + +4. SRH Procedures + + In this section we describe the different procedures on the SRH. + +4.1. Source SR Node + + A Source SR Node can be any node originating an IPv6 packet with its + IPv6 and Segment Routing Headers. This include either: + + A host originating an IPv6 packet. + + An SR domain ingress router encapsulating a received IPv6 packet + into an outer IPv6 header followed by an SRH. + + + + +Previdi, et al. Expires August 5, 2017 [Page 14] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + The mechanism through which a Segment List is derived is outside of + the scope of this document. As an example, the Segment List may be + obtained through: + + Local path computation. + + Local configuration. + + Interaction with a centralized controller delivering the path. + + Any other mechanism. + + The following are the steps of the creation of the SRH: + + Next Header and Hdr Ext Len fields are set according to [RFC2460]. + + Routing Type field is set as TBD (to be allocated by IANA, + suggested value 4). + + The Segment List is built with the FIRST segment of the path + encoded in the LAST element of the Segment List. Subsequent + segments are encoded on top of the first segment. Finally, the + LAST segment of the path is encoded in the FIRST element of the + Segment List. In other words, the Segment List is encoded in the + reverse order of the path. + + The final DA of the packet is encoded as the last segment of the + path (encoded in the first element of the Segment List). + + The DA of the packet is set with the value of the first segment + (found in the last element of the segment list). + + The Segments Left field is set to n-1 where n is the number of + elements in the Segment List. + + The First Segment field is set to n-1 where n is the number of + elements in the Segment List. + + The packet is sent out towards the first segment (i.e.: + represented in the packet DA). + + HMAC TLV may be set according to Section 5. + +4.2. Transit Node + + According to [RFC2460], the only node who is allowed to inspect the + Routing Extension Header (and therefore the SRH), is the node + corresponding to the DA of the packet. Any other transit node MUST + + + +Previdi, et al. Expires August 5, 2017 [Page 15] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + NOT inspect the underneath routing header and MUST forward the packet + towards the DA and according to the IPv6 routing table. + + In the example case described in Section 2.2.2, when SR capable nodes + are connected through an overlay spanning multiple third-party + infrastructure, it is safe to send SRH packets (i.e.: packet having a + Segment Routing Header) between each other overlay/SR-capable nodes + as long as the segment list does not include any of the transit + provider nodes. In addition, as a generic security measure, any + service provider will block any packet destined to one of its + internal routers, especially if these packets have an extended header + in it. + +4.3. SR Segment Endpoint Node + + The SR segment endpoint node is the node whose address is in the DA. + The segment endpoint node inspects the SRH and does: + + 1. IF DA = myself (segment endpoint) + 2. IF Segments Left > 0 THEN + decrement Segments Left + update DA with Segment List[Segments Left] + 3. ELSE continue IPv6 processing of the packet + End of processing. + 4. Forward the packet out + +5. Security Considerations + + This section analyzes the security threat model, the security issues + and proposed solutions related to the new Segment Routing Header. + + The Segment Routing Header (SRH) is simply another type of the + routing header as described in RFC 2460 [RFC2460] and is: + + o Added by an SR edge router when entering the segment routing + domain or by the originating host itself. The source host can + even be outside the SR domain; + + o inspected and acted upon when reaching the destination address of + the IP header per RFC 2460 [RFC2460]. + + Per RFC2460 [RFC2460], routers on the path that simply forward an + IPv6 packet (i.e. the IPv6 destination address is none of theirs) + will never inspect and process the content of the SRH. Routers whose + one interface IPv6 address equals the destination address field of + the IPv6 packet MUST parse the SRH and, if supported and if the local + configuration allows it, MUST act accordingly to the SRH content. + + + + +Previdi, et al. Expires August 5, 2017 [Page 16] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + According to RFC2460 [RFC2460], the default behavior of a non SR- + capable router upon receipt of an IPv6 packet with SRH destined to an + address of its, is to: + + o ignore the SRH completely if the Segment Left field is 0 and + proceed to process the next header in the IPv6 packet; + + o discard the IPv6 packet if Segment Left field is greater than 0, + it MAY send a Parameter Problem ICMP message back to the Source + Address. + +5.1. Threat model + +5.1.1. Source routing threats + + Using an SRH is similar to source routing, therefore it has some + well-known security issues as described in RFC4942 [RFC4942] section + 2.1.1 and RFC5095 [RFC5095]: + + o amplification attacks: where a packet could be forged in such a + way to cause looping among a set of SR-enabled routers causing + unnecessary traffic, hence a Denial of Service (DoS) against + bandwidth; + + o reflection attack: where a hacker could force an intermediate node + to appear as the immediate attacker, hence hiding the real + attacker from naive forensic; + + o bypass attack: where an intermediate node could be used as a + stepping stone (for example in a De-Militarized Zone) to attack + another host (for example in the datacenter or any back-end + server). + +5.1.2. Applicability of RFC 5095 to SRH + + First of all, the reader must remember this specific part of section + 1 of RFC5095 [RFC5095], "A side effect is that this also eliminates + benign RH0 use-cases; however, such applications may be facilitated + by future Routing Header specifications.". In short, it is not + forbidden to create new secure type of Routing Header; for example, + RFC 6554 (RPL) [RFC6554] also creates a new Routing Header type for a + specific application confined in a single network. + + In the segment routing architecture described in + [I-D.ietf-spring-segment-routing] there are basically two kinds of + nodes (routers and hosts): + + + + + +Previdi, et al. Expires August 5, 2017 [Page 17] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o nodes within the SR domain, which is within one single + administrative domain, i.e., where all nodes are trusted anyway + else the damage caused by those nodes could be worse than + amplification attacks: traffic interception, man-in-the-middle + attacks, more server DoS by dropping packets, and so on. + + o nodes outside of the SR domain, which is outside of the + administrative segment routing domain hence they cannot be trusted + because there is no physical security for those nodes, i.e., they + can be replaced by hostile nodes or can be coerced in wrong + behaviors. + + The main use case for SR consists of the single administrative domain + where only trusted nodes with SR enabled and configured participate + in SR: this is the same model as in RFC6554 [RFC6554]. All non- + trusted nodes do not participate as either SR processing is not + enabled by default or because they only process SRH from nodes within + their domain. + + Moreover, all SR nodes ignore SRH created by outsiders based on + topology information (received on a peering or internal interface) or + on presence and validity of the HMAC field. Therefore, if + intermediate nodes ONLY act on valid and authorized SRH (such as + within a single administrative domain), then there is no security + threat similar to RH-0. Hence, the RFC 5095 [RFC5095] attacks are + not applicable. + +5.1.3. Service stealing threat + + Segment routing is used for added value services, there is also a + need to prevent non-participating nodes to use those services; this + is called 'service stealing prevention'. + +5.1.4. Topology disclosure + + The SRH may also contains IPv6 addresses of some intermediate SR- + nodes in the path towards the destination, this obviously reveals + those addresses to the potentially hostile attackers if those + attackers are able to intercept packets containing SRH. On the other + hand, if the attacker can do a traceroute whose probes will be + forwarded along the SR path, then there is little learned by + intercepting the SRH itself. + +5.1.5. ICMP Generation + + Per section 4.4 of RFC2460 [RFC2460], when destination nodes (i.e. + where the destination address is one of theirs) receive a Routing + Header with unsupported Routing Type, the required behavior is: + + + +Previdi, et al. Expires August 5, 2017 [Page 18] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o If Segments Left is zero, the node must ignore the Routing header + and proceed to process the next header in the packet. + + o If Segments Left is non-zero, the node must discard the packet and + send an ICMP Parameter Problem, Code 0, message to the packet's + Source Address, pointing to the unrecognized Routing Type. + + This required behavior could be used by an attacker to force the + generation of ICMP message by any node. The attacker could send + packets with SRH (with Segment Left set to 0) destined to a node not + supporting SRH. Per RFC2460 [RFC2460], the destination node could + generate an ICMP message, causing a local CPU utilization and if the + source of the offending packet with SRH was spoofed could lead to a + reflection attack without any amplification. + + It must be noted that this is a required behavior for any unsupported + Routing Type and not limited to SRH packets. So, it is not specific + to SRH and the usual rate limiting for ICMP generation is required + anyway for any IPv6 implementation and has been implemented and + deployed for many years. + +5.2. Security fields in SRH + + This section summarizes the use of specific fields in the SRH. They + are based on a key-hashed message authentication code (HMAC). + + The security-related fields in the SRH are instantiated by the HMAC + TLV, containing: + + o HMAC Key-id, 32 bits wide; + + o HMAC, 256 bits wide (optional, exists only if HMAC Key-id is not + 0). + + The HMAC field is the output of the HMAC computation (per RFC 2104 + [RFC2104]) using a pre-shared key identified by HMAC Key-id and of + the text which consists of the concatenation of: + + o the source IPv6 address; + + o First Segment field; + + o an octet of bit flags; + + o HMAC Key-id; + + o all addresses in the Segment List. + + + + +Previdi, et al. Expires August 5, 2017 [Page 19] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + The purpose of the HMAC TLV is to verify the validity, the integrity + and the authorization of the SRH itself. If an outsider of the SR + domain does not have access to a current pre-shared secret, then it + cannot compute the right HMAC field and the first SR router on the + path processing the SRH and configured to check the validity of the + HMAC will simply reject the packet. + + The HMAC TLV is located at the end of the SRH simply because only the + router on the ingress of the SR domain needs to process it, then all + other SR nodes can ignore it (based on local policy) because they + trust the upstream router. This is to speed up forwarding operations + because SR routers which do not validate the SRH do not need to parse + the SRH until the end. + + The HMAC Key-id field allows for the simultaneous existence of + several hash algorithms (SHA-256, SHA3-256 ... or future ones) as + well as pre-shared keys. The HMAC Key-id field is opaque, i.e., it + has neither syntax nor semantic except as an index to the right + combination of pre-shared key and hash algorithm and except that a + value of 0 means that there is no HMAC field. Having an HMAC Key-id + field allows for pre-shared key roll-over when two pre-shared keys + are supported for a while when all SR nodes converged to a fresher + pre-shared key. It could also allow for interoperation among + different SR domains if allowed by local policy and assuming a + collision-free HMAC Key Id allocation. + + When a specific SRH is linked to a time-related service (such as + turbo-QoS for a 1-hour period) where the DA, Segment ID (SID) are + identical, then it is important to refresh the shared-secret + frequently as the HMAC validity period expires only when the HMAC + Key-id and its associated shared-secret expires. + +5.2.1. Selecting a hash algorithm + + The HMAC field in the HMAC TLV is 256 bit wide. Therefore, the HMAC + MUST be based on a hash function whose output is at least 256 bits. + If the output of the hash function is 256, then this output is simply + inserted in the HMAC field. If the output of the hash function is + larger than 256 bits, then the output value is truncated to 256 by + taking the least-significant 256 bits and inserting them in the HMAC + field. + + SRH implementations can support multiple hash functions but MUST + implement SHA-2 [FIPS180-4] in its SHA-256 variant. + + NOTE: SHA-1 is currently used by some early implementations used for + quick interoperations testing, the 160-bit hash value must then be + + + + +Previdi, et al. Expires August 5, 2017 [Page 20] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + right-hand padded with 96 bits set to 0. The authors understand that + this is not secure but is ok for limited tests. + +5.2.2. Performance impact of HMAC + + While adding an HMAC to each and every SR packet increases the + security, it has a performance impact. Nevertheless, it must be + noted that: + + o the HMAC field is used only when SRH is added by a device (such as + a home set-up box) which is outside of the segment routing domain. + If the SRH is added by a router in the trusted segment routing + domain, then, there is no need for an HMAC field, hence no + performance impact. + + o when present, the HMAC field MUST only be checked and validated by + the first router of the segment routing domain, this router is + named 'validating SR router'. Downstream routers may not inspect + the HMAC field. + + o this validating router can also have a cache of <IPv6 header + + SRH, HMAC field value> to improve the performance. It is not the + same use case as in IPsec where HMAC value was unique per packet, + in SRH, the HMAC value is unique per flow. + + o Last point, hash functions such as SHA-2 have been optimized for + security and performance and there are multiple implementations + with good performance. + + With the above points in mind, the performance impact of using HMAC + is minimized. + +5.2.3. Pre-shared key management + + The field HMAC Key-id allows for: + + o key roll-over: when there is a need to change the key (the hash + pre-shared secret), then multiple pre-shared keys can be used + simultaneously. The validating routing can have a table of <HMAC + Key-id, pre-shared secret> for the currently active and future + keys. + + o different algorithms: by extending the previous table to <HMAC + Key-id, hash function, pre-shared secret>, the validating router + can also support simultaneously several hash algorithms (see + section Section 5.2.1) + + The pre-shared secret distribution can be done: + + + +Previdi, et al. Expires August 5, 2017 [Page 21] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + o in the configuration of the validating routers, either by static + configuration or any SDN oriented approach; + + o dynamically using a trusted key distribution such as [RFC6407] + + The intent of this document is NOT to define yet-another-key- + distribution-protocol. + +5.3. Deployment Models + +5.3.1. Nodes within the SR domain + + An SR domain is defined as a set of interconnected routers where all + routers at the perimeter are configured to add and act on SRH. Some + routers inside the SR domain can also act on SRH or simply forward + IPv6 packets. + + The routers inside an SR domain can be trusted to generate SRH and to + process SRH received on interfaces that are part of the SR domain. + These nodes MUST drop all SRH packets received on an interface that + is not part of the SR domain and containing an SRH whose HMAC field + cannot be validated by local policies. This includes obviously + packet with an SRH generated by a non-cooperative SR domain. + + If the validation fails, then these packets MUST be dropped, ICMP + error messages (parameter problem) SHOULD be generated (but rate + limited) and SHOULD be logged. + +5.3.2. Nodes outside of the SR domain + + Nodes outside of the SR domain cannot be trusted for physical + security; hence, they need to request by some trusted means (outside + of the scope of this document) a complete SRH for each new connection + (i.e. new destination address). The received SRH MUST include an + HMAC TLV which is computed correctly (see Section 5.2). + + When an outside node sends a packet with an SRH and towards an SR + domain ingress node, the packet MUST contain the HMAC TLV (with a + Key-id and HMAC fields) and the the destination address MUST be an + address of an SR domain ingress node . + + The ingress SR router, i.e., the router with an interface address + equals to the destination address, MUST verify the HMAC TLV. + + If the validation is successful, then the packet is simply forwarded + as usual for an SR packet. As long as the packet travels within the + SR domain, no further HMAC check needs to be done. Subsequent + + + + +Previdi, et al. Expires August 5, 2017 [Page 22] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + routers in the SR domain MAY verify the HMAC TLV when they process + the SRH (i.e. when they are the destination). + + If the validation fails, then this packet MUST be dropped, an ICMP + error message (parameter problem) SHOULD be generated (but rate + limited) and SHOULD be logged. + +5.3.3. SR path exposure + + As the intermediate SR nodes addresses appears in the SRH, if this + SRH is visible to an outsider then he/she could reuse this knowledge + to launch an attack on the intermediate SR nodes or get some insider + knowledge on the topology. This is especially applicable when the + path between the source node and the first SR domain ingress router + is on the public Internet. + + The first remark is to state that 'security by obscurity' is never + enough; in other words, the security policy of the SR domain MUST + assume that the internal topology and addressing is known by the + attacker. A simple traceroute will also give the same information + (with even more information as all intermediate nodes between SID + will also be exposed). IPsec Encapsulating Security Payload + [RFC4303] cannot be use to protect the SRH as per RFC4303 the ESP + header must appear after any routing header (including SRH). + + To prevent a user to leverage the gained knowledge by intercepting + SRH, it it recommended to apply an infrastructure Access Control List + (iACL) at the edge of the SR domain. This iACL will drop all packets + from outside the SR-domain whose destination is any address of any + router inside the domain. This security policy should be tuned for + local operations. + +5.3.4. Impact of BCP-38 + + BCP-38 [RFC2827], also known as "Network Ingress Filtering", checks + whether the source address of packets received on an interface is + valid for this interface. The use of loose source routing such as + SRH forces packets to follow a path which differs from the expected + routing. Therefore, if BCP-38 was implemented in all routers inside + the SR domain, then SR packets could be received by an interface + which is not expected one and the packets could be dropped. + + As an SR domain is usually a subset of one administrative domain, and + as BCP-38 is only deployed at the ingress routers of this + administrative domain and as packets arriving at those ingress + routers have been normally forwarded using the normal routing + information, then there is no reason why this ingress router should + + + + +Previdi, et al. Expires August 5, 2017 [Page 23] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + drop the SRH packet based on BCP-38. Routers inside the domain + commonly do not apply BCP-38; so, this is not a problem. + +6. IANA Considerations + + This document makes the following registrations in the Internet + Protocol Version 6 (IPv6) Parameters "Routing Type" registry + maintained by IANA: + + Suggested Description Reference + Value + ---------------------------------------------------------- + 4 Segment Routing Header (SRH) This document + + In addition, this document request IANA to create and maintain a new + Registry: "Segment Routing Header Type-Value Objects". The following + code-points are requested from the registry: + + Registry: Segment Routing Header Type-Value Objects + + Suggested Description Reference + Value + ----------------------------------------------------- + 1 Ingress Node TLV This document + 2 Egress Node TLV This document + 3 Opaque Container TLV This document + 4 Padding TLV This document + 5 HMAC TLV This document + +7. Manageability Considerations + + TBD + +8. Contributors + + Dave Barach, John Leddy, John Brzozowski, Pierre Francois, Nagendra + Kumar, Mark Townsley, Christian Martin, Roberta Maglione, James + Connolly, Aloys Augustin contributed to the content of this document. + +9. Acknowledgements + + The authors would like to thank Ole Troan, Bob Hinden, Fred Baker, + Brian Carpenter, Alexandru Petrescu and Punit Kumar Jaiswal for their + comments to this document. + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 24] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + +10. References + +10.1. Normative References + + [FIPS180-4] + National Institute of Standards and Technology, "FIPS + 180-4 Secure Hash Standard (SHS)", March 2012, + <http://csrc.nist.gov/publications/fips/fips180-4/ + fips-180-4.pdf>. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, + DOI 10.17487/RFC2119, March 1997, + <http://www.rfc-editor.org/info/rfc2119>. + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, DOI 10.17487/RFC2460, + December 1998, <http://www.rfc-editor.org/info/rfc2460>. + + [RFC4303] Kent, S., "IP Encapsulating Security Payload (ESP)", + RFC 4303, DOI 10.17487/RFC4303, December 2005, + <http://www.rfc-editor.org/info/rfc4303>. + + [RFC5095] Abley, J., Savola, P., and G. Neville-Neil, "Deprecation + of Type 0 Routing Headers in IPv6", RFC 5095, + DOI 10.17487/RFC5095, December 2007, + <http://www.rfc-editor.org/info/rfc5095>. + + [RFC6407] Weis, B., Rowles, S., and T. Hardjono, "The Group Domain + of Interpretation", RFC 6407, DOI 10.17487/RFC6407, + October 2011, <http://www.rfc-editor.org/info/rfc6407>. + +10.2. Informative References + + [I-D.ietf-isis-segment-routing-extensions] + Previdi, S., Filsfils, C., Bashandy, A., Gredler, H., + Litkowski, S., Decraene, B., and j. jefftant@gmail.com, + "IS-IS Extensions for Segment Routing", draft-ietf-isis- + segment-routing-extensions-09 (work in progress), October + 2016. + + [I-D.ietf-ospf-ospfv3-segment-routing-extensions] + Psenak, P., Previdi, S., Filsfils, C., Gredler, H., + Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3 + Extensions for Segment Routing", draft-ietf-ospf-ospfv3- + segment-routing-extensions-07 (work in progress), October + 2016. + + + + +Previdi, et al. Expires August 5, 2017 [Page 25] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + [I-D.ietf-spring-ipv6-use-cases] + Brzozowski, J., Leddy, J., Townsley, W., Filsfils, C., and + R. Maglione, "IPv6 SPRING Use Cases", draft-ietf-spring- + ipv6-use-cases-08 (work in progress), January 2017. + + [I-D.ietf-spring-resiliency-use-cases] + Filsfils, C., Previdi, S., Decraene, B., and R. Shakir, + "Resiliency use cases in SPRING networks", draft-ietf- + spring-resiliency-use-cases-08 (work in progress), October + 2016. + + [I-D.ietf-spring-segment-routing] + Filsfils, C., Previdi, S., Decraene, B., Litkowski, S., + and R. Shakir, "Segment Routing Architecture", draft-ietf- + spring-segment-routing-10 (work in progress), November + 2016. + + [I-D.ietf-spring-segment-routing-mpls] + Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., + Litkowski, S., Horneffer, M., Shakir, R., + jefftant@gmail.com, j., and E. Crabbe, "Segment Routing + with MPLS data plane", draft-ietf-spring-segment-routing- + mpls-06 (work in progress), January 2017. + + [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D. + Zappala, "Source Demand Routing: Packet Format and + Forwarding Specification (Version 1)", RFC 1940, + DOI 10.17487/RFC1940, May 1996, + <http://www.rfc-editor.org/info/rfc1940>. + + [RFC2104] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed- + Hashing for Message Authentication", RFC 2104, + DOI 10.17487/RFC2104, February 1997, + <http://www.rfc-editor.org/info/rfc2104>. + + [RFC2827] Ferguson, P. and D. Senie, "Network Ingress Filtering: + Defeating Denial of Service Attacks which employ IP Source + Address Spoofing", BCP 38, RFC 2827, DOI 10.17487/RFC2827, + May 2000, <http://www.rfc-editor.org/info/rfc2827>. + + [RFC4942] Davies, E., Krishnan, S., and P. Savola, "IPv6 Transition/ + Co-existence Security Considerations", RFC 4942, + DOI 10.17487/RFC4942, September 2007, + <http://www.rfc-editor.org/info/rfc4942>. + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 26] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + [RFC6554] Hui, J., Vasseur, JP., Culler, D., and V. Manral, "An IPv6 + Routing Header for Source Routes with the Routing Protocol + for Low-Power and Lossy Networks (RPL)", RFC 6554, + DOI 10.17487/RFC6554, March 2012, + <http://www.rfc-editor.org/info/rfc6554>. + + [RFC7855] Previdi, S., Ed., Filsfils, C., Ed., Decraene, B., + Litkowski, S., Horneffer, M., and R. Shakir, "Source + Packet Routing in Networking (SPRING) Problem Statement + and Requirements", RFC 7855, DOI 10.17487/RFC7855, May + 2016, <http://www.rfc-editor.org/info/rfc7855>. + +Authors' Addresses + + Stefano Previdi (editor) + Cisco Systems, Inc. + Via Del Serafico, 200 + Rome 00142 + Italy + + Email: sprevidi@cisco.com + + + Clarence Filsfils + Cisco Systems, Inc. + Brussels + BE + + Email: cfilsfil@cisco.com + + + Brian Field + Comcast + 4100 East Dry Creek Road + Centennial, CO 80122 + US + + Email: Brian_Field@cable.comcast.com + + + Ida Leung + Rogers Communications + 8200 Dixie Road + Brampton, ON L6T 0C1 + CA + + Email: Ida.Leung@rci.rogers.com + + + + +Previdi, et al. Expires August 5, 2017 [Page 27] + +Internet-Draft IPv6 Segment Routing Header (SRH) February 2017 + + + Jen Linkova + Google + 1600 Amphitheatre Parkway + Mountain View, CA 94043 + US + + Email: furry@google.com + + + Ebben Aries + Facebook + US + + Email: exa@fb.com + + + Tomoya Kosugi + NTT + 3-9-11, Midori-Cho Musashino-Shi, + Tokyo 180-8585 + JP + + Email: kosugi.tomoya@lab.ntt.co.jp + + + Eric Vyncke + Cisco Systems, Inc. + De Kleetlaann 6A + Diegem 1831 + Belgium + + Email: evyncke@cisco.com + + + David Lebrun + Universite Catholique de Louvain + Place Ste Barbe, 2 + Louvain-la-Neuve, 1348 + Belgium + + Email: david.lebrun@uclouvain.be + + + + + + + + + + +Previdi, et al. Expires August 5, 2017 [Page 28]
\ No newline at end of file diff --git a/src/vnet/sr/rfc_draft_05.txt b/src/vnet/sr/rfc_draft_05.txt deleted file mode 100644 index bc41c181..00000000 --- a/src/vnet/sr/rfc_draft_05.txt +++ /dev/null @@ -1,1265 +0,0 @@ -Network Working Group S. Previdi, Ed. -Internet-Draft C. Filsfils -Intended status: Standards Track Cisco Systems, Inc. -Expires: June 12, 2015 B. Field - Comcast - I. Leung - Rogers Communications - December 9, 2014 - - - IPv6 Segment Routing Header (SRH) - draft-previdi-6man-segment-routing-header-05 - -Abstract - - Segment Routing (SR) allows a node to steer a packet through a - controlled set of instructions, called segments, by prepending a SR - header to the packet. A segment can represent any instruction, - topological or service-based. SR allows to enforce a flow through - any path (topological, or application/service based) while - maintaining per-flow state only at the ingress node to the SR domain. - - Segment Routing can be applied to the IPv6 data plane with the - addition of a new type of Routing Extension Header. This draft - describes the Segment Routing Extension Header Type and how it is - used by SR capable nodes. - -Requirements Language - - The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", - "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this - document are to be interpreted as described in RFC 2119 [RFC2119]. - -Status of This Memo - - This Internet-Draft is submitted in full conformance with the - provisions of BCP 78 and BCP 79. - - Internet-Drafts are working documents of the Internet Engineering - Task Force (IETF). Note that other groups may also distribute - working documents as Internet-Drafts. The list of current Internet- - Drafts is at http://datatracker.ietf.org/drafts/current/. - - Internet-Drafts are draft documents valid for a maximum of six months - and may be updated, replaced, or obsoleted by other documents at any - time. It is inappropriate to use Internet-Drafts as reference - material or to cite them other than as "work in progress." - - - - -Previdi, et al. Expires June 12, 2015 [Page 1] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - This Internet-Draft will expire on June 12, 2015. - -Copyright Notice - - Copyright (c) 2014 IETF Trust and the persons identified as the - document authors. All rights reserved. - - This document is subject to BCP 78 and the IETF Trust's Legal - Provisions Relating to IETF Documents - (http://trustee.ietf.org/license-info) in effect on the date of - publication of this document. Please review these documents - carefully, as they describe your rights and restrictions with respect - to this document. Code Components extracted from this document must - include Simplified BSD License text as described in Section 4.e of - the Trust Legal Provisions and are provided without warranty as - described in the Simplified BSD License. - -Table of Contents - - 1. Structure of this document . . . . . . . . . . . . . . . . . 3 - 2. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3 - 3. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 - 3.1. Data Planes supporting Segment Routing . . . . . . . . . 4 - 3.2. Illustration . . . . . . . . . . . . . . . . . . . . . . 4 - 4. Abstract Routing Model . . . . . . . . . . . . . . . . . . . 7 - 4.1. Segment Routing Global Block (SRGB) . . . . . . . . . . . 8 - 4.2. Traffic Engineering with SR . . . . . . . . . . . . . . . 9 - 4.3. Segment Routing Database . . . . . . . . . . . . . . . . 10 - 5. IPv6 Instantiation of Segment Routing . . . . . . . . . . . . 10 - 5.1. Segment Identifiers (SIDs) and SRGB . . . . . . . . . . . 10 - 5.1.1. Node-SID . . . . . . . . . . . . . . . . . . . . . . 11 - 5.1.2. Adjacency-SID . . . . . . . . . . . . . . . . . . . . 11 - 5.2. Segment Routing Extension Header (SRH) . . . . . . . . . 11 - 5.2.1. SRH and RFC2460 behavior . . . . . . . . . . . . . . 15 - 6. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 15 - 6.1. Segment Routing Operations . . . . . . . . . . . . . . . 15 - 6.2. Segment Routing Node Functions . . . . . . . . . . . . . 16 - 6.2.1. Ingress SR Node . . . . . . . . . . . . . . . . . . . 16 - 6.2.2. Transit Non-SR Capable Node . . . . . . . . . . . . . 18 - 6.2.3. SR Intra Segment Transit Node . . . . . . . . . . . . 18 - 6.2.4. SR Segment Endpoint Node . . . . . . . . . . . . . . 18 - 6.3. FRR Flag Settings . . . . . . . . . . . . . . . . . . . . 18 - 7. SR and Tunneling . . . . . . . . . . . . . . . . . . . . . . 18 - 8. Example Use Case . . . . . . . . . . . . . . . . . . . . . . 19 - 9. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 21 - 10. Manageability Considerations . . . . . . . . . . . . . . . . 21 - 11. Security Considerations . . . . . . . . . . . . . . . . . . . 21 - 12. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 21 - - - -Previdi, et al. Expires June 12, 2015 [Page 2] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - 13. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 21 - 14. References . . . . . . . . . . . . . . . . . . . . . . . . . 21 - 14.1. Normative References . . . . . . . . . . . . . . . . . . 21 - 14.2. Informative References . . . . . . . . . . . . . . . . . 21 - Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 22 - -1. Structure of this document - - Section 3 gives an introduction on SR for IPv6 networks. - - Section 4 describes the Segment Routing abstract model. - - Section 5 defines the Segment Routing Header (SRH) allowing - instantiation of SR over IPv6 dataplane. - - Section 6 details the procedures of the Segment Routing Header. - -2. Segment Routing Documents - - Segment Routing terminology is defined in - [I-D.filsfils-spring-segment-routing]. - - Segment Routing use cases are described in - [I-D.filsfils-spring-segment-routing-use-cases]. - - Segment Routing IPv6 use cases are described in - [I-D.ietf-spring-ipv6-use-cases]. - - Segment Routing protocol extensions are defined in - [I-D.ietf-isis-segment-routing-extensions], and - [I-D.psenak-ospf-segment-routing-ospfv3-extension]. - - The security mechanisms of the Segment Routing Header (SRH) are - described in [I-D.vyncke-6man-segment-routing-security]. - -3. Introduction - - Segment Routing (SR), defined in - [I-D.filsfils-spring-segment-routing], allows a node to steer a - packet through a controlled set of instructions, called segments, by - prepending a SR header to the packet. A segment can represent any - instruction, topological or service-based. SR allows to enforce a - flow through any path (topological or service/application based) - while maintaining per-flow state only at the ingress node to the SR - domain. Segments can be derived from different components: IGP, BGP, - Services, Contexts, Locators, etc. The list of segment forming the - path is called the Segment List and is encoded in the packet header. - - - - -Previdi, et al. Expires June 12, 2015 [Page 3] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - SR allows the use of strict and loose source based routing paradigms - without requiring any additional signaling protocols in the - infrastructure hence delivering an excellent scalability property. - - The source based routing model described in - [I-D.filsfils-spring-segment-routing] is inherited from the ones - proposed by [RFC1940] and [RFC2460]. The source based routing model - offers the support for explicit routing capability. - -3.1. Data Planes supporting Segment Routing - - Segment Routing (SR), can be instantiated over MPLS - ([I-D.filsfils-spring-segment-routing-mpls]) and IPv6. This document - defines its instantiation over the IPv6 data-plane based on the use- - cases defined in [I-D.ietf-spring-ipv6-use-cases]. - - Segment Routing for IPv6 (SR-IPv6) is required in networks where MPLS - data-plane is not used or, when combined with SR-MPLS, in networks - where MPLS is used in the core and IPv6 is used at the edge (home - networks, datacenters). - - This document defines a new type of Routing Header (originally - defined in [RFC2460]) called the Segment Routing Header (SRH) in - order to convey the Segment List in the packet header as defined in - [I-D.filsfils-spring-segment-routing]. Mechanisms through which - segment are known and advertised are outside the scope of this - document. - -3.2. Illustration - - In the context of Figure 1 where all the links have the same IGP - cost, let us assume that a packet P enters the SR domain at an - ingress edge router I and that the operator requests the following - requirements for packet P: - - The local service S offered by node B must be applied to packet P. - - The links AB and CE cannot be used to transport the packet P. - - Any node N along the journey of the packet should be able to - determine where the packet P entered the SR domain and where it - will exit. The intermediate node should be able to determine the - paths from the ingress edge router to itself, and from itself to - the egress edge router. - - Per-flow State for packet P should only be created at the ingress - edge router. - - - - -Previdi, et al. Expires June 12, 2015 [Page 4] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - The operator can forbid, for security reasons, anyone outside the - operator domain to exploit its intra-domain SR capabilities. - - I---A---B---C---E - \ | / \ / - \ | / F - \|/ - D - - Figure 1: An illustration of SR properties - - All these properties may be realized by instructing the ingress SR - edge router I to push the following abstract SR header on the packet - P. - - +---------------------------------------------------------------+ - | | | - | Abstract SR Header | | - | | | - | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported | - | ^ | | Packet | - | | | | P | - | +---------------------+ | | - | | | - +---------------------------------------------------------------+ - - Figure 2: Packet P at node I - - The abstract SR header contains a source route encoded as a list of - segments {SD, SB, SS, SF, SE}, a pointer (Ptr) and the identification - of the ingress and egress SR edge routers (segments SI and SE). - - A segment identifies a topological instruction or a service - instruction. A segment can either be global or local. The - instruction associated with a global segment is recognized and - executed by any SR-capable node in the domain. The instruction - associated with a local segment is only supported by the specific - node that originates it. - - Let us assume some IGP (i.e.: ISIS and OSPF) extensions to define a - "Node Segment" as a global instruction within the IGP domain to - forward a packet along the shortest path to the specified node. Let - us further assume that within the SR domain illustrated in Figure 1, - segments SI, SD, SB, SE and SF respectively identify IGP node - segments to I, D, B, E and F. - - Let us assume that node B identifies its local service S with local - segment SS. - - - -Previdi, et al. Expires June 12, 2015 [Page 5] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - With all of this in mind, let us describe the journey of the packet - P. - - The packet P reaches the ingress SR edge router. I pushes the SR - header illustrated in Figure 2 and sets the pointer to the first - segment of the list (SD). - - SD is an instruction recognized by all the nodes in the SR domain - which causes the packet to be forwarded along the shortest path to D. - - Once at D, the pointer is incremented and the next segment is - executed (SB). - - SB is an instruction recognized by all the nodes in the SR domain - which causes the packet to be forwarded along the shortest path to B. - - Once at B, the pointer is incremented and the next segment is - executed (SS). - - SS is an instruction only recognized by node B which causes the - packet to receive service S. - - Once the service applied, the next segment is executed (SF) which - causes the packet to be forwarded along the shortest path to F. - - Once at F, the pointer is incremented and the next segment is - executed (SE). - - SE is an instruction recognized by all the nodes in the SR domain - which causes the packet to be forwarded along the shortest path to E. - - E then removes the SR header and the packet continues its journey - outside the SR domain. - - All of the requirements are met. - - First, the packet P has not used links AB and CE: the shortest-path - from I to D is I-A-D, the shortest-path from D to B is D-B, the - shortest-path from B to F is B-C-F and the shortest-path from F to E - is F-E, hence the packet path through the SR domain is I-A-D-B-C-F-E - and the links AB and CE have been avoided. - - Second, the service S supported by B has been applied on packet P. - - Third, any node along the packet path is able to identify the service - and topological journey of the packet within the SR domain. For - example, node C receives the packet illustrated in Figure 3 and hence - is able to infer where the packet entered the SR domain (SI), how it - - - -Previdi, et al. Expires June 12, 2015 [Page 6] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - got up to itself {SD, SB, SS, SE}, where it will exit the SR domain - (SE) and how it will do so {SF, SE}. - - +---------------------------------------------------------------+ - | | | - | SR Header | | - | | | - | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported | - | ^ | | Packet | - | | | | P | - | +--------+ | | - | | | - +---------------------------------------------------------------+ - - Figure 3: Packet P at node C - - Fourth, only node I maintains per-flow state for packet P. The - entire program of topological and service instructions to be executed - by the SR domain on packet P is encoded by the ingress edge router I - in the SR header in the form of a list of segments where each segment - identifies a specific instruction. No further per-flow state is - required along the packet path. The per-flow state is in the SR - header and travels with the packet. Intermediate nodes only hold - states related to the IGP global node segments and the local IGP - adjacency segments. These segments are not per-flow specific and - hence scale very well. Typically, an intermediate node would - maintain in the order of 100's to 1000's global node segments and in - the order of 10's to 100 of local adjacency segments. Typically the - SR IGP forwarding table is expected to be much less than 10000 - entries. - - Fifth, the SR header is inserted at the entrance to the domain and - removed at the exit of the operator domain. For security reasons, - the operator can forbid anyone outside its domain to use its intra- - domain SR capability. - -4. Abstract Routing Model - - At the entrance of the SR domain, the ingress SR edge router pushes - the SR header on top of the packet. At the exit of the SR domain, - the egress SR edge router removes the SR header. - - The abstract SR header contains an ordered list of segments, a - pointer identifying the next segment to process and the - identifications of the ingress and egress SR edge routers on the path - of this packet. The pointer identifies the segment that MUST be used - by the receiving router to process the packet. This segment is - called the active segment. - - - -Previdi, et al. Expires June 12, 2015 [Page 7] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - A property of SR is that the entire source route of the packet, - including the identity of the ingress and egress edge routers is - always available with the packet. This allows for interesting - accounting and service applications. - - We define three SR-header operations: - - "PUSH": an SR header is pushed on an IP packet, or additional - segments are added at the head of the segment list. The pointer - is moved to the first entry of the added segments. - - "NEXT": the active segment is completed, the pointer is moved to - the next segment in the list. - - "CONTINUE": the active segment is not completed, the pointer is - left unchanged. - - In the future, other SR-header management operations may be defined. - - As the packet travels through the SR domain, the pointer is - incremented through the ordered list of segments and the source route - encoded by the SR ingress edge node is executed. - - A node processes an incoming packet according to the instruction - associated with the active segment. - - Any instruction might be associated with a segment: for example, an - intra-domain topological strict or loose forwarding instruction, a - service instruction, etc. - - At minimum, a segment instruction must define two elements: the - identity of the next-hop to forward the packet to (this could be the - same node or a context within the node) and which SR-header - management operation to execute. - - Each segment is known in the network through a Segment Identifier - (SID). The terms "segment" and "SID" are interchangeable. - -4.1. Segment Routing Global Block (SRGB) - - In the SR abstract model, a segment is identified by a Segment - Routing Identifier (SID). The SR abstract model doesn't mandate a - specific format for the SID (IPv6 address or other formats). - - In Segment Routing IPv6 the SID is an IPv6 address. Therefore, the - SRGB is materialized by the global IPv6 address space which - represents the set of IPv6 routable addresses in the SR domain. The - following rules apply: - - - -Previdi, et al. Expires June 12, 2015 [Page 8] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - o Each node of the SR domain MUST be configured with the Segment - Routing Global Block (SRGB). - - o All global segments must be allocated from the SRGB. Any SR - capable node MUST be able to process any global segment advertised - by any other node within the SR domain. - - o Any segment outside the SRGB has a local significance and is - called a "local segment". An SR-capable node MUST be able to - process the local segments it originates. An SR-capable node MUST - NOT support the instruction associated with a local segment - originated by a remote node. - -4.2. Traffic Engineering with SR - - An SR Traffic Engineering policy is composed of two elements: a flow - classification and a segment-list to prepend on the packets of the - flow. - - In SR, this per-flow state only exists at the ingress edge node where - the policy is defined and the SR header is pushed. - - It is outside the scope of the document to define the process that - leads to the instantiation at a node N of an SR Traffic Engineering - policy. - - [I-D.filsfils-spring-segment-routing-use-cases] illustrates various - alternatives: - - N is deriving this policy automatically (e.g. FRR). - - N is provisioned explicitly by the operator. - - N is provisioned by a controller or server (e.g.: SDN Controller). - - N is provisioned by the operator with a high-level policy which is - mapped into a path thanks to a local CSPF-based computation (e.g. - affinity/SRLG exclusion). - - N could also be provisioned by other means. - - [I-D.filsfils-spring-segment-routing-use-cases] explains why the - majority of use-cases require very short segment-lists, hence - minimizing the performance impact, if any, of inserting and - transporting the segment list. - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 9] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - A SDN controller, which desires to instantiate at node N an SR - Traffic Engineering policy, collects the SR capability of node N such - as to ensure that the policy meets its capability. - -4.3. Segment Routing Database - - The Segment routing Database (SRDB) is a set of entries where each - entry is identified by a SID. The instruction associated with each - entry at least defines the identity of the next-hop to which the - packet should be forwarded and what operation should be performed on - the SR header (PUSH, CONTINUE, NEXT). - - +---------+-----------+---------------------------------+ - | Segment | Next-Hop | SR Header operation | - +---------+-----------+---------------------------------+ - | Sk | M | CONTINUE | - | Sj | N | NEXT | - | Sl | NAT Srvc | NEXT | - | Sm | FW srvc | NEXT | - | Sn | Q | NEXT | - | etc. | etc. | etc. | - +---------+-----------+---------------------------------+ - - Figure 4: SR Database - - Each SR-capable node maintains its local SRDB. SRDB entries can - either derive from local policy or from protocol segment - advertisement. - -5. IPv6 Instantiation of Segment Routing - -5.1. Segment Identifiers (SIDs) and SRGB - - Segment Routing, as described in - [I-D.filsfils-spring-segment-routing], defines Node-SID and - Adjacency-SID. When SR is used over IPv6 data-plane the following - applies. - - The SRGB is the global IPv6 address space which represents the set of - IPv6 routable addresses in the SR domain. - - Node SIDs are IPv6 addresses part of the SRGB (i.e.: routable - addresses). Adjacency-SIDs are IPv6 addresses which may not be part - of the global IPv6 address space. - - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 10] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - -5.1.1. Node-SID - - The Node-SID identifies a node. With SR-IPv6 the Node-SID is an IPv6 - prefix that the operator configured on the node and that is used as - the node identifier. Typically, in case of a router, this is the - IPv6 address of the node loopback interface. Therefore, SR-IPv6 does - not require any additional SID advertisement for the Node Segment. - The Node-SID is in fact the IPv6 address of the node. - -5.1.2. Adjacency-SID - - In the SR architecture defined in - [I-D.filsfils-spring-segment-routing] the Adjacency-SID (or Adj-SID) - identifies a given interface and may be local or global (depending on - how it is advertised). A node may advertise one (or more) Adj-SIDs - allocated to a given interface so to force the forwarding of the - packet (when received with that particular Adj-SID) into the - interface regardless the routing entry for the packet destination. - The semantic of the Adj-SID is: - - Send out the packet to the interface this prefix is allocated to. - - When SR is applied to IPv6, any SID is in a global IPv6 address and - therefore, an Adj-SID has a global significance (i.e.: the IPv6 - address representing the SID is a global address). In other words, a - node that advertises the Adj-SID in the form of a global IPv6 address - representing the link/adjacency the packet has to be forwarded to, - will apply to the Adj-SID a global significance. - - Advertisement of Adj-SID may be done using multiple mechanisms among - which the ones described in ISIS and OSPF protocol extensions: - [I-D.ietf-isis-segment-routing-extensions] and - [I-D.psenak-ospf-segment-routing-ospfv3-extension]. The distinction - between local and global significance of the Adj-SID is given in the - encoding of the Adj-SID advertisement. - -5.2. Segment Routing Extension Header (SRH) - - A new type of the Routing Header (originally defined in [RFC2460]) is - defined: the Segment Routing Header (SRH) which has a new Routing - Type, (suggested value 4) to be assigned by IANA. - - As an example, if an explicit path is to be constructed across a core - network running ISIS or OSPF, the segment list will contain SIDs - representing the nodes across the path (loose or strict) which, - usually, are the IPv6 loopback interface address of each node. If - the path is across service or application entities, the segment list - - - - -Previdi, et al. Expires June 12, 2015 [Page 11] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - contains the IPv6 addresses of these services or application - instances. - - The Segment Routing Header (SRH) is defined as follows: - - - 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next Header | Hdr Ext Len | Routing Type | Segments Left | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | First Segment | Flags | HMAC Key ID | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Segment List[0] (128 bits ipv6 address) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | | - ... - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Segment List[n] (128 bits ipv6 address) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Policy List[0] (optional) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Policy List[1] (optional) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | Policy List[2] (optional) | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - | | - | | - | HMAC (256 bits) | - - - -Previdi, et al. Expires June 12, 2015 [Page 12] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - | (optional) | - | | - | | - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - where: - - o Next Header: 8-bit selector. Identifies the type of header - immediately following the SRH. - - o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH - header in 8-octet units, not including the first 8 octets. - - o Routing Type: TBD, to be assigned by IANA (suggested value: 4). - - o Segments Left. Defined in [RFC2460], it contains the index, in - the Segment List, of the next segment to inspect. Segments Left - is decremented at each segment and it is used as an index in the - segment list. - - o First Segment: offset in the SRH, not including the first 8 octets - and expressed in 16-octet units, pointing to the last element of - the segment list, which is in fact the first segment of the - segment routing path. - - o Flags: 16 bits of flags. Following flags are defined: - - 1 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |C|P|R|R| Policy Flags | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - C-flag: Clean-up flag. Set when the SRH has to be removed from - the packet when packet reaches the last segment. - - P-flag: Protected flag. Set when the packet has been rerouted - through FRR mechanism by a SR endpoint node. See Section 6.3 - for more details. - - R-flags. Reserved and for future use. - - Policy Flags. Define the type of the IPv6 addresses encoded - into the Policy List (see below). The following have been - defined: - - - - - -Previdi, et al. Expires June 12, 2015 [Page 13] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - Bits 4-6: determine the type of the first element after the - segment list. - - Bits 7-9: determine the type of the second element. - - Bits 10-12: determine the type of the third element. - - Bits 13-15: determine the type of the fourth element. - - The following values are used for the type: - - 0x0: Not present. If value is set to 0x0, it means the - element represented by these bits is not present. - - 0x1: SR Ingress. - - 0x2: SR Egress. - - 0x3: Original Source Address. - - o HMAC Key ID and HMAC field, and their use are defined in - [I-D.vyncke-6man-segment-routing-security]. - - o Segment List[n]: 128 bit IPv6 addresses representing the nth - segment in the Segment List. The Segment List is encoded starting - from the last segment of the path. I.e., the first element of the - segment list (Segment List [0]) contains the last segment of the - path while the last segment of the Segment List (Segment List[n]) - contains the first segment of the path. The index contained in - "Segments Left" identifies the current active segment. - - o Policy List. Optional addresses representing specific nodes in - the SR path such as: - - SR Ingress: a 128 bit generic identifier representing the - ingress in the SR domain (i.e.: it needs not to be a valid IPv6 - address). - - SR Egress: a 128 bit generic identifier representing the egress - in the SR domain (i.e.: it needs not to be a valid IPv6 - address). - - Original Source Address: IPv6 address originally present in the - SA field of the packet. - - The segments in the Policy List are encoded after the segment list - and they are optional. If none are in the SRH, all bits of the - Policy List Flags MUST be set to 0x0. - - - -Previdi, et al. Expires June 12, 2015 [Page 14] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - -5.2.1. SRH and RFC2460 behavior - - The SRH being a new type of the Routing Header, it also has the same - properties: - - SHOULD only appear once in the packet. - - Only the router whose address is in the DA field of the packet - header MUST inspect the SRH. - - Therefore, Segment Routing in IPv6 networks implies that the segment - identifier (i.e.: the IPv6 address of the segment) is moved into the - DA of the packet. - - The DA of the packet changes at each segment termination/completion - and therefore the original DA of the packet MUST be encoded as the - last segment of the path. - - As illustrated in Section 3.2, nodes that are within the path of a - segment will forward packets based on the DA of the packet without - inspecting the SRH. This ensures full interoperability between SR- - capable and non-SR-capable nodes. - -6. SRH Procedures - - In this section we describe the different procedures on the SRH. - -6.1. Segment Routing Operations - - When Segment Routing is instantiated over the IPv6 data plane the - following applies: - - o The segment list is encoded in the SRH. - - o The active segment is in the destination address of the packet. - - o The Segment Routing CONTINUE operation (as described in - [I-D.filsfils-spring-segment-routing]) is implemented as a - regular/plain IPv6 operation consisting of DA based forwarding. - - o The NEXT operation is implemented through the update of the DA - with the value represented by the Next Segment field in the SRH. - - o The PUSH operation is implemented through the insertion of the SRH - or the insertion of additional segments in the SRH segment list. - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 15] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - -6.2. Segment Routing Node Functions - - SR packets are forwarded to segments endpoints (i.e.: nodes whose - address is in the DA field of the packet). The segment endpoint, - when receiving a SR packet destined to itself, does: - - o Inspect the SRH. - - o Determine the next active segment. - - o Update the Segments Left field (or, if requested, remove the SRH - from the packet). - - o Update the DA. - - o Send the packet to the next segment. - - The procedures applied to the SRH are related to the node function. - Following nodes functions are defined: - - Ingress SR Node. - - Transit Non-SR Node. - - Transit SR Intra Segment Node. - - SR Endpoint Node. - -6.2.1. Ingress SR Node - - Ingress Node can be a router at the edge of the SR domain or a SR- - capable host. The ingress SR node may obtain the segment list by - either: - - Local path computation. - - Local configuration. - - Interaction with an SDN controller delivering the path as a - complete SRH. - - Any other mechanism (mechanisms through which the path is acquired - are outside the scope of this document). - - When creating the SRH (either at ingress node or in the SDN - controller) the following is done: - - Next Header and Hdr Ext Len fields are set according to [RFC2460]. - - - -Previdi, et al. Expires June 12, 2015 [Page 16] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - Routing Type field is set as TBD (SRH). - - The Segment List is built with the FIRST segment of the path - encoded in the LAST element of the Segment List. Subsequent - segments are encoded on top of the first segment. Finally, the - LAST segment of the path is encoded in the FIRST element of the - Segment List. In other words, the Segment List is encoded in the - reverse order of the path. - - The original DA of the packet is encoded as the last segment of - the path (encoded in the first element of the Segment List). - - the DA of the packet is set with the value of the first segment - (found in the last element of the segment list). - - the Segments Left field is set to n-1 where n is the number of - elements in the Segment List. - - The packet is sent out towards the first segment (i.e.: - represented in the packet DA). - -6.2.1.1. Security at Ingress - - The procedures related to the Segment Routing security are detailed - in [I-D.vyncke-6man-segment-routing-security]. - - In the case where the SR domain boundaries are not under control of - the network operator (e.g.: when the SR domain edge is in a home - network), it is important to authenticate and validate the content of - any SRH being received by the network operator. In such case, the - security procedure described in - [I-D.vyncke-6man-segment-routing-security] is to be used. - - The ingress node (e.g.: the host in the home network) requests the - SRH from a control system (e.g.: an SDN controller) which delivers - the SRH with its HMAC signature on it. - - Then, the home network host can send out SR packets (with an SRH on - it) that will be validated at the ingress of the network operator - infrastructure. - - The ingress node of the network operator infrastructure, is - configured in order to validate the incoming SRH HMACs in order to - allow only packets having correct SRH according to their SA/DA - addresses. - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 17] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - -6.2.2. Transit Non-SR Capable Node - - SR is interoperable with plain IPv6 forwarding. Any non SR-capable - node will forward SR packets solely based on the DA. There's no SRH - inspection. This ensures full interoperability between SR and non-SR - nodes. - -6.2.3. SR Intra Segment Transit Node - - Only the node whose address is in DA inspects and processes the SRH - (according to [RFC2460]). An intra segment transit node is not in - the DA and its forwarding is based on DA and its SR-IPv6 FIB. - -6.2.4. SR Segment Endpoint Node - - The SR segment endpoint node is the node whose address is in the DA. - The segment endpoint node inspects the SRH and does: - - 1. IF DA = myself (segment endpoint) - 2. IF Segments Left > 0 THEN - decrement Segments Left - update DA with Segment List[Segments Left] - 3. ELSE IF Segments List[Segments Left] <> DA THEN - update DA with Segments List[Segments Left] - IF Clean-up bit is set THEN remove the SRH - 4. ELSE give the packet to next PID (application) - End of processing. - 5. Forward the packet out - -6.3. FRR Flag Settings - - A node supporting SR and doing Fast Reroute (as described in - [I-D.filsfils-spring-segment-routing-use-cases], when rerouting - packets through FRR mechanisms, SHOULD inspect the rerouted packet - header and look for the SRH. If the SRH is present, the rerouting - node SHOULD set the Protected bit on all rerouted packets. - -7. SR and Tunneling - - Encapsulation can be realized in two different ways with SR-IPv6: - - Outer encapsulation. - - SRH with SA/DA original addresses. - - Outer encapsulation tunneling is the traditional method where an - additional IPv6 header is prepended to the packet. The original IPv6 - header being encapsulated, everything is preserved and the packet is - - - -Previdi, et al. Expires June 12, 2015 [Page 18] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - switched/routed according to the outer header (that could contain a - SRH). - - SRH allows encoding both original SA and DA, hence an operator may - decide to change the SA/DA at ingress and restore them at egress. - This can be achieved without outer encapsulation, by changing SA/DA - and encoding the original SA in the Policy List and in the original - DA in the Segment List. - -8. Example Use Case - - A more detailed description of use cases are available in - [I-D.ietf-spring-ipv6-use-cases]. In this section, a simple SR-IPv6 - example is illustrated. - - In the topology described in Figure 6 it is assumed an end-to-end SR - deployment. Therefore SR is supported by all nodes from A to J. - - Home Network | Backbone | Datacenter - | | - | +---+ +---+ +---+ | +---+ | - +---|---| C |---| D |---| E |---|---| I |---| - | | +---+ +---+ +---+ | +---+ | - | | | | | | | | +---+ - +---+ +---+ | | | | | | |--| X | - | A |---| B | | +---+ +---+ +---+ | +---+ | +---+ - +---+ +---+ | | F |---| G |---| H |---|---| J |---| - | +---+ +---+ +---+ | +---+ | - | | - | +-----------+ - | SDN | - | Orch/Ctlr | - +-----------+ - - Figure 6: Sample SR topology - - The following workflow applies to packets sent by host A and destined - to server X. - - - - - - - - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 19] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - . Host A sends a request for a path to server X to the SDN - controller or orchestration system. - - . The SDN controller/orchestrator builds a SRH with: - . Segment List: C, F, J, X - . HMAC - that satisfies the requirements expressed in the request - by host A and based on policies applicable to host A. - - . Host A receives the SRH and insert it into the packet. - The packet has now: - . SA: A - . DA: C - . SRH with - . SL: X, J, F, C - . Segments Left: 3 (i.e.: Segment List size - 1) - . PL: C (ingress), J (egress) - Note that X is the last segment and C is the - first segment (i.e.: the SL is encoded in the reverse - path order). - . HMAC - - . When packet arrives in C (first segment), C does: - . Validate the HMAC of the SRH. - . Decrement Segments Left by one: 2 - . Update the DA with the next segment found in - Segment List[2]. DA is set to F. - . Forward the packet to F. - - . When packet arrives in F (second segment), F does: - . Decrement Segments Left by one: 1 - . Update the DA with the next segment found in - Segment List[1]. DA is set to J. - . Forward the packet to J. - - . Packet travels across G and H nodes which do plain - IPv6 forwarding based on DA. No inspection of SRH needs - to be done in these nodes. However, any SR capable node - is allowed to set the Protected bit in case of FRR - protection. - - . When packet arrives in J (third segment), J does: - . Decrement Segments Left by one: 0 - . Update the DA with the next segment found in - Segment List[0]. DA is set to X. - . If the cleanup bit is set, then node J will strip out - the SRH from the packet. - . Forward the packet to X. - - - -Previdi, et al. Expires June 12, 2015 [Page 20] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - The packet arrives in the server that may or may not support SR. The - return traffic, from server to host, may be sent using the same - procedures. - -9. IANA Considerations - - TBD - -10. Manageability Considerations - - TBD - -11. Security Considerations - - Security mechanisms applied to Segment Routing over IPv6 networks are - detailed in [I-D.vyncke-6man-segment-routing-security]. - -12. Contributors - - The authors would like to thank Dave Barach, John Leddy, John - Brzozowski, Pierre Francois, Nagendra Kumar, Mark Townsley, Christian - Martin, Roberta Maglione, Eric Vyncke, James Connolly, David Lebrun - and Fred Baker for their contribution to this document. - -13. Acknowledgements - - TBD - -14. References - -14.1. Normative References - - [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate - Requirement Levels", BCP 14, RFC 2119, March 1997. - - [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 - (IPv6) Specification", RFC 2460, December 1998. - -14.2. Informative References - - [I-D.filsfils-spring-segment-routing] - Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., - Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., - Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe, - "Segment Routing Architecture", draft-filsfils-spring- - segment-routing-04 (work in progress), July 2014. - - - - - -Previdi, et al. Expires June 12, 2015 [Page 21] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - [I-D.filsfils-spring-segment-routing-mpls] - Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., - Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., - Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe, - "Segment Routing with MPLS data plane", draft-filsfils- - spring-segment-routing-mpls-03 (work in progress), August - 2014. - - [I-D.filsfils-spring-segment-routing-use-cases] - Filsfils, C., Francois, P., Previdi, S., Decraene, B., - Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., - Ytti, S., Henderickx, W., Tantsura, J., Kini, S., and E. - Crabbe, "Segment Routing Use Cases", draft-filsfils- - spring-segment-routing-use-cases-01 (work in progress), - October 2014. - - [I-D.ietf-isis-segment-routing-extensions] - Previdi, S., Filsfils, C., Bashandy, A., Gredler, H., - Litkowski, S., Decraene, B., and J. Tantsura, "IS-IS - Extensions for Segment Routing", draft-ietf-isis-segment- - routing-extensions-03 (work in progress), October 2014. - - [I-D.ietf-spring-ipv6-use-cases] - Brzozowski, J., Leddy, J., Leung, I., Previdi, S., - Townsley, W., Martin, C., Filsfils, C., and R. Maglione, - "IPv6 SPRING Use Cases", draft-ietf-spring-ipv6-use- - cases-03 (work in progress), November 2014. - - [I-D.psenak-ospf-segment-routing-ospfv3-extension] - Psenak, P., Previdi, S., Filsfils, C., Gredler, H., - Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3 - Extensions for Segment Routing", draft-psenak-ospf- - segment-routing-ospfv3-extension-02 (work in progress), - July 2014. - - [I-D.vyncke-6man-segment-routing-security] - Vyncke, E. and S. Previdi, "IPv6 Segment Routing Header - (SRH) Security Considerations", July 2014. - - [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D. - Zappala, "Source Demand Routing: Packet Format and - Forwarding Specification (Version 1)", RFC 1940, May 1996. - -Authors' Addresses - - - - - - - -Previdi, et al. Expires June 12, 2015 [Page 22] - -Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 - - - Stefano Previdi (editor) - Cisco Systems, Inc. - Via Del Serafico, 200 - Rome 00142 - Italy - - Email: sprevidi@cisco.com - - - Clarence Filsfils - Cisco Systems, Inc. - Brussels - BE - - Email: cfilsfil@cisco.com - - - Brian Field - Comcast - 4100 East Dry Creek Road - Centennial, CO 80122 - US - - Email: Brian_Field@cable.comcast.com - - - Ida Leung - Rogers Communications - 8200 Dixie Road - Brampton, ON L6T 0C1 - CA - - Email: Ida.Leung@rci.rogers.com diff --git a/src/vnet/sr/sr.api b/src/vnet/sr/sr.api index 3d017ce5..5feadcb0 100644 --- a/src/vnet/sr/sr.api +++ b/src/vnet/sr/sr.api @@ -12,108 +12,207 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -/** \brief IPv6 segment routing tunnel add / del request + +/** \brief IPv6 SR LocalSID add/del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_del Boolean of whether its a delete instruction + @param localsid_addr IPv6 address of the localsid + @param end_psp Boolean of whether decapsulation is allowed in this function + @param behavior Type of behavior (function) for this localsid + @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + @param fib_table FIB table in which we should install the localsid entry + @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. +*/ +define sr_localsid_add_del +{ + u32 client_index; + u32 context; + u8 is_del; + u8 localsid_addr[16]; + u8 end_psp; + u8 behavior; + u32 sw_if_index; + u32 vlan_index; + u32 fib_table; + u8 nh_addr[16]; +}; + +/** \brief IPv6 SR LocalSID add/del request response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_localsid_add_del_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 SR policy add + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param bsid is the bindingSID of the SR Policy + @param weight is the weight of the sid list. optional. + @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation) + @param type is the type of the SR policy. (0.Default // 1.Spray) + @param fib_table is the VRF where to install the FIB entry for the BSID + @param segments is a vector of IPv6 address composing the segment list +*/ +define sr_policy_add +{ + u32 client_index; + u32 context; + u8 bsid_addr[16]; + u32 weight; + u8 is_encap; + u8 type; + u32 fib_table; + u8 n_segments; + u8 segments[0]; +}; + +/** \brief IPv6 SR Policy add request response + @param context - sender context, to match reply w/ request + @param retval - return value for request +*/ +define sr_policy_add_reply +{ + u32 context; + i32 retval; +}; + +/** \brief IPv6 SR policy modification @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param name[] - tunnel name (len. 64) - @param src_address[] - - @param dst_address[] - - @param dst_mask_width - - @param inner_vrf_id - - @param outer_vrf_id - - @param flags_net_byte_order - - @param n_segments - - @param n_tags - - @param segs_and_tags[] - - @param policy_name[] - name of policy to associate this tunnel to (len. 64) + @param bsid is the bindingSID of the SR Policy + @param sr_policy_index is the index of the SR policy + @param fib_table is the VRF where to install the FIB entry for the BSID + @param operation is the operation to perform (among the top ones) + @param segments is a vector of IPv6 address composing the segment list + @param sl_index is the index of the Segment List to modify/delete + @param weight is the weight of the sid list. optional. + @param is_encap Mode. Encapsulation or SRH insertion. */ -define sr_tunnel_add_del +define sr_policy_mod { u32 client_index; u32 context; - u8 is_add; - u8 name[64]; - u8 src_address[16]; - u8 dst_address[16]; - u8 dst_mask_width; - u32 inner_vrf_id; - u32 outer_vrf_id; - u16 flags_net_byte_order; + u8 bsid_addr[16]; + u32 sr_policy_index; + u32 fib_table; + u8 operation; + u32 sl_index; + u32 weight; u8 n_segments; - u8 n_tags; - u8 policy_name[64]; - u8 segs_and_tags[0]; + u8 segments[0]; }; -/** \brief IPv6 segment routing tunnel add / del response +/** \brief IPv6 SR Policy modification request response @param context - sender context, to match reply w/ request @param retval - return value for request */ -define sr_tunnel_add_del_reply +define sr_policy_mod_reply { u32 context; i32 retval; }; -/** \brief IPv6 segment routing policy add / del request +/** \brief IPv6 SR policy deletion @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param name[] - policy name (len. 64) - @param tunnel_names[] - + @param bsid is the bindingSID of the SR Policy + @param index is the index of the SR policy */ -define sr_policy_add_del +define sr_policy_del { u32 client_index; u32 context; - u8 is_add; - u8 name[64]; - u8 tunnel_names[0]; + u8 bsid_addr[16]; + u32 sr_policy_index; }; -/** \brief IPv6 segment routing policy add / del response +/** \brief IPv6 SR Policy deletion request response @param context - sender context, to match reply w/ request - @param retval - return value for request - - + @param retval - return value for request */ -define sr_policy_add_del_reply +define sr_policy_del_reply { u32 context; i32 retval; }; -/** \brief IPv6 segment routing multicast map to policy add / del request +/** \brief IPv6 SR steering add/del @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param is_add - add the tunnel if non-zero, else delete it - @param multicast_address[] - IP6 multicast address - @param policy_name[] = policy name (len.64) + @param is_del + @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + @param sr_policy is the index of the SR Policy (alt to bsid) + @param table_id is the VRF where to install the FIB entry for the BSID + @param prefix is the IPv4/v6 address for L3 traffic type + @param mask_width is the mask for L3 traffic type + @param sw_if_index is the incoming interface for L2 traffic + @param traffic_type describes the type of traffic */ -define sr_multicast_map_add_del +define sr_steering_add_del { u32 client_index; u32 context; - u8 is_add; - u8 multicast_address[16]; - u8 policy_name[64]; + u8 is_del; + u8 bsid_addr[16]; + u32 sr_policy_index; + u32 table_id; + u8 prefix_addr[16]; + u32 mask_width; + u32 sw_if_index; + u8 traffic_type; }; -/** \brief IPv6 segment routing multicast map to policy add / del response +/** \brief IPv6 SR steering add/del request response @param context - sender context, to match reply w/ request @param retval - return value for request */ -define sr_multicast_map_add_del_reply +define sr_steering_add_del_reply { u32 context; i32 retval; }; +/** \brief Dump the list of SR LocalSIDs + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request +*/ +/**define sr_localsids_dump +{ + u32 client_index; + u32 context; +};*/ + +/** \brief Details about a single SR LocalSID + @param context - returned sender context, to match reply w/ request + @param localsid_addr IPv6 address of the localsid + @param behavior Type of behavior (function) for this localsid + @param end_psp Boolean of whether decapsulation is allowed in this function + @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + @param fib_table FIB table in which we should install the localsid entry + @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. +*/ +/**manual_endian define sr_localsid_details +{ + u32 context; + u8 localsid_addr[16]; + u8 behavior; + u8 end_psp; + u32 sw_if_index; + u32 vlan_index; + u32 fib_table; + u8 nh_addr[16]; +};*/ + /* + * fd.io coding-style-patch-verification: ON * Local Variables: * eval: (c-set-style "gnu") * End: */ -
\ No newline at end of file diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c index 012d4542..34344fce 100644..100755 --- a/src/vnet/sr/sr.c +++ b/src/vnet/sr/sr.c @@ -17,9 +17,10 @@ /** * @file - * @brief Segment Routing main functions + * @brief Segment Routing initialization * */ + #include <vnet/vnet.h> #include <vnet/sr/sr.h> #include <vnet/fib/ip6_fib.h> @@ -27,1179 +28,13 @@ #include <vnet/dpo/dpo.h> #include <vnet/dpo/replicate_dpo.h> -#include <openssl/hmac.h> - ip6_sr_main_t sr_main; -static vlib_node_registration_t sr_local_node; - -/** - * @brief Dynamically added SR DPO type - */ -static dpo_type_t sr_dpo_type; - -/** - * @brief Dynamically added SR FIB Node type - */ -static fib_node_type_t sr_fib_node_type; - -/** - * @brief Use passed HMAC key in ip6_sr_header_t in OpenSSL HMAC routines - * - * @param sm ip6_sr_main_t * - * @param ip ip6_header_t * - * @param sr ip6_sr_header_t * - */ -void -sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, ip6_sr_header_t * sr) -{ - u32 key_index; - static u8 *keybuf; - u8 *copy_target; - int first_segment; - ip6_address_t *addrp; - int i; - ip6_sr_hmac_key_t *hmac_key; - u32 sig_len; - - key_index = sr->hmac_key; - - /* No signature? Pass... */ - if (key_index == 0) - return; - - /* We don't know about this key? Fail... */ - if (key_index >= vec_len (sm->hmac_keys)) - return; - - hmac_key = sm->hmac_keys + key_index; - - vec_reset_length (keybuf); - - /* pkt ip6 src address */ - vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); - clib_memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t)); - - /* first segment */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] = sr->first_segment; - - /* octet w/ bit 0 = "clean" flag */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] - = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)) - ? 0x80 : 0; - - /* hmac key id */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] = sr->hmac_key; - - first_segment = sr->first_segment; - - addrp = sr->segments; - - /* segments */ - for (i = 0; i <= first_segment; i++) - { - vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); - clib_memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t)); - addrp++; - } - - addrp++; - - HMAC_CTX_init (sm->hmac_ctx); - if (!HMAC_Init (sm->hmac_ctx, hmac_key->shared_secret, - vec_len (hmac_key->shared_secret), sm->md)) - clib_warning ("barf1"); - if (!HMAC_Update (sm->hmac_ctx, keybuf, vec_len (keybuf))) - clib_warning ("barf2"); - if (!HMAC_Final (sm->hmac_ctx, (unsigned char *) addrp, &sig_len)) - clib_warning ("barf3"); - HMAC_CTX_cleanup (sm->hmac_ctx); -} - -/** - * @brief Format function for decoding various SR flags - * - * @param s u8 * - formatted string - * @param args va_list * - u16 flags - * - * @return formatted output string u8 * - */ -u8 * -format_ip6_sr_header_flags (u8 * s, va_list * args) -{ - u16 flags = (u16) va_arg (*args, int); - u8 pl_flag; - int bswap_needed = va_arg (*args, int); - int i; - - if (bswap_needed) - flags = clib_host_to_net_u16 (flags); - - if (flags & IP6_SR_HEADER_FLAG_CLEANUP) - s = format (s, "cleanup "); - - if (flags & IP6_SR_HEADER_FLAG_PROTECTED) - s = format (s, "reroute "); - - s = format (s, "pl: "); - for (i = 1; i <= 4; i++) - { - pl_flag = ip6_sr_policy_list_flags (flags, i); - s = format (s, "[%d] ", i); - - switch (pl_flag) - { - case IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT: - s = format (s, "NotPr "); - break; - case IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE: - s = format (s, "InPE "); - break; - case IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE: - s = format (s, "EgPE "); - break; - - case IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR: - s = format (s, "OrgSrc "); - break; - } - } - return s; -} - -/** - * @brief Format function for decoding ip6_sr_header_t - * - * @param s u8 * - formatted string - * @param args va_list * - ip6_sr_header_t - * - * @return formatted output string u8 * - */ -u8 * -format_ip6_sr_header (u8 * s, va_list * args) -{ - ip6_sr_header_t *h = va_arg (*args, ip6_sr_header_t *); - ip6_address_t placeholder_addr = - { {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, - 254, 254} - }; - int print_hmac = va_arg (*args, int); - int i, pl_index, max_segs; - int flags_host_byte_order = clib_net_to_host_u16 (h->flags); - - s = format (s, "next proto %d, len %d, type %d", - h->protocol, (h->length << 3) + 8, h->type); - s = format (s, "\n segs left %d, first_segment %d, hmac key %d", - h->segments_left, h->first_segment, h->hmac_key); - s = format (s, "\n flags %U", format_ip6_sr_header_flags, - flags_host_byte_order, 0 /* bswap needed */ ); - - /* - * Header length is in 8-byte units (minus one), so - * divide by 2 to ascertain the number of ip6 addresses in the - * segment list - */ - max_segs = (h->length >> 1); - - if (!print_hmac && h->hmac_key) - max_segs -= 2; - - s = format (s, "\n Segments (in processing order):"); - - for (i = h->first_segment; i >= 1; i--) - s = format (s, "\n %U", format_ip6_address, h->segments + i); - if (ip6_address_is_equal (&placeholder_addr, h->segments)) - s = format (s, "\n (empty placeholder)"); - else - s = format (s, "\n %U", format_ip6_address, h->segments); - - s = format (s, "\n Policy List:"); - - pl_index = 1; /* to match the RFC text */ - for (i = (h->first_segment + 1); i < max_segs; i++, pl_index++) - { - char *tag; - char *tags[] = { " ", "InPE: ", "EgPE: ", "OrgSrc: " }; - - tag = tags[0]; - if (pl_index >= 1 && pl_index <= 4) - { - int this_pl_flag = ip6_sr_policy_list_flags - (flags_host_byte_order, pl_index); - tag = tags[this_pl_flag]; - } - - s = format (s, "\n %s%U", tag, format_ip6_address, h->segments + i); - } - - return s; -} - -/** - * @brief Format function for decoding ip6_sr_header_t with length - * - * @param s u8 * - formatted string - * @param args va_list * - ip6_header_t + ip6_sr_header_t - * - * @return formatted output string u8 * - */ -u8 * -format_ip6_sr_header_with_length (u8 * s, va_list * args) -{ - ip6_header_t *h = va_arg (*args, ip6_header_t *); - u32 max_header_bytes = va_arg (*args, u32); - uword header_bytes; - - header_bytes = sizeof (h[0]) + sizeof (ip6_sr_header_t); - if (max_header_bytes != 0 && header_bytes > max_header_bytes) - return format (s, "ip6_sr header truncated"); - - s = format (s, "IP6: %U\n", format_ip6_header, h, max_header_bytes); - s = - format (s, "SR: %U\n", format_ip6_sr_header, (ip6_sr_header_t *) (h + 1), - 0 /* print_hmac */ , max_header_bytes); - return s; -} - -/** - * @brief Defined valid next nodes -*/ -#define foreach_sr_rewrite_next \ -_(ERROR, "error-drop") \ -_(SR_LOCAL, "sr-local") - -/** - * @brief Struct for defined valid next nodes -*/ -typedef enum -{ -#define _(s,n) SR_REWRITE_NEXT_##s, - foreach_sr_rewrite_next -#undef _ - SR_REWRITE_N_NEXT, -} sr_rewrite_next_t; - -/** - * @brief Struct for data for SR rewrite packet trace - */ -typedef struct -{ - ip6_address_t src, dst; - u16 length; - u32 next_index; - u32 tunnel_index; - u8 sr[256]; -} sr_rewrite_trace_t; - -/** - * @brief Error strings for SR rewrite - */ -static char *sr_rewrite_error_strings[] = { -#define sr_error(n,s) s, -#include "sr_error.def" -#undef sr_error -}; - -/** - * @brief Struct for SR rewrite error strings - */ -typedef enum -{ -#define sr_error(n,s) SR_REWRITE_ERROR_##n, -#include "sr_error.def" -#undef sr_error - SR_REWRITE_N_ERROR, -} sr_rewrite_error_t; - - -/** - * @brief Format function for SR rewrite trace. - */ -u8 * -format_sr_rewrite_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_rewrite_trace_t *t = va_arg (*args, sr_rewrite_trace_t *); - ip6_sr_main_t *sm = &sr_main; - ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); - ip6_fib_t *rx_fib, *tx_fib; - - rx_fib = ip6_fib_get (tun->rx_fib_index); - tx_fib = ip6_fib_get (tun->tx_fib_index); - - s = format - (s, "SR-REWRITE: next %s ip6 src %U dst %U len %u\n" - " rx-fib-id %d tx-fib-id %d\n%U", - (t->next_index == SR_REWRITE_NEXT_SR_LOCAL) - ? "sr-local" : "ip6-lookup", - format_ip6_address, &t->src, - format_ip6_address, &t->dst, t->length, - rx_fib->table_id, tx_fib->table_id, - format_ip6_sr_header, t->sr, 0 /* print_hmac */ ); - return s; -} - -/** - * @brief Main processing dual-loop for Segment Routing Rewrite - * @node sr-rewrite - * - * @param vm vlib_main_t * - * @param node vlib_node_runtime_t * - * @param from_frame vlib_frame_t * - * - * @return from_frame->n_vectors uword - */ -static uword -sr_rewrite (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, *from, *to_next; - ip6_sr_main_t *sm = &sr_main; - u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, - vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *); - sr_local_cb = sm->sr_local_cb; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - /* Note 2x loop disabled */ - while (0 && n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - ip6_header_t *ip0, *ip1; - ip6_sr_header_t *sr0, *sr1; - ip6_sr_tunnel_t *t0, *t1; - u32 next0; - u32 next1; - u16 new_l0 = 0; - u16 new_l1 = 0; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - /* - * $$$ parse through header(s) to pick the point - * where we punch in the SR extention header - */ - t0 = - pool_elt_at_index (sm->tunnels, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - t1 = - pool_elt_at_index (sm->tunnels, - vnet_buffer (b1)->ip.adj_index[VLIB_TX]); - - ASSERT (VLIB_BUFFER_PRE_DATA_SIZE - >= ((word) vec_len (t0->rewrite)) + b0->current_data); - ASSERT (VLIB_BUFFER_PRE_DATA_SIZE - >= ((word) vec_len (t1->rewrite)) + b1->current_data); - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->tx_fib_index; - - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - - /* - * SR-unaware service chaining case: pkt coming back from - * service has the original dst address, and will already - * have an SR header. If so, send it to sr-local - */ - if (PREDICT_FALSE (ip0->protocol == IPPROTO_IPV6_ROUTE)) - { - vlib_buffer_advance (b0, sizeof (ip0)); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - new_l0 = clib_net_to_host_u16 (ip0->payload_length); - next0 = SR_REWRITE_NEXT_SR_LOCAL; - } - else - { - u32 len_bytes = sizeof (ip6_header_t); - u8 next_hdr = ip0->protocol; - - /* HBH must immediately follow ipv6 header */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - len_bytes += - ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); - /* Ignoring the sr_local for now, if RH follows HBH here */ - next_hdr = ext_hdr->next_hdr; - ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; - } - else - { - ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ - } - /* - * Copy data before the punch-in point left by the - * required amount. Assume (for the moment) that only - * the main packet header needs to be copied. - */ - clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), - ip0, len_bytes); - vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite)); - ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes); - /* $$$ tune */ - clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); - - /* Fix the next header chain */ - sr0->protocol = next_hdr; - - new_l0 = clib_net_to_host_u16 (ip0->payload_length) + - vec_len (t0->rewrite); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - /* Copy dst address into the DA slot in the segment list */ - clib_memcpy (sr0->segments, ip0->dst_address.as_u64, - sizeof (ip6_address_t)); - /* Rewrite the ip6 dst address with the first hop */ - clib_memcpy (ip0->dst_address.as_u64, t0->first_hop.as_u64, - sizeof (ip6_address_t)); - - sr_fix_hmac (sm, ip0, sr0); - - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = - t0->first_hop_dpo.dpoi_index; - next0 = t0->first_hop_dpo.dpoi_next_node; - next0 = (sr_local_cb ? - sr_local_cb (vm, node, b0, ip0, sr0) : next0); - - /* - * Ignore "do not rewrite" shtik in this path - */ - if (PREDICT_FALSE (next0 & 0x80000000)) - { - next0 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next0 == SR_REWRITE_NEXT_ERROR)) - b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; - } - } - - if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE)) - { - vlib_buffer_advance (b1, sizeof (ip1)); - sr1 = (ip6_sr_header_t *) (ip1 + 1); - new_l1 = clib_net_to_host_u16 (ip1->payload_length); - next1 = SR_REWRITE_NEXT_SR_LOCAL; - } - else - { - u32 len_bytes = sizeof (ip6_header_t); - u8 next_hdr = ip1->protocol; - - /* HBH must immediately follow ipv6 header */ - if (PREDICT_FALSE - (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); - len_bytes += - ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); - /* Ignoring the sr_local for now, if RH follows HBH here */ - next_hdr = ext_hdr->next_hdr; - ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; - } - else - { - ip1->protocol = IPPROTO_IPV6_ROUTE; - } - /* - * Copy data before the punch-in point left by the - * required amount. Assume (for the moment) that only - * the main packet header needs to be copied. - */ - clib_memcpy (((u8 *) ip1) - vec_len (t1->rewrite), - ip1, len_bytes); - vlib_buffer_advance (b1, -(word) vec_len (t1->rewrite)); - ip1 = vlib_buffer_get_current (b1); - sr1 = (ip6_sr_header_t *) ((u8 *) ip1 + len_bytes); - clib_memcpy (sr1, t1->rewrite, vec_len (t1->rewrite)); - - sr1->protocol = next_hdr; - new_l1 = clib_net_to_host_u16 (ip1->payload_length) + - vec_len (t1->rewrite); - ip1->payload_length = clib_host_to_net_u16 (new_l1); - - /* Copy dst address into the DA slot in the segment list */ - clib_memcpy (sr1->segments, ip1->dst_address.as_u64, - sizeof (ip6_address_t)); - /* Rewrite the ip6 dst address with the first hop */ - clib_memcpy (ip1->dst_address.as_u64, t1->first_hop.as_u64, - sizeof (ip6_address_t)); - - sr_fix_hmac (sm, ip1, sr1); - - vnet_buffer (b1)->ip.adj_index[VLIB_TX] = - t1->first_hop_dpo.dpoi_index; - next1 = t1->first_hop_dpo.dpoi_next_node; - next1 = (sr_local_cb ? - sr_local_cb (vm, node, b1, ip1, sr1) : next1); - - /* - * Ignore "do not rewrite" shtik in this path - */ - if (PREDICT_FALSE (next1 & 0x80000000)) - { - next1 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next1 == SR_REWRITE_NEXT_ERROR)) - b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; - } - } - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_index = t0 - sm->tunnels; - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = new_l0; - tr->next_index = next0; - if (sr0) - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); - } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, - b1, sizeof (*tr)); - tr->tunnel_index = t1 - sm->tunnels; - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = new_l1; - tr->next_index = next1; - if (sr1) - clib_memcpy (tr->sr, sr1, sizeof (tr->sr)); - } - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip6_sr_header_t *sr0 = 0; - ip6_sr_tunnel_t *t0; - u32 next0; - u16 new_l0 = 0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - - /* - * $$$ parse through header(s) to pick the point - * where we punch in the SR extention header - */ - t0 = - pool_elt_at_index (sm->tunnels, - vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - - ASSERT (VLIB_BUFFER_PRE_DATA_SIZE - >= ((word) vec_len (t0->rewrite)) + b0->current_data); - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; - - ip0 = vlib_buffer_get_current (b0); - - /* - * SR-unaware service chaining case: pkt coming back from - * service has the original dst address, and will already - * have an SR header. If so, send it to sr-local - */ - if (PREDICT_FALSE (ip0->protocol == IPPROTO_IPV6_ROUTE)) - { - vlib_buffer_advance (b0, sizeof (ip0)); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - new_l0 = clib_net_to_host_u16 (ip0->payload_length); - next0 = SR_REWRITE_NEXT_SR_LOCAL; - } - else - { - u32 len_bytes = sizeof (ip6_header_t); - u8 next_hdr = ip0->protocol; - - /* HBH must immediately follow ipv6 header */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - len_bytes += - ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); - next_hdr = ext_hdr->next_hdr; - ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; - /* Ignoring the sr_local for now, if RH follows HBH here */ - } - else - { - ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ - } - /* - * Copy data before the punch-in point left by the - * required amount. Assume (for the moment) that only - * the main packet header needs to be copied. - */ - clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), - ip0, len_bytes); - vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite)); - ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes); - /* $$$ tune */ - clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); - - /* Fix the next header chain */ - sr0->protocol = next_hdr; - new_l0 = clib_net_to_host_u16 (ip0->payload_length) + - vec_len (t0->rewrite); - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - /* Copy dst address into the DA slot in the segment list */ - clib_memcpy (sr0->segments, ip0->dst_address.as_u64, - sizeof (ip6_address_t)); - /* Rewrite the ip6 dst address with the first hop */ - clib_memcpy (ip0->dst_address.as_u64, t0->first_hop.as_u64, - sizeof (ip6_address_t)); - - sr_fix_hmac (sm, ip0, sr0); - - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = - t0->first_hop_dpo.dpoi_index; - next0 = t0->first_hop_dpo.dpoi_next_node; - next0 = (sr_local_cb ? - sr_local_cb (vm, node, b0, ip0, sr0) : next0); - - /* - * Ignore "do not rewrite" shtik in this path - */ - if (PREDICT_FALSE (next0 & 0x80000000)) - { - next0 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next0 == SR_REWRITE_NEXT_ERROR)) - b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; - } - } - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - tr->tunnel_index = t0 - sm->tunnels; - if (ip0) - { - memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - } - tr->length = new_l0; - tr->next_index = next0; - if (sr0) - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_rewrite_node) = { - .function = sr_rewrite, - .name = "sr-rewrite", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - .format_trace = format_sr_rewrite_trace, - .format_buffer = format_ip6_sr_header_with_length, - - .n_errors = SR_REWRITE_N_ERROR, - .error_strings = sr_rewrite_error_strings, - - .runtime_data_bytes = 0, - - .n_next_nodes = SR_REWRITE_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_REWRITE_NEXT_##s] = n, - foreach_sr_rewrite_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite) -/* *INDENT-ON* */ - -static int -ip6_routes_add_del (ip6_sr_tunnel_t * t, int is_del) -{ - ip6_sr_main_t *sm = &sr_main; - - /* - * the prefix for the tunnel's destination - */ - /* *INDENT-OFF* */ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = t->dst_mask_width, - .fp_addr = { - .ip6 = t->key.dst, - } - }; - /* *INDENT-ON* */ - - if (is_del) - { - fib_table_entry_delete (t->rx_fib_index, &pfx, FIB_SOURCE_SR); - } - else - { - dpo_id_t dpo = DPO_INVALID; - - dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); - fib_table_entry_special_dpo_add (t->rx_fib_index, - &pfx, - FIB_SOURCE_SR, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); - dpo_reset (&dpo); - } - - /* - * Track the first hop address so we don't need to perform an extra - * lookup in the data-path - */ - /* *INDENT-OFF* */ - const fib_prefix_t first_hop_pfx = { - .fp_len = 128, - .fp_proto = FIB_PROTOCOL_IP6, - .fp_addr = { - .ip6 = t->first_hop, - } - }; - /* *INDENT-ON* */ - - if (is_del) - { - fib_entry_child_remove (t->fib_entry_index, t->sibling_index); - fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR); - } - else - { - t->fib_entry_index = - fib_table_entry_special_add (t->rx_fib_index, - &first_hop_pfx, - FIB_SOURCE_RR, - FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID); - t->sibling_index = - fib_entry_child_add (t->fib_entry_index, - sr_fib_node_type, t - sm->tunnels); - } - - return 0; -} - -/** - * @brief Find or add if not found - HMAC shared secret - * - * @param sm ip6_sr_main_t * - * @param secret u8 * - * @param indexp u32 * - * - * @return ip6_sr_hmac_key_t * - */ -static ip6_sr_hmac_key_t * -find_or_add_shared_secret (ip6_sr_main_t * sm, u8 * secret, u32 * indexp) -{ - uword *p; - ip6_sr_hmac_key_t *key = 0; - int i; - - p = hash_get_mem (sm->hmac_key_by_shared_secret, secret); - - if (p) - { - key = vec_elt_at_index (sm->hmac_keys, p[0]); - if (indexp) - *indexp = p[0]; - return (key); - } - - /* Specific key ID? */ - if (indexp && *indexp) - { - vec_validate (sm->hmac_keys, *indexp); - key = sm->hmac_keys + *indexp; - } - else - { - for (i = 0; i < vec_len (sm->hmac_keys); i++) - { - if (sm->hmac_keys[i].shared_secret == 0) - { - key = sm->hmac_keys + i; - goto found; - } - } - vec_validate (sm->hmac_keys, i); - key = sm->hmac_keys + i; - found: - ; - } - - key->shared_secret = vec_dup (secret); - - hash_set_mem (sm->hmac_key_by_shared_secret, key->shared_secret, - key - sm->hmac_keys); - - if (indexp) - *indexp = key - sm->hmac_keys; - return (key); -} - -/** - * @brief Stack a tunnel on the forwarding chain of the first-hop - */ -static void -sr_tunnel_stack (ip6_sr_tunnel_t * st) -{ - dpo_stack (sr_dpo_type, - DPO_PROTO_IP6, - &st->first_hop_dpo, - fib_entry_contribute_ip_forwarding (st->fib_entry_index)); -} - -/** - * @brief Add or Delete a Segment Routing tunnel. - * - * @param a ip6_sr_add_del_tunnel_args_t * - * - * @return retval int - */ -int -ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) -{ - ip6_main_t *im = &ip6_main; - ip6_sr_tunnel_key_t key; - ip6_sr_tunnel_t *t; - uword *p, *n; - ip6_sr_header_t *h = 0; - u32 header_length; - ip6_address_t *addrp, *this_address; - ip6_sr_main_t *sm = &sr_main; - u8 *key_copy; - u32 rx_fib_index, tx_fib_index; - u32 hmac_key_index_u32; - u8 hmac_key_index = 0; - ip6_sr_policy_t *pt; - int i; - - /* Make sure that the rx FIB exists */ - p = hash_get (im->fib_index_by_table_id, a->rx_table_id); - - if (p == 0) - return -3; - - /* remember the FIB index */ - rx_fib_index = p[0]; - - /* Make sure that the supplied FIB exists */ - p = hash_get (im->fib_index_by_table_id, a->tx_table_id); - - if (p == 0) - return -4; - - /* remember the FIB index */ - tx_fib_index = p[0]; - - clib_memcpy (key.src.as_u8, a->src_address->as_u8, sizeof (key.src)); - clib_memcpy (key.dst.as_u8, a->dst_address->as_u8, sizeof (key.dst)); - - /* When adding a tunnel: - * - If a "name" is given, it must not exist. - * - The "key" is always checked, and must not exist. - * When deleting a tunnel: - * - If the "name" is given, and it exists, then use it. - * - If the "name" is not given, use the "key". - * - If the "name" and the "key" are given, then both must point to the same - * thing. - */ - - /* Lookup the key */ - p = hash_get_mem (sm->tunnel_index_by_key, &key); - - /* If the name is given, look it up */ - if (a->name) - n = hash_get_mem (sm->tunnel_index_by_name, a->name); - else - n = 0; - - /* validate key/name parameters */ - if (!a->is_del) /* adding a tunnel */ - { - if (a->name && n) /* name given & exists already */ - return -1; - if (p) /* key exists already */ - return -1; - } - else /* deleting a tunnel */ - { - if (!p) /* key doesn't exist */ - return -2; - if (a->name && !n) /* name given & it doesn't exist */ - return -2; - - if (n) /* name given & found */ - { - if (n[0] != p[0]) /* name and key do not point to the same thing */ - return -2; - } - } - - - if (a->is_del) /* delete the tunnel */ - { - hash_pair_t *hp; - - /* Delete existing tunnel */ - t = pool_elt_at_index (sm->tunnels, p[0]); - - ip6_routes_add_del (t, 1); - - vec_free (t->rewrite); - /* Remove tunnel from any policy if associated */ - if (t->policy_index != ~0) - { - pt = pool_elt_at_index (sm->policies, t->policy_index); - for (i = 0; i < vec_len (pt->tunnel_indices); i++) - { - if (pt->tunnel_indices[i] == t - sm->tunnels) - { - vec_delete (pt->tunnel_indices, 1, i); - goto found; - } - } - clib_warning ("Tunnel index %d not found in policy_index %d", - t - sm->tunnels, pt - sm->policies); - found: - /* If this is last tunnel in the policy, clean up the policy too */ - if (vec_len (pt->tunnel_indices) == 0) - { - hash_unset_mem (sm->policy_index_by_policy_name, pt->name); - vec_free (pt->name); - pool_put (sm->policies, pt); - } - } - - /* Clean up the tunnel by name */ - if (t->name) - { - hash_unset_mem (sm->tunnel_index_by_name, t->name); - vec_free (t->name); - } - dpo_reset (&t->first_hop_dpo); - pool_put (sm->tunnels, t); - hp = hash_get_pair (sm->tunnel_index_by_key, &key); - key_copy = (void *) (hp->key); - hash_unset_mem (sm->tunnel_index_by_key, &key); - vec_free (key_copy); - return 0; - } - - /* create a new tunnel */ - pool_get (sm->tunnels, t); - memset (t, 0, sizeof (*t)); - t->policy_index = ~0; - fib_node_init (&t->node, sr_fib_node_type); - - clib_memcpy (&t->key, &key, sizeof (t->key)); - t->dst_mask_width = a->dst_mask_width; - t->rx_fib_index = rx_fib_index; - t->tx_fib_index = tx_fib_index; - - if (!vec_len (a->segments)) - /* there must be at least one segment... */ - return -4; - - /* The first specified hop goes right into the dst address */ - clib_memcpy (&t->first_hop, &a->segments[0], sizeof (ip6_address_t)); - - /* - * Create the sr header rewrite string - * The list of segments needs an extra slot for the ultimate destination - * which is taken from the packet we add the SRH to. - */ - header_length = sizeof (*h) + - sizeof (ip6_address_t) * (vec_len (a->segments) + 1 + vec_len (a->tags)); - - if (a->shared_secret) - { - /* Allocate a new key slot if we don't find the secret key */ - hmac_key_index_u32 = 0; - (void) find_or_add_shared_secret (sm, a->shared_secret, - &hmac_key_index_u32); - - /* Hey Vinz Clortho: Gozzer is pissed.. you're out of keys! */ - if (hmac_key_index_u32 >= 256) - return -5; - hmac_key_index = hmac_key_index_u32; - header_length += SHA256_DIGEST_LENGTH; - } - - vec_validate (t->rewrite, header_length - 1); - - h = (ip6_sr_header_t *) t->rewrite; - - h->protocol = 0xFF; /* we don't know yet */ - - h->length = (header_length / 8) - 1; - h->type = ROUTING_HEADER_TYPE_SR; - - /* first_segment and segments_left need to have the index of the last - * element in the list; a->segments has one element less than ends up - * in the header (it does not have the DA in it), so vec_len(a->segments) - * is the value we want. - */ - h->first_segment = h->segments_left = vec_len (a->segments); - - if (a->shared_secret) - h->hmac_key = hmac_key_index & 0xFF; - - h->flags = a->flags_net_byte_order; - - /* Paint on the segment list, in reverse. - * This is offset by one to leave room at the start for the ultimate - * destination. - */ - addrp = h->segments + vec_len (a->segments); - - vec_foreach (this_address, a->segments) - { - clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); - addrp--; - } - - /* - * Since the ultimate destination address is not yet known, set that slot - * to a value we will instantly recognize as bogus. - */ - memset (h->segments, 0xfe, sizeof (ip6_address_t)); - - /* Paint on the tag list, not reversed */ - addrp = h->segments + vec_len (a->segments); - - vec_foreach (this_address, a->tags) - { - clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); - addrp++; - } - - key_copy = vec_new (ip6_sr_tunnel_key_t, 1); - clib_memcpy (key_copy, &key, sizeof (ip6_sr_tunnel_key_t)); - hash_set_mem (sm->tunnel_index_by_key, key_copy, t - sm->tunnels); - - /* - * Stick the tunnel index into the rewrite header. - * - * Unfortunately, inserting an SR header according to the various - * RFC's requires parsing through the ip6 header, perhaps consing a - * buffer onto the head of the vlib_buffer_t, etc. We don't use the - * normal reverse bcopy rewrite code. - * - * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain - * at some point... - */ - - /* - * Add the routes for the tunnel destination and first-hop, then stack - * the tunnel on the appropriate forwarding DPOs. - */ - ip6_routes_add_del (t, 0); - sr_tunnel_stack (t); - - if (a->policy_name) - { - p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); - if (p) - { - pt = pool_elt_at_index (sm->policies, p[0]); - } - else /* no policy, lets create one */ - { - pool_get (sm->policies, pt); - memset (pt, 0, sizeof (*pt)); - pt->name = format (0, "%s%c", a->policy_name, 0); - hash_set_mem (sm->policy_index_by_policy_name, pt->name, - pt - sm->policies); - p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); - } - vec_add1 (pt->tunnel_indices, t - sm->tunnels); - if (p == 0) - clib_warning ("p is NULL!"); - t->policy_index = p ? p[0] : ~0; /* equiv. to (pt - sm->policies) */ - } - - if (a->name) - { - t->name = format (0, "%s%c", a->name, 0); - hash_set_mem (sm->tunnel_index_by_name, t->name, t - sm->tunnels); - } - - return 0; -} /** * @brief no-op lock function. * The lifetime of the SR entry is managed by the control plane */ -static void +void sr_dpo_lock (dpo_id_t * dpo) { } @@ -1208,2227 +43,15 @@ sr_dpo_lock (dpo_id_t * dpo) * @brief no-op unlock function. * The lifetime of the SR entry is managed by the control plane */ -static void -sr_dpo_unlock (dpo_id_t * dpo) -{ -} - -u8 * -format_sr_dpo (u8 * s, va_list * args) -{ - index_t index = va_arg (*args, index_t); - CLIB_UNUSED (u32 indent) = va_arg (*args, u32); - - return (format (s, "SR: tunnel:[%d]", index)); -} - -const static dpo_vft_t sr_dpo_vft = { - .dv_lock = sr_dpo_lock, - .dv_unlock = sr_dpo_unlock, - .dv_format = format_sr_dpo, -}; - -const static char *const sr_ip6_nodes[] = { - "sr-rewrite", - NULL, -}; - -const static char *const *const sr_nodes[DPO_PROTO_NUM] = { - [DPO_PROTO_IP6] = sr_ip6_nodes, -}; - -static ip6_sr_tunnel_t * -sr_tunnel_from_fib_node (fib_node_t * node) -{ -#if (CLIB_DEBUG > 0) - ASSERT (sr_fib_node_type == node->fn_type); -#endif - return ((ip6_sr_tunnel_t *) (((char *) node) - - STRUCT_OFFSET_OF (ip6_sr_tunnel_t, node))); -} - -/** - * Function definition to backwalk a FIB node - */ -static fib_node_back_walk_rc_t -sr_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) -{ - sr_tunnel_stack (sr_tunnel_from_fib_node (node)); - - return (FIB_NODE_BACK_WALK_CONTINUE); -} - -/** - * Function definition to get a FIB node from its index - */ -static fib_node_t * -sr_tunnel_fib_node_get (fib_node_index_t index) -{ - ip6_sr_tunnel_t *st; - ip6_sr_main_t *sm; - - sm = &sr_main; - st = pool_elt_at_index (sm->tunnels, index); - - return (&st->node); -} - -/** - * Function definition to inform the FIB node that its last lock has gone. - */ -static void -sr_tunnel_last_lock_gone (fib_node_t * node) -{ - /* - * The SR tunnel is a root of the graph. As such - * it never has children and thus is never locked. - */ - ASSERT (0); -} - -/* - * Virtual function table registered by SR tunnels - * for participation in the FIB object graph. - */ -const static fib_node_vft_t sr_fib_vft = { - .fnv_get = sr_tunnel_fib_node_get, - .fnv_last_lock = sr_tunnel_last_lock_gone, - .fnv_back_walk = sr_tunnel_back_walk, -}; - -/** - * @brief CLI parser for Add or Delete a Segment Routing tunnel. - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -sr_add_del_tunnel_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0; - ip6_address_t src_address; - int src_address_set = 0; - ip6_address_t dst_address; - u32 dst_mask_width; - int dst_address_set = 0; - u16 flags = 0; - u8 *shared_secret = 0; - u8 *name = 0; - u8 *policy_name = 0; - u32 rx_table_id = 0; - u32 tx_table_id = 0; - ip6_address_t *segments = 0; - ip6_address_t *this_seg; - ip6_address_t *tags = 0; - ip6_address_t *this_tag; - ip6_sr_add_del_tunnel_args_t _a, *a = &_a; - ip6_address_t next_address, tag; - int pl_index; - int rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (unformat (input, "rx-fib-id %d", &rx_table_id)) - ; - else if (unformat (input, "tx-fib-id %d", &tx_table_id)) - ; - else if (unformat (input, "src %U", unformat_ip6_address, &src_address)) - src_address_set = 1; - else if (unformat (input, "name %s", &name)) - ; - else if (unformat (input, "policy %s", &policy_name)) - ; - else if (unformat (input, "dst %U/%d", - unformat_ip6_address, &dst_address, &dst_mask_width)) - dst_address_set = 1; - else if (unformat (input, "next %U", unformat_ip6_address, - &next_address)) - { - vec_add2 (segments, this_seg, 1); - clib_memcpy (this_seg->as_u8, next_address.as_u8, - sizeof (*this_seg)); - } - else if (unformat (input, "tag %U", unformat_ip6_address, &tag)) - { - vec_add2 (tags, this_tag, 1); - clib_memcpy (this_tag->as_u8, tag.as_u8, sizeof (*this_tag)); - } - else if (unformat (input, "clean")) - flags |= IP6_SR_HEADER_FLAG_CLEANUP; - else if (unformat (input, "protected")) - flags |= IP6_SR_HEADER_FLAG_PROTECTED; - else if (unformat (input, "key %s", &shared_secret)) - /* Do not include the trailing NULL byte. Guaranteed interop issue */ - _vec_len (shared_secret) -= 1; - else if (unformat (input, "InPE %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - { - pl_index_range_error: - return clib_error_return - (0, "Policy List Element Index %d out of range (1-4)", - pl_index); - - } - flags |= IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE - << ip6_sr_policy_list_shift_from_index (pl_index); - } - else if (unformat (input, "EgPE %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - goto pl_index_range_error; - flags |= IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE - << ip6_sr_policy_list_shift_from_index (pl_index); - } - else if (unformat (input, "OrgSrc %d", &pl_index)) - { - if (pl_index <= 0 || pl_index > 4) - goto pl_index_range_error; - flags |= IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR - << ip6_sr_policy_list_shift_from_index (pl_index); - } - else - break; - } - - if (!src_address_set) - return clib_error_return (0, "src address required"); - - if (!dst_address_set) - return clib_error_return (0, "dst address required"); - - if (!segments) - return clib_error_return (0, "at least one sr segment required"); - - memset (a, 0, sizeof (*a)); - a->src_address = &src_address; - a->dst_address = &dst_address; - a->dst_mask_width = dst_mask_width; - a->segments = segments; - a->tags = tags; - a->flags_net_byte_order = clib_host_to_net_u16 (flags); - a->is_del = is_del; - a->rx_table_id = rx_table_id; - a->tx_table_id = tx_table_id; - a->shared_secret = shared_secret; - - if (vec_len (name)) - a->name = name; - else - a->name = 0; - - if (vec_len (policy_name)) - a->policy_name = policy_name; - else - a->policy_name = 0; - - rv = ip6_sr_add_del_tunnel (a); - - vec_free (segments); - vec_free (tags); - vec_free (shared_secret); - - switch (rv) - { - case 0: - break; - - case -1: - return clib_error_return (0, "SR tunnel src %U dst %U already exists", - format_ip6_address, &src_address, - format_ip6_address, &dst_address); - - case -2: - return clib_error_return (0, "SR tunnel src %U dst %U does not exist", - format_ip6_address, &src_address, - format_ip6_address, &dst_address); - - case -3: - return clib_error_return (0, "FIB table %d does not exist", - rx_table_id); - - case -4: - return clib_error_return (0, "At least one segment is required"); - - default: - return clib_error_return (0, "BUG: ip6_sr_add_del_tunnel returns %d", - rv); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_tunnel_command, static) = { - .path = "sr tunnel", - .short_help = - "sr tunnel [del] [name <name>] src <addr> dst <addr> [next <addr>] " - "[clean] [reroute] [key <secret>] [policy <policy_name>]" - "[rx-fib-id <fib_id>] [tx-fib-id <fib_id>]", - .function = sr_add_del_tunnel_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Display Segment Routing tunnel - * - * @param vm vlib_main_t * - * @param t ip6_sr_tunnel_t * - * - */ void -ip6_sr_tunnel_display (vlib_main_t * vm, ip6_sr_tunnel_t * t) -{ - ip6_sr_main_t *sm = &sr_main; - ip6_fib_t *rx_fib, *tx_fib; - ip6_sr_policy_t *pt; - - rx_fib = ip6_fib_get (t->rx_fib_index); - tx_fib = ip6_fib_get (t->tx_fib_index); - - if (t->name) - vlib_cli_output (vm, "sr tunnel name: %s", (char *) t->name); - - vlib_cli_output (vm, "src %U dst %U first hop %U", - format_ip6_address, &t->key.src, - format_ip6_address, &t->key.dst, - format_ip6_address, &t->first_hop); - vlib_cli_output (vm, " rx-fib-id %d tx-fib-id %d", - rx_fib->table_id, tx_fib->table_id); - vlib_cli_output (vm, " sr: %U", format_ip6_sr_header, t->rewrite, - 0 /* print_hmac */ ); - - if (t->policy_index != ~0) - { - pt = pool_elt_at_index (sm->policies, t->policy_index); - vlib_cli_output (vm, "sr policy: %s", (char *) pt->name); - } - vlib_cli_output (vm, "-------"); - - return; -} - -/** - * @brief CLI Parser for Display Segment Routing tunnel - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -show_sr_tunnel_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - static ip6_sr_tunnel_t **tunnels; - ip6_sr_tunnel_t *t; - ip6_sr_main_t *sm = &sr_main; - int i; - uword *p = 0; - u8 *name = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "name %s", &name)) - { - p = hash_get_mem (sm->tunnel_index_by_name, name); - if (!p) - vlib_cli_output (vm, "No SR tunnel with name: %s. Showing all.", - name); - } - else - break; - } - - vec_reset_length (tunnels); - - if (!p) /* Either name parm not passed or no tunnel with that name found, show all */ - { - /* *INDENT-OFF* */ - pool_foreach (t, sm->tunnels, - ({ - vec_add1 (tunnels, t); - })); - /* *INDENT-ON* */ - } - else /* Just show the one tunnel by name */ - vec_add1 (tunnels, &sm->tunnels[p[0]]); - - if (vec_len (tunnels) == 0) - vlib_cli_output (vm, "No SR tunnels configured"); - - for (i = 0; i < vec_len (tunnels); i++) - { - t = tunnels[i]; - ip6_sr_tunnel_display (vm, t); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_tunnel_command, static) = { - .path = "show sr tunnel", - .short_help = "show sr tunnel [name <sr-tunnel-name>]", - .function = show_sr_tunnel_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Add or Delete a Segment Routing policy - * - * @param a ip6_sr_add_del_policy_args_t * - * - * @return retval int - */ -int -ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a) -{ - ip6_sr_main_t *sm = &sr_main; - uword *p; - ip6_sr_tunnel_t *t = 0; - ip6_sr_policy_t *policy; - u32 *tunnel_indices = 0; - int i; - - - - if (a->is_del) - { - p = hash_get_mem (sm->policy_index_by_policy_name, a->name); - if (!p) - return -6; /* policy name not found */ - - policy = pool_elt_at_index (sm->policies, p[0]); - - vec_foreach_index (i, policy->tunnel_indices) - { - t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[i]); - t->policy_index = ~0; - } - hash_unset_mem (sm->policy_index_by_policy_name, a->name); - pool_put (sm->policies, policy); - return 0; - } - - - if (!vec_len (a->tunnel_names)) - return -3; /*tunnel name is required case */ - - vec_reset_length (tunnel_indices); - /* Check tunnel names, add tunnel_index to policy */ - for (i = 0; i < vec_len (a->tunnel_names); i++) - { - p = hash_get_mem (sm->tunnel_index_by_name, a->tunnel_names[i]); - if (!p) - return -4; /* tunnel name not found case */ - - t = pool_elt_at_index (sm->tunnels, p[0]); - /* - No need to check t==0. -3 condition above ensures name - */ - if (t->policy_index != ~0) - return -5; /* tunnel name already associated with a policy */ - - /* Add to tunnel indicies */ - vec_add1 (tunnel_indices, p[0]); - } - - /* Add policy to ip6_sr_main_t */ - pool_get (sm->policies, policy); - policy->name = a->name; - policy->tunnel_indices = tunnel_indices; - hash_set_mem (sm->policy_index_by_policy_name, policy->name, - policy - sm->policies); - - /* Yes, this could be construed as overkill but the last thing you should do is set - the policy_index on the tunnel after everything is set in ip6_sr_main_t. - If this is deemed overly cautious, could set this in the vec_len(tunnel_names) loop. - */ - for (i = 0; i < vec_len (policy->tunnel_indices); i++) - { - t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[i]); - t->policy_index = policy - sm->policies; - } - - return 0; -} - -/** - * @brief CLI Parser for Add or Delete a Segment Routing policy - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -sr_add_del_policy_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0; - u8 **tunnel_names = 0; - u8 *tunnel_name = 0; - u8 *name = 0; - ip6_sr_add_del_policy_args_t _a, *a = &_a; - int rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (unformat (input, "name %s", &name)) - ; - else if (unformat (input, "tunnel %s", &tunnel_name)) - { - if (tunnel_name) - { - vec_add1 (tunnel_names, tunnel_name); - tunnel_name = 0; - } - } - else - break; - } - - if (!name) - return clib_error_return (0, "name of SR policy required"); - - - memset (a, 0, sizeof (*a)); - - a->is_del = is_del; - a->name = name; - a->tunnel_names = tunnel_names; - - rv = ip6_sr_add_del_policy (a); - - vec_free (tunnel_names); - - switch (rv) - { - case 0: - break; - - case -3: - return clib_error_return (0, - "tunnel name to associate to SR policy is required"); - - case -4: - return clib_error_return (0, "tunnel name not found"); - - case -5: - return clib_error_return (0, "tunnel already associated with policy"); - - case -6: - return clib_error_return (0, "policy name %s not found", name); - - case -7: - return clib_error_return (0, "TODO: deleting policy name %s", name); - - default: - return clib_error_return (0, "BUG: ip6_sr_add_del_policy returns %d", - rv); - - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_policy_command, static) = { - .path = "sr policy", - .short_help = - "sr policy [del] name <policy-name> tunnel <sr-tunnel-name> [tunnel <sr-tunnel-name>]*", - .function = sr_add_del_policy_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief CLI Parser for Displaying Segment Routing policy - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -show_sr_policy_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - static ip6_sr_policy_t **policies; - ip6_sr_policy_t *policy; - ip6_sr_tunnel_t *t; - ip6_sr_main_t *sm = &sr_main; - int i, j; - uword *p = 0; - u8 *name = 0; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "name %s", &name)) - { - p = hash_get_mem (sm->policy_index_by_policy_name, name); - if (!p) - vlib_cli_output (vm, - "policy with name %s not found. Showing all.", - name); - } - else - break; - } - - vec_reset_length (policies); - - if (!p) /* Either name parm not passed or no policy with that name found, show all */ - { - /* *INDENT-OFF* */ - pool_foreach (policy, sm->policies, - ({ - vec_add1 (policies, policy); - })); - /* *INDENT-ON* */ - } - else /* Just show the one policy by name and a summary of tunnel names */ - { - policy = pool_elt_at_index (sm->policies, p[0]); - vec_add1 (policies, policy); - } - - if (vec_len (policies) == 0) - vlib_cli_output (vm, "No SR policies configured"); - - for (i = 0; i < vec_len (policies); i++) - { - policy = policies[i]; - - if (policy->name) - vlib_cli_output (vm, "SR policy name: %s", (char *) policy->name); - for (j = 0; j < vec_len (policy->tunnel_indices); j++) - { - t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[j]); - ip6_sr_tunnel_display (vm, t); - } - } - - return 0; - -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_policy_command, static) = { - .path = "show sr policy", - .short_help = "show sr policy [name <sr-policy-name>]", - .function = show_sr_policy_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Add or Delete a mapping of IP6 multicast address - * to Segment Routing policy. - * - * @param a ip6_sr_add_del_multicastmap_args_t * - * - * @return retval int - */ -int -ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) -{ - uword *p; - ip6_sr_tunnel_t *t; - ip6_sr_main_t *sm = &sr_main; - ip6_sr_policy_t *pt; - index_t rep; - u32 ii; - - if (a->is_del) - { - /* clean up the adjacency */ - p = - hash_get_mem (sm->policy_index_by_multicast_address, - a->multicast_address); - } - else - { - /* Get our policy by policy_name */ - p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); - - } - if (!p) - return -1; - - pt = pool_elt_at_index (sm->policies, p[0]); - - /* - Get the first tunnel associated with policy populate the fib adjacency. - From there, since this tunnel will have it's policy_index != ~0 it will - be the trigger in the dual_loop to pull up the policy and make a copy-rewrite - for each tunnel in the policy - */ - - t = pool_elt_at_index (sm->tunnels, pt->tunnel_indices[0]); - - /* - * Stick the tunnel index into the rewrite header. - * - * Unfortunately, inserting an SR header according to the various - * RFC's requires parsing through the ip6 header, perhaps consing a - * buffer onto the head of the vlib_buffer_t, etc. We don't use the - * normal reverse bcopy rewrite code. - * - * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain - * at some point... - */ - - /* - * Construct an mFIB entry for the multicast address, - * using the rx/tx fib from the first tunnel. - * There is no RPF information for this address (I need to discuss this with - * Pablo), so for now accept from anywhere... - */ - /* *INDENT-OFF* */ - mfib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - .fp_grp_addr = { - .ip6 = *a->multicast_address, - } - }; - /* *INDENT-ON* */ - - if (a->is_del) - mfib_table_entry_delete (t->rx_fib_index, &pfx, MFIB_SOURCE_SRv6); - else - { - /* - * Construct a replicate DPO that will replicate received packets over - * each tunnel in the policy - */ - dpo_id_t dpo = DPO_INVALID; - - rep = replicate_create (vec_len (pt->tunnel_indices), DPO_PROTO_IP6); - - vec_foreach_index (ii, pt->tunnel_indices) - { - dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, pt->tunnel_indices[ii]); - - replicate_set_bucket (rep, ii, &dpo); - } - - mfib_table_entry_special_add (t->rx_fib_index, - &pfx, - MFIB_SOURCE_SRv6, - MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF, rep); - - dpo_reset (&dpo); - } - - u8 *mcast_copy = 0; - mcast_copy = vec_new (ip6_address_t, 1); - memcpy (mcast_copy, a->multicast_address, sizeof (ip6_address_t)); - - if (a->is_del) - { - hash_unset_mem (sm->policy_index_by_multicast_address, mcast_copy); - vec_free (mcast_copy); - } - else - { - hash_set_mem (sm->policy_index_by_multicast_address, mcast_copy, - pt - sm->policies); - } - - return 0; -} - -/** - * @brief CLI Parser for Adding or Delete a mapping of IP6 multicast address - * to Segment Routing policy. - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -sr_add_del_multicast_map_command_fn (vlib_main_t * vm, - unformat_input_t * input, - vlib_cli_command_t * cmd) -{ - int is_del = 0; - ip6_address_t multicast_address; - u8 *policy_name = 0; - int multicast_address_set = 0; - ip6_sr_add_del_multicastmap_args_t _a, *a = &_a; - int rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else - if (unformat - (input, "address %U", unformat_ip6_address, &multicast_address)) - multicast_address_set = 1; - else if (unformat (input, "sr-policy %s", &policy_name)) - ; - else - break; - } - - if (!is_del && !policy_name) - return clib_error_return (0, "name of sr policy required"); - - if (!multicast_address_set) - return clib_error_return (0, "multicast address required"); - - memset (a, 0, sizeof (*a)); - - a->is_del = is_del; - a->multicast_address = &multicast_address; - a->policy_name = policy_name; - - rv = ip6_sr_add_del_multicastmap (a); - - switch (rv) - { - case 0: - break; - case -1: - return clib_error_return (0, "no policy with name: %s", policy_name); - - case -2: - return clib_error_return (0, "multicast map someting "); - - case -3: - return clib_error_return (0, - "tunnel name to associate to SR policy is required"); - - case -7: - return clib_error_return (0, "TODO: deleting policy name %s", - policy_name); - - default: - return clib_error_return (0, "BUG: ip6_sr_add_del_policy returns %d", - rv); - - } - return 0; - -} - - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_multicast_map_command, static) = { - .path = "sr multicast-map", - .short_help = - "sr multicast-map address <multicast-ip6-address> sr-policy <sr-policy-name> [del]", - .function = sr_add_del_multicast_map_command_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief CLI Parser for Displaying a mapping of IP6 multicast address - * to Segment Routing policy. - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -show_sr_multicast_map_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - u8 *key = 0; - u32 value; - ip6_address_t multicast_address; - ip6_sr_policy_t *pt; - - /* pull all entries from the hash table into vector for display */ - - /* *INDENT-OFF* */ - hash_foreach_mem (key, value, sm->policy_index_by_multicast_address, - ({ - if (!key) - vlib_cli_output (vm, "no multicast maps configured"); - else - { - multicast_address = *((ip6_address_t *)key); - pt = pool_elt_at_index (sm->policies, value); - if (pt) - { - vlib_cli_output (vm, "address: %U policy: %s", - format_ip6_address, &multicast_address, - pt->name); - } - else - vlib_cli_output (vm, "BUG: policy not found for address: %U with policy index %d", - format_ip6_address, &multicast_address, - value); - - } - - })); - /* *INDENT-ON* */ - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_multicast_map_command, static) = { - .path = "show sr multicast-map", - .short_help = "show sr multicast-map", - .function = show_sr_multicast_map_fn, -}; -/* *INDENT-ON* */ - - -#define foreach_sr_fix_dst_addr_next \ -_(DROP, "error-drop") - -/** - * @brief Struct for valid next-nodes for SR fix destination address node - */ -typedef enum -{ -#define _(s,n) SR_FIX_DST_ADDR_NEXT_##s, - foreach_sr_fix_dst_addr_next -#undef _ - SR_FIX_DST_ADDR_N_NEXT, -} sr_fix_dst_addr_next_t; - -/** - * @brief Error strings for SR Fix Destination rewrite - */ -static char *sr_fix_dst_error_strings[] = { -#define sr_fix_dst_error(n,s) s, -#include "sr_fix_dst_error.def" -#undef sr_fix_dst_error -}; - -/** - * @brief Struct for errors for SR Fix Destination rewrite - */ -typedef enum -{ -#define sr_fix_dst_error(n,s) SR_FIX_DST_ERROR_##n, -#include "sr_fix_dst_error.def" -#undef sr_fix_dst_error - SR_FIX_DST_N_ERROR, -} sr_fix_dst_error_t; - -/** - * @brief Information for fix address trace - */ -typedef struct -{ - ip6_address_t src, dst; - u32 next_index; - u32 adj_index; - u8 sr[256]; -} sr_fix_addr_trace_t; - -/** - * @brief Formatter for fix address trace - */ -u8 * -format_sr_fix_addr_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_fix_addr_trace_t *t = va_arg (*args, sr_fix_addr_trace_t *); - vnet_hw_interface_t *hi = 0; - ip_adjacency_t *adj; - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; - vnet_main_t *vnm = vnet_get_main (); - - if (t->adj_index != ~0) - { - adj = ip_get_adjacency (lm, t->adj_index); - hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); - } - - s = format (s, "SR-FIX_ADDR: next %s ip6 src %U dst %U\n", - (t->next_index == SR_FIX_DST_ADDR_NEXT_DROP) - ? "drop" : "output", - format_ip6_address, &t->src, format_ip6_address, &t->dst); - if (t->next_index != SR_FIX_DST_ADDR_NEXT_DROP) - { - s = - format (s, "%U\n", format_ip6_sr_header, t->sr, 1 /* print_hmac */ ); - s = - format (s, " output via %s", - hi ? (char *) (hi->name) : "Invalid adj"); - } - return s; -} - -/** - * @brief Fix SR destination address - dual-loop - * - * @node sr-fix-dst-addr - * @param vm vlib_main_t * - * @param node vlib_node_runtime_t * - * @param from_frame vlib_frame_t * - * - * @return from_frame->n_vectors uword - */ -static uword -sr_fix_dst_addr (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, *from, *to_next; - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - -#if 0 - while (0 && n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - __attribute__ ((unused)) vlib_buffer_t *b0, *b1; - u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP; - u32 next1 = SR_FIX_DST_ADDR_NEXT_DROP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } -#endif - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0; - ip_adjacency_t *adj0; - ip6_sr_header_t *sr0; - u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP; - ip6_address_t *new_dst0; - ethernet_header_t *eh0; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - - adj0 = - ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - next0 = adj0->if_address_index; - - /* We should be pointing at an Ethernet header... */ - eh0 = vlib_buffer_get_current (b0); - ip0 = (ip6_header_t *) (eh0 + 1); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - - /* We'd better find an SR header... */ - if (PREDICT_FALSE (ip0->protocol != IPPROTO_IPV6_ROUTE)) - { - b0->error = node->errors[SR_FIX_DST_ERROR_NO_SR_HEADER]; - goto do_trace0; - } - else - { - /* - * We get here from sr_rewrite or sr_local, with - * sr->segments_left pointing at the (copy of the original) dst - * address. Use it, then increment sr0->segments_left. - */ - - /* Out of segments? Turf the packet */ - if (PREDICT_FALSE (sr0->segments_left == 0)) - { - b0->error = node->errors[SR_FIX_DST_ERROR_NO_MORE_SEGMENTS]; - goto do_trace0; - } - - /* - * Rewrite the packet with the original dst address - * We assume that the last segment (in processing order) contains - * the original dst address. The list is reversed, so sr0->segments - * contains the original dst address. - */ - new_dst0 = sr0->segments; - ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; - ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; - } - - do_trace0: - - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_fix_addr_trace_t *t = vlib_add_trace (vm, node, - b0, sizeof (*t)); - t->next_index = next0; - t->adj_index = ~0; - - if (next0 != SR_FIX_DST_ADDR_NEXT_DROP) - { - t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; - clib_memcpy (t->src.as_u8, ip0->src_address.as_u8, - sizeof (t->src.as_u8)); - clib_memcpy (t->dst.as_u8, ip0->dst_address.as_u8, - sizeof (t->dst.as_u8)); - clib_memcpy (t->sr, sr0, sizeof (t->sr)); - } - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - return from_frame->n_vectors; -} - - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_fix_dst_addr_node) = { - .function = sr_fix_dst_addr, - .name = "sr-fix-dst-addr", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - .format_trace = format_sr_fix_addr_trace, - .format_buffer = format_ip6_sr_header_with_length, - - .runtime_data_bytes = 0, - - .n_errors = SR_FIX_DST_N_ERROR, - .error_strings = sr_fix_dst_error_strings, - - .n_next_nodes = SR_FIX_DST_ADDR_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_FIX_DST_ADDR_NEXT_##s] = n, - foreach_sr_fix_dst_addr_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr) -/* *INDENT-ON* */ - -static clib_error_t * -sr_init (vlib_main_t * vm) -{ - ip6_sr_main_t *sm = &sr_main; - clib_error_t *error = 0; - vlib_node_t *ip6_lookup_node, *ip6_rewrite_node; - - if ((error = vlib_call_init_function (vm, ip_main_init))) - return error; - - if ((error = vlib_call_init_function (vm, ip6_lookup_init))) - return error; - - sm->vlib_main = vm; - sm->vnet_main = vnet_get_main (); - - vec_validate (sm->hmac_keys, 0); - sm->hmac_keys[0].shared_secret = (u8 *) 0xdeadbeef; - - sm->tunnel_index_by_key = - hash_create_mem (0, sizeof (ip6_sr_tunnel_key_t), sizeof (uword)); - - sm->tunnel_index_by_name = hash_create_string (0, sizeof (uword)); - - sm->policy_index_by_policy_name = hash_create_string (0, sizeof (uword)); - - sm->policy_index_by_multicast_address = - hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword)); - - sm->hmac_key_by_shared_secret = hash_create_string (0, sizeof (uword)); - - ip6_register_protocol (IPPROTO_IPV6_ROUTE, sr_local_node.index); - - ip6_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip6-lookup"); - ASSERT (ip6_lookup_node); - - ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); - ASSERT (ip6_rewrite_node); - - /* Add a disposition to ip6_rewrite for the sr dst address hack node */ - sm->ip6_rewrite_sr_next_index = - vlib_node_add_next (vm, ip6_rewrite_node->index, - sr_fix_dst_addr_node.index); - - OpenSSL_add_all_digests (); - - sm->md = (void *) EVP_get_digestbyname ("sha1"); - sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX)); - - sr_dpo_type = dpo_register_new_type (&sr_dpo_vft, sr_nodes); - sr_fib_node_type = fib_node_register_new_type (&sr_fib_vft); - - return error; -} - -VLIB_INIT_FUNCTION (sr_init); - -/** - * @brief Definition of next-nodes for SR local - */ -#define foreach_sr_local_next \ - _ (ERROR, "error-drop") \ - _ (IP6_LOOKUP, "ip6-lookup") - -/** - * @brief Struct for definition of next-nodes for SR local - */ -typedef enum -{ -#define _(s,n) SR_LOCAL_NEXT_##s, - foreach_sr_local_next -#undef _ - SR_LOCAL_N_NEXT, -} sr_local_next_t; - -/** - * @brief Struct for packet trace of SR local - */ -typedef struct -{ - u8 next_index; - u8 sr_valid; - ip6_address_t src, dst; - u16 length; - u8 sr[256]; -} sr_local_trace_t; - -/** - * @brief Definition of SR local error-strings - */ -static char *sr_local_error_strings[] = { -#define sr_error(n,s) s, -#include "sr_error.def" -#undef sr_error -}; - -/** - * @brief Struct for definition of SR local error-strings - */ -typedef enum -{ -#define sr_error(n,s) SR_LOCAL_ERROR_##n, -#include "sr_error.def" -#undef sr_error - SR_LOCAL_N_ERROR, -} sr_local_error_t; - -/** - * @brief Format SR local trace - * - * @param s u8 * - * @param args va_list * - * - * @return s u8 * - */ -u8 * -format_sr_local_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - sr_local_trace_t *t = va_arg (*args, sr_local_trace_t *); - - s = format (s, "SR-LOCAL: src %U dst %U len %u next_index %d", - format_ip6_address, &t->src, - format_ip6_address, &t->dst, t->length, t->next_index); - if (t->sr_valid) - s = - format (s, "\n %U", format_ip6_sr_header, t->sr, 1 /* print_hmac */ ); - else - s = format (s, "\n popped SR header"); - - return s; -} - - -/* $$$$ fixme: smp, don't copy data, cache input, output (maybe) */ -/** - * @brief Validate the SR HMAC - * - * @param sm ip6_sr_main_t * - * @param ip ip6_header_t * - * @param sr ip6_sr_header_t * - * - * @return retval int - */ -static int -sr_validate_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, ip6_sr_header_t * sr) -{ - u32 key_index; - static u8 *keybuf; - u8 *copy_target; - int first_segment; - ip6_address_t *addrp; - int i; - ip6_sr_hmac_key_t *hmac_key; - static u8 *signature; - u32 sig_len; - - key_index = sr->hmac_key; - - /* No signature? Pass... */ - if (key_index == 0) - return 0; - - /* We don't know about this key? Fail... */ - if (key_index >= vec_len (sm->hmac_keys)) - return 1; - - vec_validate (signature, SHA256_DIGEST_LENGTH - 1); - - hmac_key = sm->hmac_keys + key_index; - - vec_reset_length (keybuf); - - /* pkt ip6 src address */ - vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); - clib_memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t)); - - /* last segment */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] = sr->first_segment; - - /* octet w/ bit 0 = "clean" flag */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] - = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)) - ? 0x80 : 0; - - /* hmac key id */ - vec_add2 (keybuf, copy_target, 1); - copy_target[0] = sr->hmac_key; - - first_segment = sr->first_segment; - - addrp = sr->segments; - - /* segments */ - for (i = 0; i <= first_segment; i++) - { - vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); - clib_memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t)); - addrp++; - } - - if (sm->is_debug) - clib_warning ("verify key index %d keybuf: %U", key_index, - format_hex_bytes, keybuf, vec_len (keybuf)); - - /* shared secret */ - - /* SHA1 is shorter than SHA-256 */ - memset (signature, 0, vec_len (signature)); - - HMAC_CTX_init (sm->hmac_ctx); - if (!HMAC_Init (sm->hmac_ctx, hmac_key->shared_secret, - vec_len (hmac_key->shared_secret), sm->md)) - clib_warning ("barf1"); - if (!HMAC_Update (sm->hmac_ctx, keybuf, vec_len (keybuf))) - clib_warning ("barf2"); - if (!HMAC_Final (sm->hmac_ctx, signature, &sig_len)) - clib_warning ("barf3"); - HMAC_CTX_cleanup (sm->hmac_ctx); - - if (sm->is_debug) - clib_warning ("computed signature len %d, value %U", sig_len, - format_hex_bytes, signature, vec_len (signature)); - - /* Point at the SHA signature in the packet */ - addrp++; - if (sm->is_debug) - clib_warning ("read signature %U", format_hex_bytes, addrp, - SHA256_DIGEST_LENGTH); - - return memcmp (signature, addrp, SHA256_DIGEST_LENGTH); -} - -/** - * @brief SR local node - * @node sr-local - * - * @param vm vlib_main_t * - * @param node vlib_node_runtime_t * - * @param from_frame vlib_frame_t * - * - * @return from_frame->n_vectors uword - */ -static uword -sr_local (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * from_frame) -{ - u32 n_left_from, next_index, *from, *to_next; - ip6_sr_main_t *sm = &sr_main; - u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, - vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *); - sr_local_cb = sm->sr_local_cb; - - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - - next_index = node->cached_next_index; - - while (n_left_from > 0) - { - u32 n_left_to_next; - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - u32 bi0, bi1; - vlib_buffer_t *b0, *b1; - ip6_header_t *ip0, *ip1; - ip6_sr_header_t *sr0, *sr1; - ip6_address_t *new_dst0, *new_dst1; - u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; - u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, LOAD); - vlib_prefetch_buffer_header (p3, LOAD); - - CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); - } - - bi0 = from[0]; - bi1 = from[1]; - to_next[0] = bi0; - to_next[1] = bi1; - from += 2; - to_next += 2; - n_left_to_next -= 2; - n_left_from -= 2; - - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - sr0 = - (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) - ext_hdr); - } - - if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = - node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; - goto do_trace0; - } - - /* Out of segments? Turf the packet */ - if (PREDICT_FALSE (sr0->segments_left == 0)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; - goto do_trace0; - } - - if (PREDICT_FALSE (sm->validate_hmac)) - { - if (sr_validate_hmac (sm, ip0, sr0)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; - goto do_trace0; - } - } - - next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : next0; - - /* - * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx - */ - if (PREDICT_FALSE (next0 & 0x80000000)) - { - next0 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next0 == SR_LOCAL_NEXT_ERROR)) - b0->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; - } - else - { - u32 segment_index0; - - segment_index0 = sr0->segments_left - 1; - - /* Rewrite the packet */ - new_dst0 = (ip6_address_t *) (sr0->segments + segment_index0); - ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; - ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; - - if (PREDICT_TRUE (sr0->segments_left > 0)) - sr0->segments_left -= 1; - } - - /* End of the path. Clean up the SR header, or not */ - if (PREDICT_FALSE - (sr0->segments_left == 0 && - (sr0->flags & - clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) - { - u64 *copy_dst0, *copy_src0; - u16 new_l0; - u32 copy_len_u64s0 = 0; - int i; - - /* - * Copy the ip6 header right by the (real) length of the - * sr header. - */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - copy_len_u64s0 = - (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; - ext_hdr->next_hdr = sr0->protocol; - } - else - { - ip0->protocol = sr0->protocol; - } - vlib_buffer_advance (b0, (sr0->length + 1) * 8); - - new_l0 = clib_net_to_host_u16 (ip0->payload_length) - - (sr0->length + 1) * 8; - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - copy_src0 = (u64 *) ip0; - copy_dst0 = copy_src0 + (sr0->length + 1); - - copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; - copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; - copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; - copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; - copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; - - for (i = copy_len_u64s0 - 1; i >= 0; i--) - { - copy_dst0[i] = copy_src0[i]; - } - - sr0 = 0; - } - - do_trace0: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_local_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = vlib_buffer_length_in_chain (vm, b0); - tr->next_index = next0; - tr->sr_valid = sr0 != 0; - if (tr->sr_valid) - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); - } - - b1 = vlib_get_buffer (vm, bi1); - ip1 = vlib_buffer_get_current (b1); - sr1 = (ip6_sr_header_t *) (ip1 + 1); - if (PREDICT_FALSE - (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); - sr1 = - (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) - ext_hdr); - } - - if (PREDICT_FALSE (sr1->type != ROUTING_HEADER_TYPE_SR)) - { - next1 = SR_LOCAL_NEXT_ERROR; - b1->error = - node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; - goto do_trace1; - } - - /* Out of segments? Turf the packet */ - if (PREDICT_FALSE (sr1->segments_left == 0)) - { - next1 = SR_LOCAL_NEXT_ERROR; - b1->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; - goto do_trace1; - } - - if (PREDICT_FALSE (sm->validate_hmac)) - { - if (sr_validate_hmac (sm, ip1, sr1)) - { - next1 = SR_LOCAL_NEXT_ERROR; - b1->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; - goto do_trace1; - } - } - - next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) : next1; - - /* - * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx - */ - if (PREDICT_FALSE (next1 & 0x80000000)) - { - next1 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next1 == SR_LOCAL_NEXT_ERROR)) - b1->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; - } - else - { - u32 segment_index1; - - segment_index1 = sr1->segments_left - 1; - - /* Rewrite the packet */ - new_dst1 = (ip6_address_t *) (sr1->segments + segment_index1); - ip1->dst_address.as_u64[0] = new_dst1->as_u64[0]; - ip1->dst_address.as_u64[1] = new_dst1->as_u64[1]; - - if (PREDICT_TRUE (sr1->segments_left > 0)) - sr1->segments_left -= 1; - } - - /* End of the path. Clean up the SR header, or not */ - if (PREDICT_FALSE - (sr1->segments_left == 0 && - (sr1->flags & - clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) - { - u64 *copy_dst1, *copy_src1; - u16 new_l1; - u32 copy_len_u64s1 = 0; - int i; - - /* - * Copy the ip6 header right by the (real) length of the - * sr header. - */ - if (PREDICT_FALSE - (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); - copy_len_u64s1 = - (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; - ext_hdr->next_hdr = sr1->protocol; - } - else - { - ip1->protocol = sr1->protocol; - } - vlib_buffer_advance (b1, (sr1->length + 1) * 8); - - new_l1 = clib_net_to_host_u16 (ip1->payload_length) - - (sr1->length + 1) * 8; - ip1->payload_length = clib_host_to_net_u16 (new_l1); - - copy_src1 = (u64 *) ip1; - copy_dst1 = copy_src1 + (sr1->length + 1); - - copy_dst1[4 + copy_len_u64s1] = copy_src1[4 + copy_len_u64s1]; - copy_dst1[3 + copy_len_u64s1] = copy_src1[3 + copy_len_u64s1]; - copy_dst1[2 + copy_len_u64s1] = copy_src1[2 + copy_len_u64s1]; - copy_dst1[1 + copy_len_u64s1] = copy_src1[1 + copy_len_u64s1]; - copy_dst1[0 + copy_len_u64s1] = copy_src1[0 + copy_len_u64s1]; - - for (i = copy_len_u64s1 - 1; i >= 0; i--) - { - copy_dst1[i] = copy_src1[i]; - } - - sr1 = 0; - } - - do_trace1: - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_local_trace_t *tr = vlib_add_trace (vm, node, - b1, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = vlib_buffer_length_in_chain (vm, b1); - tr->next_index = next1; - tr->sr_valid = sr1 != 0; - if (tr->sr_valid) - clib_memcpy (tr->sr, sr1, sizeof (tr->sr)); - } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, - to_next, n_left_to_next, - bi0, bi1, next0, next1); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 bi0; - vlib_buffer_t *b0; - ip6_header_t *ip0 = 0; - ip6_sr_header_t *sr0; - ip6_address_t *new_dst0; - u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; - - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) (ip0 + 1); - - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - sr0 = - (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) - ext_hdr); - } - if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = - node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; - goto do_trace; - } - - /* Out of segments? Turf the packet */ - if (PREDICT_FALSE (sr0->segments_left == 0)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; - goto do_trace; - } - - if (PREDICT_FALSE (sm->validate_hmac)) - { - if (sr_validate_hmac (sm, ip0, sr0)) - { - next0 = SR_LOCAL_NEXT_ERROR; - b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; - goto do_trace; - } - } - - next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : next0; - - /* - * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx - */ - if (PREDICT_FALSE (next0 & 0x80000000)) - { - next0 ^= 0xFFFFFFFF; - if (PREDICT_FALSE (next0 == SR_LOCAL_NEXT_ERROR)) - b0->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; - } - else - { - u32 segment_index0; - - segment_index0 = sr0->segments_left - 1; - - /* Rewrite the packet */ - new_dst0 = (ip6_address_t *) (sr0->segments + segment_index0); - ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; - ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; - - if (PREDICT_TRUE (sr0->segments_left > 0)) - sr0->segments_left -= 1; - } - - /* End of the path. Clean up the SR header, or not */ - if (PREDICT_FALSE - (sr0->segments_left == 0 && - (sr0->flags & - clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) - { - u64 *copy_dst0, *copy_src0; - u16 new_l0; - u32 copy_len_u64s0 = 0; - int i; - - /* - * Copy the ip6 header right by the (real) length of the - * sr header. - */ - if (PREDICT_FALSE - (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) - { - ip6_hop_by_hop_ext_t *ext_hdr = - (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); - copy_len_u64s0 = - (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; - ext_hdr->next_hdr = sr0->protocol; - } - else - { - ip0->protocol = sr0->protocol; - } - - vlib_buffer_advance (b0, (sr0->length + 1) * 8); - - new_l0 = clib_net_to_host_u16 (ip0->payload_length) - - (sr0->length + 1) * 8; - ip0->payload_length = clib_host_to_net_u16 (new_l0); - - copy_src0 = (u64 *) ip0; - copy_dst0 = copy_src0 + (sr0->length + 1); - copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; - copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; - copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; - copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; - copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; - - for (i = copy_len_u64s0 - 1; i >= 0; i--) - { - copy_dst0[i] = copy_src0[i]; - } - - sr0 = 0; - } - - do_trace: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - sr_local_trace_t *tr = vlib_add_trace (vm, node, - b0, sizeof (*tr)); - clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, - sizeof (tr->src.as_u8)); - clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, - sizeof (tr->dst.as_u8)); - tr->length = vlib_buffer_length_in_chain (vm, b0); - tr->next_index = next0; - tr->sr_valid = sr0 != 0; - if (tr->sr_valid) - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - vlib_node_increment_counter (vm, sr_local_node.index, - SR_LOCAL_ERROR_PKTS_PROCESSED, - from_frame->n_vectors); - return from_frame->n_vectors; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (sr_local_node, static) = { - .function = sr_local, - .name = "sr-local", - /* Takes a vector of packets. */ - .vector_size = sizeof (u32), - .format_trace = format_sr_local_trace, - - .runtime_data_bytes = 0, - - .n_errors = SR_LOCAL_N_ERROR, - .error_strings = sr_local_error_strings, - - .n_next_nodes = SR_LOCAL_N_NEXT, - .next_nodes = { -#define _(s,n) [SR_LOCAL_NEXT_##s] = n, - foreach_sr_local_next -#undef _ - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (sr_local_node, sr_local) -/* *INDENT-ON* */ - -ip6_sr_main_t * -sr_get_main (vlib_main_t * vm) -{ - vlib_call_init_function (vm, sr_init); - ASSERT (sr_local_node.index); - return &sr_main; -} - -/** - * @brief CLI parser for SR fix destination rewrite node - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -set_ip6_sr_rewrite_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = 128, - }; - u32 fib_index = 0; - u32 fib_id = 0; - u32 adj_index; - ip_adjacency_t *adj; - vnet_hw_interface_t *hi; - u32 sw_if_index; - ip6_sr_main_t *sm = &sr_main; - vnet_main_t *vnm = vnet_get_main (); - fib_node_index_t fei; - - if (!unformat (input, "%U", unformat_ip6_address, &pfx.fp_addr.ip6)) - return clib_error_return (0, "ip6 address missing in '%U'", - format_unformat_error, input); - - if (unformat (input, "rx-table-id %d", &fib_id)) - { - fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, fib_id); - if (fib_index == ~0) - return clib_error_return (0, "fib-id %d not found", fib_id); - } - - fei = fib_table_lookup_exact_match (fib_index, &pfx); - - if (FIB_NODE_INDEX_INVALID == fei) - return clib_error_return (0, "no match for %U", - format_ip6_address, &pfx.fp_addr.ip6); - - adj_index = fib_entry_get_adj_for_source (fei, FIB_SOURCE_SR); - - if (ADJ_INDEX_INVALID == adj_index) - return clib_error_return (0, "%U not SR sourced", - format_ip6_address, &pfx.fp_addr.ip6); - - adj = adj_get (adj_index); - - if (adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) - return clib_error_return (0, "%U unresolved (not a rewrite adj)", - format_ip6_address, &pfx.fp_addr.ip6); - - adj->rewrite_header.next_index = sm->ip6_rewrite_sr_next_index; - - sw_if_index = adj->rewrite_header.sw_if_index; - hi = vnet_get_sup_hw_interface (vnm, sw_if_index); - adj->rewrite_header.node_index = sr_fix_dst_addr_node.index; - - /* $$$$$ hack... steal the interface address index */ - adj->if_address_index = - vlib_node_add_next (vm, sr_fix_dst_addr_node.index, - hi->output_node_index); - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_ip6_sr_rewrite, static) = { - .path = "set ip6 sr rewrite", - .short_help = "set ip6 sr rewrite <ip6-address> [fib-id <id>]", - .function = set_ip6_sr_rewrite_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Register a callback routine to set next0 in sr_local - * - * @param cb void * - */ -void -vnet_register_sr_app_callback (void *cb) -{ - ip6_sr_main_t *sm = &sr_main; - - sm->sr_local_cb = cb; -} - -/** - * @brief Test routine for validation of HMAC - */ -static clib_error_t * -test_sr_hmac_validate_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - - if (unformat (input, "validate on")) - sm->validate_hmac = 1; - else if (unformat (input, "chunk-offset off")) - sm->validate_hmac = 0; - else - return clib_error_return (0, "expected validate on|off in '%U'", - format_unformat_error, input); - - vlib_cli_output (vm, "hmac signature validation %s", - sm->validate_hmac ? "on" : "off"); - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (test_sr_hmac_validate, static) = { - .path = "test sr hmac", - .short_help = "test sr hmac validate [on|off]", - .function = test_sr_hmac_validate_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Add or Delete HMAC key - * - * @param sm ip6_sr_main_t * - * @param key_id u32 - * @param shared_secret u8 * - * @param is_del u8 - * - * @return retval i32 - */ -// $$$ fixme shouldn't return i32 -i32 -sr_hmac_add_del_key (ip6_sr_main_t * sm, u32 key_id, u8 * shared_secret, - u8 is_del) -{ - u32 index; - ip6_sr_hmac_key_t *key; - - if (is_del == 0) - { - /* Specific key in use? Fail. */ - if (key_id && vec_len (sm->hmac_keys) > key_id - && sm->hmac_keys[key_id].shared_secret) - return -2; - - index = key_id; - key = find_or_add_shared_secret (sm, shared_secret, &index); - ASSERT (index == key_id); - return 0; - } - - /* delete */ - - if (key_id) /* delete by key ID */ - { - if (vec_len (sm->hmac_keys) <= key_id) - return -3; - - key = sm->hmac_keys + key_id; - - hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret); - vec_free (key->shared_secret); - return 0; - } - - index = 0; - key = find_or_add_shared_secret (sm, shared_secret, &index); - hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret); - vec_free (key->shared_secret); - return 0; -} - - -static clib_error_t * -sr_hmac_add_del_key_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - u8 is_del = 0; - u32 key_id = 0; - u8 key_id_set = 0; - u8 *shared_secret = 0; - i32 rv; - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "del")) - is_del = 1; - else if (unformat (input, "id %d", &key_id)) - key_id_set = 1; - else if (unformat (input, "key %s", &shared_secret)) - { - /* Do not include the trailing NULL byte. Guaranteed interop issue */ - _vec_len (shared_secret) -= 1; - } - else - break; - } - - if (is_del == 0 && shared_secret == 0) - return clib_error_return (0, "shared secret must be set to add a key"); - - if (shared_secret == 0 && key_id_set == 0) - return clib_error_return (0, "shared secret and key id both unset"); - - rv = sr_hmac_add_del_key (sm, key_id, shared_secret, is_del); - - vec_free (shared_secret); - - switch (rv) - { - case 0: - break; - - default: - return clib_error_return (0, "sr_hmac_add_del_key returned %d", rv); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (sr_hmac, static) = { - .path = "sr hmac", - .short_help = "sr hmac [del] id <nn> key <str>", - .function = sr_hmac_add_del_key_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief CLI parser for show HMAC key shared secrets - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -show_sr_hmac_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - ip6_sr_main_t *sm = &sr_main; - int i; - - for (i = 1; i < vec_len (sm->hmac_keys); i++) - { - if (sm->hmac_keys[i].shared_secret) - vlib_cli_output (vm, "[%d]: %v", i, sm->hmac_keys[i].shared_secret); - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (show_sr_hmac, static) = { - .path = "show sr hmac", - .short_help = "show sr hmac", - .function = show_sr_hmac_fn, -}; -/* *INDENT-ON* */ - -/** - * @brief Test for SR debug flag - * - * @param vm vlib_main_t * - * @param input unformat_input_t * - * @param cmd vlib_cli_command_t * - * - * @return error clib_error_t * - */ -static clib_error_t * -test_sr_debug_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) +sr_dpo_unlock (dpo_id_t * dpo) { - ip6_sr_main_t *sm = &sr_main; - - if (unformat (input, "on")) - sm->is_debug = 1; - else if (unformat (input, "off")) - sm->is_debug = 0; - else - return clib_error_return (0, "expected on|off in '%U'", - format_unformat_error, input); - - vlib_cli_output (vm, "debug trace now %s", sm->is_debug ? "on" : "off"); - - return 0; } -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (test_sr_debug, static) = { - .path = "test sr debug", - .short_help = "test sr debug on|off", - .function = test_sr_debug_fn, -}; -/* *INDENT-ON* */ - /* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h index 3c50b735..eb781e4b 100644..100755 --- a/src/vnet/sr/sr.h +++ b/src/vnet/sr/sr.h @@ -12,256 +12,283 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + /** * @file - * @brief Segment Routing header + * @brief Segment Routing data structures definitions + * */ + #ifndef included_vnet_sr_h #define included_vnet_sr_h #include <vnet/vnet.h> #include <vnet/sr/sr_packet.h> #include <vnet/ip/ip6_packet.h> +#include <vnet/ethernet/ethernet.h> -#include <openssl/opensslconf.h> #include <stdlib.h> #include <string.h> -#include <openssl/crypto.h> -#include <openssl/sha.h> -#include <openssl/opensslv.h> -#include <openssl/hmac.h> +#define IPv6_DEFAULT_HEADER_LENGTH 40 +#define IPv6_DEFAULT_HOP_LIMIT 64 +#define IPv6_DEFAULT_MAX_MASK_WIDTH 128 -/** - * @brief Segment Route tunnel key - */ -typedef struct -{ - ip6_address_t src; - ip6_address_t dst; -} ip6_sr_tunnel_key_t; +#define SR_BEHAVIOR_END 1 +#define SR_BEHAVIOR_X 2 +#define SR_BEHAVIOR_D_FIRST 3 /* Unused. Separator in between regular and D */ +#define SR_BEHAVIOR_DX2 4 +#define SR_BEHAVIOR_DX6 5 +#define SR_BEHAVIOR_DX4 6 +#define SR_BEHAVIOR_DT6 7 +#define SR_BEHAVIOR_DT4 8 +#define SR_BEHAVIOR_LAST 9 /* Must always be the last one */ + +#define SR_STEER_L2 2 +#define SR_STEER_IPV4 4 +#define SR_STEER_IPV6 6 + +#define SR_FUNCTION_SIZE 4 +#define SR_ARGUMENT_SIZE 4 + +#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1 /** - * @brief Segment Route tunnel + * @brief SR Segment List (SID list) */ typedef struct { - /** src, dst address */ - ip6_sr_tunnel_key_t key; - - /** Pptional tunnel name */ - u8 *name; + ip6_address_t *segments; /**< SIDs (key) */ - /** Mask width for FIB entry */ - u32 dst_mask_width; + u32 weight; /**< SID list weight (wECMP / UCMP) */ - /** First hop, to save 1 elt in the segment list */ - ip6_address_t first_hop; + u8 *rewrite; /**< Precomputed rewrite header */ + u8 *rewrite_bsid; /**< Precomputed rewrite header for bindingSID */ - /** RX Fib index */ - u32 rx_fib_index; - /** TX Fib index */ - u32 tx_fib_index; + dpo_id_t bsid_dpo; /**< DPO for Encaps/Insert for BSID */ + dpo_id_t ip6_dpo; /**< DPO for Encaps/Insert IPv6 */ + dpo_id_t ip4_dpo; /**< DPO for Encaps IPv6 */ +} ip6_sr_sl_t; - /** The actual ip6 SR header */ - u8 *rewrite; +/* SR policy types */ +#define SR_POLICY_TYPE_DEFAULT 0 +#define SR_POLICY_TYPE_SPRAY 1 +/** + * @brief SR Policy + */ +typedef struct +{ + u32 *segments_lists; /**< SID lists indexes (vector) */ - /** Indicates that this tunnel is part of a policy comprising - of multiple tunnels. If == ~0 tunnel is not part of a policy */ - u32 policy_index; + ip6_address_t bsid; /**< BindingSID (key) */ - /** - * The FIB node graph linkage - */ - fib_node_t node; + u8 type; /**< Type (default is 0) */ - /** - * The FIB entry index for the first hop. We track this so we - * don't need an extra lookup for it in the data plane - */ - fib_node_index_t fib_entry_index; + /* SR Policy specific DPO */ + /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */ + /* IF Type = SPRAY then Spray DPO with all SID lists */ + dpo_id_t bsid_dpo; /**< SR Policy specific DPO - BSID */ + dpo_id_t ip4_dpo; /**< SR Policy specific DPO - IPv6 */ + dpo_id_t ip6_dpo; /**< SR Policy specific DPO - IPv4 */ - /** - * This tunnel's sibling index in the children of the FIB entry - */ - u32 sibling_index; + u32 fib_table; /**< FIB table */ - /** - * The DPO contributed by the first-hop FIB entry. - */ - dpo_id_t first_hop_dpo; -} ip6_sr_tunnel_t; + u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */ +} ip6_sr_policy_t; /** - * @brief Shared secret for keyed-hash message authentication code (HMAC). + * @brief SR LocalSID */ typedef struct { - u8 *shared_secret; -} ip6_sr_hmac_key_t; + ip6_address_t localsid; /**< LocalSID IPv6 address */ -/** - * @brief Args required for add/del tunnel. - * - * Else we end up passing a LOT of parameters around. - */ -typedef struct -{ - /** Key (header imposition case) */ - ip6_address_t *src_address; - ip6_address_t *dst_address; - u32 dst_mask_width; - u32 rx_table_id; - u32 tx_table_id; + char end_psp; /**< Combined with End.PSP? */ - /** optional name argument - for referencing SR tunnel/policy by name */ - u8 *name; + u16 behavior; /**< Behavior associated to this localsid */ - /** optional policy name */ - u8 *policy_name; + union + { + u32 sw_if_index; /**< xconnect only */ + u32 vrf_index; /**< vrf only */ + }; - /** segment list, when inserting an ip6 SR header */ - ip6_address_t *segments; + u32 fib_table; /**< FIB table where localsid is registered */ - /** - * "Tag" list, aka segments inserted at the end of the list, - * past last_seg - */ - ip6_address_t *tags; + u32 vlan_index; /**< VLAN tag (not an index) */ - /** Shared secret => generate SHA-256 HMAC security fields */ - u8 *shared_secret; + ip46_address_t next_hop; /**< Next_hop for xconnect usage only */ - /** Flags, e.g. cleanup, policy-list flags */ - u16 flags_net_byte_order; + u32 nh_adj; /**< Next_adj for xconnect usage only */ - /** Delete the tunnnel? */ - u8 is_del; -} ip6_sr_add_del_tunnel_args_t; + void *plugin_mem; /**< Memory to be used by the plugin callback functions */ +} ip6_sr_localsid_t; + +typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid); /** - * @brief Args for creating a policy. - * - * Typically used for multicast replication. - * ie a multicast address can be associated with a policy, - * then replicated across a number of unicast SR tunnels. + * @brief SR LocalSID behavior registration */ typedef struct { - /** policy name */ - u8 *name; + u16 sr_localsid_function_number; /**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */ - /** tunnel names */ - u8 **tunnel_names; + u8 *function_name; /**< Function name. (key). */ - /** Delete the policy? */ - u8 is_del; -} ip6_sr_add_del_policy_args_t; + u8 *keyword_str; /**< Behavior keyword (i.e. End.X) */ + + u8 *def_str; /**< Behavior definition (i.e. Endpoint with cross-connect) */ + + u8 *params_str; /**< Behavior parameters (i.e. <oif> <IP46next_hop>) */ + + dpo_type_t dpo; /**< DPO type registration */ + + format_function_t *ls_format; /**< LocalSID format function */ + + unformat_function_t *ls_unformat; /**< LocalSID unformat function */ + + sr_plugin_callback_t *creation; /**< Function within plugin that will be called after localsid creation*/ + + sr_plugin_callback_t *removal; /**< Function within plugin that will be called before localsid removal */ +} sr_localsid_fn_registration_t; /** - * @brief Segment Routing policy. + * @brief Steering db key * - * Typically used for multicast replication. - * ie a multicast address can be associated with a policy, - * then replicated across a number of unicast SR tunnels. + * L3 is IPv4/IPv6 + mask + * L2 is sf_if_index + vlan */ typedef struct { - /** name of policy */ - u8 *name; - - /** vector to SR tunnel index */ - u32 *tunnel_indices; + union + { + struct + { + ip46_address_t prefix; /**< IP address of the prefix */ + u32 mask_width; /**< Mask width of the prefix */ + u32 fib_table; /**< VRF of the prefix */ + } l3; + struct + { + u32 sw_if_index; /**< Incoming software interface */ + } l2; + }; + u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */ +} sr_steering_key_t; -} ip6_sr_policy_t; +typedef struct +{ + sr_steering_key_t classify; /**< Traffic classification */ + u32 sr_policy; /**< SR Policy index */ +} ip6_sr_steering_policy_t; /** - * @brief Args for mapping of multicast address to policy name. - * - * Typically used for multicast replication. - * ie a multicast address can be associated with a policy, - * then replicated across a number of unicast SR tunnels. + * @brief Segment Routing main datastructure */ typedef struct { - /** multicast IP6 address */ - ip6_address_t *multicast_address; + /* ip6-lookup next index for imposition FIB entries */ + u32 ip6_lookup_sr_next_index; - /** name of policy to map to */ - u8 *policy_name; + /* ip6-replicate next index for multicast tunnel */ + u32 ip6_lookup_sr_spray_index; - /** Delete the mapping */ - u8 is_del; + /* IP4-lookup -> SR rewrite next index */ + u32 ip4_lookup_sr_policy_rewrite_encaps_index; + u32 ip4_lookup_sr_policy_rewrite_insert_index; -} ip6_sr_add_del_multicastmap_args_t; + /* IP6-lookup -> SR rewrite next index */ + u32 ip6_lookup_sr_policy_rewrite_encaps_index; + u32 ip6_lookup_sr_policy_rewrite_insert_index; -/** - * @brief Segment Routing state. - */ -typedef struct -{ - /** pool of tunnel instances, sr entry only */ - ip6_sr_tunnel_t *tunnels; + /* L2-input -> SR rewrite next index */ + u32 l2_sr_policy_rewrite_index; + + /* IP6-lookup -> SR LocalSID (SR End processing) index */ + u32 ip6_lookup_sr_localsid_index; - /** find an sr "tunnel" by its outer-IP src/dst */ - uword *tunnel_index_by_key; + /* SR SID lists */ + ip6_sr_sl_t *sid_lists; - /** find an sr "tunnel" by its name */ - uword *tunnel_index_by_name; + /* SR policies */ + ip6_sr_policy_t *sr_policies; - /** policy pool */ - ip6_sr_policy_t *policies; + /* Find an SR policy by its BindingSID */ + ip6_address_t *sr_policy_index_by_key; - /** find a policy by name */ - uword *policy_index_by_policy_name; + /* Pool of SR localsid instances */ + ip6_sr_localsid_t *localsids; - /** multicast address to policy mapping */ - uword *policy_index_by_multicast_address; + /* Find a SR localsid instance based on its functionID */ + ip6_address_t *localsids_index_by_key; - /** hmac key id by shared secret */ - uword *hmac_key_by_shared_secret; + /* Pool of SR steer policies instances */ + ip6_sr_steering_policy_t *steer_policies; - /** ip6-rewrite next index for reinstalling the original dst address */ - u32 ip6_rewrite_sr_next_index; + /* Find a steer policy based on its classifier */ + sr_steering_key_t *steer_policies_index_by_key; - /** application API callback */ - void *sr_local_cb; + /* L2 steering ifaces - sr_policies */ + u32 *sw_iface_sr_policies; - /** validate hmac keys */ - u8 validate_hmac; + /* Spray DPO */ + dpo_type_t sr_pr_spray_dpo_type; - /** pool of hmac keys */ - ip6_sr_hmac_key_t *hmac_keys; + /* Plugin functions */ + sr_localsid_fn_registration_t *plugin_functions; - /** Openssl var */ - EVP_MD *md; - /** Openssl var */ - HMAC_CTX *hmac_ctx; + /* Find plugin function by name */ + uword *plugin_functions_by_key; - /** enable debug spew */ - u8 is_debug; + /* Counters */ + vlib_combined_counter_main_t sr_ls_valid_counters; + vlib_combined_counter_main_t sr_ls_invalid_counters; - /** convenience */ + /* SR Policies FIBs */ + u32 fib_table_ip6; + u32 fib_table_ip4; + + /* convenience */ vlib_main_t *vlib_main; - /** convenience */ vnet_main_t *vnet_main; } ip6_sr_main_t; ip6_sr_main_t sr_main; -format_function_t format_ip6_sr_header; -format_function_t format_ip6_sr_header_with_length; - -vlib_node_registration_t ip6_sr_input_node; - -int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a); -int ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a); -int ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a); - -void vnet_register_sr_app_callback (void *cb); - -void sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, - ip6_sr_header_t * sr); +extern vlib_node_registration_t sr_policy_rewrite_encaps_node; +extern vlib_node_registration_t sr_policy_rewrite_insert_node; +extern vlib_node_registration_t sr_localsid_node; +extern vlib_node_registration_t sr_localsid_d_node; + +void sr_dpo_lock (dpo_id_t * dpo); +void sr_dpo_unlock (dpo_id_t * dpo); + +int sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, + u8 * keyword_str, u8 * def_str, + u8 * params_str, dpo_type_t * dpo, + format_function_t * ls_format, + unformat_function_t * ls_unformat, + sr_plugin_callback_t * creation_fn, + sr_plugin_callback_t * removal_fn); + +int +sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, + u32 weight, u8 behavior, u32 fib_table, u8 is_encap); +int +sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, + u8 operation, ip6_address_t * segments, u32 sl_index, + u32 weight); +int sr_policy_del (ip6_address_t * bsid, u32 index); + +int sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, + char end_psp, u8 behavior, u32 sw_if_index, + u32 vlan_index, u32 fib_table, ip46_address_t * nh_addr, + void *ls_plugin_mem); + +int +sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, + u32 table_id, ip46_address_t * prefix, u32 mask_width, + u32 sw_if_index, u8 traffic_type); #endif /* included_vnet_sr_h */ diff --git a/src/vnet/sr/sr_api.c b/src/vnet/sr/sr_api.c index bab0fc84..f4e1c346 100644 --- a/src/vnet/sr/sr_api.c +++ b/src/vnet/sr/sr_api.c @@ -18,6 +18,7 @@ */ #include <vnet/vnet.h> +#include <vnet/sr/sr.h> #include <vlibmemory/api.h> #include <vnet/interface.h> @@ -43,159 +44,129 @@ #include <vlibapi/api_helper_macros.h> #define foreach_vpe_api_msg \ -_(SR_MULTICAST_MAP_ADD_DEL, sr_multicast_map_add_del) - -static void vl_api_sr_tunnel_add_del_t_handler - (vl_api_sr_tunnel_add_del_t * mp) +_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del) \ +_(SR_POLICY_DEL, sr_policy_del) \ +_(SR_STEERING_ADD_DEL, sr_steering_add_del) +//_(SR_LOCALSIDS, sr_localsids_dump) +//_(SR_LOCALSID_BEHAVIORS, sr_localsid_behaviors_dump) + +static void vl_api_sr_localsid_add_del_t_handler + (vl_api_sr_localsid_add_del_t * mp) { -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_tunnel_args_t _a, *a = &_a; + vl_api_sr_localsid_add_del_reply_t *rmp; int rv = 0; - vl_api_sr_tunnel_add_del_reply_t *rmp; - ip6_address_t *segments = 0, *seg; - ip6_address_t *tags = 0, *tag; - ip6_address_t *this_address; - int i; +/* + * int sr_cli_localsid (char is_del, ip6_address_t *localsid_addr, + * char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table, + * ip46_address_t *nh_addr, void *ls_plugin_mem) + */ + rv = sr_cli_localsid (mp->is_del, + (ip6_address_t *) & mp->localsid_addr, + mp->end_psp, + mp->behavior, + ntohl (mp->sw_if_index), + ntohl (mp->vlan_index), + ntohl (mp->fib_table), + (ip46_address_t *) & mp->nh_addr, NULL); + + REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY); +} - if (mp->n_segments == 0) - { - rv = -11; - goto out; - } +static void +vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp) +{ + vl_api_sr_policy_add_reply_t *rmp; + ip6_address_t *segments = 0, *seg; + ip6_address_t *this_address = (ip6_address_t *) mp->segments; - memset (a, 0, sizeof (*a)); - a->src_address = (ip6_address_t *) & mp->src_address; - a->dst_address = (ip6_address_t *) & mp->dst_address; - a->dst_mask_width = mp->dst_mask_width; - a->flags_net_byte_order = mp->flags_net_byte_order; - a->is_del = (mp->is_add == 0); - a->rx_table_id = ntohl (mp->outer_vrf_id); - a->tx_table_id = ntohl (mp->inner_vrf_id); - - a->name = format (0, "%s", mp->name); - if (!(vec_len (a->name))) - a->name = 0; - - a->policy_name = format (0, "%s", mp->policy_name); - if (!(vec_len (a->policy_name))) - a->policy_name = 0; - - /* Yank segments and tags out of the API message */ - this_address = (ip6_address_t *) mp->segs_and_tags; + int i; for (i = 0; i < mp->n_segments; i++) { vec_add2 (segments, seg, 1); clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); this_address++; } - for (i = 0; i < mp->n_tags; i++) - { - vec_add2 (tags, tag, 1); - clib_memcpy (tag->as_u8, this_address->as_u8, sizeof (*this_address)); - this_address++; - } - a->segments = segments; - a->tags = tags; - - rv = ip6_sr_add_del_tunnel (a); - -out: +/* + * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments, + * u32 weight, u8 behavior, u32 fib_table, u8 is_encap) + */ + int rv = 0; + rv = sr_policy_add ((ip6_address_t *) & mp->bsid_addr, + segments, + ntohl (mp->weight), + mp->type, ntohl (mp->fib_table), mp->is_encap); - REPLY_MACRO (VL_API_SR_TUNNEL_ADD_DEL_REPLY); -#endif + REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY); } -static void vl_api_sr_policy_add_del_t_handler - (vl_api_sr_policy_add_del_t * mp) +static void +vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp) { -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_policy_args_t _a, *a = &_a; - int rv = 0; - vl_api_sr_policy_add_del_reply_t *rmp; - int i; - - memset (a, 0, sizeof (*a)); - a->is_del = (mp->is_add == 0); - - a->name = format (0, "%s", mp->name); - if (!(vec_len (a->name))) - { - rv = VNET_API_ERROR_NO_SUCH_NODE2; - goto out; - } - - if (!(mp->tunnel_names[0])) - { - rv = VNET_API_ERROR_NO_SUCH_NODE2; - goto out; - } + vl_api_sr_policy_mod_reply_t *rmp; - // start deserializing tunnel_names - int num_tunnels = mp->tunnel_names[0]; //number of tunnels - u8 *deser_tun_names = mp->tunnel_names; - deser_tun_names += 1; //moving along - - u8 *tun_name = 0; - int tun_name_len = 0; + ip6_address_t *segments = 0, *seg; + ip6_address_t *this_address = (ip6_address_t *) mp->segments; - for (i = 0; i < num_tunnels; i++) + int i; + for (i = 0; i < mp->n_segments; i++) { - tun_name_len = *deser_tun_names; - deser_tun_names += 1; - vec_resize (tun_name, tun_name_len); - memcpy (tun_name, deser_tun_names, tun_name_len); - vec_add1 (a->tunnel_names, tun_name); - deser_tun_names += tun_name_len; - tun_name = 0; + vec_add2 (segments, seg, 1); + clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address)); + this_address++; } - rv = ip6_sr_add_del_policy (a); - -out: + int rv = 0; +/* + * int + * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table, + * u8 operation, ip6_address_t *segments, u32 sl_index, + * u32 weight, u8 is_encap) + */ + rv = sr_policy_mod ((ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index), + ntohl (mp->fib_table), + mp->operation, + segments, ntohl (mp->sl_index), ntohl (mp->weight)); - REPLY_MACRO (VL_API_SR_POLICY_ADD_DEL_REPLY); -#endif + REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY); } -static void vl_api_sr_multicast_map_add_del_t_handler - (vl_api_sr_multicast_map_add_del_t * mp) +static void +vl_api_sr_policy_del_t_handler (vl_api_sr_policy_del_t * mp) { -#if IP6SR == 0 - clib_warning ("unimplemented"); -#else - ip6_sr_add_del_multicastmap_args_t _a, *a = &_a; + vl_api_sr_policy_del_reply_t *rmp; int rv = 0; - vl_api_sr_multicast_map_add_del_reply_t *rmp; - - memset (a, 0, sizeof (*a)); - a->is_del = (mp->is_add == 0); - - a->multicast_address = (ip6_address_t *) & mp->multicast_address; - a->policy_name = format (0, "%s", mp->policy_name); - - if (a->multicast_address == 0) - { - rv = -1; - goto out; - } - - if (!(a->policy_name)) - { - rv = -2; - goto out; - } - - rv = ip6_sr_add_del_multicastmap (a); +/* + * int + * sr_policy_del (ip6_address_t *bsid, u32 index) + */ + rv = sr_policy_del ((ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index)); -out: + REPLY_MACRO (VL_API_SR_POLICY_DEL_REPLY); +} - REPLY_MACRO (VL_API_SR_MULTICAST_MAP_ADD_DEL_REPLY); -#endif +static void vl_api_sr_steering_add_del_t_handler + (vl_api_sr_steering_add_del_t * mp) +{ + vl_api_sr_steering_add_del_reply_t *rmp; + int rv = 0; +/* + * int + * sr_steering_policy(int is_del, ip6_address_t *bsid, u32 sr_policy_index, + * u32 table_id, ip46_address_t *prefix, u32 mask_width, u32 sw_if_index, + * u8 traffic_type) + */ + rv = sr_steering_policy (mp->is_del, + (ip6_address_t *) & mp->bsid_addr, + ntohl (mp->sr_policy_index), + ntohl (mp->table_id), + (ip46_address_t *) & mp->prefix_addr, + ntohl (mp->mask_width), + ntohl (mp->sw_if_index), mp->traffic_type); + + REPLY_MACRO (VL_API_SR_STEERING_ADD_DEL_REPLY); } /* @@ -233,27 +204,26 @@ sr_api_hookup (vlib_main_t * vm) #undef _ /* - * Manually register the sr tunnel add del msg, so we trace + * Manually register the sr policy add msg, so we trace * enough bytes to capture a typical segment list */ - vl_msg_api_set_handlers (VL_API_SR_TUNNEL_ADD_DEL, - "sr_tunnel_add_del", - vl_api_sr_tunnel_add_del_t_handler, + vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD, + "sr_policy_add", + vl_api_sr_policy_add_t_handler, vl_noop_handler, - vl_api_sr_tunnel_add_del_t_endian, - vl_api_sr_tunnel_add_del_t_print, 256, 1); - + vl_api_sr_policy_add_t_endian, + vl_api_sr_policy_add_t_print, 256, 1); /* - * Manually register the sr policy add del msg, so we trace - * enough bytes to capture a typical tunnel name list + * Manually register the sr policy mod msg, so we trace + * enough bytes to capture a typical segment list */ - vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD_DEL, - "sr_policy_add_del", - vl_api_sr_policy_add_del_t_handler, + vl_msg_api_set_handlers (VL_API_SR_POLICY_MOD, + "sr_policy_mod", + vl_api_sr_policy_mod_t_handler, vl_noop_handler, - vl_api_sr_policy_add_del_t_endian, - vl_api_sr_policy_add_del_t_print, 256, 1); + vl_api_sr_policy_mod_t_endian, + vl_api_sr_policy_mod_t_print, 256, 1); /* * Set up the (msg_name, crc, message-id) table diff --git a/src/vnet/sr/sr_doc.md b/src/vnet/sr/sr_doc.md new file mode 100644 index 00000000..a7220630 --- /dev/null +++ b/src/vnet/sr/sr_doc.md @@ -0,0 +1,161 @@ +# SRv6: Segment Routing for IPv6 {#sr_doc} + +This is a memo intended to contain documentation of the VPP SRv6 implementation +Everything that is not directly obvious should come here. +For any feedback on content that should be explained please mailto:pcamaril@cisco.com + +## Segment Routing + +Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior. + +Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era. + +Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements. + +Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design. + +Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network. + +**The implementation of Segment Routing in VPP only covers the IPv6 data plane (SRv6).** + +## Segment Routing terminology + +* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05) +* SegmentID (SID): is an IPv6 address. +* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse. +* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights. +* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed. +* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet. + +## Creating an SR LocalSID + +A local SID is associated to a Segment Routing behavior -or function- on the current node. + +The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA. + +A local END SID is instantiated using the following CLI: + + sr localsid (del) address XX::YY behavior end + +This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above. + +Other examples of local SIDs are the following: + + sr localsid (del) address XX::YY behavior end (psp) + sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a (psp) + sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a + sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1 + sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0 + sr localsid (del) address XX::YY behavior end.dt6 5 + sr localsid (del) address XX::YY behavior end.dt6 5 + +Note that all of these behaviors match the specifications in **TODO REF NET PGM**. Please refer to this document for a detailed description of each behavior. + +Help on the available local SID behaviors and their usage can be obtained with: + + help sr localsid + +Alternatively they can be obtained using. + + show sr localsids behavior + +The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework. + + +VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes. + +The contents of the 'My LocalSID Table' is shown with: + + vpp# show sr localsid + SRv6 - My LocalSID Table: + ========================= + Address: c3::1 + Behavior: DX6 (Endpoint with decapsulation and IPv6 cross-connect) + Iface: GigabitEthernet0/5/0 + Next hop: b:c3::b + Good traffic: [51277 packets : 5332808 bytes] + Bad traffic: [0 packets : 0 bytes] + -------------------- + +The traffic counters can be reset with: + + vpp# clear sr localsid counters + +## Creating a SR Policy + +An SR Policy is defined by a Binding SID and a weighted set of Segment Lists. + +A new SR policy is created with a first SID list using: + + sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3) + +* The weight parameter is only used if more than one SID list is associated with the policy. +* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed. + +An SR policy is deleted with: + + sr policy del bsid 2001::1 + sr policy del index 1 + +The existing SR policies are listed with: + + show sr policies + +### Adding/Removing SID Lists from an SR policy + +An additional SID list is associated with an existing SR policy with: + + sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3) + sr policy mod index 3 add sl next A2:: next B2:: next C2:: (weight 3) + +Conversely, a SID list can be removed from an SR policy with: + + sr policy mod bsid 2001::1 del sl index 1 + sr policy mod index 3 del sl index 1 + +Note that this cannot be used to remove the last SID list of a policy. + +The weight of a SID list can also be modified with: + + sr policy mod bsid 2001::1 mod sl index 1 weight 4 + sr policy mod index 3 mod sl index 1 weight 4 + +### SR Policies: Spray policies + +Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them. + +SID list weights are ignored with this type of policies. + +A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in: + + sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray + +Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node. + +### Encapsulation SR policies + +In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command: + + set sr encaps source addr XXXX::YYYY + +## Steering packets into a SR Policy + +To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'. + + sr steer l3 2001::/64 via sr policy index 1 + sr steer l3 2001::/64 via sr policy bsid cafe::1 + sr steer l3 2001::/64 via sr policy bsid cafe::1 fib-table 3 + sr steer l3 10.0.0.0/16 via sr policy bsid cafe::1 + sr steer l2 TenGE0/1/0 via sr policy bsid cafe::1 + +Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *puts the interface into promiscous mode*. + +## SR LocalSID development framework + +One of the * 'key' * concepts about SRv6 is regarding network programmability. This is why an SRv6 LocalSID is associated with an specific function. + +However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code. + +The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code. + +For more information please refer to the documentation *SRv6 Sample SR LocalSID plugin*. diff --git a/src/vnet/sr/sr_error.def b/src/vnet/sr/sr_error.def deleted file mode 100644 index 62d021fd..00000000 --- a/src/vnet/sr/sr_error.def +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -sr_error (NONE, "no error") -sr_error (BAD_ROUTING_HEADER_TYPE, "bad routing header type (not 4)") -sr_error (NO_MORE_SEGMENTS, "out of SR segment drops") -sr_error (PKTS_PROCESSED, "SR packets processed") -sr_error (APP_CALLBACK, "SR application callback errors") -sr_error (HMAC_INVALID, "SR packets with invalid HMAC signatures") diff --git a/src/vnet/sr/sr_fix_dst_error.def b/src/vnet/sr/sr_fix_dst_error.def deleted file mode 100644 index 48fe7af6..00000000 --- a/src/vnet/sr/sr_fix_dst_error.def +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -sr_fix_dst_error (NONE, "no error") -sr_fix_dst_error (NO_SR_HEADER, "no SR header present") -sr_fix_dst_error (NO_MORE_SEGMENTS, "no more SR segments") diff --git a/src/vnet/sr/sr_localsid.c b/src/vnet/sr/sr_localsid.c new file mode 100755 index 00000000..407491ce --- /dev/null +++ b/src/vnet/sr/sr_localsid.c @@ -0,0 +1,1478 @@ +/* + * sr_localsid.c: ipv6 segment routing Endpoint behaviors + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Processing of packets with a SRH + * + * CLI to define new Segment Routing End processing functions. + * Graph node to support such functions. + * + * Each function associates an SRv6 segment (IPv6 address) with an specific + * Segment Routing function. + * + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/sr/sr.h> +#include <vnet/ip/ip.h> +#include <vnet/sr/sr_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/dpo.h> +#include <vnet/adj/adj.h> + +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +/** + * @brief Dynamically added SR localsid DPO type + */ +static dpo_type_t sr_localsid_dpo_type; +static dpo_type_t sr_localsid_d_dpo_type; + +/** + * @brief SR localsid add/del + * + * Function to add or delete SR LocalSIDs. + * + * @param is_del Boolean of whether its a delete instruction + * @param localsid_addr IPv6 address of the localsid + * @param is_decap Boolean of whether decapsulation is allowed in this function + * @param behavior Type of behavior (function) for this localsid + * @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table. + * @param vlan_index Only for L2 xconnect. Outgoing VLAN tag. + * @param fib_table FIB table in which we should install the localsid entry + * @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect. + * + * @return 0 on success, error otherwise. + */ +int +sr_cli_localsid (char is_del, ip6_address_t * localsid_addr, + char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, + u32 fib_table, ip46_address_t * nh_addr, void *ls_plugin_mem) +{ + ip6_sr_main_t *sm = &sr_main; + uword *p; + int rv; + + ip6_sr_localsid_t *ls = 0; + ip6_address_t *key_copy; + + dpo_id_t dpo = DPO_INVALID; + + /* Search for the item */ + p = hash_get_mem (sm->localsids_index_by_key, localsid_addr); + + if (p) + { + if (is_del) + { + hash_pair_t *hp; + /* Retrieve localsid */ + ls = pool_elt_at_index (sm->localsids, p[0]); + /* Delete FIB entry */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = *localsid_addr, + } + }; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, fib_table), &pfx, + FIB_SOURCE_SR); + + /* In case it is a Xconnect iface remove the (OIF, NHOP) adj */ + if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX6 + || ls->behavior == SR_BEHAVIOR_DX4) + adj_unlock (ls->nh_adj); + + if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = 0; + plugin = pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + + /* Callback plugin removal function */ + rv = plugin->removal (ls); + } + + /* Delete localsid registry */ + pool_put (sm->localsids, ls); + hp = hash_get_pair (sm->localsids_index_by_key, localsid_addr); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->localsids_index_by_key, localsid_addr); + vec_free (key_copy); + return 1; + } + else /* create with function already existing; complain */ + return -1; + } + else + /* delete; localsid does not exist; complain */ + if (is_del) + return -2; + + /* Check whether there exists a FIB entry with such address */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + }; + + pfx.fp_addr.as_u64[0] = localsid_addr->as_u64[0]; + pfx.fp_addr.as_u64[1] = localsid_addr->as_u64[1]; + + /* Lookup the FIB index associated to the table id provided */ + u32 fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, fib_table); + if (fib_index == ~0) + return -3; + + /* Lookup the localsid in such FIB table */ + fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + return -4; //There is an entry for such address (the localsid addr) + + /* Create a new localsid registry */ + pool_get (sm->localsids, ls); + memset (ls, 0, sizeof (*ls)); + + clib_memcpy (&ls->localsid, localsid_addr, sizeof (ip6_address_t)); + ls->end_psp = end_psp; + ls->behavior = behavior; + ls->nh_adj = (u32) ~ 0; + ls->fib_table = fib_table; + switch (behavior) + { + case SR_BEHAVIOR_END: + break; + case SR_BEHAVIOR_X: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); + break; + case SR_BEHAVIOR_DX4: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip4, &nh_addr->ip4, sizeof (ip4_address_t)); + break; + case SR_BEHAVIOR_DX6: + ls->sw_if_index = sw_if_index; + clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t)); + break; + case SR_BEHAVIOR_DT6: + ls->vrf_index = sw_if_index; + break; + case SR_BEHAVIOR_DX2: + ls->sw_if_index = sw_if_index; + ls->vlan_index = vlan_index; + break; + } + + /* Figure out the adjacency magic for Xconnect variants */ + if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4 + || ls->behavior == SR_BEHAVIOR_DX6) + { + adj_index_t nh_adj_index = ADJ_INDEX_INVALID; + + /* Retrieve the adjacency corresponding to the (OIF, next_hop) */ + if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X) + nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6, + nh_addr, sw_if_index); + + else if (ls->behavior == SR_BEHAVIOR_DX4) + nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4, + nh_addr, sw_if_index); + + /* Check for ADJ creation error. If so panic */ + if (nh_adj_index == ADJ_INDEX_INVALID) + { + pool_put (sm->localsids, ls); + return -5; + } + + ls->nh_adj = nh_adj_index; + } + + /* Set DPO */ + if (ls->behavior == SR_BEHAVIOR_END || ls->behavior == SR_BEHAVIOR_X) + dpo_set (&dpo, sr_localsid_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); + else if (ls->behavior > SR_BEHAVIOR_D_FIRST + && ls->behavior < SR_BEHAVIOR_LAST) + dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids); + else if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = 0; + plugin = pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + /* Copy the unformat memory result */ + ls->plugin_mem = ls_plugin_mem; + /* Callback plugin creation function */ + rv = plugin->creation (ls); + if (rv) + { + pool_put (sm->localsids, ls); + return -6; + } + dpo_set (&dpo, plugin->dpo, DPO_PROTO_IP6, ls - sm->localsids); + } + + /* Set hash key for searching localsid by address */ + key_copy = vec_new (ip6_address_t, 1); + clib_memcpy (key_copy, localsid_addr, sizeof (ip6_address_t)); + hash_set_mem (sm->localsids_index_by_key, key_copy, ls - sm->localsids); + + fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + + /* Set counter to zero */ + vlib_validate_combined_counter (&(sm->sr_ls_valid_counters), + ls - sm->localsids); + vlib_validate_combined_counter (&(sm->sr_ls_invalid_counters), + ls - sm->localsids); + + vlib_zero_combined_counter (&(sm->sr_ls_valid_counters), + ls - sm->localsids); + vlib_zero_combined_counter (&(sm->sr_ls_invalid_counters), + ls - sm->localsids); + + return 0; +} + +/** + * @brief SR LocalSID CLI function. + * + * @see sr_cli_localsid + */ +static clib_error_t * +sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_sr_main_t *sm = &sr_main; + u32 sw_if_index = (u32) ~ 0, vlan_index = (u32) ~ 0, fib_index = 0; + int is_del = 0; + int end_psp = 0; + ip6_address_t resulting_address; + ip46_address_t next_hop; + char address_set = 0; + char behavior = 0; + void *ls_plugin_mem = 0; + + int rv; + + memset (&resulting_address, 0, sizeof (ip6_address_t)); + ip46_address_reset (&next_hop); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (!address_set + && unformat (input, "address %U", unformat_ip6_address, + &resulting_address)) + address_set = 1; + else if (!address_set + && unformat (input, "addr %U", unformat_ip6_address, + &resulting_address)) + address_set = 1; + else if (unformat (input, "fib-table %u", &fib_index)); + else if (vlan_index == (u32) ~ 0 + && unformat (input, "vlan %u", &vlan_index)); + else if (!behavior && unformat (input, "behavior")) + { + if (unformat (input, "end.x %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &next_hop.ip6)) + behavior = SR_BEHAVIOR_X; + else if (unformat (input, "end.dx6 %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &next_hop.ip6)) + behavior = SR_BEHAVIOR_DX6; + else if (unformat (input, "end.dx4 %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip4_address, &next_hop.ip4)) + behavior = SR_BEHAVIOR_DX4; + else if (unformat (input, "end.dx2 %U", + unformat_vnet_sw_interface, vnm, &sw_if_index)) + behavior = SR_BEHAVIOR_DX2; + else if (unformat (input, "end.dt6 %u", &sw_if_index)) + behavior = SR_BEHAVIOR_DT6; + else if (unformat (input, "end.dt4 %u", &sw_if_index)) + behavior = SR_BEHAVIOR_DT4; + else + { + /* Loop over all the plugin behavior format functions */ + sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0; + sr_localsid_fn_registration_t **plugin_it = 0; + + /* Create a vector out of the plugin pool as recommended */ + /* *INDENT-OFF* */ + pool_foreach (plugin, sm->plugin_functions, + { + vec_add1 (vec_plugins, plugin); + }); + /* *INDENT-ON* */ + + vec_foreach (plugin_it, vec_plugins) + { + if (unformat + (input, "%U", (*plugin_it)->ls_unformat, &ls_plugin_mem)) + { + behavior = (*plugin_it)->sr_localsid_function_number; + break; + } + } + } + + if (!behavior) + { + if (unformat (input, "end")) + behavior = SR_BEHAVIOR_END; + else + break; + } + } + else if (!end_psp && unformat (input, "psp")) + end_psp = 1; + else + break; + } + + if (!behavior && end_psp) + behavior = SR_BEHAVIOR_END; + + if (!address_set) + return clib_error_return (0, + "Error: SRv6 LocalSID address is mandatory."); + if (!is_del && !behavior) + return clib_error_return (0, + "Error: SRv6 LocalSID behavior is mandatory."); + if (vlan_index != (u32) ~ 0) + return clib_error_return (0, + "Error: SRv6 End.DX2 with rewrite VLAN tag not supported by now."); + if (end_psp && !(behavior == SR_BEHAVIOR_END || behavior == SR_BEHAVIOR_X)) + return clib_error_return (0, + "Error: SRv6 PSP only compatible with End and End.X"); + + rv = sr_cli_localsid (is_del, &resulting_address, end_psp, behavior, + sw_if_index, vlan_index, fib_index, &next_hop, + ls_plugin_mem); + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -1: + return clib_error_return (0, + "Identical localsid already exists. Requested localsid not created."); + case -2: + return clib_error_return (0, + "The requested localsid could not be deleted. SR localsid not found"); + case -3: + return clib_error_return (0, "FIB table %u does not exist", fib_index); + case -4: + return clib_error_return (0, "There is already one FIB entry for the" + "requested localsid non segment routing related"); + case -5: + return clib_error_return (0, + "Could not create ARP/ND entry for such next_hop. Internal error."); + case -6: + return clib_error_return (0, + "Error on the plugin based localsid creation."); + default: + return clib_error_return (0, "BUG: sr localsid returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_localsid_command, static) = { + .path = "sr localsid", + .short_help = "sr localsid (del) address XX:XX::YY:YY" + "(fib-table 8) behavior STRING", + .long_help = + "Create SR LocalSID and binds it to a particular behavior\n" + "Arguments:\n" + "\tlocalSID IPv6_addr(128b) LocalSID IPv6 address\n" + "\t(fib-table X) Optional. VRF where to install SRv6 localsid\n" + "\tbehavior STRING Specifies the behavior\n" + "\n\tBehaviors:\n" + "\tEnd\t-> Endpoint.\n" + "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" + "\t\tParameters: '<iface> <ip6_next_hop>'\n" + "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" + "\t\tParameters: '<iface>'\n" + "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" + "\t\tParameters: '<iface> <ip6_next_hop>'\n" + "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" + "\t\tParameters: '<iface> <ip4_next_hop>'\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" + "\t\tParameters: '<ip6_fib_table>'\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" + "\t\tParameters: '<ip4_fib_table>'\n", + .function = sr_cli_localsid_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI function to 'show' all SR LocalSIDs on console. + */ +static clib_error_t * +show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_sr_main_t *sm = &sr_main; + ip6_sr_localsid_t **localsid_list = 0; + ip6_sr_localsid_t *ls; + int i; + + vlib_cli_output (vm, "SRv6 - My LocalSID Table:"); + vlib_cli_output (vm, "========================="); + /* *INDENT-OFF* */ + pool_foreach (ls, sm->localsids, ({ vec_add1 (localsid_list, ls); })); + /* *INDENT-ON* */ + for (i = 0; i < vec_len (localsid_list); i++) + { + ls = localsid_list[i]; + switch (ls->behavior) + { + case SR_BEHAVIOR_END: + vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd", + format_ip6_address, &ls->localsid); + break; + case SR_BEHAVIOR_X: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tX (Endpoint with Layer-3 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip6_address, &ls->next_hop.ip6); + break; + case SR_BEHAVIOR_DX4: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX4 (Endpoint with decapsulation and IPv4 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip4_address, &ls->next_hop.ip4); + break; + case SR_BEHAVIOR_DX6: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX6 (Endpoint with decapsulation and IPv6 cross-connect)" + "\n\tIface: \t%U\n\tNext hop: \t%U", + format_ip6_address, &ls->localsid, + format_vnet_sw_if_index_name, vnm, ls->sw_if_index, + format_ip6_address, &ls->next_hop.ip6); + break; + case SR_BEHAVIOR_DX2: + if (ls->vlan_index == (u32) ~ 0) + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDX2 (Endpoint with decapulation and Layer-2 cross-connect)" + "\n\tIface: \t%U", format_ip6_address, + &ls->localsid, format_vnet_sw_if_index_name, vnm, + ls->sw_if_index); + else + vlib_cli_output (vm, + "Unsupported yet. (DX2 with egress VLAN rewrite)"); + break; + case SR_BEHAVIOR_DT6: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDT6 (Endpoint with decapsulation and specific IPv6 table lookup)" + "\n\tTable: %u", format_ip6_address, &ls->localsid, + ls->fib_table); + break; + case SR_BEHAVIOR_DT4: + vlib_cli_output (vm, + "\tAddress: \t%U\n\tBehavior: \tDT4 (Endpoint with decapsulation and specific IPv4 table lookup)" + "\n\tTable: \t%u", format_ip6_address, + &ls->localsid, ls->fib_table); + break; + default: + if (ls->behavior >= SR_BEHAVIOR_LAST) + { + sr_localsid_fn_registration_t *plugin = + pool_elt_at_index (sm->plugin_functions, + ls->behavior - SR_BEHAVIOR_LAST); + + vlib_cli_output (vm, "\tAddress: \t%U\n" + "\tBehavior: \t%s (%s)\n\t%U", + format_ip6_address, &ls->localsid, + plugin->keyword_str, plugin->def_str, + plugin->ls_format, ls->plugin_mem); + } + else + //Should never get here... + vlib_cli_output (vm, "Internal error"); + break; + } + if (ls->end_psp) + vlib_cli_output (vm, "\tPSP: \tTrue\n"); + + /* Print counters */ + vlib_counter_t valid, invalid; + vlib_get_combined_counter (&(sm->sr_ls_valid_counters), i, &valid); + vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), i, &invalid); + vlib_cli_output (vm, "\tGood traffic: \t[%Ld packets : %Ld bytes]\n", + valid.packets, valid.bytes); + vlib_cli_output (vm, "\tBad traffic: \t[%Ld packets : %Ld bytes]\n", + invalid.packets, invalid.bytes); + vlib_cli_output (vm, "--------------------"); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_localsid_command, static) = { + .path = "show sr localsids", + .short_help = "show sr localsids", + .function = show_sr_localsid_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Function to 'clear' ALL SR localsid counters + */ +static clib_error_t * +clear_sr_localsid_counters_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + + vlib_clear_combined_counters (&(sm->sr_ls_valid_counters)); + vlib_clear_combined_counters (&(sm->sr_ls_invalid_counters)); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = { + .path = "clear sr localsid counters", + .short_help = "clear sr localsid counters", + .function = clear_sr_localsid_counters_command_fn, +}; +/* *INDENT-ON* */ + +/************************ SR LocalSID graphs node ****************************/ +/** + * @brief SR localsid node trace + */ +typedef struct +{ + u32 localsid_index; + ip6_address_t src, out_dst; + u8 sr[256]; + u8 num_segments; + u8 segments_left; + //With SRv6 header update include flags here. +} sr_localsid_trace_t; + +#define foreach_sr_localsid_error \ +_(NO_INNER_HEADER, "(SR-Error) No inner IP header") \ +_(NO_MORE_SEGMENTS, "(SR-Error) No more segments") \ +_(NO_SRH, "(SR-Error) No SR header") \ +_(NO_PSP, "(SR-Error) PSP Not available (segments left > 0)") \ +_(NOT_LS, "(SR-Error) Decaps not available (segments left > 0)") \ +_(L2, "(SR-Error) SRv6 decapsulated a L2 frame without dest") + +typedef enum +{ +#define _(sym,str) SR_LOCALSID_ERROR_##sym, + foreach_sr_localsid_error +#undef _ + SR_LOCALSID_N_ERROR, +} sr_localsid_error_t; + +static char *sr_localsid_error_strings[] = { +#define _(sym,string) string, + foreach_sr_localsid_error +#undef _ +}; + +#define foreach_sr_localsid_next \ +_(ERROR, "error-drop") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(IP4_LOOKUP, "ip4-lookup") \ +_(IP6_REWRITE, "ip6-rewrite") \ +_(IP4_REWRITE, "ip4-rewrite") \ +_(INTERFACE_OUTPUT, "interface-output") + +typedef enum +{ +#define _(s,n) SR_LOCALSID_NEXT_##s, + foreach_sr_localsid_next +#undef _ + SR_LOCALSID_N_NEXT, +} sr_localsid_next_t; + +/** + * @brief SR LocalSID graph node trace function + * + * @see sr_localsid + */ +u8 * +format_sr_localsid_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_sr_main_t *sm = &sr_main; + sr_localsid_trace_t *t = va_arg (*args, sr_localsid_trace_t *); + + ip6_sr_localsid_t *ls = + pool_elt_at_index (sm->localsids, t->localsid_index); + + s = + format (s, "SR-LOCALSID:\n\tLocalsid: %U\n", format_ip6_address, + &ls->localsid); + switch (ls->behavior) + { + case SR_BEHAVIOR_END: + s = format (s, "\tBehavior: End\n"); + break; + case SR_BEHAVIOR_DX6: + s = format (s, "\tBehavior: Decapsulation with IPv6 L3 xconnect\n"); + break; + case SR_BEHAVIOR_DX4: + s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n"); + break; + case SR_BEHAVIOR_X: + s = format (s, "\tBehavior: IPv6 L3 xconnect\n"); + break; + case SR_BEHAVIOR_DT6: + s = format (s, "\tBehavior: Decapsulation with IPv6 Table lookup\n"); + break; + case SR_BEHAVIOR_DT4: + s = format (s, "\tBehavior: Decapsulation with IPv4 Table lookup\n"); + break; + case SR_BEHAVIOR_DX2: + s = format (s, "\tBehavior: Decapsulation with L2 xconnect\n"); + break; + default: + s = format (s, "\tBehavior: defined in plugin\n"); //TODO + break; + } + if (t->num_segments != 0xFF) + { + if (t->num_segments > 0) + { + s = format (s, "\tSegments left: %d\n", t->num_segments); + s = format (s, "\tSID list: [in ietf order]"); + int i = 0; + for (i = 0; i < t->num_segments; i++) + { + s = format (s, "\n\t-> %U", format_ip6_address, + (ip6_address_t *) & t->sr[i * + sizeof (ip6_address_t)]); + } + } + } + return s; +} + +/** + * @brief Function doing End processing. + */ +static_always_inline void +end_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + ip6_address_t *new_dst0; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left != 0)) + { + sr0->segments_left -= 1; + new_dst0 = (ip6_address_t *) (sr0->segments); + new_dst0 += sr0->segments_left; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + + if (ls0->behavior == SR_BEHAVIOR_X) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + } + } + else + { + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS]; + } + } + else + { + /* Error. Routing header of type != SR */ + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH]; + } +} + +/* + * @brief Function doing SRH processing for D* variants + */ +//FixME. I must crosscheck that next_proto matches the localsid +static_always_inline void +end_decaps_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + /* Compute the size of the IPv6 header with all Ext. headers */ + u8 next_proto; + ip6_ext_header_t *next_ext_header; + u16 total_size = 0; + + next_proto = ip0->protocol; + next_ext_header = (void *) (ip0 + 1); + total_size = sizeof (ip6_header_t); + while (ip6_ext_hdr (next_proto)) + { + total_size += ip6_ext_header_len (next_ext_header); + next_proto = next_ext_header->next_hdr; + next_ext_header = ip6_ext_next_header (next_ext_header); + } + + /* Ensure this is the last segment. Otherwise drop. */ + if (sr0 && sr0->segments_left != 0) + { + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NOT_LS]; + return; + } + + switch (next_proto) + { + case IP_PROTOCOL_IPV6: + /* Encap-End IPv6. Pop outer IPv6 header. */ + if (ls0->behavior == SR_BEHAVIOR_DX6) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + return; + } + else if (ls0->behavior == SR_BEHAVIOR_DT6) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table; + return; + } + break; + case IP_PROTOCOL_IP_IN_IP: + /* Encap-End IPv4. Pop outer IPv6 header */ + if (ls0->behavior == SR_BEHAVIOR_DX4) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP4_REWRITE; + return; + } + else if (ls0->behavior == SR_BEHAVIOR_DT4) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table; + *next0 = SR_LOCALSID_NEXT_IP4_LOOKUP; + return; + } + break; + case IP_PROTOCOL_IP6_NONXT: + /* L2 encaps */ + if (ls0->behavior == SR_BEHAVIOR_DX2) + { + vlib_buffer_advance (b0, total_size); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->sw_if_index; + *next0 = SR_LOCALSID_NEXT_INTERFACE_OUTPUT; + return; + } + break; + } + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_INNER_HEADER]; + return; +} + +/** + * @brief Function doing End processing with PSP + */ +static_always_inline void +end_psp_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_ext_header_t * prev0, + ip6_sr_header_t * sr0, + ip6_sr_localsid_t * ls0, u32 * next0) +{ + u32 new_l0, sr_len; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left == 1)) + { + ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0]; + ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1]; + + /* Remove the SRH taking care of the rest of IPv6 ext header */ + if (prev0) + prev0->next_hdr = sr0->protocol; + else + ip0->protocol = sr0->protocol; + + sr_len = ip6_ext_header_len (sr0); + vlib_buffer_advance (b0, sr_len); + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + clib_memcpy ((void *) ip0 + sr_len, ip0, + (void *) sr0 - (void *) ip0); + + if (ls0->behavior == SR_BEHAVIOR_X) + { + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj; + *next0 = SR_LOCALSID_NEXT_IP6_REWRITE; + } + return; + } + } + /* Error. Routing header of type != SR */ + *next0 = SR_LOCALSID_NEXT_ERROR; + b0->error = node->errors[SR_LOCALSID_ERROR_NO_PSP]; +} + +/** + * @brief SR LocalSID graph node. Supports all default SR Endpoint variants + */ +static uword +sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_sr_main_t *sm = &sr_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + u32 cpu_index = os_get_cpu_number (); + + while (n_left_from > 0) + { + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+4 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls1 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls2 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls3 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE); + + end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0); + end_decaps_srh_processing (node, b1, ip1, sr1, ls1, &next1); + end_decaps_srh_processing (node, b2, ip2, sr2, ls2, &next2); + end_decaps_srh_processing (node, b3, ip3, sr3, ls3, &next3); + + //TODO: trace. + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b0)); + + vlib_increment_combined_counter + (((next1 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b1)); + + vlib_increment_combined_counter + (((next2 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b2)); + + vlib_increment_combined_counter + (((next3 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b3)); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + + /* Lookup the SR End behavior based on IP DA (adj) */ + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + /* Find SRH as well as previous header */ + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + + /* SRH processing and End variants */ + end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_localsid_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->num_segments = 0; + tr->localsid_index = ls0 - sm->localsids; + + if (ip0 == vlib_buffer_get_current (b0)) + { + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->out_dst.as_u8)); + if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE + && sr0->type == ROUTING_HEADER_TYPE_SR) + { + clib_memcpy (tr->sr, sr0->segments, sr0->length * 8); + tr->num_segments = + sr0->length * 8 / sizeof (ip6_address_t); + tr->segments_left = sr0->segments_left; + } + } + else + tr->num_segments = 0xFF; + } + + /* Increase the counters */ + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_localsid_d_node) = { + .function = sr_localsid_d_fn, + .name = "sr-localsid-d", + .vector_size = sizeof (u32), + .format_trace = format_sr_localsid_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_LOCALSID_N_ERROR, + .error_strings = sr_localsid_error_strings, + .n_next_nodes = SR_LOCALSID_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_LOCALSID_NEXT_##s] = n, + foreach_sr_localsid_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief SR LocalSID graph node. Supports all default SR Endpoint variants + */ +static uword +sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_sr_main_t *sm = &sr_main; + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + u32 cpu_index = os_get_cpu_number (); + + while (n_left_from > 0) + { + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE); + + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls1 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls2 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ls3 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + if (ls0->end_psp) + end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0); + else + end_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (ls1->end_psp) + end_psp_srh_processing (node, b1, ip1, prev1, sr1, ls1, &next1); + else + end_srh_processing (node, b1, ip1, sr1, ls1, &next1); + + if (ls2->end_psp) + end_psp_srh_processing (node, b2, ip2, prev2, sr2, ls2, &next2); + else + end_srh_processing (node, b2, ip2, sr2, ls2, &next2); + + if (ls3->end_psp) + end_psp_srh_processing (node, b3, ip3, prev3, sr3, ls3, &next3); + else + end_srh_processing (node, b3, ip3, sr3, ls3, &next3); + + //TODO: proper trace. + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b0)); + + vlib_increment_combined_counter + (((next1 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls1 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b1)); + + vlib_increment_combined_counter + (((next2 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls2 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b2)); + + vlib_increment_combined_counter + (((next3 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls3 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b3)); + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP; + ip6_sr_localsid_t *ls0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE); + + /* Lookup the SR End behavior based on IP DA (adj) */ + ls0 = + pool_elt_at_index (sm->localsids, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + /* SRH processing */ + if (ls0->end_psp) + end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0); + else + end_srh_processing (node, b0, ip0, sr0, ls0, &next0); + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_localsid_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->num_segments = 0; + tr->localsid_index = ls0 - sm->localsids; + + if (ip0 == vlib_buffer_get_current (b0)) + { + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->out_dst.as_u8)); + if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE + && sr0->type == ROUTING_HEADER_TYPE_SR) + { + clib_memcpy (tr->sr, sr0->segments, sr0->length * 8); + tr->num_segments = + sr0->length * 8 / sizeof (ip6_address_t); + tr->segments_left = sr0->segments_left; + } + } + else + { + tr->num_segments = 0xFF; + } + } + + vlib_increment_combined_counter + (((next0 == + SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) : + &(sm->sr_ls_valid_counters)), cpu_index, ls0 - sm->localsids, 1, + vlib_buffer_length_in_chain (vm, b0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_localsid_node) = { + .function = sr_localsid_fn, + .name = "sr-localsid", + .vector_size = sizeof (u32), + .format_trace = format_sr_localsid_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_LOCALSID_N_ERROR, + .error_strings = sr_localsid_error_strings, + .n_next_nodes = SR_LOCALSID_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_LOCALSID_NEXT_##s] = n, + foreach_sr_localsid_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +static u8 * +format_sr_dpo (u8 * s, va_list * args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + + return (format (s, "SR: localsid_index:[%d]", index)); +} + +const static dpo_vft_t sr_loc_vft = { + .dv_lock = sr_dpo_lock, + .dv_unlock = sr_dpo_unlock, + .dv_format = format_sr_dpo, +}; + +const static char *const sr_loc_ip6_nodes[] = { + "sr-localsid", + NULL, +}; + +const static char *const *const sr_loc_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_loc_ip6_nodes, +}; + +const static char *const sr_loc_d_ip6_nodes[] = { + "sr-localsid-d", + NULL, +}; + +const static char *const *const sr_loc_d_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_loc_d_ip6_nodes, +}; + + +/*************************** SR LocalSID plugins ******************************/ +/** + * @brief SR LocalSID plugin registry + */ +int +sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name, + u8 * keyword_str, u8 * def_str, + u8 * params_str, dpo_type_t * dpo, + format_function_t * ls_format, + unformat_function_t * ls_unformat, + sr_plugin_callback_t * creation_fn, + sr_plugin_callback_t * removal_fn) +{ + ip6_sr_main_t *sm = &sr_main; + uword *p; + + sr_localsid_fn_registration_t *plugin; + + /* Did this function exist? If so update it */ + p = hash_get_mem (sm->plugin_functions_by_key, fn_name); + if (p) + { + plugin = pool_elt_at_index (sm->plugin_functions, p[0]); + } + /* Else create a new one and set hash key */ + else + { + pool_get (sm->plugin_functions, plugin); + hash_set_mem (sm->plugin_functions_by_key, fn_name, + plugin - sm->plugin_functions); + } + + memset (plugin, 0, sizeof (*plugin)); + + plugin->sr_localsid_function_number = (plugin - sm->plugin_functions); + plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST; + plugin->ls_format = ls_format; + plugin->ls_unformat = ls_unformat; + plugin->creation = creation_fn; + plugin->removal = removal_fn; + clib_memcpy (&plugin->dpo, dpo, sizeof (dpo_type_t)); + plugin->function_name = format (0, "%s%c", fn_name, 0); + plugin->keyword_str = format (0, "%s%c", keyword_str, 0); + plugin->def_str = format (0, "%s%c", def_str, 0); + plugin->params_str = format (0, "%s%c", params_str, 0); + + return plugin->sr_localsid_function_number; +} + +/** + * @brief CLI function to 'show' all available SR LocalSID behaviors + */ +static clib_error_t * +show_sr_localsid_behaviors_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + sr_localsid_fn_registration_t *plugin; + sr_localsid_fn_registration_t **plugins_vec = 0; + int i; + + vlib_cli_output (vm, + "SR LocalSIDs behaviors:\n-----------------------\n\n"); + + /* *INDENT-OFF* */ + pool_foreach (plugin, sm->plugin_functions, + ({ vec_add1 (plugins_vec, plugin); })); + /* *INDENT-ON* */ + + /* Print static behaviors */ + vlib_cli_output (vm, "Default behaviors:\n" + "\tEnd\t-> Endpoint.\n" + "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n" + "\t\tParameters: '<iface> <ip6_next_hop>'\n" + "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n" + "\t\tParameters: '<iface>'\n" + "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n" + "\t\tParameters: '<iface> <ip6_next_hop>'\n" + "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n" + "\t\tParameters: '<iface> <ip4_next_hop>'\n" + "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n" + "\t\tParameters: '<ip6_fib_table>'\n" + "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n" + "\t\tParameters: '<ip4_fib_table>'\n"); + vlib_cli_output (vm, "Plugin behaviors:\n"); + for (i = 0; i < vec_len (plugins_vec); i++) + { + plugin = plugins_vec[i]; + vlib_cli_output (vm, "\t%s\t-> %s.\n", plugin->keyword_str, + plugin->def_str); + vlib_cli_output (vm, "\t\tParameters: '%s'\n", plugin->params_str); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = { + .path = "show sr localsids behaviors", + .short_help = "show sr localsids behaviors", + .function = show_sr_localsid_behaviors_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief SR LocalSID initialization + */ +clib_error_t * +sr_localsids_init (vlib_main_t * vm) +{ + /* Init memory for function keys */ + ip6_sr_main_t *sm = &sr_main; + sm->localsids_index_by_key = + hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword)); + /* Init SR behaviors DPO type */ + sr_localsid_dpo_type = dpo_register_new_type (&sr_loc_vft, sr_loc_nodes); + /* Init SR behaviors DPO type */ + sr_localsid_d_dpo_type = + dpo_register_new_type (&sr_loc_vft, sr_loc_d_nodes); + /* Init memory for localsid plugins */ + sm->plugin_functions_by_key = hash_create_string (0, sizeof (uword)); + return 0; +} + +VLIB_INIT_FUNCTION (sr_localsids_init); +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/sr/sr_packet.h b/src/vnet/sr/sr_packet.h index 179b94c2..7af4ad4d 100644..100755 --- a/src/vnet/sr/sr_packet.h +++ b/src/vnet/sr/sr_packet.h @@ -20,60 +20,36 @@ * limitations under the License. */ -/** - * @file - * @brief The Segment Routing Header (SRH). - * - * The Segment Routing Header (SRH) is defined in the diagram below. - * - * - * 0 1 2 3 - * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | Next Header | Hdr Ext Len | Routing Type | Segments Left | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | First Segment | Flags | HMAC Key ID | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Segment List[0] (128 bits ipv6 address) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | | - * ... - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Segment List[n] (128 bits ipv6 address) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Policy List[0] (optional) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Policy List[1] (optional) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | Policy List[2] (optional) | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | | - * | | - * | | - * | HMAC (256 bits) | - * | (optional) | - * | | - * | | - * | | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +/* + * The Segment Routing Header (SRH) is defined as follows: + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Next Header | Hdr Ext Len | Routing Type | Segments Left | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | First Segment | Flags | RESERVED | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[0] (128 bits IPv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | | + * ... + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[n] (128 bits IPv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * // // + * // Optional Type Length Value objects (variable) // + * // // + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * where: * @@ -87,57 +63,39 @@ * * o Segments Left. Defined in [RFC2460], it contains the index, in * the Segment List, of the next segment to inspect. Segments Left - * is decremented at each segment and it is used as an index in the - * segment list. - * - * o First Segment: offset in the SRH, not including the first 8 octets - * and expressed in 16-octet units, pointing to the last element of - * the segment list, which is in fact the first segment of the - * segment routing path. - * - * o Flags: 16 bits of flags. Following flags are defined: - * - * 1 - * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |C|P|R|R| Policy Flags | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * C-flag: Clean-up flag. Set when the SRH has to be removed from - * the packet when packet reaches the last segment. - * - * P-flag: Protected flag. Set when the packet has been rerouted - * through FRR mechanism by a SR endpoint node. See Section 6.3 - * for more details. - * - * R-flags. Reserved and for future use. - * - * Policy Flags. Define the type of the IPv6 addresses encoded - * into the Policy List (see below). The following have been - * defined: + * is decremented at each segment. * - * Bits 4-6: determine the type of the first element after the - * segment list. + * o First Segment: contains the index, in the Segment List, of the + * first segment of the path which is in fact the last element of the + * Segment List. * - * Bits 7-9: determine the type of the second element. + * o Flags: 8 bits of flags. Following flags are defined: * - * Bits 10-12: determine the type of the third element. + * 0 1 2 3 4 5 6 7 + * +-+-+-+-+-+-+-+-+ + * |U|P|O|A|H| U | + * +-+-+-+-+-+-+-+-+ * - * Bits 13-15: determine the type of the fourth element. + * U: Unused and for future use. SHOULD be unset on transmission + * and MUST be ignored on receipt. * - * The following values are used for the type: + * P-flag: Protected flag. Set when the packet has been rerouted + * through FRR mechanism by an SR endpoint node. * - * 0x0: Not present. If value is set to 0x0, it means the - * element represented by these bits is not present. + * O-flag: OAM flag. When set, it indicates that this packet is + * an operations and management (OAM) packet. * - * 0x1: SR Ingress. + * A-flag: Alert flag. If present, it means important Type Length + * Value (TLV) objects are present. See Section 3.1 for details + * on TLVs objects. * - * 0x2: SR Egress. + * H-flag: HMAC flag. If set, the HMAC TLV is present and is + * encoded as the last TLV of the SRH. In other words, the last + * 36 octets of the SRH represent the HMAC information. See + * Section 3.1.5 for details on the HMAC TLV. * - * 0x3: Original Source Address. - * - * o HMAC Key ID and HMAC field, and their use are defined in - * [I-D.vyncke-6man-segment-routing-security]. + * o RESERVED: SHOULD be unset on transmission and MUST be ignored on + * receipt. * * o Segment List[n]: 128 bit IPv6 addresses representing the nth * segment in the Segment List. The Segment List is encoded starting @@ -147,23 +105,8 @@ * contains the first segment of the path. The index contained in * "Segments Left" identifies the current active segment. * - * o Policy List. Optional addresses representing specific nodes in - * the SR path such as: - * - * SR Ingress: a 128 bit generic identifier representing the - * ingress in the SR domain (i.e.: it needs not to be a valid IPv6 - * address). - * - * SR Egress: a 128 bit generic identifier representing the egress - * in the SR domain (i.e.: it needs not to be a valid IPv6 - * address). - * - * Original Source Address: IPv6 address originally present in the - * SA field of the packet. + * o Type Length Value (TLV) are described in Section 3.1. * - * The segments in the Policy List are encoded after the segment list - * and they are optional. If none are in the SRH, all bits of the - * Policy List Flags MUST be set to 0x0. */ #ifndef IPPROTO_IPV6_ROUTE @@ -171,81 +114,46 @@ #endif #define ROUTING_HEADER_TYPE_SR 4 -/** - @brief SR header struct. -*/ + typedef struct { - /** Protocol for next header. */ + /* Protocol for next header. */ u8 protocol; - - /** + /* * Length of routing header in 8 octet units, * not including the first 8 octets */ u8 length; - /** Type of routing header; type 4 = segement routing */ + /* Type of routing header; type 4 = segement routing */ u8 type; - /** Next segment in the segment list */ + /* Next segment in the segment list */ u8 segments_left; - /** - * Policy list pointer: offset in the SRH of the policy - * list - in 16-octet units - not including the first 8 octets. - */ + /* Pointer to the first segment in the header */ u8 first_segment; - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_CLEANUP (0x8000) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PROTECTED (0x4000) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_RESERVED (0x3000) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT (0x0) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE (0x1) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE (0x2) - /** Flag bits */ -#define IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR (0x3) - /** values 0x4 - 0x7 are reserved */ - u16 flags; - u8 hmac_key; + /* Flag bits */ +#define IP6_SR_HEADER_FLAG_PROTECTED (0x40) +#define IP6_SR_HEADER_FLAG_OAM (0x20) +#define IP6_SR_HEADER_FLAG_ALERT (0x10) +#define IP6_SR_HEADER_FLAG_HMAC (0x80) + + /* values 0x0, 0x4 - 0x7 are reserved */ + u8 flags; + u16 reserved; - /** The segment + policy list elts */ + /* The segment elts */ ip6_address_t segments[0]; } __attribute__ ((packed)) ip6_sr_header_t; -static inline int -ip6_sr_policy_list_shift_from_index (int pl_index) -{ - return (-3 * pl_index) + 12; -} - -/** pl_index is one-origined */ -static inline int -ip6_sr_policy_list_flags (u16 flags_host_byte_order, int pl_index) -{ - int shift; - - if (pl_index <= 0 || pl_index > 4) - return 0; - - shift = (-3 * pl_index) + 12; - flags_host_byte_order >>= shift; - - return (flags_host_byte_order & 7); -} +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ #endif /* included_vnet_sr_packet_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/sr/sr_policy_rewrite.c b/src/vnet/sr/sr_policy_rewrite.c new file mode 100755 index 00000000..1f8bdca5 --- /dev/null +++ b/src/vnet/sr/sr_policy_rewrite.c @@ -0,0 +1,3253 @@ +/* + * sr_policy_rewrite.c: ipv6 sr policy creation + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief SR policy creation and application + * + * Create an SR policy. + * An SR policy can be either of 'default' type or 'spray' type + * An SR policy has attached a list of SID lists. + * In case the SR policy is a default one it will load balance among them. + * An SR policy has associated a BindingSID. + * In case any packet arrives with IPv6 DA == BindingSID then the SR policy + * associated to such bindingSID will be applied to such packet. + * + * SR policies can be applied either by using IPv6 encapsulation or + * SRH insertion. Both methods can be found on this file. + * + * Traffic input usually is IPv6 packets. However it is possible to have + * IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH) + * + * This file provides the appropiates VPP graph nodes to do any of these + * methods. + * + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/sr/sr.h> +#include <vnet/ip/ip.h> +#include <vnet/sr/sr_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/dpo.h> +#include <vnet/dpo/replicate_dpo.h> + +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +/** + * @brief SR policy rewrite trace + */ +typedef struct +{ + ip6_address_t src, dst; +} sr_policy_rewrite_trace_t; + +/* Graph arcs */ +#define foreach_sr_policy_rewrite_next \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(ERROR, "error-drop") + +typedef enum +{ +#define _(s,n) SR_POLICY_REWRITE_NEXT_##s, + foreach_sr_policy_rewrite_next +#undef _ + SR_POLICY_REWRITE_N_NEXT, +} sr_policy_rewrite_next_t; + +/* SR rewrite errors */ +#define foreach_sr_policy_rewrite_error \ +_(INTERNAL_ERROR, "Segment Routing undefined error") \ +_(BSID_ZERO, "BSID with SL = 0") \ +_(COUNTER_TOTAL, "SR steered IPv6 packets") \ +_(COUNTER_ENCAP, "SR: Encaps packets") \ +_(COUNTER_INSERT, "SR: SRH inserted packets") \ +_(COUNTER_BSID, "SR: BindingSID steered packets") + +typedef enum +{ +#define _(sym,str) SR_POLICY_REWRITE_ERROR_##sym, + foreach_sr_policy_rewrite_error +#undef _ + SR_POLICY_REWRITE_N_ERROR, +} sr_policy_rewrite_error_t; + +static char *sr_policy_rewrite_error_strings[] = { +#define _(sym,string) string, + foreach_sr_policy_rewrite_error +#undef _ +}; + +/** + * @brief Dynamically added SR SL DPO type + */ +static dpo_type_t sr_pr_encaps_dpo_type; +static dpo_type_t sr_pr_insert_dpo_type; +static dpo_type_t sr_pr_bsid_encaps_dpo_type; +static dpo_type_t sr_pr_bsid_insert_dpo_type; + +/** + * @brief IPv6 SA for encapsulated packets + */ +static ip6_address_t sr_pr_encaps_src; + +/******************* SR rewrite set encaps IPv6 source addr *******************/ +/* Note: This is temporal. We don't know whether to follow this path or + take the ip address of a loopback interface or even the OIF */ + +static clib_error_t * +set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat + (input, "addr %U", unformat_ip6_address, &sr_pr_encaps_src)) + return 0; + else + return clib_error_return (0, "No address specified"); + } + return clib_error_return (0, "No address specified"); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_sr_src_command, static) = { + .path = "set sr encaps source", + .short_help = "set sr encaps source addr <ip6_addr>", + .function = set_sr_src_command_fn, +}; +/* *INDENT-ON* */ + +/*********************** SR rewrite string computation ************************/ +/** + * @brief SR rewrite string computation for IPv6 encapsulation (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for encapsulation + */ +static inline u8 * +compute_rewrite_encaps (ip6_address_t * sl) +{ + ip6_header_t *iph; + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += IPv6_DEFAULT_HEADER_LENGTH; + if (vec_len (sl) > 1) + { + header_length += sizeof (ip6_sr_header_t); + header_length += vec_len (sl) * sizeof (ip6_address_t); + } + + vec_validate (rs, header_length - 1); + + iph = (ip6_header_t *) rs; + iph->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0 | ((6 & 0xF) << 28)); + iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0]; + iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1]; + iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH; + iph->protocol = IP_PROTOCOL_IPV6; + iph->hop_limit = IPv6_DEFAULT_HOP_LIMIT; + + srh = (ip6_sr_header_t *) (iph + 1); + iph->protocol = IP_PROTOCOL_IPV6_ROUTE; + srh->protocol = IP_PROTOCOL_IPV6; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl) - 1; + srh->first_segment = vec_len (sl) - 1; + srh->length = ((sizeof (ip6_sr_header_t) + + (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x00; + addrp = srh->segments + vec_len (sl) - 1; + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + iph->dst_address.as_u64[0] = sl->as_u64[0]; + iph->dst_address.as_u64[1] = sl->as_u64[1]; + return rs; +} + +/** + * @brief SR rewrite string computation for SRH insertion (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for SRH insertion + */ +static inline u8 * +compute_rewrite_insert (ip6_address_t * sl) +{ + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += sizeof (ip6_sr_header_t); + header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t); + + vec_validate (rs, header_length - 1); + + srh = (ip6_sr_header_t *) rs; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl); + srh->first_segment = vec_len (sl); + srh->length = ((sizeof (ip6_sr_header_t) + + ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x0000; + addrp = srh->segments + vec_len (sl); + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + return rs; +} + +/** + * @brief SR rewrite string computation for SRH insertion with BSID (inline) + * + * @param sl is a vector of IPv6 addresses composing the Segment List + * + * @return precomputed rewrite string for SRH insertion with BSID + */ +static inline u8 * +compute_rewrite_bsid (ip6_address_t * sl) +{ + ip6_sr_header_t *srh; + ip6_address_t *addrp, *this_address; + u32 header_length = 0; + u8 *rs = NULL; + + header_length = 0; + header_length += sizeof (ip6_sr_header_t); + header_length += vec_len (sl) * sizeof (ip6_address_t); + + vec_validate (rs, header_length - 1); + + srh = (ip6_sr_header_t *) rs; + srh->type = ROUTING_HEADER_TYPE_SR; + srh->segments_left = vec_len (sl) - 1; + srh->first_segment = vec_len (sl) - 1; + srh->length = ((sizeof (ip6_sr_header_t) + + (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1; + srh->flags = 0x00; + srh->reserved = 0x0000; + addrp = srh->segments + vec_len (sl) - 1; + vec_foreach (this_address, sl) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + return rs; +} + +/*************************** SR LB helper functions **************************/ +/** + * @brief Creates a Segment List and adds it to an SR policy + * + * Creates a Segment List and adds it to the SR policy. Notice that the SL are + * not necessarily unique. Hence there might be two Segment List within the + * same SR Policy with exactly the same segments and same weight. + * + * @param sr_policy is the SR policy where the SL will be added + * @param sl is a vector of IPv6 addresses composing the Segment List + * @param weight is the weight of the SegmentList (for load-balancing purposes) + * @param is_encap represents the mode (SRH insertion vs Encapsulation) + * + * @return pointer to the just created segment list + */ +static inline ip6_sr_sl_t * +create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight, + u8 is_encap) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_sl_t *segment_list; + + pool_get (sm->sid_lists, segment_list); + memset (segment_list, 0, sizeof (*segment_list)); + + vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists); + + /* Fill in segment list */ + segment_list->weight = + (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT); + segment_list->segments = vec_dup (sl); + + if (is_encap) + { + segment_list->rewrite = compute_rewrite_encaps (sl); + segment_list->rewrite_bsid = segment_list->rewrite; + } + else + { + segment_list->rewrite = compute_rewrite_insert (sl); + segment_list->rewrite_bsid = compute_rewrite_bsid (sl); + } + + /* Create DPO */ + dpo_reset (&segment_list->bsid_dpo); + dpo_reset (&segment_list->ip6_dpo); + dpo_reset (&segment_list->ip4_dpo); + + if (is_encap) + { + dpo_set (&segment_list->ip6_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP6, + segment_list - sm->sid_lists); + dpo_set (&segment_list->ip4_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP4, + segment_list - sm->sid_lists); + dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_encaps_dpo_type, + DPO_PROTO_IP6, segment_list - sm->sid_lists); + } + else + { + dpo_set (&segment_list->ip6_dpo, sr_pr_insert_dpo_type, DPO_PROTO_IP6, + segment_list - sm->sid_lists); + dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_insert_dpo_type, + DPO_PROTO_IP6, segment_list - sm->sid_lists); + } + + return segment_list; +} + +/** + * @brief Updates the Load Balancer after an SR Policy change + * + * @param sr_policy is the modified SR Policy + */ +static inline void +update_lb (ip6_sr_policy_t * sr_policy) +{ + flow_hash_config_t fhc; + u32 *sl_index; + ip6_sr_sl_t *segment_list; + ip6_sr_main_t *sm = &sr_main; + load_balance_path_t path; + load_balance_path_t *ip4_path_vector = 0; + load_balance_path_t *ip6_path_vector = 0; + load_balance_path_t *b_path_vector = 0; + + /* In case LB does not exist, create it */ + if (!dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + }; + + /* Add FIB entry for BSID */ + fhc = fib_table_get_flow_hash_config (sr_policy->fib_table, + dpo_proto_to_fib (DPO_PROTO_IP6)); + + dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, + load_balance_create (0, DPO_PROTO_IP6, fhc)); + + dpo_set (&sr_policy->ip6_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6, + load_balance_create (0, DPO_PROTO_IP6, fhc)); + + /* Update FIB entry's to point to the LB DPO in the main FIB and hidden one */ + fib_table_entry_special_dpo_update (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, + sr_policy->fib_table), &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->bsid_dpo); + + fib_table_entry_special_dpo_update (sm->fib_table_ip6, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip6_dpo); + + if (sr_policy->is_encap) + { + dpo_set (&sr_policy->ip4_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP4, + load_balance_create (0, DPO_PROTO_IP4, fhc)); + + fib_table_entry_special_dpo_update (sm->fib_table_ip4, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip4_dpo); + } + + } + + /* Create the LB path vector */ + //path_vector = vec_new(load_balance_path_t, vec_len(sr_policy->segments_lists)); + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + path.path_dpo = segment_list->bsid_dpo; + path.path_weight = segment_list->weight; + vec_add1 (b_path_vector, path); + path.path_dpo = segment_list->ip6_dpo; + vec_add1 (ip6_path_vector, path); + if (sr_policy->is_encap) + { + path.path_dpo = segment_list->ip4_dpo; + vec_add1 (ip4_path_vector, path); + } + } + + /* Update LB multipath */ + load_balance_multipath_update (&sr_policy->bsid_dpo, b_path_vector, + LOAD_BALANCE_FLAG_NONE); + load_balance_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector, + LOAD_BALANCE_FLAG_NONE); + if (sr_policy->is_encap) + load_balance_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector, + LOAD_BALANCE_FLAG_NONE); + + /* Cleanup */ + vec_free (b_path_vector); + vec_free (ip6_path_vector); + vec_free (ip4_path_vector); + +} + +/** + * @brief Updates the Replicate DPO after an SR Policy change + * + * @param sr_policy is the modified SR Policy (type spray) + */ +static inline void +update_replicate (ip6_sr_policy_t * sr_policy) +{ + u32 *sl_index; + ip6_sr_sl_t *segment_list; + ip6_sr_main_t *sm = &sr_main; + load_balance_path_t path; + load_balance_path_t *b_path_vector = 0; + load_balance_path_t *ip6_path_vector = 0; + load_balance_path_t *ip4_path_vector = 0; + + /* In case LB does not exist, create it */ + if (!dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + dpo_set (&sr_policy->bsid_dpo, DPO_REPLICATE, + DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6)); + + dpo_set (&sr_policy->ip6_dpo, DPO_REPLICATE, + DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6)); + + /* Update FIB entry's DPO to point to SR without LB */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + }; + fib_table_entry_special_dpo_update (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, + sr_policy->fib_table), &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->bsid_dpo); + + fib_table_entry_special_dpo_update (sm->fib_table_ip6, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip6_dpo); + + if (sr_policy->is_encap) + { + dpo_set (&sr_policy->ip4_dpo, DPO_REPLICATE, DPO_PROTO_IP4, + replicate_create (0, DPO_PROTO_IP4)); + + fib_table_entry_special_dpo_update (sm->fib_table_ip4, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, + &sr_policy->ip4_dpo); + } + + } + + /* Create the replicate path vector */ + path.path_weight = 1; + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + path.path_dpo = segment_list->bsid_dpo; + vec_add1 (b_path_vector, path); + path.path_dpo = segment_list->ip6_dpo; + vec_add1 (ip6_path_vector, path); + if (sr_policy->is_encap) + { + path.path_dpo = segment_list->ip4_dpo; + vec_add1 (ip4_path_vector, path); + } + } + + /* Update replicate multipath */ + replicate_multipath_update (&sr_policy->bsid_dpo, b_path_vector); + replicate_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector); + if (sr_policy->is_encap) + replicate_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector); + + /* Cleanup */ + vec_free (b_path_vector); + vec_free (ip6_path_vector); + vec_free (ip4_path_vector); +} + +/******************************* SR rewrite API *******************************/ +/* Three functions for handling sr policies: + * -> sr_policy_add + * -> sr_policy_del + * -> sr_policy_mod + * All of them are API. CLI function on sr_policy_command_fn */ + +/** + * @brief Create a new SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param segments is a vector of IPv6 address composing the segment list + * @param weight is the weight of the sid list. optional. + * @param behavior is the behavior of the SR policy. (default//spray) + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion + * + * @return 0 if correct, else error + */ +int +sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments, + u32 weight, u8 behavior, u32 fib_table, u8 is_encap) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + ip6_address_t *key_copy; + uword *p; + + /* Search for existing keys (BSID) */ + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + { + /* Add SR policy that already exists; complain */ + return -12; + } + + /* Search collision in FIB entries */ + /* Explanation: It might be possible that some other entity has already + * created a route for the BSID. This in theory is impossible, but in + * practise we could see it. Assert it and scream if needed */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = *bsid, + } + }; + + /* Lookup the FIB index associated to the table selected */ + u32 fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, + (fib_table != + (u32) ~ 0 ? fib_table : 0)); + if (fib_index == ~0) + return -13; + + /* Lookup whether there exists an entry for the BSID */ + fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx); + if (FIB_NODE_INDEX_INVALID != fei) + return -12; //There is an entry for such lookup + + /* Add an SR policy object */ + pool_get (sm->sr_policies, sr_policy); + memset (sr_policy, 0, sizeof (*sr_policy)); + clib_memcpy (&sr_policy->bsid, bsid, sizeof (ip6_address_t)); + sr_policy->type = behavior; + sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0); //Is default FIB 0 ? + sr_policy->is_encap = is_encap; + + /* Copy the key */ + key_copy = vec_new (ip6_address_t, 1); + clib_memcpy (key_copy, bsid, sizeof (ip6_address_t)); + hash_set_mem (sm->sr_policy_index_by_key, key_copy, + sr_policy - sm->sr_policies); + + /* Create a segment list and add the index to the SR policy */ + create_sl (sr_policy, segments, weight, is_encap); + + /* If FIB doesnt exist, create them */ + if (sm->fib_table_ip6 == (u32) ~ 0) + { + sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + "SRv6 steering of IP6 prefixes through BSIDs"); + sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6, + "SRv6 steering of IP4 prefixes through BSIDs"); + fib_table_flush (sm->fib_table_ip6, FIB_PROTOCOL_IP6, + FIB_SOURCE_SPECIAL); + fib_table_flush (sm->fib_table_ip4, FIB_PROTOCOL_IP6, + FIB_SOURCE_SPECIAL); + } + + /* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + return 0; +} + +/** + * @brief Delete a SR policy + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * + * @return 0 if correct, else error + */ +int +sr_policy_del (ip6_address_t * bsid, u32 index) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_sl_t *segment_list; + ip6_address_t *key_copy; + u32 *sl_index; + uword *p; + + hash_pair_t *hp; + if (bsid) + { + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + } + else + { + sr_policy = pool_elt_at_index (sm->sr_policies, index); + if (!sr_policy) + return -1; + } + + /* Remove BindingSID FIB entry */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = sr_policy->bsid, + } + , + }; + + fib_table_entry_special_remove (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, sr_policy->fib_table), + &pfx, FIB_SOURCE_SR); + + fib_table_entry_special_remove (sm->fib_table_ip6, &pfx, FIB_SOURCE_SR); + + if (sr_policy->is_encap) + fib_table_entry_special_remove (sm->fib_table_ip4, &pfx, FIB_SOURCE_SR); + + if (dpo_id_is_valid (&sr_policy->bsid_dpo)) + { + dpo_reset (&sr_policy->bsid_dpo); + dpo_reset (&sr_policy->ip4_dpo); + dpo_reset (&sr_policy->ip6_dpo); + } + + /* Clean SID Lists */ + vec_foreach (sl_index, sr_policy->segments_lists) + { + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + vec_free (segment_list->segments); + vec_free (segment_list->rewrite); + vec_free (segment_list->rewrite_bsid); + pool_put_index (sm->sid_lists, *sl_index); + } + + /* Remove SR policy entry */ + hp = hash_get_pair (sm->sr_policy_index_by_key, &sr_policy->bsid); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->sr_policy_index_by_key, &sr_policy->bsid); + vec_free (key_copy); + pool_put (sm->sr_policies, sr_policy); + + /* If FIB empty unlock it */ + if (!pool_elts (sm->sr_policies)) + { + fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6); + fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6); + sm->fib_table_ip6 = (u32) ~ 0; + sm->fib_table_ip4 = (u32) ~ 0; + } + + return 0; +} + +/** + * @brief Modify an existing SR policy + * + * The possible modifications are adding a new Segment List, modifying an + * existing Segment List (modify the weight only) and delete a given + * Segment List from the SR Policy. + * + * @param bsid is the bindingSID of the SR Policy + * @param index is the index of the SR policy + * @param fib_table is the VRF where to install the FIB entry for the BSID + * @param operation is the operation to perform (among the top ones) + * @param segments is a vector of IPv6 address composing the segment list + * @param sl_index is the index of the Segment List to modify/delete + * @param weight is the weight of the sid list. optional. + * @param is_encap Mode. Encapsulation or SRH insertion. + * + * @return 0 if correct, else error + */ +int +sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table, + u8 operation, ip6_address_t * segments, u32 sl_index, + u32 weight) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_sl_t *segment_list; + u32 *sl_index_iterate; + uword *p; + + if (bsid) + { + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -1; + } + else + { + sr_policy = pool_elt_at_index (sm->sr_policies, index); + if (!sr_policy) + return -1; + } + + if (operation == 1) /* Add SR List to an existing SR policy */ + { + /* Create the new SL */ + segment_list = + create_sl (sr_policy, segments, weight, sr_policy->is_encap); + + /* Create a new LB DPO */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + } + else if (operation == 2) /* Delete SR List from an existing SR policy */ + { + /* Check that currently there are more than one SID list */ + if (vec_len (sr_policy->segments_lists) == 1) + return -21; + + /* Check that the SR list does exist and is assigned to the sr policy */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -22; + + /* Remove the lucky SR list that is being kicked out */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + vec_free (segment_list->segments); + vec_free (segment_list->rewrite); + vec_free (segment_list->rewrite_bsid); + pool_put_index (sm->sid_lists, sl_index); + vec_del1 (sr_policy->segments_lists, + sl_index_iterate - sr_policy->segments_lists); + + /* Create a new LB DPO */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + else if (sr_policy->type == SR_POLICY_TYPE_SPRAY) + update_replicate (sr_policy); + } + else if (operation == 3) /* Modify the weight of an existing SR List */ + { + /* Find the corresponding SL */ + vec_foreach (sl_index_iterate, sr_policy->segments_lists) + if (*sl_index_iterate == sl_index) + break; + + if (*sl_index_iterate != sl_index) + return -32; + + /* Change the weight */ + segment_list = pool_elt_at_index (sm->sid_lists, sl_index); + segment_list->weight = weight; + + /* Update LB */ + if (sr_policy->type == SR_POLICY_TYPE_DEFAULT) + update_lb (sr_policy); + } + else /* Incorrect op. */ + return -1; + + return 0; +} + +/** + * @brief CLI for 'sr policies' command family + */ +static clib_error_t * +sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int rv = -1; + char is_del = 0, is_add = 0, is_mod = 0; + char policy_set = 0; + ip6_address_t bsid, next_address; + u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0; + u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0; + ip6_address_t *segments = 0, *this_seg; + u8 operation = 0; + char is_encap = 1; + char is_spray = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (!is_add && !is_mod && !is_del && unformat (input, "add")) + is_add = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "del")) + is_del = 1; + else if (!is_add && !is_mod && !is_del && unformat (input, "mod")) + is_mod = 1; + else if (!policy_set + && unformat (input, "bsid %U", unformat_ip6_address, &bsid)) + policy_set = 1; + else if (!is_add && !policy_set + && unformat (input, "index %d", &sr_policy_index)) + policy_set = 1; + else if (unformat (input, "weight %d", &weight)); + else + if (unformat (input, "next %U", unformat_ip6_address, &next_address)) + { + vec_add2 (segments, this_seg, 1); + clib_memcpy (this_seg->as_u8, next_address.as_u8, + sizeof (*this_seg)); + } + else if (unformat (input, "add sl")) + operation = 1; + else if (unformat (input, "del sl index %d", &sl_index)) + operation = 2; + else if (unformat (input, "mod sl index %d", &sl_index)) + operation = 3; + else if (fib_table == (u32) ~ 0 + && unformat (input, "fib-table %d", &fib_table)); + else if (unformat (input, "encap")) + is_encap = 1; + else if (unformat (input, "insert")) + is_encap = 0; + else if (unformat (input, "spray")) + is_spray = 1; + else + break; + } + + if (!is_add && !is_mod && !is_del) + return clib_error_return (0, "Incorrect CLI"); + + if (!policy_set) + return clib_error_return (0, "No SR policy BSID or index specified"); + + if (is_add) + { + if (vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + rv = sr_policy_add (&bsid, segments, weight, + (is_spray ? SR_POLICY_TYPE_SPRAY : + SR_POLICY_TYPE_DEFAULT), fib_table, is_encap); + } + else if (is_del) + rv = sr_policy_del ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), + sr_policy_index); + else if (is_mod) + { + if (!operation) + return clib_error_return (0, "No SL modification specified"); + if (operation != 1 && sl_index == (u32) ~ 0) + return clib_error_return (0, "No Segment List index specified"); + if (operation == 1 && vec_len (segments) == 0) + return clib_error_return (0, "No Segment List specified"); + if (operation == 3 && weight == (u32) ~ 0) + return clib_error_return (0, "No new weight for the SL specified"); + rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid), + sr_policy_index, fib_table, operation, segments, + sl_index, weight); + } + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -12: + return clib_error_return (0, + "There is already a FIB entry for the BindingSID address.\n" + "The SR policy could not be created."); + case -13: + return clib_error_return (0, "The specified FIB table does not exist."); + case -21: + return clib_error_return (0, + "The selected SR policy only contains ONE segment list. " + "Please remove the SR policy instead"); + case -22: + return clib_error_return (0, + "Could not delete the segment list. " + "It is not associated with that SR policy."); + case -32: + return clib_error_return (0, + "Could not modify the segment list. " + "The given SL is not associated with such SR policy."); + default: + return clib_error_return (0, "BUG: sr policy returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_policy_command, static) = { + .path = "sr policy", + .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] " + "next A:: next B:: next C:: (weight 1) (fib-table 2) (encap|insert)", + .long_help = + "Manipulation of SR policies.\n" + "A Segment Routing policy may contain several SID lists. Each SID list has\n" + "an associated weight (default 1), which will result in wECMP (uECMP).\n" + "Segment Routing policies might be of type encapsulation or srh insertion\n" + "Each SR policy will be associated with a unique BindingSID.\n" + "A BindingSID is a locally allocated SegmentID. For every packet that arrives\n" + "with IPv6_DA:BSID such traffic will be steered into the SR policy.\n" + "The add command will create a SR policy with its first segment list (sl)\n" + "The mod command allows you to add, remove, or modify the existing segment lists\n" + "within an SR policy.\n" + "The del command allows you to delete a SR policy along with all its associated\n" + "SID lists.\n", + .function = sr_policy_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI to display onscreen all the SR policies + */ +static clib_error_t * +show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + u32 *sl_index; + ip6_sr_sl_t *segment_list = 0; + ip6_sr_policy_t *sr_policy = 0; + ip6_sr_policy_t **vec_policies = 0; + ip6_address_t *addr; + u8 *s; + int i = 0; + + vlib_cli_output (vm, "SR policies:"); + + /* *INDENT-OFF* */ + pool_foreach (sr_policy, sm->sr_policies, + {vec_add1 (vec_policies, sr_policy); } ); + /* *INDENT-ON* */ + + vec_foreach_index (i, vec_policies) + { + sr_policy = vec_policies[i]; + vlib_cli_output (vm, "[%u].-\tBSID: %U", + (u32) (sr_policy - sm->sr_policies), + format_ip6_address, &sr_policy->bsid); + vlib_cli_output (vm, "\tBehavior: %s", + (sr_policy->is_encap ? "Encapsulation" : + "SRH insertion")); + vlib_cli_output (vm, "\tType: %s", + (sr_policy->type == + SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray")); + vlib_cli_output (vm, "\tFIB table: %u", + (sr_policy->fib_table != + (u32) ~ 0 ? sr_policy->fib_table : 0)); + vlib_cli_output (vm, "\tSegment Lists:"); + vec_foreach (sl_index, sr_policy->segments_lists) + { + s = NULL; + s = format (s, "\t[%u].- ", *sl_index); + segment_list = pool_elt_at_index (sm->sid_lists, *sl_index); + s = format (s, "< "); + vec_foreach (addr, segment_list->segments) + { + s = format (s, "%U, ", format_ip6_address, addr); + } + s = format (s, "\b\b > "); + s = format (s, "weight: %u", segment_list->weight); + vlib_cli_output (vm, " %s", s); + } + vlib_cli_output (vm, "-----------"); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_policies_command, static) = { + .path = "show sr policies", + .short_help = "show sr policies", + .function = show_sr_policies_command_fn, +}; +/* *INDENT-ON* */ + +/*************************** SR rewrite graph node ****************************/ +/** + * @brief Trace for the SR Policy Rewrite graph node + */ +static u8 * +format_sr_policy_rewrite_trace (u8 * s, va_list * args) +{ + //TODO + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_policy_rewrite_trace_t *t = va_arg (*args, sr_policy_rewrite_trace_t *); + + s = format + (s, "SR-policy-rewrite: src %U dst %U", + format_ip6_address, &t->src, format_ip6_address, &t->dst); + + return s; +} + +/** + * @brief IPv6 encapsulation processing as per RFC2473 + */ +static_always_inline void +encaps_processing_v6 (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, ip6_header_t * ip0_encap) +{ + u32 new_l0; + + ip0_encap->hop_limit -= 1; + new_l0 = + ip0->payload_length + sizeof (ip6_header_t) + + clib_net_to_host_u16 (ip0_encap->payload_length); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->ip_version_traffic_class_and_flow_label = + ip0_encap->ip_version_traffic_class_and_flow_label; +} + +/** + * @brief Graph node for applying a SR policy into an IPv6 packet. Encapsulation + */ +static uword +sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + encaps_processing_v6 (node, b1, ip1, ip1_encap); + encaps_processing_v6 (node, b2, ip2, ip2_encap); + encaps_processing_v6 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0, *ip0_encap = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = { + .function = sr_policy_rewrite_encaps, + .name = "sr-pl-rewrite-encaps", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief IPv4 encapsulation processing as per RFC2473 + */ +static_always_inline void +encaps_processing_v4 (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, ip4_header_t * ip0_encap) +{ + u32 new_l0; + ip6_sr_header_t *sr0; + + u32 checksum0; + + /* Inner IPv4: Decrement TTL & update checksum */ + ip0_encap->ttl -= 1; + checksum0 = ip0_encap->checksum + clib_host_to_net_u16 (0x0100); + checksum0 += checksum0 >= 0xffff; + ip0_encap->checksum = checksum0; + + /* Outer IPv6: Update length, FL, proto */ + new_l0 = ip0->payload_length + clib_net_to_host_u16 (ip0_encap->length); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0 | ((6 & 0xF) << 28) | + ((ip0_encap->tos & 0xFF) << 20)); + sr0 = (void *) (ip0 + 1); + sr0->protocol = IP_PROTOCOL_IP_IN_IP; +} + +/** + * @brief Graph node for applying a SR policy into an IPv4 packet. Encapsulation + */ +static uword +sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip4_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v4 (node, b0, ip0, ip0_encap); + encaps_processing_v4 (node, b1, ip1, ip1_encap); + encaps_processing_v4 (node, b2, ip2, ip2_encap); + encaps_processing_v4 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip4_header_t *ip0_encap = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v4 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = { + .function = sr_policy_rewrite_encaps_v4, + .name = "sr-pl-rewrite-encaps-v4", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +always_inline u32 +ip_flow_hash (void *data) +{ + ip4_header_t *iph = (ip4_header_t *) data; + + if ((iph->ip_version_and_header_length & 0xF0) == 0x40) + return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT); + else + return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT); +} + +always_inline u64 +mac_to_u64 (u8 * m) +{ + return (*((u64 *) m) & 0xffffffffffff); +} + +always_inline u32 +l2_flow_hash (vlib_buffer_t * b0) +{ + ethernet_header_t *eh; + u64 a, b, c; + uword is_ip, eh_size; + u16 eh_type; + + eh = vlib_buffer_get_current (b0); + eh_type = clib_net_to_host_u16 (eh->type); + eh_size = ethernet_buffer_header_size (b0); + + is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6); + + /* since we have 2 cache lines, use them */ + if (is_ip) + a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size); + else + a = eh->type; + + b = mac_to_u64 ((u8 *) eh->dst_address); + c = mac_to_u64 ((u8 *) eh->src_address); + hash_mix64 (a, b, c); + + return (u32) c; +} + +/** + * @brief Graph node for applying a SR policy into a L2 frame + */ +static uword +sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ethernet_header_t *en0, *en1, *en2, *en3; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_policy_t *sp0, *sp1, *sp2, *sp3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sp0 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b0)->sw_if_index + [VLIB_RX]]); + + sp1 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b1)->sw_if_index + [VLIB_RX]]); + + sp2 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b2)->sw_if_index + [VLIB_RX]]); + + sp3 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b3)->sw_if_index + [VLIB_RX]]); + + if (vec_len (sp0->segments_lists) == 1) + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0]; + else + { + vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash & + (vec_len (sp0->segments_lists) - 1))]; + } + + if (vec_len (sp1->segments_lists) == 1) + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = sp1->segments_lists[1]; + else + { + vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1); + vnet_buffer (b1)->ip.adj_index[VLIB_TX] = + sp1->segments_lists[(vnet_buffer (b1)->ip.flow_hash & + (vec_len (sp1->segments_lists) - 1))]; + } + + if (vec_len (sp2->segments_lists) == 1) + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = sp2->segments_lists[2]; + else + { + vnet_buffer (b2)->ip.flow_hash = l2_flow_hash (b2); + vnet_buffer (b2)->ip.adj_index[VLIB_TX] = + sp2->segments_lists[(vnet_buffer (b2)->ip.flow_hash & + (vec_len (sp2->segments_lists) - 1))]; + } + + if (vec_len (sp3->segments_lists) == 1) + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = sp3->segments_lists[3]; + else + { + vnet_buffer (b3)->ip.flow_hash = l2_flow_hash (b3); + vnet_buffer (b3)->ip.adj_index[VLIB_TX] = + sp3->segments_lists[(vnet_buffer (b3)->ip.flow_hash & + (vec_len (sp3->segments_lists) - 1))]; + } + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + en0 = vlib_buffer_get_current (b0); + en1 = vlib_buffer_get_current (b1); + en2 = vlib_buffer_get_current (b2); + en3 = vlib_buffer_get_current (b3); + + clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite, + vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) en1) - vec_len (sl1->rewrite), sl1->rewrite, + vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) en2) - vec_len (sl2->rewrite), sl2->rewrite, + vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) en3) - vec_len (sl3->rewrite), sl3->rewrite, + vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + ip0->payload_length = + clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t)); + ip1->payload_length = + clib_host_to_net_u16 (b1->current_length - sizeof (ip6_header_t)); + ip2->payload_length = + clib_host_to_net_u16 (b2->current_length - sizeof (ip6_header_t)); + ip3->payload_length = + clib_host_to_net_u16 (b3->current_length - sizeof (ip6_header_t)); + + sr0 = (void *) (ip0 + 1); + sr1 = (void *) (ip1 + 1); + sr2 = (void *) (ip2 + 1); + sr3 = (void *) (ip3 + 1); + + sr0->protocol = sr1->protocol = sr2->protocol = sr3->protocol = + IP_PROTOCOL_IP6_NONXT; + + /* Which Traffic class and flow label do I set ? */ + //ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0|((6&0xF)<<28)|((ip0_encap->tos&0xFF)<<20)); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0; + ethernet_header_t *en0; + ip6_sr_policy_t *sp0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + /* Find the SR policy */ + sp0 = pool_elt_at_index (sm->sr_policies, + sm->sw_iface_sr_policies[vnet_buffer + (b0)->sw_if_index + [VLIB_RX]]); + + /* In case there is more than one SL, LB among them */ + if (vec_len (sp0->segments_lists) == 1) + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0]; + else + { + vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0); + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = + sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash & + (vec_len (sp0->segments_lists) - 1))]; + } + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + en0 = vlib_buffer_get_current (b0); + + clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite, + vec_len (sl0->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + ip0->payload_length = + clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t)); + + sr0 = (void *) (ip0 + 1); + sr0->protocol = IP_PROTOCOL_IP6_NONXT; + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = { + .function = sr_policy_rewrite_encaps_l2, + .name = "sr-pl-rewrite-encaps-l2", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Graph node for applying a SR policy into a packet. SRH insertion. + */ +static uword +sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int insert_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + u16 new_l0, new_l1, new_l2, new_l3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr1 = + (ip6_sr_header_t *) (((void *) (ip1 + 1)) + + ip6_ext_header_len (ip1 + 1)); + else + sr1 = (ip6_sr_header_t *) (ip1 + 1); + + if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr2 = + (ip6_sr_header_t *) (((void *) (ip2 + 1)) + + ip6_ext_header_len (ip2 + 1)); + else + sr2 = (ip6_sr_header_t *) (ip2 + 1); + + if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr3 = + (ip6_sr_header_t *) (((void *) (ip3 + 1)) + + ip6_ext_header_len (ip3 + 1)); + else + sr3 = (ip6_sr_header_t *) (ip3 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite), (u8 *) ip1, + (void *) sr1 - (void *) ip1); + clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite), (u8 *) ip2, + (void *) sr2 - (void *) ip2); + clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite), (u8 *) ip3, + (void *) sr3 - (void *) ip3); + + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite, + vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite)), sl1->rewrite, + vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite)), sl2->rewrite, + vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite)), sl3->rewrite, + vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite); + ip1 = ((void *) ip1) - vec_len (sl1->rewrite); + ip2 = ((void *) ip2) - vec_len (sl2->rewrite); + ip3 = ((void *) ip3) - vec_len (sl3->rewrite); + + ip0->hop_limit -= 1; + ip1->hop_limit -= 1; + ip2->hop_limit -= 1; + ip3->hop_limit -= 1; + + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite); + new_l1 = + clib_net_to_host_u16 (ip1->payload_length) + + vec_len (sl1->rewrite); + new_l2 = + clib_net_to_host_u16 (ip2->payload_length) + + vec_len (sl2->rewrite); + new_l3 = + clib_net_to_host_u16 (ip3->payload_length) + + vec_len (sl3->rewrite); + + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + ip2->payload_length = clib_host_to_net_u16 (new_l2); + ip3->payload_length = clib_host_to_net_u16 (new_l3); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite); + sr1 = ((void *) sr1) - vec_len (sl1->rewrite); + sr2 = ((void *) sr2) - vec_len (sl2->rewrite); + sr3 = ((void *) sr3) - vec_len (sl3->rewrite); + + sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0]; + sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1]; + sr1->segments->as_u64[0] = ip1->dst_address.as_u64[0]; + sr1->segments->as_u64[1] = ip1->dst_address.as_u64[1]; + sr2->segments->as_u64[0] = ip2->dst_address.as_u64[0]; + sr2->segments->as_u64[1] = ip2->dst_address.as_u64[1]; + sr3->segments->as_u64[0] = ip3->dst_address.as_u64[0]; + sr3->segments->as_u64[1] = ip3->dst_address.as_u64[1]; + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + ip1->dst_address.as_u64[0] = + (sr1->segments + sr1->segments_left)->as_u64[0]; + ip1->dst_address.as_u64[1] = + (sr1->segments + sr1->segments_left)->as_u64[1]; + ip2->dst_address.as_u64[0] = + (sr2->segments + sr2->segments_left)->as_u64[0]; + ip2->dst_address.as_u64[1] = + (sr2->segments + sr2->segments_left)->as_u64[1]; + ip3->dst_address.as_u64[0] = + (sr3->segments + sr3->segments_left)->as_u64[0]; + ip3->dst_address.as_u64[1] = + (sr3->segments + sr3->segments_left)->as_u64[1]; + + ip6_ext_header_t *ip_ext; + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip1 + 1 == (void *) sr1) + { + sr1->protocol = ip1->protocol; + ip1->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip2 + 1 == (void *) sr2) + { + sr2->protocol = ip2->protocol; + ip2->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip3 + 1 == (void *) sr3) + { + sr3->protocol = ip3->protocol; + ip3->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip3 + 1); + sr3->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + insert_pkts += 4; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0 = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0 = 0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + ip0 = vlib_buffer_get_current (b0); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite, + vec_len (sl0->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite); + ip0->hop_limit -= 1; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite); + sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0]; + sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1]; + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip6_ext_header_t *ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + insert_pkts++; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + insert_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = { + .function = sr_policy_rewrite_insert, + .name = "sr-pl-rewrite-insert", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion. + */ +static uword +sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int insert_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + u16 new_l0, new_l1, new_l2, new_l3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite_bsid) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite_bsid) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite_bsid) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite_bsid) + b3->current_data)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr1 = + (ip6_sr_header_t *) (((void *) (ip1 + 1)) + + ip6_ext_header_len (ip1 + 1)); + else + sr1 = (ip6_sr_header_t *) (ip1 + 1); + + if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr2 = + (ip6_sr_header_t *) (((void *) (ip2 + 1)) + + ip6_ext_header_len (ip2 + 1)); + else + sr2 = (ip6_sr_header_t *) (ip2 + 1); + + if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr3 = + (ip6_sr_header_t *) (((void *) (ip3 + 1)) + + ip6_ext_header_len (ip3 + 1)); + else + sr3 = (ip6_sr_header_t *) (ip3 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite_bsid), (u8 *) ip1, + (void *) sr1 - (void *) ip1); + clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite_bsid), (u8 *) ip2, + (void *) sr2 - (void *) ip2); + clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite_bsid), (u8 *) ip3, + (void *) sr3 - (void *) ip3); + + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)), + sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid)); + clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite_bsid)), + sl1->rewrite_bsid, vec_len (sl1->rewrite_bsid)); + clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite_bsid)), + sl2->rewrite_bsid, vec_len (sl2->rewrite_bsid)); + clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite_bsid)), + sl3->rewrite_bsid, vec_len (sl3->rewrite_bsid)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite_bsid)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite_bsid)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite_bsid)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid); + ip1 = ((void *) ip1) - vec_len (sl1->rewrite_bsid); + ip2 = ((void *) ip2) - vec_len (sl2->rewrite_bsid); + ip3 = ((void *) ip3) - vec_len (sl3->rewrite_bsid); + + ip0->hop_limit -= 1; + ip1->hop_limit -= 1; + ip2->hop_limit -= 1; + ip3->hop_limit -= 1; + + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite_bsid); + new_l1 = + clib_net_to_host_u16 (ip1->payload_length) + + vec_len (sl1->rewrite_bsid); + new_l2 = + clib_net_to_host_u16 (ip2->payload_length) + + vec_len (sl2->rewrite_bsid); + new_l3 = + clib_net_to_host_u16 (ip3->payload_length) + + vec_len (sl3->rewrite_bsid); + + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + ip2->payload_length = clib_host_to_net_u16 (new_l2); + ip3->payload_length = clib_host_to_net_u16 (new_l3); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid); + sr1 = ((void *) sr1) - vec_len (sl1->rewrite_bsid); + sr2 = ((void *) sr2) - vec_len (sl2->rewrite_bsid); + sr3 = ((void *) sr3) - vec_len (sl3->rewrite_bsid); + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + ip1->dst_address.as_u64[0] = + (sr1->segments + sr1->segments_left)->as_u64[0]; + ip1->dst_address.as_u64[1] = + (sr1->segments + sr1->segments_left)->as_u64[1]; + ip2->dst_address.as_u64[0] = + (sr2->segments + sr2->segments_left)->as_u64[0]; + ip2->dst_address.as_u64[1] = + (sr2->segments + sr2->segments_left)->as_u64[1]; + ip3->dst_address.as_u64[0] = + (sr3->segments + sr3->segments_left)->as_u64[0]; + ip3->dst_address.as_u64[1] = + (sr3->segments + sr3->segments_left)->as_u64[1]; + + ip6_ext_header_t *ip_ext; + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip1 + 1 == (void *) sr1) + { + sr1->protocol = ip1->protocol; + ip1->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip2 + 1 == (void *) sr2) + { + sr2->protocol = ip2->protocol; + ip2->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip2 + 1); + sr2->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (ip3 + 1 == (void *) sr3) + { + sr3->protocol = ip3->protocol; + ip3->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip_ext = (void *) (ip3 + 1); + sr3->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + insert_pkts += 4; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0 = 0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0 = 0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite_bsid) + b0->current_data)); + + ip0 = vlib_buffer_get_current (b0); + + if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + sr0 = + (ip6_sr_header_t *) (((void *) (ip0 + 1)) + + ip6_ext_header_len (ip0 + 1)); + else + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0, + (void *) sr0 - (void *) ip0); + clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)), + sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid)); + + ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid); + ip0->hop_limit -= 1; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + + vec_len (sl0->rewrite_bsid); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid); + + ip0->dst_address.as_u64[0] = + (sr0->segments + sr0->segments_left)->as_u64[0]; + ip0->dst_address.as_u64[1] = + (sr0->segments + sr0->segments_left)->as_u64[1]; + + if (ip0 + 1 == (void *) sr0) + { + sr0->protocol = ip0->protocol; + ip0->protocol = IP_PROTOCOL_IPV6_ROUTE; + } + else + { + ip6_ext_header_t *ip_ext = (void *) (ip0 + 1); + sr0->protocol = ip_ext->next_hdr; + ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE; + } + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + insert_pkts++; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + insert_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = { + .function = sr_policy_rewrite_b_insert, + .name = "sr-pl-rewrite-b-insert", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/** + * @brief Function BSID encapsulation + */ +static_always_inline void +end_bsid_encaps_srh_processing (vlib_node_runtime_t * node, + vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_sr_header_t * sr0, u32 * next0) +{ + ip6_address_t *new_dst0; + + if (PREDICT_FALSE (!sr0)) + goto error_bsid_encaps; + + if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR)) + { + if (PREDICT_TRUE (sr0->segments_left != 0)) + { + sr0->segments_left -= 1; + new_dst0 = (ip6_address_t *) (sr0->segments); + new_dst0 += sr0->segments_left; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + return; + } + } + +error_bsid_encaps: + *next0 = SR_POLICY_REWRITE_NEXT_ERROR; + b0->error = node->errors[SR_POLICY_REWRITE_ERROR_BSID_ZERO]; +} + +/** + * @brief Graph node for applying a SR policy BSID - Encapsulation + */ +static uword +sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + ip6_sr_main_t *sm = &sr_main; + u32 n_left_from, next_index, *from, *to_next; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + int encap_pkts = 0, bsid_pkts = 0; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Quad - Loop */ + while (n_left_from >= 8 && n_left_to_next >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t *b0, *b1, *b2, *b3; + u32 next0, next1, next2, next3; + next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + ip6_header_t *ip0, *ip1, *ip2, *ip3; + ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap; + ip6_sr_header_t *sr0, *sr1, *sr2, *sr3; + ip6_ext_header_t *prev0, *prev1, *prev2, *prev3; + ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + /* Prefetch the buffer header and packet for the N+2 loop iteration */ + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + to_next[2] = bi2 = from[2]; + to_next[3] = bi3 = from[3]; + from += 4; + to_next += 4; + n_left_from -= 4; + n_left_to_next -= 4; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + sl1 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + sl2 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b2)->ip.adj_index[VLIB_TX]); + sl3 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b3)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl1->rewrite) + b1->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl2->rewrite) + b2->current_data)); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl3->rewrite) + b3->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + ip1_encap = vlib_buffer_get_current (b1); + ip2_encap = vlib_buffer_get_current (b2); + ip3_encap = vlib_buffer_get_current (b3); + + ip6_ext_header_find_t (ip0_encap, prev0, sr0, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip1_encap, prev1, sr1, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip2_encap, prev2, sr2, + IP_PROTOCOL_IPV6_ROUTE); + ip6_ext_header_find_t (ip3_encap, prev3, sr3, + IP_PROTOCOL_IPV6_ROUTE); + + end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0); + end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1); + end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2); + end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite), + sl1->rewrite, vec_len (sl1->rewrite)); + clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite), + sl2->rewrite, vec_len (sl2->rewrite)); + clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite), + sl3->rewrite, vec_len (sl3->rewrite)); + + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite)); + vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite)); + vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + ip2 = vlib_buffer_get_current (b2); + ip3 = vlib_buffer_get_current (b3); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + encaps_processing_v6 (node, b1, ip1, ip1_encap); + encaps_processing_v6 (node, b2, ip2, ip2_encap); + encaps_processing_v6 (node, b3, ip3, ip3_encap); + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b2, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b3, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + } + + encap_pkts += 4; + vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, bi2, bi3, + next0, next1, next2, next3); + } + + /* Single loop for potentially the last three packets */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0, *ip0_encap = 0; + ip6_ext_header_t *prev0; + ip6_sr_header_t *sr0; + ip6_sr_sl_t *sl0; + u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); + + sl0 = + pool_elt_at_index (sm->sid_lists, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= + (vec_len (sl0->rewrite) + b0->current_data)); + + ip0_encap = vlib_buffer_get_current (b0); + ip6_ext_header_find_t (ip0_encap, prev0, sr0, + IP_PROTOCOL_IPV6_ROUTE); + end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0); + + clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite), + sl0->rewrite, vec_len (sl0->rewrite)); + vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite)); + + ip0 = vlib_buffer_get_current (b0); + + encaps_processing_v6 (node, b0, ip0, ip0_encap); + + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) && + PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_policy_rewrite_trace_t *tr = + vlib_add_trace (vm, node, b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + + encap_pkts++; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Update counters */ + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL, + encap_pkts); + vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index, + SR_POLICY_REWRITE_ERROR_COUNTER_BSID, + bsid_pkts); + + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = { + .function = sr_policy_rewrite_b_encaps, + .name = "sr-pl-rewrite-b-encaps", + .vector_size = sizeof (u32), + .format_trace = format_sr_policy_rewrite_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = SR_POLICY_REWRITE_N_ERROR, + .error_strings = sr_policy_rewrite_error_strings, + .n_next_nodes = SR_POLICY_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n, + foreach_sr_policy_rewrite_next +#undef _ + }, +}; +/* *INDENT-ON* */ + +/*************************** SR Segment Lists DPOs ****************************/ +static u8 * +format_sr_segment_list_dpo (u8 * s, va_list * args) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_address_t *addr; + ip6_sr_sl_t *sl; + + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + s = format (s, "SR: Segment List index:[%d]", index); + s = format (s, "\n\tSegments:"); + + sl = pool_elt_at_index (sm->sid_lists, index); + + s = format (s, "< "); + vec_foreach (addr, sl->segments) + { + s = format (s, "%U, ", format_ip6_address, addr); + } + s = format (s, "\b\b > - "); + s = format (s, "Weight: %u", sl->weight); + + return s; +} + +const static dpo_vft_t sr_policy_rewrite_vft = { + .dv_lock = sr_dpo_lock, + .dv_unlock = sr_dpo_unlock, + .dv_format = format_sr_segment_list_dpo, +}; + +const static char *const sr_pr_encaps_ip6_nodes[] = { + "sr-pl-rewrite-encaps", + NULL, +}; + +const static char *const sr_pr_encaps_ip4_nodes[] = { + "sr-pl-rewrite-encaps-v4", + NULL, +}; + +const static char *const *const sr_pr_encaps_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_encaps_ip6_nodes, + [DPO_PROTO_IP4] = sr_pr_encaps_ip4_nodes, +}; + +const static char *const sr_pr_insert_ip6_nodes[] = { + "sr-pl-rewrite-insert", + NULL, +}; + +const static char *const *const sr_pr_insert_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_insert_ip6_nodes, +}; + +const static char *const sr_pr_bsid_insert_ip6_nodes[] = { + "sr-pl-rewrite-b-insert", + NULL, +}; + +const static char *const *const sr_pr_bsid_insert_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_bsid_insert_ip6_nodes, +}; + +const static char *const sr_pr_bsid_encaps_ip6_nodes[] = { + "sr-pl-rewrite-b-encaps", + NULL, +}; + +const static char *const *const sr_pr_bsid_encaps_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_pr_bsid_encaps_ip6_nodes, +}; + +/********************* SR Policy Rewrite initialization ***********************/ +/** + * @brief SR Policy Rewrite initialization + */ +clib_error_t * +sr_policy_rewrite_init (vlib_main_t * vm) +{ + ip6_sr_main_t *sm = &sr_main; + + /* Init memory for sr policy keys (bsid <-> ip6_address_t) */ + sm->sr_policy_index_by_key = hash_create_mem (0, sizeof (ip6_address_t), + sizeof (uword)); + + /* Init SR VPO DPOs type */ + sr_pr_encaps_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_encaps_nodes); + + sr_pr_insert_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_insert_nodes); + + sr_pr_bsid_encaps_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_encaps_nodes); + + sr_pr_bsid_insert_dpo_type = + dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_insert_nodes); + + /* Register the L2 encaps node used in HW redirect */ + sm->l2_sr_policy_rewrite_index = sr_policy_rewrite_encaps_node.index; + + sm->fib_table_ip6 = (u32) ~ 0; + sm->fib_table_ip4 = (u32) ~ 0; + + return 0; +} + +VLIB_INIT_FUNCTION (sr_policy_rewrite_init); + + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/sr/sr_steering.c b/src/vnet/sr/sr_steering.c new file mode 100755 index 00000000..86d6f27c --- /dev/null +++ b/src/vnet/sr/sr_steering.c @@ -0,0 +1,568 @@ +/* + * sr_steering.c: ipv6 segment routing steering into SR policy + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Packet steering into SR Policies + * + * This file is in charge of handling the FIB appropiatly to steer packets + * through SR Policies as defined in 'sr_policy_rewrite.c'. Notice that here + * we are only doing steering. SR policy application is done in + * sr_policy_rewrite.c + * + * Supports: + * - Steering of IPv6 traffic Destination Address based + * - Steering of IPv4 traffic Destination Address based + * - Steering of L2 frames, interface based (sw interface) + */ + +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/sr/sr.h> +#include <vnet/ip/ip.h> +#include <vnet/sr/sr_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/dpo.h> + +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +/** + * @brief Steer traffic L2 and L3 traffic through a given SR policy + * + * @param is_del + * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index) + * @param sr_policy is the index of the SR Policy (alt to bsid) + * @param table_id is the VRF where to install the FIB entry for the BSID + * @param prefix is the IPv4/v6 address for L3 traffic type + * @param mask_width is the mask for L3 traffic type + * @param sw_if_index is the incoming interface for L2 traffic + * @param traffic_type describes the type of traffic + * + * @return 0 if correct, else error + */ +int +sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index, + u32 table_id, ip46_address_t * prefix, u32 mask_width, + u32 sw_if_index, u8 traffic_type) +{ + ip6_sr_main_t *sm = &sr_main; + sr_steering_key_t key, *key_copy; + ip6_sr_steering_policy_t *steer_pl; + fib_prefix_t pfx = { 0 }; + + ip6_sr_policy_t *sr_policy = 0; + uword *p = 0; + + hash_pair_t *hp; + + /* Compute the steer policy key */ + if (prefix) + { + key.l3.prefix.as_u64[0] = prefix->as_u64[0]; + key.l3.prefix.as_u64[1] = prefix->as_u64[1]; + key.l3.mask_width = mask_width; + key.l3.fib_table = (table_id != (u32) ~ 0 ? table_id : 0); + } + else + { + key.l2.sw_if_index = sw_if_index; + + /* Sanitise the SW_IF_INDEX */ + if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces, + sw_if_index)) + return -3; + + vnet_sw_interface_t *sw = + vnet_get_sw_interface (sm->vnet_main, sw_if_index); + if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE) + return -3; + } + + key.traffic_type = traffic_type; + + /* Search for the item */ + p = hash_get_mem (sm->steer_policies_index_by_key, &key); + + if (p) + { + /* Retrieve Steer Policy function */ + steer_pl = pool_elt_at_index (sm->steer_policies, p[0]); + + if (is_del) + { + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP4, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + } + else if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + /* Remove HW redirection */ + vnet_feature_enable_disable ("device-input", + "sr-policy-rewrite-encaps-l2", + sw_if_index, 0, 0, 0); + sm->sw_iface_sr_policies[sw_if_index] = ~(u32) 0; + + /* Remove promiscous mode from interface */ + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *eif = + ethernet_get_interface (em, sw_if_index); + + if (!eif) + goto cleanup_error_redirection; + + ethernet_set_flags (vnm, sw_if_index, 0); + } + + /* Delete SR steering policy entry */ + pool_put (sm->steer_policies, steer_pl); + hp = hash_get_pair (sm->steer_policies_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->steer_policies_index_by_key, &key); + vec_free (key_copy); + return 1; + } + else /* It means user requested to update an existing SR steering policy */ + { + /* Retrieve SR steering policy */ + if (bsid) + { + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -2; + } + else + sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); + + if (!sr_policy) + return -2; + + steer_pl->sr_policy = sr_policy - sm->sr_policies; + + /* Remove old FIB/hw redirection and create a new one */ + if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP6, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + + /* Create a new one */ + goto update_fib; + + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + /* Remove FIB entry */ + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_delete (fib_table_id_find_fib_index + (FIB_PROTOCOL_IP4, + steer_pl->classify.l3.fib_table), &pfx, + FIB_SOURCE_SR); + + /* Create a new one */ + goto update_fib; + } + else if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + /* Update L2-HW redirection */ + goto update_fib; + } + } + } + else + /* delete; steering policy does not exist; complain */ + if (is_del) + return -4; + + /* Retrieve SR policy */ + if (bsid) + { + p = hash_get_mem (sm->sr_policy_index_by_key, bsid); + if (p) + sr_policy = pool_elt_at_index (sm->sr_policies, p[0]); + else + return -2; + } + else + sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index); + + /* Create a new steering policy */ + pool_get (sm->steer_policies, steer_pl); + memset (steer_pl, 0, sizeof (*steer_pl)); + + if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6) + { + clib_memcpy (&steer_pl->classify.l3.prefix, prefix, + sizeof (ip46_address_t)); + steer_pl->classify.l3.mask_width = mask_width; + steer_pl->classify.l3.fib_table = + (table_id != (u32) ~ 0 ? table_id : 0); + steer_pl->classify.traffic_type = traffic_type; + } + else if (traffic_type == SR_STEER_L2) + { + steer_pl->classify.l2.sw_if_index = sw_if_index; + steer_pl->classify.traffic_type = traffic_type; + } + else + { + /* Incorrect API usage. Should never get here */ + pool_put (sm->steer_policies, steer_pl); + hp = hash_get_pair (sm->steer_policies_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->steer_policies_index_by_key, &key); + vec_free (key_copy); + return -1; + } + steer_pl->sr_policy = sr_policy - sm->sr_policies; + + /* Create and store key */ + key_copy = vec_new (sr_steering_key_t, 1); + clib_memcpy (key_copy, &key, sizeof (sr_steering_key_t)); + hash_set_mem (sm->steer_policies_index_by_key, + key_copy, steer_pl - sm->steer_policies); + + if (traffic_type == SR_STEER_L2) + { + if (!sr_policy->is_encap) + goto cleanup_error_encap; + + if (vnet_feature_enable_disable + ("device-input", "sr-policy-rewrite-encaps-l2", sw_if_index, 1, 0, + 0)) + goto cleanup_error_redirection; + + /* Set promiscous mode on interface */ + vnet_main_t *vnm = vnet_get_main (); + ethernet_main_t *em = ðernet_main; + ethernet_interface_t *eif = ethernet_get_interface (em, sw_if_index); + + if (!eif) + goto cleanup_error_redirection; + + ethernet_set_flags (vnm, sw_if_index, + ETHERNET_INTERFACE_FLAG_ACCEPT_ALL); + } + else if (traffic_type == SR_STEER_IPV4) + if (!sr_policy->is_encap) + goto cleanup_error_encap; + +update_fib: + /* FIB API calls - Recursive route through the BindingSID */ + if (traffic_type == SR_STEER_IPV6) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6; + + fib_table_entry_path_add (fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, + (table_id != + (u32) ~ 0 ? + table_id : 0)), + &pfx, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + (ip46_address_t *) & sr_policy->bsid, ~0, + sm->fib_table_ip6, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + else if (traffic_type == SR_STEER_IPV4) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = steer_pl->classify.l3.mask_width; + pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4; + + fib_table_entry_path_add (fib_table_id_find_fib_index (FIB_PROTOCOL_IP4, + (table_id != + (u32) ~ 0 ? + table_id : 0)), + &pfx, FIB_SOURCE_CLI, FIB_ENTRY_FLAG_NONE, + FIB_PROTOCOL_IP6, + (ip46_address_t *) & sr_policy->bsid, ~0, + sm->fib_table_ip4, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + else if (traffic_type == SR_STEER_L2) + { + if (sw_if_index < vec_len (sm->sw_iface_sr_policies)) + sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy; + else + { + vec_resize (sm->sw_iface_sr_policies, + (pool_len (sm->vnet_main->interface_main.sw_interfaces) + - vec_len (sm->sw_iface_sr_policies))); + sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy; + } + } + + return 0; + +cleanup_error_encap: + pool_put (sm->steer_policies, steer_pl); + hp = hash_get_pair (sm->steer_policies_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->steer_policies_index_by_key, &key); + vec_free (key_copy); + return -5; + +cleanup_error_redirection: + pool_put (sm->steer_policies, steer_pl); + hp = hash_get_pair (sm->steer_policies_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->steer_policies_index_by_key, &key); + vec_free (key_copy); + return -3; +} + +static clib_error_t * +sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + + int is_del = 0; + + ip46_address_t prefix; + u32 dst_mask_width = 0; + u32 sw_if_index = (u32) ~ 0; + u8 traffic_type = 0; + u32 fib_table = (u32) ~ 0; + + ip6_address_t bsid; + u32 sr_policy_index = (u32) ~ 0; + + u8 sr_policy_set = 0; + + int rv; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip6_address, + &prefix.ip6, &dst_mask_width)) + traffic_type = SR_STEER_IPV6; + else if (!traffic_type + && unformat (input, "l3 %U/%d", unformat_ip4_address, + &prefix.ip4, &dst_mask_width)) + traffic_type = SR_STEER_IPV4; + else if (!traffic_type + && unformat (input, "l2 %U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + traffic_type = SR_STEER_L2; + else if (!sr_policy_set + && unformat (input, "via sr policy index %d", + &sr_policy_index)) + sr_policy_set = 1; + else if (!sr_policy_set + && unformat (input, "via sr policy bsid %U", + unformat_ip6_address, &bsid)) + sr_policy_set = 1; + else if (fib_table == (u32) ~ 0 + && unformat (input, "fib-table %d", &fib_table)); + else + break; + } + + if (!traffic_type) + return clib_error_return (0, "No L2/L3 traffic specified"); + if (!sr_policy_set) + return clib_error_return (0, "No SR policy specified"); + + /* Make sure that the prefixes are clean */ + if (traffic_type == SR_STEER_IPV4) + { + u32 mask = + (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0); + prefix.ip4.as_u32 &= mask; + } + else if (traffic_type == SR_STEER_IPV6) + { + ip6_address_t mask; + ip6_address_mask_from_width (&mask, dst_mask_width); + ip6_address_mask (&prefix.ip6, &mask); + } + + rv = + sr_steering_policy (is_del, (sr_policy_index == ~(u32) 0 ? &bsid : NULL), + sr_policy_index, fib_table, &prefix, dst_mask_width, + sw_if_index, traffic_type); + + switch (rv) + { + case 0: + break; + case 1: + return 0; + case -1: + return clib_error_return (0, "Incorrect API usage."); + case -2: + return clib_error_return (0, + "The requested SR policy could not be located. Review the BSID/index."); + case -3: + return clib_error_return (0, + "Unable to do SW redirect. Incorrect interface."); + case -4: + return clib_error_return (0, + "The requested SR policy could not be deleted. Review the BSID/index."); + case -5: + return clib_error_return (0, + "The SR policy is not an encapsulation one."); + default: + return clib_error_return (0, "BUG: sr steer policy returns %d", rv); + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_steer_policy_command, static) = { + .path = "sr steer", + .short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>]" + "via sr policy [index <sr_policy_index>|bsid <bsid_ip6_addr>]" + "(fib-table <fib_table_index>)", + .long_help = + "\tSteer a L2 or L3 traffic through an existing SR policy.\n" + "\tExamples:\n" + "\t\tsr steer l3 2001::/64 via sr_policy index 5\n" + "\t\tsr steer l3 2001::/64 via sr_policy bsid 2010::9999:1\n" + "\t\tsr steer l2 GigabitEthernet0/5/0 via sr_policy index 5\n" + "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n", + .function = sr_steer_policy_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_sr_steering_policies_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_sr_steering_policy_t **steer_policies = 0; + ip6_sr_steering_policy_t *steer_pl; + + vnet_main_t *vnm = vnet_get_main (); + + ip6_sr_policy_t *pl = 0; + int i; + + vlib_cli_output (vm, "SR steering policies:"); + /* *INDENT-OFF* */ + pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);})); + /* *INDENT-ON* */ + vlib_cli_output (vm, "Traffic\t\tSR policy BSID"); + for (i = 0; i < vec_len (steer_policies); i++) + { + steer_pl = steer_policies[i]; + pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy); + if (steer_pl->classify.traffic_type == SR_STEER_L2) + { + vlib_cli_output (vm, "L2 %U\t%U", + format_vnet_sw_if_index_name, vnm, + steer_pl->classify.l2.sw_if_index, + format_ip6_address, &pl->bsid); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV4) + { + vlib_cli_output (vm, "L3 %U/%d\t%U", + format_ip4_address, + &steer_pl->classify.l3.prefix.ip4, + steer_pl->classify.l3.mask_width, + format_ip6_address, &pl->bsid); + } + else if (steer_pl->classify.traffic_type == SR_STEER_IPV6) + { + vlib_cli_output (vm, "L3 %U/%d\t%U", + format_ip6_address, + &steer_pl->classify.l3.prefix.ip6, + steer_pl->classify.l3.mask_width, + format_ip6_address, &pl->bsid); + } + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = { + .path = "show sr steering policies", + .short_help = "show sr steering policies", + .function = show_sr_steering_policies_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +sr_steering_init (vlib_main_t * vm) +{ + ip6_sr_main_t *sm = &sr_main; + + /* Init memory for function keys */ + sm->steer_policies_index_by_key = + hash_create_mem (0, sizeof (sr_steering_key_t), sizeof (uword)); + + sm->sw_iface_sr_policies = 0; + + sm->vnet_main = vnet_get_main (); + + return 0; +} + +VLIB_INIT_FUNCTION (sr_steering_init); + +VNET_FEATURE_INIT (sr_policy_rewrite_encaps_l2, static) = +{ +.arc_name = "device-input",.node_name = + "sr-pl-rewrite-encaps-l2",.runs_before = + VNET_FEATURES ("ethernet-input"),}; + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ |