diff options
Diffstat (limited to 'src/vnet/sr')
-rw-r--r-- | src/vnet/sr/dir.dox | 25 | ||||
-rw-r--r-- | src/vnet/sr/examples/sr_multicastmap.script | 4 | ||||
-rw-r--r-- | src/vnet/sr/rfc_draft_05.txt | 1265 | ||||
-rw-r--r-- | src/vnet/sr/sr.c | 3333 | ||||
-rw-r--r-- | src/vnet/sr/sr.h | 262 | ||||
-rw-r--r-- | src/vnet/sr/sr_error.def | 20 | ||||
-rw-r--r-- | src/vnet/sr/sr_fix_dst_error.def | 17 | ||||
-rw-r--r-- | src/vnet/sr/sr_packet.h | 251 | ||||
-rw-r--r-- | src/vnet/sr/sr_replicate.c | 490 |
9 files changed, 5667 insertions, 0 deletions
diff --git a/src/vnet/sr/dir.dox b/src/vnet/sr/dir.dox new file mode 100644 index 00000000000..a98b202c93e --- /dev/null +++ b/src/vnet/sr/dir.dox @@ -0,0 +1,25 @@ +/* + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + @dir + @brief Segment Routing code + + An implementation of Segment Routing as per: + draft-previdi-6man-segment-routing-header-05 + + See file: rfc_draft_05.txt + +*/
\ No newline at end of file diff --git a/src/vnet/sr/examples/sr_multicastmap.script b/src/vnet/sr/examples/sr_multicastmap.script new file mode 100644 index 00000000000..20bf7dc0eb7 --- /dev/null +++ b/src/vnet/sr/examples/sr_multicastmap.script @@ -0,0 +1,4 @@ +sr_tunnel_add_del name sr2 src ::a:1:1:0:6 dst ff15::2/128 next ::a:1:1:0:f next ::a:1:1:0:1a next ff15::1 tag ::a:1:1:0:7 clean +sr_tunnel_add_del name sr3 src ::b:1:1:0:6 dst ff16::2/128 next ::a:1:1:0:13 next ::a:1:1:0:1a next ff15::1 tag ::a:1:1:0:7 clean +sr_policy_add_del name pol1 tunnel sr2 tunnel sr3 +sr_multicast_map_add_del address ff15::1 sr-policy pol1 diff --git a/src/vnet/sr/rfc_draft_05.txt b/src/vnet/sr/rfc_draft_05.txt new file mode 100644 index 00000000000..bc41c181ea4 --- /dev/null +++ b/src/vnet/sr/rfc_draft_05.txt @@ -0,0 +1,1265 @@ +Network Working Group S. Previdi, Ed. +Internet-Draft C. Filsfils +Intended status: Standards Track Cisco Systems, Inc. +Expires: June 12, 2015 B. Field + Comcast + I. Leung + Rogers Communications + December 9, 2014 + + + IPv6 Segment Routing Header (SRH) + draft-previdi-6man-segment-routing-header-05 + +Abstract + + Segment Routing (SR) allows a node to steer a packet through a + controlled set of instructions, called segments, by prepending a SR + header to the packet. A segment can represent any instruction, + topological or service-based. SR allows to enforce a flow through + any path (topological, or application/service based) while + maintaining per-flow state only at the ingress node to the SR domain. + + Segment Routing can be applied to the IPv6 data plane with the + addition of a new type of Routing Extension Header. This draft + describes the Segment Routing Extension Header Type and how it is + used by SR capable nodes. + +Requirements Language + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +Status of This Memo + + This Internet-Draft is submitted in full conformance with the + provisions of BCP 78 and BCP 79. + + Internet-Drafts are working documents of the Internet Engineering + Task Force (IETF). Note that other groups may also distribute + working documents as Internet-Drafts. The list of current Internet- + Drafts is at http://datatracker.ietf.org/drafts/current/. + + Internet-Drafts are draft documents valid for a maximum of six months + and may be updated, replaced, or obsoleted by other documents at any + time. It is inappropriate to use Internet-Drafts as reference + material or to cite them other than as "work in progress." + + + + +Previdi, et al. Expires June 12, 2015 [Page 1] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + This Internet-Draft will expire on June 12, 2015. + +Copyright Notice + + Copyright (c) 2014 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + +Table of Contents + + 1. Structure of this document . . . . . . . . . . . . . . . . . 3 + 2. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3 + 3. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3 + 3.1. Data Planes supporting Segment Routing . . . . . . . . . 4 + 3.2. Illustration . . . . . . . . . . . . . . . . . . . . . . 4 + 4. Abstract Routing Model . . . . . . . . . . . . . . . . . . . 7 + 4.1. Segment Routing Global Block (SRGB) . . . . . . . . . . . 8 + 4.2. Traffic Engineering with SR . . . . . . . . . . . . . . . 9 + 4.3. Segment Routing Database . . . . . . . . . . . . . . . . 10 + 5. IPv6 Instantiation of Segment Routing . . . . . . . . . . . . 10 + 5.1. Segment Identifiers (SIDs) and SRGB . . . . . . . . . . . 10 + 5.1.1. Node-SID . . . . . . . . . . . . . . . . . . . . . . 11 + 5.1.2. Adjacency-SID . . . . . . . . . . . . . . . . . . . . 11 + 5.2. Segment Routing Extension Header (SRH) . . . . . . . . . 11 + 5.2.1. SRH and RFC2460 behavior . . . . . . . . . . . . . . 15 + 6. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 15 + 6.1. Segment Routing Operations . . . . . . . . . . . . . . . 15 + 6.2. Segment Routing Node Functions . . . . . . . . . . . . . 16 + 6.2.1. Ingress SR Node . . . . . . . . . . . . . . . . . . . 16 + 6.2.2. Transit Non-SR Capable Node . . . . . . . . . . . . . 18 + 6.2.3. SR Intra Segment Transit Node . . . . . . . . . . . . 18 + 6.2.4. SR Segment Endpoint Node . . . . . . . . . . . . . . 18 + 6.3. FRR Flag Settings . . . . . . . . . . . . . . . . . . . . 18 + 7. SR and Tunneling . . . . . . . . . . . . . . . . . . . . . . 18 + 8. Example Use Case . . . . . . . . . . . . . . . . . . . . . . 19 + 9. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 21 + 10. Manageability Considerations . . . . . . . . . . . . . . . . 21 + 11. Security Considerations . . . . . . . . . . . . . . . . . . . 21 + 12. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 21 + + + +Previdi, et al. Expires June 12, 2015 [Page 2] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + 13. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 21 + 14. References . . . . . . . . . . . . . . . . . . . . . . . . . 21 + 14.1. Normative References . . . . . . . . . . . . . . . . . . 21 + 14.2. Informative References . . . . . . . . . . . . . . . . . 21 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 22 + +1. Structure of this document + + Section 3 gives an introduction on SR for IPv6 networks. + + Section 4 describes the Segment Routing abstract model. + + Section 5 defines the Segment Routing Header (SRH) allowing + instantiation of SR over IPv6 dataplane. + + Section 6 details the procedures of the Segment Routing Header. + +2. Segment Routing Documents + + Segment Routing terminology is defined in + [I-D.filsfils-spring-segment-routing]. + + Segment Routing use cases are described in + [I-D.filsfils-spring-segment-routing-use-cases]. + + Segment Routing IPv6 use cases are described in + [I-D.ietf-spring-ipv6-use-cases]. + + Segment Routing protocol extensions are defined in + [I-D.ietf-isis-segment-routing-extensions], and + [I-D.psenak-ospf-segment-routing-ospfv3-extension]. + + The security mechanisms of the Segment Routing Header (SRH) are + described in [I-D.vyncke-6man-segment-routing-security]. + +3. Introduction + + Segment Routing (SR), defined in + [I-D.filsfils-spring-segment-routing], allows a node to steer a + packet through a controlled set of instructions, called segments, by + prepending a SR header to the packet. A segment can represent any + instruction, topological or service-based. SR allows to enforce a + flow through any path (topological or service/application based) + while maintaining per-flow state only at the ingress node to the SR + domain. Segments can be derived from different components: IGP, BGP, + Services, Contexts, Locators, etc. The list of segment forming the + path is called the Segment List and is encoded in the packet header. + + + + +Previdi, et al. Expires June 12, 2015 [Page 3] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + SR allows the use of strict and loose source based routing paradigms + without requiring any additional signaling protocols in the + infrastructure hence delivering an excellent scalability property. + + The source based routing model described in + [I-D.filsfils-spring-segment-routing] is inherited from the ones + proposed by [RFC1940] and [RFC2460]. The source based routing model + offers the support for explicit routing capability. + +3.1. Data Planes supporting Segment Routing + + Segment Routing (SR), can be instantiated over MPLS + ([I-D.filsfils-spring-segment-routing-mpls]) and IPv6. This document + defines its instantiation over the IPv6 data-plane based on the use- + cases defined in [I-D.ietf-spring-ipv6-use-cases]. + + Segment Routing for IPv6 (SR-IPv6) is required in networks where MPLS + data-plane is not used or, when combined with SR-MPLS, in networks + where MPLS is used in the core and IPv6 is used at the edge (home + networks, datacenters). + + This document defines a new type of Routing Header (originally + defined in [RFC2460]) called the Segment Routing Header (SRH) in + order to convey the Segment List in the packet header as defined in + [I-D.filsfils-spring-segment-routing]. Mechanisms through which + segment are known and advertised are outside the scope of this + document. + +3.2. Illustration + + In the context of Figure 1 where all the links have the same IGP + cost, let us assume that a packet P enters the SR domain at an + ingress edge router I and that the operator requests the following + requirements for packet P: + + The local service S offered by node B must be applied to packet P. + + The links AB and CE cannot be used to transport the packet P. + + Any node N along the journey of the packet should be able to + determine where the packet P entered the SR domain and where it + will exit. The intermediate node should be able to determine the + paths from the ingress edge router to itself, and from itself to + the egress edge router. + + Per-flow State for packet P should only be created at the ingress + edge router. + + + + +Previdi, et al. Expires June 12, 2015 [Page 4] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + The operator can forbid, for security reasons, anyone outside the + operator domain to exploit its intra-domain SR capabilities. + + I---A---B---C---E + \ | / \ / + \ | / F + \|/ + D + + Figure 1: An illustration of SR properties + + All these properties may be realized by instructing the ingress SR + edge router I to push the following abstract SR header on the packet + P. + + +---------------------------------------------------------------+ + | | | + | Abstract SR Header | | + | | | + | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported | + | ^ | | Packet | + | | | | P | + | +---------------------+ | | + | | | + +---------------------------------------------------------------+ + + Figure 2: Packet P at node I + + The abstract SR header contains a source route encoded as a list of + segments {SD, SB, SS, SF, SE}, a pointer (Ptr) and the identification + of the ingress and egress SR edge routers (segments SI and SE). + + A segment identifies a topological instruction or a service + instruction. A segment can either be global or local. The + instruction associated with a global segment is recognized and + executed by any SR-capable node in the domain. The instruction + associated with a local segment is only supported by the specific + node that originates it. + + Let us assume some IGP (i.e.: ISIS and OSPF) extensions to define a + "Node Segment" as a global instruction within the IGP domain to + forward a packet along the shortest path to the specified node. Let + us further assume that within the SR domain illustrated in Figure 1, + segments SI, SD, SB, SE and SF respectively identify IGP node + segments to I, D, B, E and F. + + Let us assume that node B identifies its local service S with local + segment SS. + + + +Previdi, et al. Expires June 12, 2015 [Page 5] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + With all of this in mind, let us describe the journey of the packet + P. + + The packet P reaches the ingress SR edge router. I pushes the SR + header illustrated in Figure 2 and sets the pointer to the first + segment of the list (SD). + + SD is an instruction recognized by all the nodes in the SR domain + which causes the packet to be forwarded along the shortest path to D. + + Once at D, the pointer is incremented and the next segment is + executed (SB). + + SB is an instruction recognized by all the nodes in the SR domain + which causes the packet to be forwarded along the shortest path to B. + + Once at B, the pointer is incremented and the next segment is + executed (SS). + + SS is an instruction only recognized by node B which causes the + packet to receive service S. + + Once the service applied, the next segment is executed (SF) which + causes the packet to be forwarded along the shortest path to F. + + Once at F, the pointer is incremented and the next segment is + executed (SE). + + SE is an instruction recognized by all the nodes in the SR domain + which causes the packet to be forwarded along the shortest path to E. + + E then removes the SR header and the packet continues its journey + outside the SR domain. + + All of the requirements are met. + + First, the packet P has not used links AB and CE: the shortest-path + from I to D is I-A-D, the shortest-path from D to B is D-B, the + shortest-path from B to F is B-C-F and the shortest-path from F to E + is F-E, hence the packet path through the SR domain is I-A-D-B-C-F-E + and the links AB and CE have been avoided. + + Second, the service S supported by B has been applied on packet P. + + Third, any node along the packet path is able to identify the service + and topological journey of the packet within the SR domain. For + example, node C receives the packet illustrated in Figure 3 and hence + is able to infer where the packet entered the SR domain (SI), how it + + + +Previdi, et al. Expires June 12, 2015 [Page 6] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + got up to itself {SD, SB, SS, SE}, where it will exit the SR domain + (SE) and how it will do so {SF, SE}. + + +---------------------------------------------------------------+ + | | | + | SR Header | | + | | | + | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported | + | ^ | | Packet | + | | | | P | + | +--------+ | | + | | | + +---------------------------------------------------------------+ + + Figure 3: Packet P at node C + + Fourth, only node I maintains per-flow state for packet P. The + entire program of topological and service instructions to be executed + by the SR domain on packet P is encoded by the ingress edge router I + in the SR header in the form of a list of segments where each segment + identifies a specific instruction. No further per-flow state is + required along the packet path. The per-flow state is in the SR + header and travels with the packet. Intermediate nodes only hold + states related to the IGP global node segments and the local IGP + adjacency segments. These segments are not per-flow specific and + hence scale very well. Typically, an intermediate node would + maintain in the order of 100's to 1000's global node segments and in + the order of 10's to 100 of local adjacency segments. Typically the + SR IGP forwarding table is expected to be much less than 10000 + entries. + + Fifth, the SR header is inserted at the entrance to the domain and + removed at the exit of the operator domain. For security reasons, + the operator can forbid anyone outside its domain to use its intra- + domain SR capability. + +4. Abstract Routing Model + + At the entrance of the SR domain, the ingress SR edge router pushes + the SR header on top of the packet. At the exit of the SR domain, + the egress SR edge router removes the SR header. + + The abstract SR header contains an ordered list of segments, a + pointer identifying the next segment to process and the + identifications of the ingress and egress SR edge routers on the path + of this packet. The pointer identifies the segment that MUST be used + by the receiving router to process the packet. This segment is + called the active segment. + + + +Previdi, et al. Expires June 12, 2015 [Page 7] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + A property of SR is that the entire source route of the packet, + including the identity of the ingress and egress edge routers is + always available with the packet. This allows for interesting + accounting and service applications. + + We define three SR-header operations: + + "PUSH": an SR header is pushed on an IP packet, or additional + segments are added at the head of the segment list. The pointer + is moved to the first entry of the added segments. + + "NEXT": the active segment is completed, the pointer is moved to + the next segment in the list. + + "CONTINUE": the active segment is not completed, the pointer is + left unchanged. + + In the future, other SR-header management operations may be defined. + + As the packet travels through the SR domain, the pointer is + incremented through the ordered list of segments and the source route + encoded by the SR ingress edge node is executed. + + A node processes an incoming packet according to the instruction + associated with the active segment. + + Any instruction might be associated with a segment: for example, an + intra-domain topological strict or loose forwarding instruction, a + service instruction, etc. + + At minimum, a segment instruction must define two elements: the + identity of the next-hop to forward the packet to (this could be the + same node or a context within the node) and which SR-header + management operation to execute. + + Each segment is known in the network through a Segment Identifier + (SID). The terms "segment" and "SID" are interchangeable. + +4.1. Segment Routing Global Block (SRGB) + + In the SR abstract model, a segment is identified by a Segment + Routing Identifier (SID). The SR abstract model doesn't mandate a + specific format for the SID (IPv6 address or other formats). + + In Segment Routing IPv6 the SID is an IPv6 address. Therefore, the + SRGB is materialized by the global IPv6 address space which + represents the set of IPv6 routable addresses in the SR domain. The + following rules apply: + + + +Previdi, et al. Expires June 12, 2015 [Page 8] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + o Each node of the SR domain MUST be configured with the Segment + Routing Global Block (SRGB). + + o All global segments must be allocated from the SRGB. Any SR + capable node MUST be able to process any global segment advertised + by any other node within the SR domain. + + o Any segment outside the SRGB has a local significance and is + called a "local segment". An SR-capable node MUST be able to + process the local segments it originates. An SR-capable node MUST + NOT support the instruction associated with a local segment + originated by a remote node. + +4.2. Traffic Engineering with SR + + An SR Traffic Engineering policy is composed of two elements: a flow + classification and a segment-list to prepend on the packets of the + flow. + + In SR, this per-flow state only exists at the ingress edge node where + the policy is defined and the SR header is pushed. + + It is outside the scope of the document to define the process that + leads to the instantiation at a node N of an SR Traffic Engineering + policy. + + [I-D.filsfils-spring-segment-routing-use-cases] illustrates various + alternatives: + + N is deriving this policy automatically (e.g. FRR). + + N is provisioned explicitly by the operator. + + N is provisioned by a controller or server (e.g.: SDN Controller). + + N is provisioned by the operator with a high-level policy which is + mapped into a path thanks to a local CSPF-based computation (e.g. + affinity/SRLG exclusion). + + N could also be provisioned by other means. + + [I-D.filsfils-spring-segment-routing-use-cases] explains why the + majority of use-cases require very short segment-lists, hence + minimizing the performance impact, if any, of inserting and + transporting the segment list. + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 9] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + A SDN controller, which desires to instantiate at node N an SR + Traffic Engineering policy, collects the SR capability of node N such + as to ensure that the policy meets its capability. + +4.3. Segment Routing Database + + The Segment routing Database (SRDB) is a set of entries where each + entry is identified by a SID. The instruction associated with each + entry at least defines the identity of the next-hop to which the + packet should be forwarded and what operation should be performed on + the SR header (PUSH, CONTINUE, NEXT). + + +---------+-----------+---------------------------------+ + | Segment | Next-Hop | SR Header operation | + +---------+-----------+---------------------------------+ + | Sk | M | CONTINUE | + | Sj | N | NEXT | + | Sl | NAT Srvc | NEXT | + | Sm | FW srvc | NEXT | + | Sn | Q | NEXT | + | etc. | etc. | etc. | + +---------+-----------+---------------------------------+ + + Figure 4: SR Database + + Each SR-capable node maintains its local SRDB. SRDB entries can + either derive from local policy or from protocol segment + advertisement. + +5. IPv6 Instantiation of Segment Routing + +5.1. Segment Identifiers (SIDs) and SRGB + + Segment Routing, as described in + [I-D.filsfils-spring-segment-routing], defines Node-SID and + Adjacency-SID. When SR is used over IPv6 data-plane the following + applies. + + The SRGB is the global IPv6 address space which represents the set of + IPv6 routable addresses in the SR domain. + + Node SIDs are IPv6 addresses part of the SRGB (i.e.: routable + addresses). Adjacency-SIDs are IPv6 addresses which may not be part + of the global IPv6 address space. + + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 10] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + +5.1.1. Node-SID + + The Node-SID identifies a node. With SR-IPv6 the Node-SID is an IPv6 + prefix that the operator configured on the node and that is used as + the node identifier. Typically, in case of a router, this is the + IPv6 address of the node loopback interface. Therefore, SR-IPv6 does + not require any additional SID advertisement for the Node Segment. + The Node-SID is in fact the IPv6 address of the node. + +5.1.2. Adjacency-SID + + In the SR architecture defined in + [I-D.filsfils-spring-segment-routing] the Adjacency-SID (or Adj-SID) + identifies a given interface and may be local or global (depending on + how it is advertised). A node may advertise one (or more) Adj-SIDs + allocated to a given interface so to force the forwarding of the + packet (when received with that particular Adj-SID) into the + interface regardless the routing entry for the packet destination. + The semantic of the Adj-SID is: + + Send out the packet to the interface this prefix is allocated to. + + When SR is applied to IPv6, any SID is in a global IPv6 address and + therefore, an Adj-SID has a global significance (i.e.: the IPv6 + address representing the SID is a global address). In other words, a + node that advertises the Adj-SID in the form of a global IPv6 address + representing the link/adjacency the packet has to be forwarded to, + will apply to the Adj-SID a global significance. + + Advertisement of Adj-SID may be done using multiple mechanisms among + which the ones described in ISIS and OSPF protocol extensions: + [I-D.ietf-isis-segment-routing-extensions] and + [I-D.psenak-ospf-segment-routing-ospfv3-extension]. The distinction + between local and global significance of the Adj-SID is given in the + encoding of the Adj-SID advertisement. + +5.2. Segment Routing Extension Header (SRH) + + A new type of the Routing Header (originally defined in [RFC2460]) is + defined: the Segment Routing Header (SRH) which has a new Routing + Type, (suggested value 4) to be assigned by IANA. + + As an example, if an explicit path is to be constructed across a core + network running ISIS or OSPF, the segment list will contain SIDs + representing the nodes across the path (loose or strict) which, + usually, are the IPv6 loopback interface address of each node. If + the path is across service or application entities, the segment list + + + + +Previdi, et al. Expires June 12, 2015 [Page 11] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + contains the IPv6 addresses of these services or application + instances. + + The Segment Routing Header (SRH) is defined as follows: + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next Header | Hdr Ext Len | Routing Type | Segments Left | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | First Segment | Flags | HMAC Key ID | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[0] (128 bits ipv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | | + ... + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Segment List[n] (128 bits ipv6 address) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Policy List[0] (optional) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Policy List[1] (optional) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Policy List[2] (optional) | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | | + | | + | HMAC (256 bits) | + + + +Previdi, et al. Expires June 12, 2015 [Page 12] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + | (optional) | + | | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + where: + + o Next Header: 8-bit selector. Identifies the type of header + immediately following the SRH. + + o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH + header in 8-octet units, not including the first 8 octets. + + o Routing Type: TBD, to be assigned by IANA (suggested value: 4). + + o Segments Left. Defined in [RFC2460], it contains the index, in + the Segment List, of the next segment to inspect. Segments Left + is decremented at each segment and it is used as an index in the + segment list. + + o First Segment: offset in the SRH, not including the first 8 octets + and expressed in 16-octet units, pointing to the last element of + the segment list, which is in fact the first segment of the + segment routing path. + + o Flags: 16 bits of flags. Following flags are defined: + + 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |C|P|R|R| Policy Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + C-flag: Clean-up flag. Set when the SRH has to be removed from + the packet when packet reaches the last segment. + + P-flag: Protected flag. Set when the packet has been rerouted + through FRR mechanism by a SR endpoint node. See Section 6.3 + for more details. + + R-flags. Reserved and for future use. + + Policy Flags. Define the type of the IPv6 addresses encoded + into the Policy List (see below). The following have been + defined: + + + + + +Previdi, et al. Expires June 12, 2015 [Page 13] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + Bits 4-6: determine the type of the first element after the + segment list. + + Bits 7-9: determine the type of the second element. + + Bits 10-12: determine the type of the third element. + + Bits 13-15: determine the type of the fourth element. + + The following values are used for the type: + + 0x0: Not present. If value is set to 0x0, it means the + element represented by these bits is not present. + + 0x1: SR Ingress. + + 0x2: SR Egress. + + 0x3: Original Source Address. + + o HMAC Key ID and HMAC field, and their use are defined in + [I-D.vyncke-6man-segment-routing-security]. + + o Segment List[n]: 128 bit IPv6 addresses representing the nth + segment in the Segment List. The Segment List is encoded starting + from the last segment of the path. I.e., the first element of the + segment list (Segment List [0]) contains the last segment of the + path while the last segment of the Segment List (Segment List[n]) + contains the first segment of the path. The index contained in + "Segments Left" identifies the current active segment. + + o Policy List. Optional addresses representing specific nodes in + the SR path such as: + + SR Ingress: a 128 bit generic identifier representing the + ingress in the SR domain (i.e.: it needs not to be a valid IPv6 + address). + + SR Egress: a 128 bit generic identifier representing the egress + in the SR domain (i.e.: it needs not to be a valid IPv6 + address). + + Original Source Address: IPv6 address originally present in the + SA field of the packet. + + The segments in the Policy List are encoded after the segment list + and they are optional. If none are in the SRH, all bits of the + Policy List Flags MUST be set to 0x0. + + + +Previdi, et al. Expires June 12, 2015 [Page 14] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + +5.2.1. SRH and RFC2460 behavior + + The SRH being a new type of the Routing Header, it also has the same + properties: + + SHOULD only appear once in the packet. + + Only the router whose address is in the DA field of the packet + header MUST inspect the SRH. + + Therefore, Segment Routing in IPv6 networks implies that the segment + identifier (i.e.: the IPv6 address of the segment) is moved into the + DA of the packet. + + The DA of the packet changes at each segment termination/completion + and therefore the original DA of the packet MUST be encoded as the + last segment of the path. + + As illustrated in Section 3.2, nodes that are within the path of a + segment will forward packets based on the DA of the packet without + inspecting the SRH. This ensures full interoperability between SR- + capable and non-SR-capable nodes. + +6. SRH Procedures + + In this section we describe the different procedures on the SRH. + +6.1. Segment Routing Operations + + When Segment Routing is instantiated over the IPv6 data plane the + following applies: + + o The segment list is encoded in the SRH. + + o The active segment is in the destination address of the packet. + + o The Segment Routing CONTINUE operation (as described in + [I-D.filsfils-spring-segment-routing]) is implemented as a + regular/plain IPv6 operation consisting of DA based forwarding. + + o The NEXT operation is implemented through the update of the DA + with the value represented by the Next Segment field in the SRH. + + o The PUSH operation is implemented through the insertion of the SRH + or the insertion of additional segments in the SRH segment list. + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 15] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + +6.2. Segment Routing Node Functions + + SR packets are forwarded to segments endpoints (i.e.: nodes whose + address is in the DA field of the packet). The segment endpoint, + when receiving a SR packet destined to itself, does: + + o Inspect the SRH. + + o Determine the next active segment. + + o Update the Segments Left field (or, if requested, remove the SRH + from the packet). + + o Update the DA. + + o Send the packet to the next segment. + + The procedures applied to the SRH are related to the node function. + Following nodes functions are defined: + + Ingress SR Node. + + Transit Non-SR Node. + + Transit SR Intra Segment Node. + + SR Endpoint Node. + +6.2.1. Ingress SR Node + + Ingress Node can be a router at the edge of the SR domain or a SR- + capable host. The ingress SR node may obtain the segment list by + either: + + Local path computation. + + Local configuration. + + Interaction with an SDN controller delivering the path as a + complete SRH. + + Any other mechanism (mechanisms through which the path is acquired + are outside the scope of this document). + + When creating the SRH (either at ingress node or in the SDN + controller) the following is done: + + Next Header and Hdr Ext Len fields are set according to [RFC2460]. + + + +Previdi, et al. Expires June 12, 2015 [Page 16] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + Routing Type field is set as TBD (SRH). + + The Segment List is built with the FIRST segment of the path + encoded in the LAST element of the Segment List. Subsequent + segments are encoded on top of the first segment. Finally, the + LAST segment of the path is encoded in the FIRST element of the + Segment List. In other words, the Segment List is encoded in the + reverse order of the path. + + The original DA of the packet is encoded as the last segment of + the path (encoded in the first element of the Segment List). + + the DA of the packet is set with the value of the first segment + (found in the last element of the segment list). + + the Segments Left field is set to n-1 where n is the number of + elements in the Segment List. + + The packet is sent out towards the first segment (i.e.: + represented in the packet DA). + +6.2.1.1. Security at Ingress + + The procedures related to the Segment Routing security are detailed + in [I-D.vyncke-6man-segment-routing-security]. + + In the case where the SR domain boundaries are not under control of + the network operator (e.g.: when the SR domain edge is in a home + network), it is important to authenticate and validate the content of + any SRH being received by the network operator. In such case, the + security procedure described in + [I-D.vyncke-6man-segment-routing-security] is to be used. + + The ingress node (e.g.: the host in the home network) requests the + SRH from a control system (e.g.: an SDN controller) which delivers + the SRH with its HMAC signature on it. + + Then, the home network host can send out SR packets (with an SRH on + it) that will be validated at the ingress of the network operator + infrastructure. + + The ingress node of the network operator infrastructure, is + configured in order to validate the incoming SRH HMACs in order to + allow only packets having correct SRH according to their SA/DA + addresses. + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 17] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + +6.2.2. Transit Non-SR Capable Node + + SR is interoperable with plain IPv6 forwarding. Any non SR-capable + node will forward SR packets solely based on the DA. There's no SRH + inspection. This ensures full interoperability between SR and non-SR + nodes. + +6.2.3. SR Intra Segment Transit Node + + Only the node whose address is in DA inspects and processes the SRH + (according to [RFC2460]). An intra segment transit node is not in + the DA and its forwarding is based on DA and its SR-IPv6 FIB. + +6.2.4. SR Segment Endpoint Node + + The SR segment endpoint node is the node whose address is in the DA. + The segment endpoint node inspects the SRH and does: + + 1. IF DA = myself (segment endpoint) + 2. IF Segments Left > 0 THEN + decrement Segments Left + update DA with Segment List[Segments Left] + 3. ELSE IF Segments List[Segments Left] <> DA THEN + update DA with Segments List[Segments Left] + IF Clean-up bit is set THEN remove the SRH + 4. ELSE give the packet to next PID (application) + End of processing. + 5. Forward the packet out + +6.3. FRR Flag Settings + + A node supporting SR and doing Fast Reroute (as described in + [I-D.filsfils-spring-segment-routing-use-cases], when rerouting + packets through FRR mechanisms, SHOULD inspect the rerouted packet + header and look for the SRH. If the SRH is present, the rerouting + node SHOULD set the Protected bit on all rerouted packets. + +7. SR and Tunneling + + Encapsulation can be realized in two different ways with SR-IPv6: + + Outer encapsulation. + + SRH with SA/DA original addresses. + + Outer encapsulation tunneling is the traditional method where an + additional IPv6 header is prepended to the packet. The original IPv6 + header being encapsulated, everything is preserved and the packet is + + + +Previdi, et al. Expires June 12, 2015 [Page 18] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + switched/routed according to the outer header (that could contain a + SRH). + + SRH allows encoding both original SA and DA, hence an operator may + decide to change the SA/DA at ingress and restore them at egress. + This can be achieved without outer encapsulation, by changing SA/DA + and encoding the original SA in the Policy List and in the original + DA in the Segment List. + +8. Example Use Case + + A more detailed description of use cases are available in + [I-D.ietf-spring-ipv6-use-cases]. In this section, a simple SR-IPv6 + example is illustrated. + + In the topology described in Figure 6 it is assumed an end-to-end SR + deployment. Therefore SR is supported by all nodes from A to J. + + Home Network | Backbone | Datacenter + | | + | +---+ +---+ +---+ | +---+ | + +---|---| C |---| D |---| E |---|---| I |---| + | | +---+ +---+ +---+ | +---+ | + | | | | | | | | +---+ + +---+ +---+ | | | | | | |--| X | + | A |---| B | | +---+ +---+ +---+ | +---+ | +---+ + +---+ +---+ | | F |---| G |---| H |---|---| J |---| + | +---+ +---+ +---+ | +---+ | + | | + | +-----------+ + | SDN | + | Orch/Ctlr | + +-----------+ + + Figure 6: Sample SR topology + + The following workflow applies to packets sent by host A and destined + to server X. + + + + + + + + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 19] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + . Host A sends a request for a path to server X to the SDN + controller or orchestration system. + + . The SDN controller/orchestrator builds a SRH with: + . Segment List: C, F, J, X + . HMAC + that satisfies the requirements expressed in the request + by host A and based on policies applicable to host A. + + . Host A receives the SRH and insert it into the packet. + The packet has now: + . SA: A + . DA: C + . SRH with + . SL: X, J, F, C + . Segments Left: 3 (i.e.: Segment List size - 1) + . PL: C (ingress), J (egress) + Note that X is the last segment and C is the + first segment (i.e.: the SL is encoded in the reverse + path order). + . HMAC + + . When packet arrives in C (first segment), C does: + . Validate the HMAC of the SRH. + . Decrement Segments Left by one: 2 + . Update the DA with the next segment found in + Segment List[2]. DA is set to F. + . Forward the packet to F. + + . When packet arrives in F (second segment), F does: + . Decrement Segments Left by one: 1 + . Update the DA with the next segment found in + Segment List[1]. DA is set to J. + . Forward the packet to J. + + . Packet travels across G and H nodes which do plain + IPv6 forwarding based on DA. No inspection of SRH needs + to be done in these nodes. However, any SR capable node + is allowed to set the Protected bit in case of FRR + protection. + + . When packet arrives in J (third segment), J does: + . Decrement Segments Left by one: 0 + . Update the DA with the next segment found in + Segment List[0]. DA is set to X. + . If the cleanup bit is set, then node J will strip out + the SRH from the packet. + . Forward the packet to X. + + + +Previdi, et al. Expires June 12, 2015 [Page 20] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + The packet arrives in the server that may or may not support SR. The + return traffic, from server to host, may be sent using the same + procedures. + +9. IANA Considerations + + TBD + +10. Manageability Considerations + + TBD + +11. Security Considerations + + Security mechanisms applied to Segment Routing over IPv6 networks are + detailed in [I-D.vyncke-6man-segment-routing-security]. + +12. Contributors + + The authors would like to thank Dave Barach, John Leddy, John + Brzozowski, Pierre Francois, Nagendra Kumar, Mark Townsley, Christian + Martin, Roberta Maglione, Eric Vyncke, James Connolly, David Lebrun + and Fred Baker for their contribution to this document. + +13. Acknowledgements + + TBD + +14. References + +14.1. Normative References + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, December 1998. + +14.2. Informative References + + [I-D.filsfils-spring-segment-routing] + Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., + Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., + Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe, + "Segment Routing Architecture", draft-filsfils-spring- + segment-routing-04 (work in progress), July 2014. + + + + + +Previdi, et al. Expires June 12, 2015 [Page 21] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + [I-D.filsfils-spring-segment-routing-mpls] + Filsfils, C., Previdi, S., Bashandy, A., Decraene, B., + Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., + Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe, + "Segment Routing with MPLS data plane", draft-filsfils- + spring-segment-routing-mpls-03 (work in progress), August + 2014. + + [I-D.filsfils-spring-segment-routing-use-cases] + Filsfils, C., Francois, P., Previdi, S., Decraene, B., + Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R., + Ytti, S., Henderickx, W., Tantsura, J., Kini, S., and E. + Crabbe, "Segment Routing Use Cases", draft-filsfils- + spring-segment-routing-use-cases-01 (work in progress), + October 2014. + + [I-D.ietf-isis-segment-routing-extensions] + Previdi, S., Filsfils, C., Bashandy, A., Gredler, H., + Litkowski, S., Decraene, B., and J. Tantsura, "IS-IS + Extensions for Segment Routing", draft-ietf-isis-segment- + routing-extensions-03 (work in progress), October 2014. + + [I-D.ietf-spring-ipv6-use-cases] + Brzozowski, J., Leddy, J., Leung, I., Previdi, S., + Townsley, W., Martin, C., Filsfils, C., and R. Maglione, + "IPv6 SPRING Use Cases", draft-ietf-spring-ipv6-use- + cases-03 (work in progress), November 2014. + + [I-D.psenak-ospf-segment-routing-ospfv3-extension] + Psenak, P., Previdi, S., Filsfils, C., Gredler, H., + Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3 + Extensions for Segment Routing", draft-psenak-ospf- + segment-routing-ospfv3-extension-02 (work in progress), + July 2014. + + [I-D.vyncke-6man-segment-routing-security] + Vyncke, E. and S. Previdi, "IPv6 Segment Routing Header + (SRH) Security Considerations", July 2014. + + [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D. + Zappala, "Source Demand Routing: Packet Format and + Forwarding Specification (Version 1)", RFC 1940, May 1996. + +Authors' Addresses + + + + + + + +Previdi, et al. Expires June 12, 2015 [Page 22] + +Internet-Draft IPv6 Segment Routing Header (SRH) December 2014 + + + Stefano Previdi (editor) + Cisco Systems, Inc. + Via Del Serafico, 200 + Rome 00142 + Italy + + Email: sprevidi@cisco.com + + + Clarence Filsfils + Cisco Systems, Inc. + Brussels + BE + + Email: cfilsfil@cisco.com + + + Brian Field + Comcast + 4100 East Dry Creek Road + Centennial, CO 80122 + US + + Email: Brian_Field@cable.comcast.com + + + Ida Leung + Rogers Communications + 8200 Dixie Road + Brampton, ON L6T 0C1 + CA + + Email: Ida.Leung@rci.rogers.com diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c new file mode 100644 index 00000000000..5d0275d992a --- /dev/null +++ b/src/vnet/sr/sr.c @@ -0,0 +1,3333 @@ +/* + * sr.c: ipv6 segment routing + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Segment Routing main functions + * + */ +#include <vnet/vnet.h> +#include <vnet/sr/sr.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/dpo/dpo.h> + +#include <openssl/hmac.h> + +ip6_sr_main_t sr_main; +static vlib_node_registration_t sr_local_node; + +/** + * @brief Dynamically added SR DPO type + */ +static dpo_type_t sr_dpo_type; + +/** + * @brief Use passed HMAC key in ip6_sr_header_t in OpenSSL HMAC routines + * + * @param sm ip6_sr_main_t * + * @param ip ip6_header_t * + * @param sr ip6_sr_header_t * + */ +void +sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, ip6_sr_header_t * sr) +{ + u32 key_index; + static u8 *keybuf; + u8 *copy_target; + int first_segment; + ip6_address_t *addrp; + int i; + ip6_sr_hmac_key_t *hmac_key; + u32 sig_len; + + key_index = sr->hmac_key; + + /* No signature? Pass... */ + if (key_index == 0) + return; + + /* We don't know about this key? Fail... */ + if (key_index >= vec_len (sm->hmac_keys)) + return; + + hmac_key = sm->hmac_keys + key_index; + + vec_reset_length (keybuf); + + /* pkt ip6 src address */ + vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); + clib_memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t)); + + /* first segment */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] = sr->first_segment; + + /* octet w/ bit 0 = "clean" flag */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] + = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)) + ? 0x80 : 0; + + /* hmac key id */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] = sr->hmac_key; + + first_segment = sr->first_segment; + + addrp = sr->segments; + + /* segments */ + for (i = 0; i <= first_segment; i++) + { + vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); + clib_memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t)); + addrp++; + } + + addrp++; + + HMAC_CTX_init (sm->hmac_ctx); + if (!HMAC_Init (sm->hmac_ctx, hmac_key->shared_secret, + vec_len (hmac_key->shared_secret), sm->md)) + clib_warning ("barf1"); + if (!HMAC_Update (sm->hmac_ctx, keybuf, vec_len (keybuf))) + clib_warning ("barf2"); + if (!HMAC_Final (sm->hmac_ctx, (unsigned char *) addrp, &sig_len)) + clib_warning ("barf3"); + HMAC_CTX_cleanup (sm->hmac_ctx); +} + +/** + * @brief Format function for decoding various SR flags + * + * @param s u8 * - formatted string + * @param args va_list * - u16 flags + * + * @return formatted output string u8 * + */ +u8 * +format_ip6_sr_header_flags (u8 * s, va_list * args) +{ + u16 flags = (u16) va_arg (*args, int); + u8 pl_flag; + int bswap_needed = va_arg (*args, int); + int i; + + if (bswap_needed) + flags = clib_host_to_net_u16 (flags); + + if (flags & IP6_SR_HEADER_FLAG_CLEANUP) + s = format (s, "cleanup "); + + if (flags & IP6_SR_HEADER_FLAG_PROTECTED) + s = format (s, "reroute "); + + s = format (s, "pl: "); + for (i = 1; i <= 4; i++) + { + pl_flag = ip6_sr_policy_list_flags (flags, i); + s = format (s, "[%d] ", i); + + switch (pl_flag) + { + case IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT: + s = format (s, "NotPr "); + break; + case IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE: + s = format (s, "InPE "); + break; + case IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE: + s = format (s, "EgPE "); + break; + + case IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR: + s = format (s, "OrgSrc "); + break; + } + } + return s; +} + +/** + * @brief Format function for decoding ip6_sr_header_t + * + * @param s u8 * - formatted string + * @param args va_list * - ip6_sr_header_t + * + * @return formatted output string u8 * + */ +u8 * +format_ip6_sr_header (u8 * s, va_list * args) +{ + ip6_sr_header_t *h = va_arg (*args, ip6_sr_header_t *); + ip6_address_t placeholder_addr = + { {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, + 254, 254} + }; + int print_hmac = va_arg (*args, int); + int i, pl_index, max_segs; + int flags_host_byte_order = clib_net_to_host_u16 (h->flags); + + s = format (s, "next proto %d, len %d, type %d", + h->protocol, (h->length << 3) + 8, h->type); + s = format (s, "\n segs left %d, first_segment %d, hmac key %d", + h->segments_left, h->first_segment, h->hmac_key); + s = format (s, "\n flags %U", format_ip6_sr_header_flags, + flags_host_byte_order, 0 /* bswap needed */ ); + + /* + * Header length is in 8-byte units (minus one), so + * divide by 2 to ascertain the number of ip6 addresses in the + * segment list + */ + max_segs = (h->length >> 1); + + if (!print_hmac && h->hmac_key) + max_segs -= 2; + + s = format (s, "\n Segments (in processing order):"); + + for (i = h->first_segment; i >= 1; i--) + s = format (s, "\n %U", format_ip6_address, h->segments + i); + if (ip6_address_is_equal (&placeholder_addr, h->segments)) + s = format (s, "\n (empty placeholder)"); + else + s = format (s, "\n %U", format_ip6_address, h->segments); + + s = format (s, "\n Policy List:"); + + pl_index = 1; /* to match the RFC text */ + for (i = (h->first_segment + 1); i < max_segs; i++, pl_index++) + { + char *tag; + char *tags[] = { " ", "InPE: ", "EgPE: ", "OrgSrc: " }; + + tag = tags[0]; + if (pl_index >= 1 && pl_index <= 4) + { + int this_pl_flag = ip6_sr_policy_list_flags + (flags_host_byte_order, pl_index); + tag = tags[this_pl_flag]; + } + + s = format (s, "\n %s%U", tag, format_ip6_address, h->segments + i); + } + + return s; +} + +/** + * @brief Format function for decoding ip6_sr_header_t with length + * + * @param s u8 * - formatted string + * @param args va_list * - ip6_header_t + ip6_sr_header_t + * + * @return formatted output string u8 * + */ +u8 * +format_ip6_sr_header_with_length (u8 * s, va_list * args) +{ + ip6_header_t *h = va_arg (*args, ip6_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + uword header_bytes; + + header_bytes = sizeof (h[0]) + sizeof (ip6_sr_header_t); + if (max_header_bytes != 0 && header_bytes > max_header_bytes) + return format (s, "ip6_sr header truncated"); + + s = format (s, "IP6: %U\n", format_ip6_header, h, max_header_bytes); + s = + format (s, "SR: %U\n", format_ip6_sr_header, (ip6_sr_header_t *) (h + 1), + 0 /* print_hmac */ , max_header_bytes); + return s; +} + +/** + * @brief Defined valid next nodes + * @note Cannot call replicate yet without DPDK +*/ +#if DPDK > 0 +#define foreach_sr_rewrite_next \ +_(ERROR, "error-drop") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(SR_LOCAL, "sr-local") \ +_(SR_REPLICATE,"sr-replicate") +#else +#define foreach_sr_rewrite_next \ +_(ERROR, "error-drop") \ +_(IP6_LOOKUP, "ip6-lookup") \ +_(SR_LOCAL, "sr-local") +#endif /* DPDK */ + +/** + * @brief Struct for defined valid next nodes +*/ +typedef enum +{ +#define _(s,n) SR_REWRITE_NEXT_##s, + foreach_sr_rewrite_next +#undef _ + SR_REWRITE_N_NEXT, +} sr_rewrite_next_t; + +/** + * @brief Struct for data for SR rewrite packet trace + */ +typedef struct +{ + ip6_address_t src, dst; + u16 length; + u32 next_index; + u32 tunnel_index; + u8 sr[256]; +} sr_rewrite_trace_t; + +/** + * @brief Error strings for SR rewrite + */ +static char *sr_rewrite_error_strings[] = { +#define sr_error(n,s) s, +#include "sr_error.def" +#undef sr_error +}; + +/** + * @brief Struct for SR rewrite error strings + */ +typedef enum +{ +#define sr_error(n,s) SR_REWRITE_ERROR_##n, +#include "sr_error.def" +#undef sr_error + SR_REWRITE_N_ERROR, +} sr_rewrite_error_t; + + +/** + * @brief Format function for SR rewrite trace. + */ +u8 * +format_sr_rewrite_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_rewrite_trace_t *t = va_arg (*args, sr_rewrite_trace_t *); + ip6_sr_main_t *sm = &sr_main; + ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); + ip6_fib_t *rx_fib, *tx_fib; + + rx_fib = ip6_fib_get (tun->rx_fib_index); + tx_fib = ip6_fib_get (tun->tx_fib_index); + + s = format + (s, "SR-REWRITE: next %s ip6 src %U dst %U len %u\n" + " rx-fib-id %d tx-fib-id %d\n%U", + (t->next_index == SR_REWRITE_NEXT_SR_LOCAL) + ? "sr-local" : "ip6-lookup", + format_ip6_address, &t->src, + format_ip6_address, &t->dst, t->length, + rx_fib->table_id, tx_fib->table_id, + format_ip6_sr_header, t->sr, 0 /* print_hmac */ ); + return s; +} + +/** + * @brief Main processing dual-loop for Segment Routing Rewrite + * @node sr-rewrite + * + * @param vm vlib_main_t * + * @param node vlib_node_runtime_t * + * @param from_frame vlib_frame_t * + * + * @return from_frame->n_vectors uword + */ +static uword +sr_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_sr_main_t *sm = &sr_main; + u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, + vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *); + sr_local_cb = sm->sr_local_cb; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Note 2x loop disabled */ + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + ip6_header_t *ip0, *ip1; + ip6_sr_header_t *sr0, *sr1; + ip6_sr_tunnel_t *t0, *t1; + u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP; + u32 next1 = SR_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0 = 0; + u16 new_l1 = 0; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* + * $$$ parse through header(s) to pick the point + * where we punch in the SR extention header + */ + t0 = + pool_elt_at_index (sm->tunnels, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + t1 = + pool_elt_at_index (sm->tunnels, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE + >= ((word) vec_len (t0->rewrite)) + b0->current_data); + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE + >= ((word) vec_len (t1->rewrite)) + b1->current_data); + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; + vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->tx_fib_index; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); +#if DPDK > 0 /* Cannot call replication node yet without DPDK */ + /* add a replication node */ + if (PREDICT_FALSE (t0->policy_index != ~0)) + { + vnet_buffer (b0)->ip.save_protocol = t0->policy_index; + next0 = SR_REWRITE_NEXT_SR_REPLICATE; + sr0 = (ip6_sr_header_t *) (t0->rewrite); + goto processnext; + } +#endif /* DPDK */ + + /* + * SR-unaware service chaining case: pkt coming back from + * service has the original dst address, and will already + * have an SR header. If so, send it to sr-local + */ + if (PREDICT_FALSE (ip0->protocol == IPPROTO_IPV6_ROUTE)) + { + vlib_buffer_advance (b0, sizeof (ip0)); + sr0 = (ip6_sr_header_t *) (ip0 + 1); + new_l0 = clib_net_to_host_u16 (ip0->payload_length); + next0 = SR_REWRITE_NEXT_SR_LOCAL; + } + else + { + u32 len_bytes = sizeof (ip6_header_t); + u8 next_hdr = ip0->protocol; + + /* HBH must immediately follow ipv6 header */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + len_bytes += + ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); + /* Ignoring the sr_local for now, if RH follows HBH here */ + next_hdr = ext_hdr->next_hdr; + ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; + } + else + { + ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ + } + /* + * Copy data before the punch-in point left by the + * required amount. Assume (for the moment) that only + * the main packet header needs to be copied. + */ + clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), + ip0, len_bytes); + vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite)); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes); + /* $$$ tune */ + clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); + + /* Fix the next header chain */ + sr0->protocol = next_hdr; + + new_l0 = clib_net_to_host_u16 (ip0->payload_length) + + vec_len (t0->rewrite); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + /* Copy dst address into the DA slot in the segment list */ + clib_memcpy (sr0->segments, ip0->dst_address.as_u64, + sizeof (ip6_address_t)); + /* Rewrite the ip6 dst address with the first hop */ + clib_memcpy (ip0->dst_address.as_u64, t0->first_hop.as_u64, + sizeof (ip6_address_t)); + + sr_fix_hmac (sm, ip0, sr0); + + next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : + next0; + + /* + * Ignore "do not rewrite" shtik in this path + */ + if (PREDICT_FALSE (next0 & 0x80000000)) + { + next0 ^= 0xFFFFFFFF; + if (PREDICT_FALSE (next0 == SR_REWRITE_NEXT_ERROR)) + b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; + } + } +#if DPDK > 0 /* Cannot call replication node yet without DPDK */ + processnext: + /* add a replication node */ + if (PREDICT_FALSE (t1->policy_index != ~0)) + { + vnet_buffer (b1)->ip.save_protocol = t1->policy_index; + next1 = SR_REWRITE_NEXT_SR_REPLICATE; + sr1 = (ip6_sr_header_t *) (t1->rewrite); + goto trace00; + } +#endif /* DPDK */ + if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE)) + { + vlib_buffer_advance (b1, sizeof (ip1)); + sr1 = (ip6_sr_header_t *) (ip1 + 1); + new_l1 = clib_net_to_host_u16 (ip1->payload_length); + next1 = SR_REWRITE_NEXT_SR_LOCAL; + } + else + { + u32 len_bytes = sizeof (ip6_header_t); + u8 next_hdr = ip1->protocol; + + /* HBH must immediately follow ipv6 header */ + if (PREDICT_FALSE + (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); + len_bytes += + ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); + /* Ignoring the sr_local for now, if RH follows HBH here */ + next_hdr = ext_hdr->next_hdr; + ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; + } + else + { + ip1->protocol = IPPROTO_IPV6_ROUTE; + } + /* + * Copy data before the punch-in point left by the + * required amount. Assume (for the moment) that only + * the main packet header needs to be copied. + */ + clib_memcpy (((u8 *) ip1) - vec_len (t1->rewrite), + ip1, len_bytes); + vlib_buffer_advance (b1, -(word) vec_len (t1->rewrite)); + ip1 = vlib_buffer_get_current (b1); + sr1 = (ip6_sr_header_t *) ((u8 *) ip1 + len_bytes); + clib_memcpy (sr1, t1->rewrite, vec_len (t1->rewrite)); + + sr1->protocol = next_hdr; + new_l1 = clib_net_to_host_u16 (ip1->payload_length) + + vec_len (t1->rewrite); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + + /* Copy dst address into the DA slot in the segment list */ + clib_memcpy (sr1->segments, ip1->dst_address.as_u64, + sizeof (ip6_address_t)); + /* Rewrite the ip6 dst address with the first hop */ + clib_memcpy (ip1->dst_address.as_u64, t1->first_hop.as_u64, + sizeof (ip6_address_t)); + + sr_fix_hmac (sm, ip1, sr1); + + next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) : + next1; + + /* + * Ignore "do not rewrite" shtik in this path + */ + if (PREDICT_FALSE (next1 & 0x80000000)) + { + next1 ^= 0xFFFFFFFF; + if (PREDICT_FALSE (next1 == SR_REWRITE_NEXT_ERROR)) + b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; + } + } +#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */ + trace00: +#endif /* DPDK */ + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_index = t0 - sm->tunnels; + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = new_l0; + tr->next_index = next0; + if (sr0) + clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); + } + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + tr->tunnel_index = t1 - sm->tunnels; + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = new_l1; + tr->next_index = next1; + if (sr1) + clib_memcpy (tr->sr, sr1, sizeof (tr->sr)); + } + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0 = 0; + ip6_sr_tunnel_t *t0; + u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP; + u16 new_l0 = 0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + + /* + * $$$ parse through header(s) to pick the point + * where we punch in the SR extention header + */ + t0 = + pool_elt_at_index (sm->tunnels, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); +#if DPDK > 0 /* Cannot call replication node yet without DPDK */ + /* add a replication node */ + if (PREDICT_FALSE (t0->policy_index != ~0)) + { + vnet_buffer (b0)->ip.save_protocol = t0->policy_index; + next0 = SR_REWRITE_NEXT_SR_REPLICATE; + sr0 = (ip6_sr_header_t *) (t0->rewrite); + goto trace0; + } +#endif /* DPDK */ + + ASSERT (VLIB_BUFFER_PRE_DATA_SIZE + >= ((word) vec_len (t0->rewrite)) + b0->current_data); + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; + + ip0 = vlib_buffer_get_current (b0); + + /* + * SR-unaware service chaining case: pkt coming back from + * service has the original dst address, and will already + * have an SR header. If so, send it to sr-local + */ + if (PREDICT_FALSE (ip0->protocol == IPPROTO_IPV6_ROUTE)) + { + vlib_buffer_advance (b0, sizeof (ip0)); + sr0 = (ip6_sr_header_t *) (ip0 + 1); + new_l0 = clib_net_to_host_u16 (ip0->payload_length); + next0 = SR_REWRITE_NEXT_SR_LOCAL; + } + else + { + u32 len_bytes = sizeof (ip6_header_t); + u8 next_hdr = ip0->protocol; + + /* HBH must immediately follow ipv6 header */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + len_bytes += + ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); + next_hdr = ext_hdr->next_hdr; + ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; + /* Ignoring the sr_local for now, if RH follows HBH here */ + } + else + { + ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ + } + /* + * Copy data before the punch-in point left by the + * required amount. Assume (for the moment) that only + * the main packet header needs to be copied. + */ + clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), + ip0, len_bytes); + vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite)); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes); + /* $$$ tune */ + clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); + + /* Fix the next header chain */ + sr0->protocol = next_hdr; + new_l0 = clib_net_to_host_u16 (ip0->payload_length) + + vec_len (t0->rewrite); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + /* Copy dst address into the DA slot in the segment list */ + clib_memcpy (sr0->segments, ip0->dst_address.as_u64, + sizeof (ip6_address_t)); + /* Rewrite the ip6 dst address with the first hop */ + clib_memcpy (ip0->dst_address.as_u64, t0->first_hop.as_u64, + sizeof (ip6_address_t)); + + sr_fix_hmac (sm, ip0, sr0); + + next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : + next0; + + /* + * Ignore "do not rewrite" shtik in this path + */ + if (PREDICT_FALSE (next0 & 0x80000000)) + { + next0 ^= 0xFFFFFFFF; + if (PREDICT_FALSE (next0 == SR_REWRITE_NEXT_ERROR)) + b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; + } + } +#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */ + trace0: +#endif /* DPDK */ + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_index = t0 - sm->tunnels; + if (ip0) + { + memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + } + tr->length = new_l0; + tr->next_index = next0; + if (sr0) + clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_rewrite_node) = { + .function = sr_rewrite, + .name = "sr-rewrite", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .format_trace = format_sr_rewrite_trace, + .format_buffer = format_ip6_sr_header_with_length, + + .n_errors = SR_REWRITE_N_ERROR, + .error_strings = sr_rewrite_error_strings, + + .runtime_data_bytes = 0, + + .n_next_nodes = SR_REWRITE_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_REWRITE_NEXT_##s] = n, + foreach_sr_rewrite_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite) +/* *INDENT-ON* */ + +static int +ip6_delete_route_no_next_hop (ip6_address_t * dst_address_arg, + u32 dst_address_length, u32 rx_table_id) +{ + fib_prefix_t pfx = { + .fp_len = dst_address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = *dst_address_arg, + } + }; + + fib_table_entry_delete (fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, + rx_table_id), + &pfx, FIB_SOURCE_SR); + + return 0; +} + +/** + * @brief Find or add if not found - HMAC shared secret + * + * @param sm ip6_sr_main_t * + * @param secret u8 * + * @param indexp u32 * + * + * @return ip6_sr_hmac_key_t * + */ +static ip6_sr_hmac_key_t * +find_or_add_shared_secret (ip6_sr_main_t * sm, u8 * secret, u32 * indexp) +{ + uword *p; + ip6_sr_hmac_key_t *key = 0; + int i; + + p = hash_get_mem (sm->hmac_key_by_shared_secret, secret); + + if (p) + { + key = vec_elt_at_index (sm->hmac_keys, p[0]); + if (indexp) + *indexp = p[0]; + return (key); + } + + /* Specific key ID? */ + if (indexp && *indexp) + { + vec_validate (sm->hmac_keys, *indexp); + key = sm->hmac_keys + *indexp; + } + else + { + for (i = 0; i < vec_len (sm->hmac_keys); i++) + { + if (sm->hmac_keys[i].shared_secret == 0) + { + key = sm->hmac_keys + i; + goto found; + } + } + vec_validate (sm->hmac_keys, i); + key = sm->hmac_keys + i; + found: + ; + } + + key->shared_secret = vec_dup (secret); + + hash_set_mem (sm->hmac_key_by_shared_secret, key->shared_secret, + key - sm->hmac_keys); + + if (indexp) + *indexp = key - sm->hmac_keys; + return (key); +} + +/** + * @brief Add or Delete a Segment Routing tunnel. + * + * @param a ip6_sr_add_del_tunnel_args_t * + * + * @return retval int + */ +int +ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a) +{ + ip6_main_t *im = &ip6_main; + ip6_sr_tunnel_key_t key; + ip6_sr_tunnel_t *t; + uword *p, *n; + ip6_sr_header_t *h = 0; + u32 header_length; + ip6_address_t *addrp, *this_address; + ip6_sr_main_t *sm = &sr_main; + u8 *key_copy; + u32 rx_fib_index, tx_fib_index; + u32 hmac_key_index_u32; + u8 hmac_key_index = 0; + ip6_sr_policy_t *pt; + int i; + dpo_id_t dpo = DPO_INVALID; + + /* Make sure that the rx FIB exists */ + p = hash_get (im->fib_index_by_table_id, a->rx_table_id); + + if (p == 0) + return -3; + + /* remember the FIB index */ + rx_fib_index = p[0]; + + /* Make sure that the supplied FIB exists */ + p = hash_get (im->fib_index_by_table_id, a->tx_table_id); + + if (p == 0) + return -4; + + /* remember the FIB index */ + tx_fib_index = p[0]; + + clib_memcpy (key.src.as_u8, a->src_address->as_u8, sizeof (key.src)); + clib_memcpy (key.dst.as_u8, a->dst_address->as_u8, sizeof (key.dst)); + + /* When adding a tunnel: + * - If a "name" is given, it must not exist. + * - The "key" is always checked, and must not exist. + * When deleting a tunnel: + * - If the "name" is given, and it exists, then use it. + * - If the "name" is not given, use the "key". + * - If the "name" and the "key" are given, then both must point to the same + * thing. + */ + + /* Lookup the key */ + p = hash_get_mem (sm->tunnel_index_by_key, &key); + + /* If the name is given, look it up */ + if (a->name) + n = hash_get_mem (sm->tunnel_index_by_name, a->name); + else + n = 0; + + /* validate key/name parameters */ + if (!a->is_del) /* adding a tunnel */ + { + if (a->name && n) /* name given & exists already */ + return -1; + if (p) /* key exists already */ + return -1; + } + else /* deleting a tunnel */ + { + if (!p) /* key doesn't exist */ + return -2; + if (a->name && !n) /* name given & it doesn't exist */ + return -2; + + if (n) /* name given & found */ + { + if (n[0] != p[0]) /* name and key do not point to the same thing */ + return -2; + } + } + + + if (a->is_del) /* delete the tunnel */ + { + hash_pair_t *hp; + + /* Delete existing tunnel */ + t = pool_elt_at_index (sm->tunnels, p[0]); + + ip6_delete_route_no_next_hop (&t->key.dst, t->dst_mask_width, + a->rx_table_id); + vec_free (t->rewrite); + /* Remove tunnel from any policy if associated */ + if (t->policy_index != ~0) + { + pt = pool_elt_at_index (sm->policies, t->policy_index); + for (i = 0; i < vec_len (pt->tunnel_indices); i++) + { + if (pt->tunnel_indices[i] == t - sm->tunnels) + { + vec_delete (pt->tunnel_indices, 1, i); + goto found; + } + } + clib_warning ("Tunnel index %d not found in policy_index %d", + t - sm->tunnels, pt - sm->policies); + found: + /* If this is last tunnel in the policy, clean up the policy too */ + if (vec_len (pt->tunnel_indices) == 0) + { + hash_unset_mem (sm->policy_index_by_policy_name, pt->name); + vec_free (pt->name); + pool_put (sm->policies, pt); + } + } + + /* Clean up the tunnel by name */ + if (t->name) + { + hash_unset_mem (sm->tunnel_index_by_name, t->name); + vec_free (t->name); + } + pool_put (sm->tunnels, t); + hp = hash_get_pair (sm->tunnel_index_by_key, &key); + key_copy = (void *) (hp->key); + hash_unset_mem (sm->tunnel_index_by_key, &key); + vec_free (key_copy); + return 0; + } + + /* create a new tunnel */ + pool_get (sm->tunnels, t); + memset (t, 0, sizeof (*t)); + t->policy_index = ~0; + + clib_memcpy (&t->key, &key, sizeof (t->key)); + t->dst_mask_width = a->dst_mask_width; + t->rx_fib_index = rx_fib_index; + t->tx_fib_index = tx_fib_index; + + if (!vec_len (a->segments)) + /* there must be at least one segment... */ + return -4; + + /* The first specified hop goes right into the dst address */ + clib_memcpy (&t->first_hop, &a->segments[0], sizeof (ip6_address_t)); + + /* + * Create the sr header rewrite string + * The list of segments needs an extra slot for the ultimate destination + * which is taken from the packet we add the SRH to. + */ + header_length = sizeof (*h) + + sizeof (ip6_address_t) * (vec_len (a->segments) + 1 + vec_len (a->tags)); + + if (a->shared_secret) + { + /* Allocate a new key slot if we don't find the secret key */ + hmac_key_index_u32 = 0; + (void) find_or_add_shared_secret (sm, a->shared_secret, + &hmac_key_index_u32); + + /* Hey Vinz Clortho: Gozzer is pissed.. you're out of keys! */ + if (hmac_key_index_u32 >= 256) + return -5; + hmac_key_index = hmac_key_index_u32; + header_length += SHA256_DIGEST_LENGTH; + } + + vec_validate (t->rewrite, header_length - 1); + + h = (ip6_sr_header_t *) t->rewrite; + + h->protocol = 0xFF; /* we don't know yet */ + + h->length = (header_length / 8) - 1; + h->type = ROUTING_HEADER_TYPE_SR; + + /* first_segment and segments_left need to have the index of the last + * element in the list; a->segments has one element less than ends up + * in the header (it does not have the DA in it), so vec_len(a->segments) + * is the value we want. + */ + h->first_segment = h->segments_left = vec_len (a->segments); + + if (a->shared_secret) + h->hmac_key = hmac_key_index & 0xFF; + + h->flags = a->flags_net_byte_order; + + /* Paint on the segment list, in reverse. + * This is offset by one to leave room at the start for the ultimate + * destination. + */ + addrp = h->segments + vec_len (a->segments); + + vec_foreach (this_address, a->segments) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp--; + } + + /* + * Since the ultimate destination address is not yet known, set that slot + * to a value we will instantly recognize as bogus. + */ + memset (h->segments, 0xfe, sizeof (ip6_address_t)); + + /* Paint on the tag list, not reversed */ + addrp = h->segments + vec_len (a->segments); + + vec_foreach (this_address, a->tags) + { + clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t)); + addrp++; + } + + key_copy = vec_new (ip6_sr_tunnel_key_t, 1); + clib_memcpy (key_copy, &key, sizeof (ip6_sr_tunnel_key_t)); + hash_set_mem (sm->tunnel_index_by_key, key_copy, t - sm->tunnels); + + /* + * Stick the tunnel index into the rewrite header. + * + * Unfortunately, inserting an SR header according to the various + * RFC's requires parsing through the ip6 header, perhaps consing a + * buffer onto the head of the vlib_buffer_t, etc. We don't use the + * normal reverse bcopy rewrite code. + * + * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain + * at some point... + */ + dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); + + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = a->dst_mask_width, + .fp_addr = { + .ip6 = *a->dst_address, + } + }; + fib_table_entry_special_dpo_add (rx_fib_index, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + + if (a->policy_name) + { + p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); + if (p) + { + pt = pool_elt_at_index (sm->policies, p[0]); + } + else /* no policy, lets create one */ + { + pool_get (sm->policies, pt); + memset (pt, 0, sizeof (*pt)); + pt->name = format (0, "%s%c", a->policy_name, 0); + hash_set_mem (sm->policy_index_by_policy_name, pt->name, + pt - sm->policies); + p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); + } + vec_add1 (pt->tunnel_indices, t - sm->tunnels); + if (p == 0) + clib_warning ("p is NULL!"); + t->policy_index = p ? p[0] : ~0; /* equiv. to (pt - sm->policies) */ + } + + if (a->name) + { + t->name = format (0, "%s%c", a->name, 0); + hash_set_mem (sm->tunnel_index_by_name, t->name, t - sm->tunnels); + } + + return 0; +} + +/** + * @brief no-op lock function. + * The lifetime of the SR entry is managed by the control plane + */ +static void +sr_dpo_lock (dpo_id_t * dpo) +{ +} + +/** + * @brief no-op unlock function. + * The lifetime of the SR entry is managed by the control plane + */ +static void +sr_dpo_unlock (dpo_id_t * dpo) +{ +} + +u8 * +format_sr_dpo (u8 * s, va_list * args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + + return (format (s, "SR: tunnel:[%d]", index)); +} + +const static dpo_vft_t sr_vft = { + .dv_lock = sr_dpo_lock, + .dv_unlock = sr_dpo_unlock, + .dv_format = format_sr_dpo, +}; + +const static char *const sr_ip6_nodes[] = { + "sr-rewrite", + NULL, +}; + +const static char *const *const sr_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP6] = sr_ip6_nodes, +}; + +/** + * @brief CLI parser for Add or Delete a Segment Routing tunnel. + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +sr_add_del_tunnel_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0; + ip6_address_t src_address; + int src_address_set = 0; + ip6_address_t dst_address; + u32 dst_mask_width; + int dst_address_set = 0; + u16 flags = 0; + u8 *shared_secret = 0; + u8 *name = 0; + u8 *policy_name = 0; + u32 rx_table_id = 0; + u32 tx_table_id = 0; + ip6_address_t *segments = 0; + ip6_address_t *this_seg; + ip6_address_t *tags = 0; + ip6_address_t *this_tag; + ip6_sr_add_del_tunnel_args_t _a, *a = &_a; + ip6_address_t next_address, tag; + int pl_index; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "rx-fib-id %d", &rx_table_id)) + ; + else if (unformat (input, "tx-fib-id %d", &tx_table_id)) + ; + else if (unformat (input, "src %U", unformat_ip6_address, &src_address)) + src_address_set = 1; + else if (unformat (input, "name %s", &name)) + ; + else if (unformat (input, "policy %s", &policy_name)) + ; + else if (unformat (input, "dst %U/%d", + unformat_ip6_address, &dst_address, &dst_mask_width)) + dst_address_set = 1; + else if (unformat (input, "next %U", unformat_ip6_address, + &next_address)) + { + vec_add2 (segments, this_seg, 1); + clib_memcpy (this_seg->as_u8, next_address.as_u8, + sizeof (*this_seg)); + } + else if (unformat (input, "tag %U", unformat_ip6_address, &tag)) + { + vec_add2 (tags, this_tag, 1); + clib_memcpy (this_tag->as_u8, tag.as_u8, sizeof (*this_tag)); + } + else if (unformat (input, "clean")) + flags |= IP6_SR_HEADER_FLAG_CLEANUP; + else if (unformat (input, "protected")) + flags |= IP6_SR_HEADER_FLAG_PROTECTED; + else if (unformat (input, "key %s", &shared_secret)) + /* Do not include the trailing NULL byte. Guaranteed interop issue */ + _vec_len (shared_secret) -= 1; + else if (unformat (input, "InPE %d", &pl_index)) + { + if (pl_index <= 0 || pl_index > 4) + { + pl_index_range_error: + return clib_error_return + (0, "Policy List Element Index %d out of range (1-4)", + pl_index); + + } + flags |= IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE + << ip6_sr_policy_list_shift_from_index (pl_index); + } + else if (unformat (input, "EgPE %d", &pl_index)) + { + if (pl_index <= 0 || pl_index > 4) + goto pl_index_range_error; + flags |= IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE + << ip6_sr_policy_list_shift_from_index (pl_index); + } + else if (unformat (input, "OrgSrc %d", &pl_index)) + { + if (pl_index <= 0 || pl_index > 4) + goto pl_index_range_error; + flags |= IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR + << ip6_sr_policy_list_shift_from_index (pl_index); + } + else + break; + } + + if (!src_address_set) + return clib_error_return (0, "src address required"); + + if (!dst_address_set) + return clib_error_return (0, "dst address required"); + + if (!segments) + return clib_error_return (0, "at least one sr segment required"); + + memset (a, 0, sizeof (*a)); + a->src_address = &src_address; + a->dst_address = &dst_address; + a->dst_mask_width = dst_mask_width; + a->segments = segments; + a->tags = tags; + a->flags_net_byte_order = clib_host_to_net_u16 (flags); + a->is_del = is_del; + a->rx_table_id = rx_table_id; + a->tx_table_id = tx_table_id; + a->shared_secret = shared_secret; + + if (vec_len (name)) + a->name = name; + else + a->name = 0; + + if (vec_len (policy_name)) + a->policy_name = policy_name; + else + a->policy_name = 0; + + rv = ip6_sr_add_del_tunnel (a); + + vec_free (segments); + vec_free (tags); + vec_free (shared_secret); + + switch (rv) + { + case 0: + break; + + case -1: + return clib_error_return (0, "SR tunnel src %U dst %U already exists", + format_ip6_address, &src_address, + format_ip6_address, &dst_address); + + case -2: + return clib_error_return (0, "SR tunnel src %U dst %U does not exist", + format_ip6_address, &src_address, + format_ip6_address, &dst_address); + + case -3: + return clib_error_return (0, "FIB table %d does not exist", + rx_table_id); + + case -4: + return clib_error_return (0, "At least one segment is required"); + + default: + return clib_error_return (0, "BUG: ip6_sr_add_del_tunnel returns %d", + rv); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_tunnel_command, static) = { + .path = "sr tunnel", + .short_help = + "sr tunnel [del] [name <name>] src <addr> dst <addr> [next <addr>] " + "[clean] [reroute] [key <secret>] [policy <policy_name>]" + "[rx-fib-id <fib_id>] [tx-fib-id <fib_id>]", + .function = sr_add_del_tunnel_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Display Segment Routing tunnel + * + * @param vm vlib_main_t * + * @param t ip6_sr_tunnel_t * + * + */ +void +ip6_sr_tunnel_display (vlib_main_t * vm, ip6_sr_tunnel_t * t) +{ + ip6_sr_main_t *sm = &sr_main; + ip6_fib_t *rx_fib, *tx_fib; + ip6_sr_policy_t *pt; + + rx_fib = ip6_fib_get (t->rx_fib_index); + tx_fib = ip6_fib_get (t->tx_fib_index); + + if (t->name) + vlib_cli_output (vm, "sr tunnel name: %s", (char *) t->name); + + vlib_cli_output (vm, "src %U dst %U first hop %U", + format_ip6_address, &t->key.src, + format_ip6_address, &t->key.dst, + format_ip6_address, &t->first_hop); + vlib_cli_output (vm, " rx-fib-id %d tx-fib-id %d", + rx_fib->table_id, tx_fib->table_id); + vlib_cli_output (vm, " sr: %U", format_ip6_sr_header, t->rewrite, + 0 /* print_hmac */ ); + + if (t->policy_index != ~0) + { + pt = pool_elt_at_index (sm->policies, t->policy_index); + vlib_cli_output (vm, "sr policy: %s", (char *) pt->name); + } + vlib_cli_output (vm, "-------"); + + return; +} + +/** + * @brief CLI Parser for Display Segment Routing tunnel + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +show_sr_tunnel_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + static ip6_sr_tunnel_t **tunnels; + ip6_sr_tunnel_t *t; + ip6_sr_main_t *sm = &sr_main; + int i; + uword *p = 0; + u8 *name = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "name %s", &name)) + { + p = hash_get_mem (sm->tunnel_index_by_name, name); + if (!p) + vlib_cli_output (vm, "No SR tunnel with name: %s. Showing all.", + name); + } + else + break; + } + + vec_reset_length (tunnels); + + if (!p) /* Either name parm not passed or no tunnel with that name found, show all */ + { + /* *INDENT-OFF* */ + pool_foreach (t, sm->tunnels, + ({ + vec_add1 (tunnels, t); + })); + /* *INDENT-ON* */ + } + else /* Just show the one tunnel by name */ + vec_add1 (tunnels, &sm->tunnels[p[0]]); + + if (vec_len (tunnels) == 0) + vlib_cli_output (vm, "No SR tunnels configured"); + + for (i = 0; i < vec_len (tunnels); i++) + { + t = tunnels[i]; + ip6_sr_tunnel_display (vm, t); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_tunnel_command, static) = { + .path = "show sr tunnel", + .short_help = "show sr tunnel [name <sr-tunnel-name>]", + .function = show_sr_tunnel_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Add or Delete a Segment Routing policy + * + * @param a ip6_sr_add_del_policy_args_t * + * + * @return retval int + */ +int +ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a) +{ + ip6_sr_main_t *sm = &sr_main; + uword *p; + ip6_sr_tunnel_t *t = 0; + ip6_sr_policy_t *policy; + u32 *tunnel_indices = 0; + int i; + + + + if (a->is_del) + { + p = hash_get_mem (sm->policy_index_by_policy_name, a->name); + if (!p) + return -6; /* policy name not found */ + + policy = pool_elt_at_index (sm->policies, p[0]); + + vec_foreach_index (i, policy->tunnel_indices) + { + t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[i]); + t->policy_index = ~0; + } + hash_unset_mem (sm->policy_index_by_policy_name, a->name); + pool_put (sm->policies, policy); + return 0; + } + + + if (!vec_len (a->tunnel_names)) + return -3; /*tunnel name is required case */ + + vec_reset_length (tunnel_indices); + /* Check tunnel names, add tunnel_index to policy */ + for (i = 0; i < vec_len (a->tunnel_names); i++) + { + p = hash_get_mem (sm->tunnel_index_by_name, a->tunnel_names[i]); + if (!p) + return -4; /* tunnel name not found case */ + + t = pool_elt_at_index (sm->tunnels, p[0]); + /* + No need to check t==0. -3 condition above ensures name + */ + if (t->policy_index != ~0) + return -5; /* tunnel name already associated with a policy */ + + /* Add to tunnel indicies */ + vec_add1 (tunnel_indices, p[0]); + } + + /* Add policy to ip6_sr_main_t */ + pool_get (sm->policies, policy); + policy->name = a->name; + policy->tunnel_indices = tunnel_indices; + hash_set_mem (sm->policy_index_by_policy_name, policy->name, + policy - sm->policies); + + /* Yes, this could be construed as overkill but the last thing you should do is set + the policy_index on the tunnel after everything is set in ip6_sr_main_t. + If this is deemed overly cautious, could set this in the vec_len(tunnel_names) loop. + */ + for (i = 0; i < vec_len (policy->tunnel_indices); i++) + { + t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[i]); + t->policy_index = policy - sm->policies; + } + + return 0; +} + +/** + * @brief CLI Parser for Add or Delete a Segment Routing policy + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +sr_add_del_policy_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0; + u8 **tunnel_names = 0; + u8 *tunnel_name = 0; + u8 *name = 0; + ip6_sr_add_del_policy_args_t _a, *a = &_a; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "name %s", &name)) + ; + else if (unformat (input, "tunnel %s", &tunnel_name)) + { + if (tunnel_name) + { + vec_add1 (tunnel_names, tunnel_name); + tunnel_name = 0; + } + } + else + break; + } + + if (!name) + return clib_error_return (0, "name of SR policy required"); + + + memset (a, 0, sizeof (*a)); + + a->is_del = is_del; + a->name = name; + a->tunnel_names = tunnel_names; + + rv = ip6_sr_add_del_policy (a); + + vec_free (tunnel_names); + + switch (rv) + { + case 0: + break; + + case -3: + return clib_error_return (0, + "tunnel name to associate to SR policy is required"); + + case -4: + return clib_error_return (0, "tunnel name not found"); + + case -5: + return clib_error_return (0, "tunnel already associated with policy"); + + case -6: + return clib_error_return (0, "policy name %s not found", name); + + case -7: + return clib_error_return (0, "TODO: deleting policy name %s", name); + + default: + return clib_error_return (0, "BUG: ip6_sr_add_del_policy returns %d", + rv); + + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_policy_command, static) = { + .path = "sr policy", + .short_help = + "sr policy [del] name <policy-name> tunnel <sr-tunnel-name> [tunnel <sr-tunnel-name>]*", + .function = sr_add_del_policy_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI Parser for Displaying Segment Routing policy + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +show_sr_policy_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + static ip6_sr_policy_t **policies; + ip6_sr_policy_t *policy; + ip6_sr_tunnel_t *t; + ip6_sr_main_t *sm = &sr_main; + int i, j; + uword *p = 0; + u8 *name = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "name %s", &name)) + { + p = hash_get_mem (sm->policy_index_by_policy_name, name); + if (!p) + vlib_cli_output (vm, + "policy with name %s not found. Showing all.", + name); + } + else + break; + } + + vec_reset_length (policies); + + if (!p) /* Either name parm not passed or no policy with that name found, show all */ + { + /* *INDENT-OFF* */ + pool_foreach (policy, sm->policies, + ({ + vec_add1 (policies, policy); + })); + /* *INDENT-ON* */ + } + else /* Just show the one policy by name and a summary of tunnel names */ + { + policy = pool_elt_at_index (sm->policies, p[0]); + vec_add1 (policies, policy); + } + + if (vec_len (policies) == 0) + vlib_cli_output (vm, "No SR policies configured"); + + for (i = 0; i < vec_len (policies); i++) + { + policy = policies[i]; + + if (policy->name) + vlib_cli_output (vm, "SR policy name: %s", (char *) policy->name); + for (j = 0; j < vec_len (policy->tunnel_indices); j++) + { + t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[j]); + ip6_sr_tunnel_display (vm, t); + } + } + + return 0; + +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_policy_command, static) = { + .path = "show sr policy", + .short_help = "show sr policy [name <sr-policy-name>]", + .function = show_sr_policy_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Add or Delete a mapping of IP6 multicast address + * to Segment Routing policy. + * + * @param a ip6_sr_add_del_multicastmap_args_t * + * + * @return retval int + */ +int +ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a) +{ + uword *p; + ip6_sr_tunnel_t *t; + ip6_sr_main_t *sm = &sr_main; + ip6_sr_policy_t *pt; + + if (a->is_del) + { + /* clean up the adjacency */ + p = + hash_get_mem (sm->policy_index_by_multicast_address, + a->multicast_address); + } + else + { + /* Get our policy by policy_name */ + p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name); + + } + if (!p) + return -1; + + pt = pool_elt_at_index (sm->policies, p[0]); + + /* + Get the first tunnel associated with policy populate the fib adjacency. + From there, since this tunnel will have it's policy_index != ~0 it will + be the trigger in the dual_loop to pull up the policy and make a copy-rewrite + for each tunnel in the policy + */ + + t = pool_elt_at_index (sm->tunnels, pt->tunnel_indices[0]); + + /* + * Stick the tunnel index into the rewrite header. + * + * Unfortunately, inserting an SR header according to the various + * RFC's requires parsing through the ip6 header, perhaps consing a + * buffer onto the head of the vlib_buffer_t, etc. We don't use the + * normal reverse bcopy rewrite code. + * + * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain + * at some point... + */ + dpo_id_t dpo = DPO_INVALID; + + dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels); + + /* Construct a FIB entry for multicast using the rx/tx fib from the first tunnel */ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + .fp_addr = { + .ip6 = *a->multicast_address, + } + }; + fib_table_entry_special_dpo_add (t->rx_fib_index, + &pfx, + FIB_SOURCE_SR, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + + u8 *mcast_copy = 0; + mcast_copy = vec_new (ip6_address_t, 1); + memcpy (mcast_copy, a->multicast_address, sizeof (ip6_address_t)); + + if (a->is_del) + { + hash_unset_mem (sm->policy_index_by_multicast_address, mcast_copy); + vec_free (mcast_copy); + return 0; + } + /* else */ + + hash_set_mem (sm->policy_index_by_multicast_address, mcast_copy, + pt - sm->policies); + + + return 0; +} + +/** + * @brief CLI Parser for Adding or Delete a mapping of IP6 multicast address + * to Segment Routing policy. + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +sr_add_del_multicast_map_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_del = 0; + ip6_address_t multicast_address; + u8 *policy_name = 0; + int multicast_address_set = 0; + ip6_sr_add_del_multicastmap_args_t _a, *a = &_a; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else + if (unformat + (input, "address %U", unformat_ip6_address, &multicast_address)) + multicast_address_set = 1; + else if (unformat (input, "sr-policy %s", &policy_name)) + ; + else + break; + } + + if (!is_del && !policy_name) + return clib_error_return (0, "name of sr policy required"); + + if (!multicast_address_set) + return clib_error_return (0, "multicast address required"); + + memset (a, 0, sizeof (*a)); + + a->is_del = is_del; + a->multicast_address = &multicast_address; + a->policy_name = policy_name; + +#if DPDK > 0 /*Cannot call replicate or configure multicast map yet without DPDK */ + rv = ip6_sr_add_del_multicastmap (a); +#else + return clib_error_return (0, + "cannot use multicast replicate spray case without DPDK installed"); +#endif /* DPDK */ + + switch (rv) + { + case 0: + break; + case -1: + return clib_error_return (0, "no policy with name: %s", policy_name); + + case -2: + return clib_error_return (0, "multicast map someting "); + + case -3: + return clib_error_return (0, + "tunnel name to associate to SR policy is required"); + + case -7: + return clib_error_return (0, "TODO: deleting policy name %s", + policy_name); + + default: + return clib_error_return (0, "BUG: ip6_sr_add_del_policy returns %d", + rv); + + } + return 0; + +} + + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_multicast_map_command, static) = { + .path = "sr multicast-map", + .short_help = + "sr multicast-map address <multicast-ip6-address> sr-policy <sr-policy-name> [del]", + .function = sr_add_del_multicast_map_command_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI Parser for Displaying a mapping of IP6 multicast address + * to Segment Routing policy. + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +show_sr_multicast_map_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + u8 *key = 0; + u32 value; + ip6_address_t multicast_address; + ip6_sr_policy_t *pt; + + /* pull all entries from the hash table into vector for display */ + + /* *INDENT-OFF* */ + hash_foreach_mem (key, value, sm->policy_index_by_multicast_address, + ({ + if (!key) + vlib_cli_output (vm, "no multicast maps configured"); + else + { + multicast_address = *((ip6_address_t *)key); + pt = pool_elt_at_index (sm->policies, value); + if (pt) + { + vlib_cli_output (vm, "address: %U policy: %s", + format_ip6_address, &multicast_address, + pt->name); + } + else + vlib_cli_output (vm, "BUG: policy not found for address: %U with policy index %d", + format_ip6_address, &multicast_address, + value); + + } + + })); + /* *INDENT-ON* */ + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_multicast_map_command, static) = { + .path = "show sr multicast-map", + .short_help = "show sr multicast-map", + .function = show_sr_multicast_map_fn, +}; +/* *INDENT-ON* */ + + +#define foreach_sr_fix_dst_addr_next \ +_(DROP, "error-drop") + +/** + * @brief Struct for valid next-nodes for SR fix destination address node + */ +typedef enum +{ +#define _(s,n) SR_FIX_DST_ADDR_NEXT_##s, + foreach_sr_fix_dst_addr_next +#undef _ + SR_FIX_DST_ADDR_N_NEXT, +} sr_fix_dst_addr_next_t; + +/** + * @brief Error strings for SR Fix Destination rewrite + */ +static char *sr_fix_dst_error_strings[] = { +#define sr_fix_dst_error(n,s) s, +#include "sr_fix_dst_error.def" +#undef sr_fix_dst_error +}; + +/** + * @brief Struct for errors for SR Fix Destination rewrite + */ +typedef enum +{ +#define sr_fix_dst_error(n,s) SR_FIX_DST_ERROR_##n, +#include "sr_fix_dst_error.def" +#undef sr_fix_dst_error + SR_FIX_DST_N_ERROR, +} sr_fix_dst_error_t; + +/** + * @brief Information for fix address trace + */ +typedef struct +{ + ip6_address_t src, dst; + u32 next_index; + u32 adj_index; + u8 sr[256]; +} sr_fix_addr_trace_t; + +/** + * @brief Formatter for fix address trace + */ +u8 * +format_sr_fix_addr_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_fix_addr_trace_t *t = va_arg (*args, sr_fix_addr_trace_t *); + vnet_hw_interface_t *hi = 0; + ip_adjacency_t *adj; + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + vnet_main_t *vnm = vnet_get_main (); + + if (t->adj_index != ~0) + { + adj = ip_get_adjacency (lm, t->adj_index); + hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); + } + + s = format (s, "SR-FIX_ADDR: next %s ip6 src %U dst %U\n", + (t->next_index == SR_FIX_DST_ADDR_NEXT_DROP) + ? "drop" : "output", + format_ip6_address, &t->src, format_ip6_address, &t->dst); + if (t->next_index != SR_FIX_DST_ADDR_NEXT_DROP) + { + s = + format (s, "%U\n", format_ip6_sr_header, t->sr, 1 /* print_hmac */ ); + s = + format (s, " output via %s", + hi ? (char *) (hi->name) : "Invalid adj"); + } + return s; +} + +/** + * @brief Fix SR destination address - dual-loop + * + * @node sr-fix-dst-addr + * @param vm vlib_main_t * + * @param node vlib_node_runtime_t * + * @param from_frame vlib_frame_t * + * + * @return from_frame->n_vectors uword + */ +static uword +sr_fix_dst_addr (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + +#if 0 + while (0 && n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + __attribute__ ((unused)) vlib_buffer_t *b0, *b1; + u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP; + u32 next1 = SR_FIX_DST_ADDR_NEXT_DROP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0; + ip_adjacency_t *adj0; + ip6_sr_header_t *sr0; + u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP; + ip6_address_t *new_dst0; + ethernet_header_t *eh0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + adj0 = + ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); + next0 = adj0->mcast_group_index; + + /* We should be pointing at an Ethernet header... */ + eh0 = vlib_buffer_get_current (b0); + ip0 = (ip6_header_t *) (eh0 + 1); + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + /* We'd better find an SR header... */ + if (PREDICT_FALSE (ip0->protocol != IPPROTO_IPV6_ROUTE)) + { + b0->error = node->errors[SR_FIX_DST_ERROR_NO_SR_HEADER]; + goto do_trace0; + } + else + { + /* + * We get here from sr_rewrite or sr_local, with + * sr->segments_left pointing at the (copy of the original) dst + * address. Use it, then increment sr0->segments_left. + */ + + /* Out of segments? Turf the packet */ + if (PREDICT_FALSE (sr0->segments_left == 0)) + { + b0->error = node->errors[SR_FIX_DST_ERROR_NO_MORE_SEGMENTS]; + goto do_trace0; + } + + /* + * Rewrite the packet with the original dst address + * We assume that the last segment (in processing order) contains + * the original dst address. The list is reversed, so sr0->segments + * contains the original dst address. + */ + new_dst0 = sr0->segments; + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + } + + do_trace0: + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_fix_addr_trace_t *t = vlib_add_trace (vm, node, + b0, sizeof (*t)); + t->next_index = next0; + t->adj_index = ~0; + + if (next0 != SR_FIX_DST_ADDR_NEXT_DROP) + { + t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + clib_memcpy (t->src.as_u8, ip0->src_address.as_u8, + sizeof (t->src.as_u8)); + clib_memcpy (t->dst.as_u8, ip0->dst_address.as_u8, + sizeof (t->dst.as_u8)); + clib_memcpy (t->sr, sr0, sizeof (t->sr)); + } + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_fix_dst_addr_node) = { + .function = sr_fix_dst_addr, + .name = "sr-fix-dst-addr", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .format_trace = format_sr_fix_addr_trace, + .format_buffer = format_ip6_sr_header_with_length, + + .runtime_data_bytes = 0, + + .n_errors = SR_FIX_DST_N_ERROR, + .error_strings = sr_fix_dst_error_strings, + + .n_next_nodes = SR_FIX_DST_ADDR_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_FIX_DST_ADDR_NEXT_##s] = n, + foreach_sr_fix_dst_addr_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr) +/* *INDENT-ON* */ + +static clib_error_t * +sr_init (vlib_main_t * vm) +{ + ip6_sr_main_t *sm = &sr_main; + clib_error_t *error = 0; + vlib_node_t *ip6_lookup_node, *ip6_rewrite_node; + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + sm->vlib_main = vm; + sm->vnet_main = vnet_get_main (); + + vec_validate (sm->hmac_keys, 0); + sm->hmac_keys[0].shared_secret = (u8 *) 0xdeadbeef; + + sm->tunnel_index_by_key = + hash_create_mem (0, sizeof (ip6_sr_tunnel_key_t), sizeof (uword)); + + sm->tunnel_index_by_name = hash_create_string (0, sizeof (uword)); + + sm->policy_index_by_policy_name = hash_create_string (0, sizeof (uword)); + + sm->policy_index_by_multicast_address = + hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword)); + + sm->hmac_key_by_shared_secret = hash_create_string (0, sizeof (uword)); + + ip6_register_protocol (IPPROTO_IPV6_ROUTE, sr_local_node.index); + + ip6_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip6-lookup"); + ASSERT (ip6_lookup_node); + + ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite"); + ASSERT (ip6_rewrite_node); + +#if DPDK > 0 /* Cannot run replicate without DPDK */ + /* Add a disposition to sr_replicate for the sr multicast replicate node */ + sm->ip6_lookup_sr_replicate_index = + vlib_node_add_next (vm, ip6_lookup_node->index, sr_replicate_node.index); +#endif /* DPDK */ + + /* Add a disposition to ip6_rewrite for the sr dst address hack node */ + sm->ip6_rewrite_sr_next_index = + vlib_node_add_next (vm, ip6_rewrite_node->index, + sr_fix_dst_addr_node.index); + + OpenSSL_add_all_digests (); + + sm->md = (void *) EVP_get_digestbyname ("sha1"); + sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX)); + + sr_dpo_type = dpo_register_new_type (&sr_vft, sr_nodes); + + return error; +} + +VLIB_INIT_FUNCTION (sr_init); + +/** + * @brief Definition of next-nodes for SR local + */ +#define foreach_sr_local_next \ + _ (ERROR, "error-drop") \ + _ (IP6_LOOKUP, "ip6-lookup") + +/** + * @brief Struct for definition of next-nodes for SR local + */ +typedef enum +{ +#define _(s,n) SR_LOCAL_NEXT_##s, + foreach_sr_local_next +#undef _ + SR_LOCAL_N_NEXT, +} sr_local_next_t; + +/** + * @brief Struct for packet trace of SR local + */ +typedef struct +{ + u8 next_index; + u8 sr_valid; + ip6_address_t src, dst; + u16 length; + u8 sr[256]; +} sr_local_trace_t; + +/** + * @brief Definition of SR local error-strings + */ +static char *sr_local_error_strings[] = { +#define sr_error(n,s) s, +#include "sr_error.def" +#undef sr_error +}; + +/** + * @brief Struct for definition of SR local error-strings + */ +typedef enum +{ +#define sr_error(n,s) SR_LOCAL_ERROR_##n, +#include "sr_error.def" +#undef sr_error + SR_LOCAL_N_ERROR, +} sr_local_error_t; + +/** + * @brief Format SR local trace + * + * @param s u8 * + * @param args va_list * + * + * @return s u8 * + */ +u8 * +format_sr_local_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_local_trace_t *t = va_arg (*args, sr_local_trace_t *); + + s = format (s, "SR-LOCAL: src %U dst %U len %u next_index %d", + format_ip6_address, &t->src, + format_ip6_address, &t->dst, t->length, t->next_index); + if (t->sr_valid) + s = + format (s, "\n %U", format_ip6_sr_header, t->sr, 1 /* print_hmac */ ); + else + s = format (s, "\n popped SR header"); + + return s; +} + + +/* $$$$ fixme: smp, don't copy data, cache input, output (maybe) */ +/** + * @brief Validate the SR HMAC + * + * @param sm ip6_sr_main_t * + * @param ip ip6_header_t * + * @param sr ip6_sr_header_t * + * + * @return retval int + */ +static int +sr_validate_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, ip6_sr_header_t * sr) +{ + u32 key_index; + static u8 *keybuf; + u8 *copy_target; + int first_segment; + ip6_address_t *addrp; + int i; + ip6_sr_hmac_key_t *hmac_key; + static u8 *signature; + u32 sig_len; + + key_index = sr->hmac_key; + + /* No signature? Pass... */ + if (key_index == 0) + return 0; + + /* We don't know about this key? Fail... */ + if (key_index >= vec_len (sm->hmac_keys)) + return 1; + + vec_validate (signature, SHA256_DIGEST_LENGTH - 1); + + hmac_key = sm->hmac_keys + key_index; + + vec_reset_length (keybuf); + + /* pkt ip6 src address */ + vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); + clib_memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t)); + + /* last segment */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] = sr->first_segment; + + /* octet w/ bit 0 = "clean" flag */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] + = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)) + ? 0x80 : 0; + + /* hmac key id */ + vec_add2 (keybuf, copy_target, 1); + copy_target[0] = sr->hmac_key; + + first_segment = sr->first_segment; + + addrp = sr->segments; + + /* segments */ + for (i = 0; i <= first_segment; i++) + { + vec_add2 (keybuf, copy_target, sizeof (ip6_address_t)); + clib_memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t)); + addrp++; + } + + if (sm->is_debug) + clib_warning ("verify key index %d keybuf: %U", key_index, + format_hex_bytes, keybuf, vec_len (keybuf)); + + /* shared secret */ + + /* SHA1 is shorter than SHA-256 */ + memset (signature, 0, vec_len (signature)); + + HMAC_CTX_init (sm->hmac_ctx); + if (!HMAC_Init (sm->hmac_ctx, hmac_key->shared_secret, + vec_len (hmac_key->shared_secret), sm->md)) + clib_warning ("barf1"); + if (!HMAC_Update (sm->hmac_ctx, keybuf, vec_len (keybuf))) + clib_warning ("barf2"); + if (!HMAC_Final (sm->hmac_ctx, signature, &sig_len)) + clib_warning ("barf3"); + HMAC_CTX_cleanup (sm->hmac_ctx); + + if (sm->is_debug) + clib_warning ("computed signature len %d, value %U", sig_len, + format_hex_bytes, signature, vec_len (signature)); + + /* Point at the SHA signature in the packet */ + addrp++; + if (sm->is_debug) + clib_warning ("read signature %U", format_hex_bytes, addrp, + SHA256_DIGEST_LENGTH); + + return memcmp (signature, addrp, SHA256_DIGEST_LENGTH); +} + +/** + * @brief SR local node + * @node sr-local + * + * @param vm vlib_main_t * + * @param node vlib_node_runtime_t * + * @param from_frame vlib_frame_t * + * + * @return from_frame->n_vectors uword + */ +static uword +sr_local (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + u32 n_left_from, next_index, *from, *to_next; + ip6_sr_main_t *sm = &sr_main; + u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, + vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *); + sr_local_cb = sm->sr_local_cb; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + ip6_header_t *ip0, *ip1; + ip6_sr_header_t *sr0, *sr1; + ip6_address_t *new_dst0, *new_dst1; + u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; + u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = from[0]; + bi1 = from[1]; + to_next[0] = bi0; + to_next[1] = bi1; + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *) (ip0 + 1); + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + sr0 = + (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) + ext_hdr); + } + + if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = + node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; + goto do_trace0; + } + + /* Out of segments? Turf the packet */ + if (PREDICT_FALSE (sr0->segments_left == 0)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; + goto do_trace0; + } + + if (PREDICT_FALSE (sm->validate_hmac)) + { + if (sr_validate_hmac (sm, ip0, sr0)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; + goto do_trace0; + } + } + + next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : next0; + + /* + * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx + */ + if (PREDICT_FALSE (next0 & 0x80000000)) + { + next0 ^= 0xFFFFFFFF; + if (PREDICT_FALSE (next0 == SR_LOCAL_NEXT_ERROR)) + b0->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; + } + else + { + u32 segment_index0; + + segment_index0 = sr0->segments_left - 1; + + /* Rewrite the packet */ + new_dst0 = (ip6_address_t *) (sr0->segments + segment_index0); + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + + if (PREDICT_TRUE (sr0->segments_left > 0)) + sr0->segments_left -= 1; + } + + /* End of the path. Clean up the SR header, or not */ + if (PREDICT_FALSE + (sr0->segments_left == 0 && + (sr0->flags & + clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) + { + u64 *copy_dst0, *copy_src0; + u16 new_l0; + u32 copy_len_u64s0 = 0; + int i; + + /* + * Copy the ip6 header right by the (real) length of the + * sr header. + */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + copy_len_u64s0 = + (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; + ext_hdr->next_hdr = sr0->protocol; + } + else + { + ip0->protocol = sr0->protocol; + } + vlib_buffer_advance (b0, (sr0->length + 1) * 8); + + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - + (sr0->length + 1) * 8; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + copy_src0 = (u64 *) ip0; + copy_dst0 = copy_src0 + (sr0->length + 1); + + copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; + copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; + copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; + copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; + copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; + + for (i = copy_len_u64s0 - 1; i >= 0; i--) + { + copy_dst0[i] = copy_src0[i]; + } + + sr0 = 0; + } + + do_trace0: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_local_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = vlib_buffer_length_in_chain (vm, b0); + tr->next_index = next0; + tr->sr_valid = sr0 != 0; + if (tr->sr_valid) + clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); + } + + b1 = vlib_get_buffer (vm, bi1); + ip1 = vlib_buffer_get_current (b1); + sr1 = (ip6_sr_header_t *) (ip1 + 1); + if (PREDICT_FALSE + (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); + sr1 = + (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) + ext_hdr); + } + + if (PREDICT_FALSE (sr1->type != ROUTING_HEADER_TYPE_SR)) + { + next1 = SR_LOCAL_NEXT_ERROR; + b1->error = + node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; + goto do_trace1; + } + + /* Out of segments? Turf the packet */ + if (PREDICT_FALSE (sr1->segments_left == 0)) + { + next1 = SR_LOCAL_NEXT_ERROR; + b1->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; + goto do_trace1; + } + + if (PREDICT_FALSE (sm->validate_hmac)) + { + if (sr_validate_hmac (sm, ip1, sr1)) + { + next1 = SR_LOCAL_NEXT_ERROR; + b1->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; + goto do_trace1; + } + } + + next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) : next1; + + /* + * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx + */ + if (PREDICT_FALSE (next1 & 0x80000000)) + { + next1 ^= 0xFFFFFFFF; + if (PREDICT_FALSE (next1 == SR_LOCAL_NEXT_ERROR)) + b1->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; + } + else + { + u32 segment_index1; + + segment_index1 = sr1->segments_left - 1; + + /* Rewrite the packet */ + new_dst1 = (ip6_address_t *) (sr1->segments + segment_index1); + ip1->dst_address.as_u64[0] = new_dst1->as_u64[0]; + ip1->dst_address.as_u64[1] = new_dst1->as_u64[1]; + + if (PREDICT_TRUE (sr1->segments_left > 0)) + sr1->segments_left -= 1; + } + + /* End of the path. Clean up the SR header, or not */ + if (PREDICT_FALSE + (sr1->segments_left == 0 && + (sr1->flags & + clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) + { + u64 *copy_dst1, *copy_src1; + u16 new_l1; + u32 copy_len_u64s1 = 0; + int i; + + /* + * Copy the ip6 header right by the (real) length of the + * sr header. + */ + if (PREDICT_FALSE + (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); + copy_len_u64s1 = + (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; + ext_hdr->next_hdr = sr1->protocol; + } + else + { + ip1->protocol = sr1->protocol; + } + vlib_buffer_advance (b1, (sr1->length + 1) * 8); + + new_l1 = clib_net_to_host_u16 (ip1->payload_length) - + (sr1->length + 1) * 8; + ip1->payload_length = clib_host_to_net_u16 (new_l1); + + copy_src1 = (u64 *) ip1; + copy_dst1 = copy_src1 + (sr1->length + 1); + + copy_dst1[4 + copy_len_u64s1] = copy_src1[4 + copy_len_u64s1]; + copy_dst1[3 + copy_len_u64s1] = copy_src1[3 + copy_len_u64s1]; + copy_dst1[2 + copy_len_u64s1] = copy_src1[2 + copy_len_u64s1]; + copy_dst1[1 + copy_len_u64s1] = copy_src1[1 + copy_len_u64s1]; + copy_dst1[0 + copy_len_u64s1] = copy_src1[0 + copy_len_u64s1]; + + for (i = copy_len_u64s1 - 1; i >= 0; i--) + { + copy_dst1[i] = copy_src1[i]; + } + + sr1 = 0; + } + + do_trace1: + if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_local_trace_t *tr = vlib_add_trace (vm, node, + b1, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = vlib_buffer_length_in_chain (vm, b1); + tr->next_index = next1; + tr->sr_valid = sr1 != 0; + if (tr->sr_valid) + clib_memcpy (tr->sr, sr1, sizeof (tr->sr)); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_header_t *ip0 = 0; + ip6_sr_header_t *sr0; + ip6_address_t *new_dst0; + u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + sr0 = (ip6_sr_header_t *) (ip0 + 1); + + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + sr0 = + (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) + ext_hdr); + } + if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = + node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE]; + goto do_trace; + } + + /* Out of segments? Turf the packet */ + if (PREDICT_FALSE (sr0->segments_left == 0)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS]; + goto do_trace; + } + + if (PREDICT_FALSE (sm->validate_hmac)) + { + if (sr_validate_hmac (sm, ip0, sr0)) + { + next0 = SR_LOCAL_NEXT_ERROR; + b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID]; + goto do_trace; + } + } + + next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : next0; + + /* + * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx + */ + if (PREDICT_FALSE (next0 & 0x80000000)) + { + next0 ^= 0xFFFFFFFF; + if (PREDICT_FALSE (next0 == SR_LOCAL_NEXT_ERROR)) + b0->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK]; + } + else + { + u32 segment_index0; + + segment_index0 = sr0->segments_left - 1; + + /* Rewrite the packet */ + new_dst0 = (ip6_address_t *) (sr0->segments + segment_index0); + ip0->dst_address.as_u64[0] = new_dst0->as_u64[0]; + ip0->dst_address.as_u64[1] = new_dst0->as_u64[1]; + + if (PREDICT_TRUE (sr0->segments_left > 0)) + sr0->segments_left -= 1; + } + + /* End of the path. Clean up the SR header, or not */ + if (PREDICT_FALSE + (sr0->segments_left == 0 && + (sr0->flags & + clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP)))) + { + u64 *copy_dst0, *copy_src0; + u16 new_l0; + u32 copy_len_u64s0 = 0; + int i; + + /* + * Copy the ip6 header right by the (real) length of the + * sr header. + */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + copy_len_u64s0 = + (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; + ext_hdr->next_hdr = sr0->protocol; + } + else + { + ip0->protocol = sr0->protocol; + } + + vlib_buffer_advance (b0, (sr0->length + 1) * 8); + + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - + (sr0->length + 1) * 8; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + copy_src0 = (u64 *) ip0; + copy_dst0 = copy_src0 + (sr0->length + 1); + copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; + copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; + copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; + copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; + copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; + + for (i = copy_len_u64s0 - 1; i >= 0; i--) + { + copy_dst0[i] = copy_src0[i]; + } + + sr0 = 0; + } + + do_trace: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_local_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + tr->length = vlib_buffer_length_in_chain (vm, b0); + tr->next_index = next0; + tr->sr_valid = sr0 != 0; + if (tr->sr_valid) + clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter (vm, sr_local_node.index, + SR_LOCAL_ERROR_PKTS_PROCESSED, + from_frame->n_vectors); + return from_frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_local_node, static) = { + .function = sr_local, + .name = "sr-local", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .format_trace = format_sr_local_trace, + + .runtime_data_bytes = 0, + + .n_errors = SR_LOCAL_N_ERROR, + .error_strings = sr_local_error_strings, + + .n_next_nodes = SR_LOCAL_N_NEXT, + .next_nodes = { +#define _(s,n) [SR_LOCAL_NEXT_##s] = n, + foreach_sr_local_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (sr_local_node, sr_local) +/* *INDENT-ON* */ + +ip6_sr_main_t * +sr_get_main (vlib_main_t * vm) +{ + vlib_call_init_function (vm, sr_init); + ASSERT (sr_local_node.index); + return &sr_main; +} + +/** + * @brief CLI parser for SR fix destination rewrite node + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +set_ip6_sr_rewrite_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = 128, + }; + u32 fib_index = 0; + u32 fib_id = 0; + u32 adj_index; + ip_adjacency_t *adj; + vnet_hw_interface_t *hi; + u32 sw_if_index; + ip6_sr_main_t *sm = &sr_main; + vnet_main_t *vnm = vnet_get_main (); + fib_node_index_t fei; + + if (!unformat (input, "%U", unformat_ip6_address, &pfx.fp_addr.ip6)) + return clib_error_return (0, "ip6 address missing in '%U'", + format_unformat_error, input); + + if (unformat (input, "rx-table-id %d", &fib_id)) + { + fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, fib_id); + if (fib_index == ~0) + return clib_error_return (0, "fib-id %d not found", fib_id); + } + + fei = fib_table_lookup_exact_match (fib_index, &pfx); + + if (FIB_NODE_INDEX_INVALID == fei) + return clib_error_return (0, "no match for %U", + format_ip6_address, &pfx.fp_addr.ip6); + + adj_index = fib_entry_get_adj_for_source (fei, FIB_SOURCE_SR); + + if (ADJ_INDEX_INVALID == adj_index) + return clib_error_return (0, "%U not SR sourced", + format_ip6_address, &pfx.fp_addr.ip6); + + adj = adj_get (adj_index); + + if (adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE) + return clib_error_return (0, "%U unresolved (not a rewrite adj)", + format_ip6_address, &pfx.fp_addr.ip6); + + adj->rewrite_header.next_index = sm->ip6_rewrite_sr_next_index; + + sw_if_index = adj->rewrite_header.sw_if_index; + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + adj->rewrite_header.node_index = sr_fix_dst_addr_node.index; + + /* $$$$$ hack... steal the mcast group index */ + adj->mcast_group_index = + vlib_node_add_next (vm, sr_fix_dst_addr_node.index, + hi->output_node_index); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ip6_sr_rewrite, static) = { + .path = "set ip6 sr rewrite", + .short_help = "set ip6 sr rewrite <ip6-address> [fib-id <id>]", + .function = set_ip6_sr_rewrite_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Register a callback routine to set next0 in sr_local + * + * @param cb void * + */ +void +vnet_register_sr_app_callback (void *cb) +{ + ip6_sr_main_t *sm = &sr_main; + + sm->sr_local_cb = cb; +} + +/** + * @brief Test routine for validation of HMAC + */ +static clib_error_t * +test_sr_hmac_validate_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + + if (unformat (input, "validate on")) + sm->validate_hmac = 1; + else if (unformat (input, "chunk-offset off")) + sm->validate_hmac = 0; + else + return clib_error_return (0, "expected validate on|off in '%U'", + format_unformat_error, input); + + vlib_cli_output (vm, "hmac signature validation %s", + sm->validate_hmac ? "on" : "off"); + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (test_sr_hmac_validate, static) = { + .path = "test sr hmac", + .short_help = "test sr hmac validate [on|off]", + .function = test_sr_hmac_validate_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Add or Delete HMAC key + * + * @param sm ip6_sr_main_t * + * @param key_id u32 + * @param shared_secret u8 * + * @param is_del u8 + * + * @return retval i32 + */ +// $$$ fixme shouldn't return i32 +i32 +sr_hmac_add_del_key (ip6_sr_main_t * sm, u32 key_id, u8 * shared_secret, + u8 is_del) +{ + u32 index; + ip6_sr_hmac_key_t *key; + + if (is_del == 0) + { + /* Specific key in use? Fail. */ + if (key_id && vec_len (sm->hmac_keys) > key_id + && sm->hmac_keys[key_id].shared_secret) + return -2; + + index = key_id; + key = find_or_add_shared_secret (sm, shared_secret, &index); + ASSERT (index == key_id); + return 0; + } + + /* delete */ + + if (key_id) /* delete by key ID */ + { + if (vec_len (sm->hmac_keys) <= key_id) + return -3; + + key = sm->hmac_keys + key_id; + + hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret); + vec_free (key->shared_secret); + return 0; + } + + index = 0; + key = find_or_add_shared_secret (sm, shared_secret, &index); + hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret); + vec_free (key->shared_secret); + return 0; +} + + +static clib_error_t * +sr_hmac_add_del_key_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + u8 is_del = 0; + u32 key_id = 0; + u8 key_id_set = 0; + u8 *shared_secret = 0; + i32 rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_del = 1; + else if (unformat (input, "id %d", &key_id)) + key_id_set = 1; + else if (unformat (input, "key %s", &shared_secret)) + { + /* Do not include the trailing NULL byte. Guaranteed interop issue */ + _vec_len (shared_secret) -= 1; + } + else + break; + } + + if (is_del == 0 && shared_secret == 0) + return clib_error_return (0, "shared secret must be set to add a key"); + + if (shared_secret == 0 && key_id_set == 0) + return clib_error_return (0, "shared secret and key id both unset"); + + rv = sr_hmac_add_del_key (sm, key_id, shared_secret, is_del); + + vec_free (shared_secret); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return (0, "sr_hmac_add_del_key returned %d", rv); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (sr_hmac, static) = { + .path = "sr hmac", + .short_help = "sr hmac [del] id <nn> key <str>", + .function = sr_hmac_add_del_key_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief CLI parser for show HMAC key shared secrets + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +show_sr_hmac_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + int i; + + for (i = 1; i < vec_len (sm->hmac_keys); i++) + { + if (sm->hmac_keys[i].shared_secret) + vlib_cli_output (vm, "[%d]: %v", i, sm->hmac_keys[i].shared_secret); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sr_hmac, static) = { + .path = "show sr hmac", + .short_help = "show sr hmac", + .function = show_sr_hmac_fn, +}; +/* *INDENT-ON* */ + +/** + * @brief Test for SR debug flag + * + * @param vm vlib_main_t * + * @param input unformat_input_t * + * @param cmd vlib_cli_command_t * + * + * @return error clib_error_t * + */ +static clib_error_t * +test_sr_debug_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip6_sr_main_t *sm = &sr_main; + + if (unformat (input, "on")) + sm->is_debug = 1; + else if (unformat (input, "off")) + sm->is_debug = 0; + else + return clib_error_return (0, "expected on|off in '%U'", + format_unformat_error, input); + + vlib_cli_output (vm, "debug trace now %s", sm->is_debug ? "on" : "off"); + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (test_sr_debug, static) = { + .path = "test sr debug", + .short_help = "test sr debug on|off", + .function = test_sr_debug_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h new file mode 100644 index 00000000000..610b36996f3 --- /dev/null +++ b/src/vnet/sr/sr.h @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Segment Routing header + * + * @note sr_replicate only works using DPDK today + */ +#ifndef included_vnet_sr_h +#define included_vnet_sr_h + +#include <vnet/vnet.h> +#include <vnet/sr/sr_packet.h> +#include <vnet/ip/ip6_packet.h> + +#include <openssl/opensslconf.h> +#include <stdlib.h> +#include <string.h> + +#include <openssl/crypto.h> +#include <openssl/sha.h> +#include <openssl/opensslv.h> +#include <openssl/hmac.h> + +/** + * @brief Segment Route tunnel key + */ +typedef struct +{ + ip6_address_t src; + ip6_address_t dst; +} ip6_sr_tunnel_key_t; + +/** + * @brief Segment Route tunnel + */ +typedef struct +{ + /** src, dst address */ + ip6_sr_tunnel_key_t key; + + /** Pptional tunnel name */ + u8 *name; + + /** Mask width for FIB entry */ + u32 dst_mask_width; + + /** First hop, to save 1 elt in the segment list */ + ip6_address_t first_hop; + + /** RX Fib index */ + u32 rx_fib_index; + /** TX Fib index */ + u32 tx_fib_index; + + /** The actual ip6 SR header */ + u8 *rewrite; + + /** Indicates that this tunnel is part of a policy comprising + of multiple tunnels. If == ~0 tunnel is not part of a policy */ + u32 policy_index; +} ip6_sr_tunnel_t; + +/** + * @brief Shared secret for keyed-hash message authentication code (HMAC). + */ +typedef struct +{ + u8 *shared_secret; +} ip6_sr_hmac_key_t; + +/** + * @brief Args required for add/del tunnel. + * + * Else we end up passing a LOT of parameters around. + */ +typedef struct +{ + /** Key (header imposition case) */ + ip6_address_t *src_address; + ip6_address_t *dst_address; + u32 dst_mask_width; + u32 rx_table_id; + u32 tx_table_id; + + /** optional name argument - for referencing SR tunnel/policy by name */ + u8 *name; + + /** optional policy name */ + u8 *policy_name; + + /** segment list, when inserting an ip6 SR header */ + ip6_address_t *segments; + + /** + * "Tag" list, aka segments inserted at the end of the list, + * past last_seg + */ + ip6_address_t *tags; + + /** Shared secret => generate SHA-256 HMAC security fields */ + u8 *shared_secret; + + /** Flags, e.g. cleanup, policy-list flags */ + u16 flags_net_byte_order; + + /** Delete the tunnnel? */ + u8 is_del; +} ip6_sr_add_del_tunnel_args_t; + +/** + * @brief Args for creating a policy. + * + * Typically used for multicast replication. + * ie a multicast address can be associated with a policy, + * then replicated across a number of unicast SR tunnels. + */ +typedef struct +{ + /** policy name */ + u8 *name; + + /** tunnel names */ + u8 **tunnel_names; + + /** Delete the policy? */ + u8 is_del; +} ip6_sr_add_del_policy_args_t; + +/** + * @brief Segment Routing policy. + * + * Typically used for multicast replication. + * ie a multicast address can be associated with a policy, + * then replicated across a number of unicast SR tunnels. + */ +typedef struct +{ + /** name of policy */ + u8 *name; + + /** vector to SR tunnel index */ + u32 *tunnel_indices; + +} ip6_sr_policy_t; + +/** + * @brief Args for mapping of multicast address to policy name. + * + * Typically used for multicast replication. + * ie a multicast address can be associated with a policy, + * then replicated across a number of unicast SR tunnels. + */ +typedef struct +{ + /** multicast IP6 address */ + ip6_address_t *multicast_address; + + /** name of policy to map to */ + u8 *policy_name; + + /** Delete the mapping */ + u8 is_del; + +} ip6_sr_add_del_multicastmap_args_t; + +/** + * @brief Segment Routing state. + */ +typedef struct +{ + /** pool of tunnel instances, sr entry only */ + ip6_sr_tunnel_t *tunnels; + + /** find an sr "tunnel" by its outer-IP src/dst */ + uword *tunnel_index_by_key; + + /** find an sr "tunnel" by its name */ + uword *tunnel_index_by_name; + + /** policy pool */ + ip6_sr_policy_t *policies; + + /** find a policy by name */ + uword *policy_index_by_policy_name; + + /** multicast address to policy mapping */ + uword *policy_index_by_multicast_address; + + /** hmac key id by shared secret */ + uword *hmac_key_by_shared_secret; + + /** ip6-rewrite next index for reinstalling the original dst address */ + u32 ip6_rewrite_sr_next_index; + + /** ip6-replicate next index for multicast tunnel */ + u32 ip6_lookup_sr_replicate_index; + + /** application API callback */ + void *sr_local_cb; + + /** validate hmac keys */ + u8 validate_hmac; + + /** pool of hmac keys */ + ip6_sr_hmac_key_t *hmac_keys; + + /** Openssl var */ + EVP_MD *md; + /** Openssl var */ + HMAC_CTX *hmac_ctx; + + /** enable debug spew */ + u8 is_debug; + + /** convenience */ + vlib_main_t *vlib_main; + /** convenience */ + vnet_main_t *vnet_main; +} ip6_sr_main_t; + +ip6_sr_main_t sr_main; + +format_function_t format_ip6_sr_header; +format_function_t format_ip6_sr_header_with_length; + +vlib_node_registration_t ip6_sr_input_node; + +#if DPDK > 0 +extern vlib_node_registration_t sr_replicate_node; +#endif /* DPDK */ + +int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a); +int ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a); +int ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a); + +void vnet_register_sr_app_callback (void *cb); + +void sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, + ip6_sr_header_t * sr); + +#endif /* included_vnet_sr_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sr/sr_error.def b/src/vnet/sr/sr_error.def new file mode 100644 index 00000000000..62d021fd47b --- /dev/null +++ b/src/vnet/sr/sr_error.def @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +sr_error (NONE, "no error") +sr_error (BAD_ROUTING_HEADER_TYPE, "bad routing header type (not 4)") +sr_error (NO_MORE_SEGMENTS, "out of SR segment drops") +sr_error (PKTS_PROCESSED, "SR packets processed") +sr_error (APP_CALLBACK, "SR application callback errors") +sr_error (HMAC_INVALID, "SR packets with invalid HMAC signatures") diff --git a/src/vnet/sr/sr_fix_dst_error.def b/src/vnet/sr/sr_fix_dst_error.def new file mode 100644 index 00000000000..48fe7af6c98 --- /dev/null +++ b/src/vnet/sr/sr_fix_dst_error.def @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +sr_fix_dst_error (NONE, "no error") +sr_fix_dst_error (NO_SR_HEADER, "no SR header present") +sr_fix_dst_error (NO_MORE_SEGMENTS, "no more SR segments") diff --git a/src/vnet/sr/sr_packet.h b/src/vnet/sr/sr_packet.h new file mode 100644 index 00000000000..179b94c2dc7 --- /dev/null +++ b/src/vnet/sr/sr_packet.h @@ -0,0 +1,251 @@ +#ifndef included_vnet_sr_packet_h +#define included_vnet_sr_packet_h + +#include <vnet/ip/ip.h> + +/* + * ipv6 segment-routing header format + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief The Segment Routing Header (SRH). + * + * The Segment Routing Header (SRH) is defined in the diagram below. + * + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Next Header | Hdr Ext Len | Routing Type | Segments Left | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | First Segment | Flags | HMAC Key ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[0] (128 bits ipv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | | + * ... + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Segment List[n] (128 bits ipv6 address) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Policy List[0] (optional) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Policy List[1] (optional) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | Policy List[2] (optional) | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | | + * | | + * | HMAC (256 bits) | + * | (optional) | + * | | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * where: + * + * o Next Header: 8-bit selector. Identifies the type of header + * immediately following the SRH. + * + * o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH + * header in 8-octet units, not including the first 8 octets. + * + * o Routing Type: TBD, to be assigned by IANA (suggested value: 4). + * + * o Segments Left. Defined in [RFC2460], it contains the index, in + * the Segment List, of the next segment to inspect. Segments Left + * is decremented at each segment and it is used as an index in the + * segment list. + * + * o First Segment: offset in the SRH, not including the first 8 octets + * and expressed in 16-octet units, pointing to the last element of + * the segment list, which is in fact the first segment of the + * segment routing path. + * + * o Flags: 16 bits of flags. Following flags are defined: + * + * 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |C|P|R|R| Policy Flags | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * C-flag: Clean-up flag. Set when the SRH has to be removed from + * the packet when packet reaches the last segment. + * + * P-flag: Protected flag. Set when the packet has been rerouted + * through FRR mechanism by a SR endpoint node. See Section 6.3 + * for more details. + * + * R-flags. Reserved and for future use. + * + * Policy Flags. Define the type of the IPv6 addresses encoded + * into the Policy List (see below). The following have been + * defined: + * + * Bits 4-6: determine the type of the first element after the + * segment list. + * + * Bits 7-9: determine the type of the second element. + * + * Bits 10-12: determine the type of the third element. + * + * Bits 13-15: determine the type of the fourth element. + * + * The following values are used for the type: + * + * 0x0: Not present. If value is set to 0x0, it means the + * element represented by these bits is not present. + * + * 0x1: SR Ingress. + * + * 0x2: SR Egress. + * + * 0x3: Original Source Address. + * + * o HMAC Key ID and HMAC field, and their use are defined in + * [I-D.vyncke-6man-segment-routing-security]. + * + * o Segment List[n]: 128 bit IPv6 addresses representing the nth + * segment in the Segment List. The Segment List is encoded starting + * from the last segment of the path. I.e., the first element of the + * segment list (Segment List [0]) contains the last segment of the + * path while the last segment of the Segment List (Segment List[n]) + * contains the first segment of the path. The index contained in + * "Segments Left" identifies the current active segment. + * + * o Policy List. Optional addresses representing specific nodes in + * the SR path such as: + * + * SR Ingress: a 128 bit generic identifier representing the + * ingress in the SR domain (i.e.: it needs not to be a valid IPv6 + * address). + * + * SR Egress: a 128 bit generic identifier representing the egress + * in the SR domain (i.e.: it needs not to be a valid IPv6 + * address). + * + * Original Source Address: IPv6 address originally present in the + * SA field of the packet. + * + * The segments in the Policy List are encoded after the segment list + * and they are optional. If none are in the SRH, all bits of the + * Policy List Flags MUST be set to 0x0. + */ + +#ifndef IPPROTO_IPV6_ROUTE +#define IPPROTO_IPV6_ROUTE 43 +#endif + +#define ROUTING_HEADER_TYPE_SR 4 +/** + @brief SR header struct. +*/ +typedef struct +{ + /** Protocol for next header. */ + u8 protocol; + + /** + * Length of routing header in 8 octet units, + * not including the first 8 octets + */ + u8 length; + + /** Type of routing header; type 4 = segement routing */ + u8 type; + + /** Next segment in the segment list */ + u8 segments_left; + + /** + * Policy list pointer: offset in the SRH of the policy + * list - in 16-octet units - not including the first 8 octets. + */ + u8 first_segment; + + /** Flag bits */ +#define IP6_SR_HEADER_FLAG_CLEANUP (0x8000) + /** Flag bits */ +#define IP6_SR_HEADER_FLAG_PROTECTED (0x4000) + /** Flag bits */ +#define IP6_SR_HEADER_FLAG_RESERVED (0x3000) + /** Flag bits */ +#define IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT (0x0) + /** Flag bits */ +#define IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE (0x1) + /** Flag bits */ +#define IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE (0x2) + /** Flag bits */ +#define IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR (0x3) + /** values 0x4 - 0x7 are reserved */ + u16 flags; + u8 hmac_key; + + /** The segment + policy list elts */ + ip6_address_t segments[0]; +} __attribute__ ((packed)) ip6_sr_header_t; + +static inline int +ip6_sr_policy_list_shift_from_index (int pl_index) +{ + return (-3 * pl_index) + 12; +} + +/** pl_index is one-origined */ +static inline int +ip6_sr_policy_list_flags (u16 flags_host_byte_order, int pl_index) +{ + int shift; + + if (pl_index <= 0 || pl_index > 4) + return 0; + + shift = (-3 * pl_index) + 12; + flags_host_byte_order >>= shift; + + return (flags_host_byte_order & 7); +} + +#endif /* included_vnet_sr_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sr/sr_replicate.c b/src/vnet/sr/sr_replicate.c new file mode 100644 index 00000000000..5f9de5042af --- /dev/null +++ b/src/vnet/sr/sr_replicate.c @@ -0,0 +1,490 @@ +/* + * sr_replicate.c: ipv6 segment routing replicator for multicast + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief Functions for replicating packets across SR tunnels. + * + * Leverages rte_pktmbuf_clone() so there is no memcpy for + * invariant parts of the packet. + * + * @note Currently requires DPDK +*/ + +#if DPDK > 0 /* Cannot run replicate without DPDK */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vnet/sr/sr.h> +#include <vnet/devices/dpdk/dpdk.h> +#include <vnet/ip/ip.h> +#include <vnet/fib/ip6_fib.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +/** + * @brief sr_replicate state. + * +*/ +typedef struct +{ + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} sr_replicate_main_t; + +sr_replicate_main_t sr_replicate_main; + +/** + * @brief Information to display in packet trace. + * +*/ +typedef struct +{ + ip6_address_t src, dst; + u16 length; + u32 next_index; + u32 tunnel_index; + u8 sr[256]; +} sr_replicate_trace_t; + +/** + * @brief packet trace format function. + * + * @param *s u8 used for string output + * @param *args va_list structured input to va_arg to output @ref sr_replicate_trace_t + * @return *s u8 - formatted trace output +*/ +static u8 * +format_sr_replicate_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sr_replicate_trace_t *t = va_arg (*args, sr_replicate_trace_t *); + ip6_sr_main_t *sm = &sr_main; + ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index); + ip6_fib_t *rx_fib, *tx_fib; + + rx_fib = ip6_fib_get (tun->rx_fib_index); + tx_fib = ip6_fib_get (tun->tx_fib_index); + + s = format + (s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n" + " rx-fib-id %d tx-fib-id %d\n%U", + "ip6-lookup", + format_ip6_address, &t->src, + format_ip6_address, &t->dst, t->length, + rx_fib->table_id, tx_fib->table_id, + format_ip6_sr_header, t->sr, 0 /* print_hmac */ ); + return s; + +} + +#define foreach_sr_replicate_error \ +_(REPLICATED, "sr packets replicated") \ +_(NO_BUFFERS, "error allocating buffers for replicas") \ +_(NO_REPLICAS, "no replicas were needed") \ +_(NO_BUFFER_DROPS, "sr no buffer drops") + +/** + * @brief Struct for SR replicate errors + */ +typedef enum +{ +#define _(sym,str) SR_REPLICATE_ERROR_##sym, + foreach_sr_replicate_error +#undef _ + SR_REPLICATE_N_ERROR, +} sr_replicate_error_t; + +/** + * @brief Error strings for SR replicate + */ +static char *sr_replicate_error_strings[] = { +#define _(sym,string) string, + foreach_sr_replicate_error +#undef _ +}; + +/** + * @brief Defines next-nodes for packet processing. + * +*/ +typedef enum +{ + SR_REPLICATE_NEXT_IP6_LOOKUP, + SR_REPLICATE_N_NEXT, +} sr_replicate_next_t; + +/** + * @brief Single loop packet replicator. + * + * @node sr-replicate + * @param vm vlib_main_t + * @return frame->n_vectors uword +*/ +static uword +sr_replicate_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + sr_replicate_next_t next_index; + int pkts_replicated = 0; + ip6_sr_main_t *sm = &sr_main; + int no_buffer_drops = 0; + vlib_buffer_free_list_t *fl; + unsigned socket_id = rte_socket_id (); + vlib_buffer_main_t *bm = vm->buffer_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0, hdr_bi0; + vlib_buffer_t *b0, *orig_b0; + struct rte_mbuf *orig_mb0 = 0, *hdr_mb0 = 0, *clone0 = 0; + struct rte_mbuf **hdr_vec = 0, **rte_mbuf_vec = 0; + ip6_sr_policy_t *pol0 = 0; + ip6_sr_tunnel_t *t0 = 0; + ip6_sr_header_t *hdr_sr0 = 0; + ip6_header_t *ip0 = 0, *hdr_ip0 = 0; + int num_replicas = 0; + int i; + u32 len_bytes = sizeof (ip6_header_t); + u8 next_hdr, ip_next_hdr = IPPROTO_IPV6_ROUTE; + + bi0 = from[0]; + + b0 = vlib_get_buffer (vm, bi0); + orig_b0 = b0; + + pol0 = pool_elt_at_index (sm->policies, + vnet_buffer (b0)->ip.save_protocol); + + ip0 = vlib_buffer_get_current (b0); + /* Skip forward to the punch-in point */ + vlib_buffer_advance (b0, sizeof (*ip0)); + next_hdr = ip0->protocol; + + /* HBH must immediately follow ipv6 header */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + u32 ext_hdr_len = 0; + ext_hdr_len = ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); + len_bytes += ext_hdr_len; + next_hdr = ext_hdr->next_hdr; + ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; + ip_next_hdr = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS; + /* Skip forward to the punch-in point */ + vlib_buffer_advance (b0, ext_hdr_len); + + } + + orig_mb0 = rte_mbuf_from_vlib_buffer (b0); + + i16 delta0 = vlib_buffer_length_in_chain (vm, orig_b0) + - (i16) orig_mb0->pkt_len; + + u16 new_data_len0 = (u16) ((i16) orig_mb0->data_len + delta0); + u16 new_pkt_len0 = (u16) ((i16) orig_mb0->pkt_len + delta0); + + orig_mb0->data_len = new_data_len0; + orig_mb0->pkt_len = new_pkt_len0; + orig_mb0->data_off += (u16) (b0->current_data); + + /* + Before entering loop determine if we can allocate: + - all the new HEADER RTE_MBUFs and assign them to a vector + - all the clones + + if successful, then iterate over vectors of resources + + */ + num_replicas = vec_len (pol0->tunnel_indices); + + if (PREDICT_FALSE (num_replicas == 0)) + { + b0->error = node->errors[SR_REPLICATE_ERROR_NO_REPLICAS]; + goto do_trace0; + } + + vec_reset_length (hdr_vec); + vec_reset_length (rte_mbuf_vec); + + for (i = 0; i < num_replicas; i++) + { + uint8_t nb_seg; + struct rte_mbuf *clone0i; + vlib_buffer_t *clone0_c, *clone_b0; + + t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); + hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]); + + if (i < (num_replicas - 1)) + { + /* Not the last tunnel to process */ + clone0 = rte_pktmbuf_clone + (orig_mb0, bm->pktmbuf_pools[socket_id]); + if (clone0 == 0) + goto clone_fail; + nb_seg = 0; + clone0i = clone0; + clone0_c = NULL; + while ((clone0->nb_segs >= 1) && (nb_seg < clone0->nb_segs)) + { + + clone_b0 = vlib_buffer_from_rte_mbuf (clone0i); + vlib_buffer_init_for_free_list (clone_b0, fl); + + ASSERT ((clone_b0->flags & VLIB_BUFFER_NEXT_PRESENT) == + 0); + ASSERT (clone_b0->current_data == 0); + + clone_b0->current_data = + (clone0i->buf_addr + clone0i->data_off) - + (void *) clone_b0->data; + + clone_b0->current_length = clone0i->data_len; + if (PREDICT_FALSE (clone0_c != NULL)) + { + clone0_c->flags |= VLIB_BUFFER_NEXT_PRESENT; + clone0_c->next_buffer = + vlib_get_buffer_index (vm, clone_b0); + } + clone0_c = clone_b0; + clone0i = clone0i->next; + nb_seg++; + } + } + else + /* First tunnel to process, use original MB */ + clone0 = orig_mb0; + + + if (PREDICT_FALSE (!clone0 || !hdr_mb0)) + { + clone_fail: + b0->error = node->errors[SR_REPLICATE_ERROR_NO_BUFFERS]; + + vec_foreach_index (i, rte_mbuf_vec) + { + rte_pktmbuf_free (rte_mbuf_vec[i]); + } + vec_free (rte_mbuf_vec); + + vec_foreach_index (i, hdr_vec) + { + rte_pktmbuf_free (hdr_vec[i]); + } + vec_free (hdr_vec); + + goto do_trace0; + } + + vec_add1 (hdr_vec, hdr_mb0); + vec_add1 (rte_mbuf_vec, clone0); + + } + + for (i = 0; i < num_replicas; i++) + { + vlib_buffer_t *hdr_b0; + u16 new_l0 = 0; + + t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); + /* Our replicas */ + hdr_mb0 = hdr_vec[i]; + clone0 = rte_mbuf_vec[i]; + + hdr_mb0->data_len = len_bytes + vec_len (t0->rewrite); + hdr_mb0->pkt_len = hdr_mb0->data_len + + vlib_buffer_length_in_chain (vm, orig_b0); + + hdr_b0 = vlib_buffer_from_rte_mbuf (hdr_mb0); + + vlib_buffer_init_for_free_list (hdr_b0, fl); + + memcpy (hdr_b0->data, ip0, len_bytes); + memcpy (hdr_b0->data + len_bytes, t0->rewrite, + vec_len (t0->rewrite)); + + hdr_b0->current_data = 0; + hdr_b0->current_length = len_bytes + vec_len (t0->rewrite); + hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT; + hdr_b0->trace_index = orig_b0->trace_index; + vnet_buffer (hdr_b0)->l2_classify.opaque_index = 0; + + hdr_b0->total_length_not_including_first_buffer = + hdr_mb0->pkt_len - hdr_b0->current_length; + vnet_buffer (hdr_b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; + + hdr_ip0 = (ip6_header_t *) hdr_b0->data; + new_l0 = clib_net_to_host_u16 (ip0->payload_length) + + vec_len (t0->rewrite); + hdr_ip0->payload_length = clib_host_to_net_u16 (new_l0); + hdr_sr0 = (ip6_sr_header_t *) ((u8 *) hdr_ip0 + len_bytes); + /* $$$ tune */ + clib_memcpy (hdr_sr0, t0->rewrite, vec_len (t0->rewrite)); + hdr_sr0->protocol = next_hdr; + hdr_ip0->protocol = ip_next_hdr; + + /* Copy dst address into the DA slot in the segment list */ + clib_memcpy (hdr_sr0->segments, ip0->dst_address.as_u64, + sizeof (ip6_address_t)); + + /* Rewrite the ip6 dst address */ + hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0]; + hdr_ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1]; + + sr_fix_hmac (sm, hdr_ip0, hdr_sr0); + + /* prepend new header to invariant piece */ + hdr_mb0->next = clone0; + hdr_b0->next_buffer = + vlib_get_buffer_index (vm, + vlib_buffer_from_rte_mbuf (clone0)); + + /* update header's fields */ + hdr_mb0->pkt_len = + (uint16_t) (hdr_mb0->data_len + clone0->pkt_len); + hdr_mb0->nb_segs = (uint8_t) (clone0->nb_segs + 1); + + /* copy metadata from source packet */ + hdr_mb0->port = clone0->port; + hdr_mb0->vlan_tci = clone0->vlan_tci; + hdr_mb0->vlan_tci_outer = clone0->vlan_tci_outer; + hdr_mb0->tx_offload = clone0->tx_offload; + hdr_mb0->hash = clone0->hash; + + hdr_mb0->ol_flags = clone0->ol_flags & ~(IND_ATTACHED_MBUF); + + __rte_mbuf_sanity_check (hdr_mb0, 1); + + hdr_bi0 = vlib_get_buffer_index (vm, hdr_b0); + + to_next[0] = hdr_bi0; + to_next += 1; + n_left_to_next -= 1; + + if (n_left_to_next == 0) + { + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + } + pkts_replicated++; + } + + from += 1; + n_left_from -= 1; + + do_trace0: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sr_replicate_trace_t *tr = vlib_add_trace (vm, node, + b0, sizeof (*tr)); + tr->tunnel_index = t0 - sm->tunnels; + tr->length = 0; + if (hdr_ip0) + { + memcpy (tr->src.as_u8, hdr_ip0->src_address.as_u8, + sizeof (tr->src.as_u8)); + memcpy (tr->dst.as_u8, hdr_ip0->dst_address.as_u8, + sizeof (tr->dst.as_u8)); + if (hdr_ip0->payload_length) + tr->length = clib_net_to_host_u16 + (hdr_ip0->payload_length); + } + tr->next_index = next_index; + if (hdr_sr0) + memcpy (tr->sr, hdr_sr0, sizeof (tr->sr)); + } + + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, sr_replicate_node.index, + SR_REPLICATE_ERROR_REPLICATED, + pkts_replicated); + + vlib_node_increment_counter (vm, sr_replicate_node.index, + SR_REPLICATE_ERROR_NO_BUFFER_DROPS, + no_buffer_drops); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sr_replicate_node) = { + .function = sr_replicate_node_fn, + .name = "sr-replicate", + .vector_size = sizeof (u32), + .format_trace = format_sr_replicate_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN(sr_replicate_error_strings), + .error_strings = sr_replicate_error_strings, + + .n_next_nodes = SR_REPLICATE_N_NEXT, + + .next_nodes = { + [SR_REPLICATE_NEXT_IP6_LOOKUP] = "ip6-lookup", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (sr_replicate_node, sr_replicate_node_fn) +/* *INDENT-ON* */ + +clib_error_t * +sr_replicate_init (vlib_main_t * vm) +{ + sr_replicate_main_t *msm = &sr_replicate_main; + + msm->vlib_main = vm; + msm->vnet_main = vnet_get_main (); + + return 0; +} + +VLIB_INIT_FUNCTION (sr_replicate_init); + +#endif /* DPDK */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |