summaryrefslogtreecommitdiffstats
path: root/vnet/vnet/sr
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/sr')
-rw-r--r--vnet/vnet/sr/rfc_draft_05.txt1265
-rw-r--r--vnet/vnet/sr/sr.c2228
-rw-r--r--vnet/vnet/sr/sr.h132
-rw-r--r--vnet/vnet/sr/sr_error.def20
-rw-r--r--vnet/vnet/sr/sr_fix_dst_error.def17
-rw-r--r--vnet/vnet/sr/sr_packet.h227
6 files changed, 3889 insertions, 0 deletions
diff --git a/vnet/vnet/sr/rfc_draft_05.txt b/vnet/vnet/sr/rfc_draft_05.txt
new file mode 100644
index 00000000000..bc41c181ea4
--- /dev/null
+++ b/vnet/vnet/sr/rfc_draft_05.txt
@@ -0,0 +1,1265 @@
+Network Working Group S. Previdi, Ed.
+Internet-Draft C. Filsfils
+Intended status: Standards Track Cisco Systems, Inc.
+Expires: June 12, 2015 B. Field
+ Comcast
+ I. Leung
+ Rogers Communications
+ December 9, 2014
+
+
+ IPv6 Segment Routing Header (SRH)
+ draft-previdi-6man-segment-routing-header-05
+
+Abstract
+
+ Segment Routing (SR) allows a node to steer a packet through a
+ controlled set of instructions, called segments, by prepending a SR
+ header to the packet. A segment can represent any instruction,
+ topological or service-based. SR allows to enforce a flow through
+ any path (topological, or application/service based) while
+ maintaining per-flow state only at the ingress node to the SR domain.
+
+ Segment Routing can be applied to the IPv6 data plane with the
+ addition of a new type of Routing Extension Header. This draft
+ describes the Segment Routing Extension Header Type and how it is
+ used by SR capable nodes.
+
+Requirements Language
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in RFC 2119 [RFC2119].
+
+Status of This Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 1]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ This Internet-Draft will expire on June 12, 2015.
+
+Copyright Notice
+
+ Copyright (c) 2014 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+Table of Contents
+
+ 1. Structure of this document . . . . . . . . . . . . . . . . . 3
+ 2. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3
+ 3. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3
+ 3.1. Data Planes supporting Segment Routing . . . . . . . . . 4
+ 3.2. Illustration . . . . . . . . . . . . . . . . . . . . . . 4
+ 4. Abstract Routing Model . . . . . . . . . . . . . . . . . . . 7
+ 4.1. Segment Routing Global Block (SRGB) . . . . . . . . . . . 8
+ 4.2. Traffic Engineering with SR . . . . . . . . . . . . . . . 9
+ 4.3. Segment Routing Database . . . . . . . . . . . . . . . . 10
+ 5. IPv6 Instantiation of Segment Routing . . . . . . . . . . . . 10
+ 5.1. Segment Identifiers (SIDs) and SRGB . . . . . . . . . . . 10
+ 5.1.1. Node-SID . . . . . . . . . . . . . . . . . . . . . . 11
+ 5.1.2. Adjacency-SID . . . . . . . . . . . . . . . . . . . . 11
+ 5.2. Segment Routing Extension Header (SRH) . . . . . . . . . 11
+ 5.2.1. SRH and RFC2460 behavior . . . . . . . . . . . . . . 15
+ 6. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 15
+ 6.1. Segment Routing Operations . . . . . . . . . . . . . . . 15
+ 6.2. Segment Routing Node Functions . . . . . . . . . . . . . 16
+ 6.2.1. Ingress SR Node . . . . . . . . . . . . . . . . . . . 16
+ 6.2.2. Transit Non-SR Capable Node . . . . . . . . . . . . . 18
+ 6.2.3. SR Intra Segment Transit Node . . . . . . . . . . . . 18
+ 6.2.4. SR Segment Endpoint Node . . . . . . . . . . . . . . 18
+ 6.3. FRR Flag Settings . . . . . . . . . . . . . . . . . . . . 18
+ 7. SR and Tunneling . . . . . . . . . . . . . . . . . . . . . . 18
+ 8. Example Use Case . . . . . . . . . . . . . . . . . . . . . . 19
+ 9. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 21
+ 10. Manageability Considerations . . . . . . . . . . . . . . . . 21
+ 11. Security Considerations . . . . . . . . . . . . . . . . . . . 21
+ 12. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 21
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 2]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ 13. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 21
+ 14. References . . . . . . . . . . . . . . . . . . . . . . . . . 21
+ 14.1. Normative References . . . . . . . . . . . . . . . . . . 21
+ 14.2. Informative References . . . . . . . . . . . . . . . . . 21
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 22
+
+1. Structure of this document
+
+ Section 3 gives an introduction on SR for IPv6 networks.
+
+ Section 4 describes the Segment Routing abstract model.
+
+ Section 5 defines the Segment Routing Header (SRH) allowing
+ instantiation of SR over IPv6 dataplane.
+
+ Section 6 details the procedures of the Segment Routing Header.
+
+2. Segment Routing Documents
+
+ Segment Routing terminology is defined in
+ [I-D.filsfils-spring-segment-routing].
+
+ Segment Routing use cases are described in
+ [I-D.filsfils-spring-segment-routing-use-cases].
+
+ Segment Routing IPv6 use cases are described in
+ [I-D.ietf-spring-ipv6-use-cases].
+
+ Segment Routing protocol extensions are defined in
+ [I-D.ietf-isis-segment-routing-extensions], and
+ [I-D.psenak-ospf-segment-routing-ospfv3-extension].
+
+ The security mechanisms of the Segment Routing Header (SRH) are
+ described in [I-D.vyncke-6man-segment-routing-security].
+
+3. Introduction
+
+ Segment Routing (SR), defined in
+ [I-D.filsfils-spring-segment-routing], allows a node to steer a
+ packet through a controlled set of instructions, called segments, by
+ prepending a SR header to the packet. A segment can represent any
+ instruction, topological or service-based. SR allows to enforce a
+ flow through any path (topological or service/application based)
+ while maintaining per-flow state only at the ingress node to the SR
+ domain. Segments can be derived from different components: IGP, BGP,
+ Services, Contexts, Locators, etc. The list of segment forming the
+ path is called the Segment List and is encoded in the packet header.
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 3]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ SR allows the use of strict and loose source based routing paradigms
+ without requiring any additional signaling protocols in the
+ infrastructure hence delivering an excellent scalability property.
+
+ The source based routing model described in
+ [I-D.filsfils-spring-segment-routing] is inherited from the ones
+ proposed by [RFC1940] and [RFC2460]. The source based routing model
+ offers the support for explicit routing capability.
+
+3.1. Data Planes supporting Segment Routing
+
+ Segment Routing (SR), can be instantiated over MPLS
+ ([I-D.filsfils-spring-segment-routing-mpls]) and IPv6. This document
+ defines its instantiation over the IPv6 data-plane based on the use-
+ cases defined in [I-D.ietf-spring-ipv6-use-cases].
+
+ Segment Routing for IPv6 (SR-IPv6) is required in networks where MPLS
+ data-plane is not used or, when combined with SR-MPLS, in networks
+ where MPLS is used in the core and IPv6 is used at the edge (home
+ networks, datacenters).
+
+ This document defines a new type of Routing Header (originally
+ defined in [RFC2460]) called the Segment Routing Header (SRH) in
+ order to convey the Segment List in the packet header as defined in
+ [I-D.filsfils-spring-segment-routing]. Mechanisms through which
+ segment are known and advertised are outside the scope of this
+ document.
+
+3.2. Illustration
+
+ In the context of Figure 1 where all the links have the same IGP
+ cost, let us assume that a packet P enters the SR domain at an
+ ingress edge router I and that the operator requests the following
+ requirements for packet P:
+
+ The local service S offered by node B must be applied to packet P.
+
+ The links AB and CE cannot be used to transport the packet P.
+
+ Any node N along the journey of the packet should be able to
+ determine where the packet P entered the SR domain and where it
+ will exit. The intermediate node should be able to determine the
+ paths from the ingress edge router to itself, and from itself to
+ the egress edge router.
+
+ Per-flow State for packet P should only be created at the ingress
+ edge router.
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 4]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ The operator can forbid, for security reasons, anyone outside the
+ operator domain to exploit its intra-domain SR capabilities.
+
+ I---A---B---C---E
+ \ | / \ /
+ \ | / F
+ \|/
+ D
+
+ Figure 1: An illustration of SR properties
+
+ All these properties may be realized by instructing the ingress SR
+ edge router I to push the following abstract SR header on the packet
+ P.
+
+ +---------------------------------------------------------------+
+ | | |
+ | Abstract SR Header | |
+ | | |
+ | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported |
+ | ^ | | Packet |
+ | | | | P |
+ | +---------------------+ | |
+ | | |
+ +---------------------------------------------------------------+
+
+ Figure 2: Packet P at node I
+
+ The abstract SR header contains a source route encoded as a list of
+ segments {SD, SB, SS, SF, SE}, a pointer (Ptr) and the identification
+ of the ingress and egress SR edge routers (segments SI and SE).
+
+ A segment identifies a topological instruction or a service
+ instruction. A segment can either be global or local. The
+ instruction associated with a global segment is recognized and
+ executed by any SR-capable node in the domain. The instruction
+ associated with a local segment is only supported by the specific
+ node that originates it.
+
+ Let us assume some IGP (i.e.: ISIS and OSPF) extensions to define a
+ "Node Segment" as a global instruction within the IGP domain to
+ forward a packet along the shortest path to the specified node. Let
+ us further assume that within the SR domain illustrated in Figure 1,
+ segments SI, SD, SB, SE and SF respectively identify IGP node
+ segments to I, D, B, E and F.
+
+ Let us assume that node B identifies its local service S with local
+ segment SS.
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 5]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ With all of this in mind, let us describe the journey of the packet
+ P.
+
+ The packet P reaches the ingress SR edge router. I pushes the SR
+ header illustrated in Figure 2 and sets the pointer to the first
+ segment of the list (SD).
+
+ SD is an instruction recognized by all the nodes in the SR domain
+ which causes the packet to be forwarded along the shortest path to D.
+
+ Once at D, the pointer is incremented and the next segment is
+ executed (SB).
+
+ SB is an instruction recognized by all the nodes in the SR domain
+ which causes the packet to be forwarded along the shortest path to B.
+
+ Once at B, the pointer is incremented and the next segment is
+ executed (SS).
+
+ SS is an instruction only recognized by node B which causes the
+ packet to receive service S.
+
+ Once the service applied, the next segment is executed (SF) which
+ causes the packet to be forwarded along the shortest path to F.
+
+ Once at F, the pointer is incremented and the next segment is
+ executed (SE).
+
+ SE is an instruction recognized by all the nodes in the SR domain
+ which causes the packet to be forwarded along the shortest path to E.
+
+ E then removes the SR header and the packet continues its journey
+ outside the SR domain.
+
+ All of the requirements are met.
+
+ First, the packet P has not used links AB and CE: the shortest-path
+ from I to D is I-A-D, the shortest-path from D to B is D-B, the
+ shortest-path from B to F is B-C-F and the shortest-path from F to E
+ is F-E, hence the packet path through the SR domain is I-A-D-B-C-F-E
+ and the links AB and CE have been avoided.
+
+ Second, the service S supported by B has been applied on packet P.
+
+ Third, any node along the packet path is able to identify the service
+ and topological journey of the packet within the SR domain. For
+ example, node C receives the packet illustrated in Figure 3 and hence
+ is able to infer where the packet entered the SR domain (SI), how it
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 6]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ got up to itself {SD, SB, SS, SE}, where it will exit the SR domain
+ (SE) and how it will do so {SF, SE}.
+
+ +---------------------------------------------------------------+
+ | | |
+ | SR Header | |
+ | | |
+ | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported |
+ | ^ | | Packet |
+ | | | | P |
+ | +--------+ | |
+ | | |
+ +---------------------------------------------------------------+
+
+ Figure 3: Packet P at node C
+
+ Fourth, only node I maintains per-flow state for packet P. The
+ entire program of topological and service instructions to be executed
+ by the SR domain on packet P is encoded by the ingress edge router I
+ in the SR header in the form of a list of segments where each segment
+ identifies a specific instruction. No further per-flow state is
+ required along the packet path. The per-flow state is in the SR
+ header and travels with the packet. Intermediate nodes only hold
+ states related to the IGP global node segments and the local IGP
+ adjacency segments. These segments are not per-flow specific and
+ hence scale very well. Typically, an intermediate node would
+ maintain in the order of 100's to 1000's global node segments and in
+ the order of 10's to 100 of local adjacency segments. Typically the
+ SR IGP forwarding table is expected to be much less than 10000
+ entries.
+
+ Fifth, the SR header is inserted at the entrance to the domain and
+ removed at the exit of the operator domain. For security reasons,
+ the operator can forbid anyone outside its domain to use its intra-
+ domain SR capability.
+
+4. Abstract Routing Model
+
+ At the entrance of the SR domain, the ingress SR edge router pushes
+ the SR header on top of the packet. At the exit of the SR domain,
+ the egress SR edge router removes the SR header.
+
+ The abstract SR header contains an ordered list of segments, a
+ pointer identifying the next segment to process and the
+ identifications of the ingress and egress SR edge routers on the path
+ of this packet. The pointer identifies the segment that MUST be used
+ by the receiving router to process the packet. This segment is
+ called the active segment.
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 7]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ A property of SR is that the entire source route of the packet,
+ including the identity of the ingress and egress edge routers is
+ always available with the packet. This allows for interesting
+ accounting and service applications.
+
+ We define three SR-header operations:
+
+ "PUSH": an SR header is pushed on an IP packet, or additional
+ segments are added at the head of the segment list. The pointer
+ is moved to the first entry of the added segments.
+
+ "NEXT": the active segment is completed, the pointer is moved to
+ the next segment in the list.
+
+ "CONTINUE": the active segment is not completed, the pointer is
+ left unchanged.
+
+ In the future, other SR-header management operations may be defined.
+
+ As the packet travels through the SR domain, the pointer is
+ incremented through the ordered list of segments and the source route
+ encoded by the SR ingress edge node is executed.
+
+ A node processes an incoming packet according to the instruction
+ associated with the active segment.
+
+ Any instruction might be associated with a segment: for example, an
+ intra-domain topological strict or loose forwarding instruction, a
+ service instruction, etc.
+
+ At minimum, a segment instruction must define two elements: the
+ identity of the next-hop to forward the packet to (this could be the
+ same node or a context within the node) and which SR-header
+ management operation to execute.
+
+ Each segment is known in the network through a Segment Identifier
+ (SID). The terms "segment" and "SID" are interchangeable.
+
+4.1. Segment Routing Global Block (SRGB)
+
+ In the SR abstract model, a segment is identified by a Segment
+ Routing Identifier (SID). The SR abstract model doesn't mandate a
+ specific format for the SID (IPv6 address or other formats).
+
+ In Segment Routing IPv6 the SID is an IPv6 address. Therefore, the
+ SRGB is materialized by the global IPv6 address space which
+ represents the set of IPv6 routable addresses in the SR domain. The
+ following rules apply:
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 8]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ o Each node of the SR domain MUST be configured with the Segment
+ Routing Global Block (SRGB).
+
+ o All global segments must be allocated from the SRGB. Any SR
+ capable node MUST be able to process any global segment advertised
+ by any other node within the SR domain.
+
+ o Any segment outside the SRGB has a local significance and is
+ called a "local segment". An SR-capable node MUST be able to
+ process the local segments it originates. An SR-capable node MUST
+ NOT support the instruction associated with a local segment
+ originated by a remote node.
+
+4.2. Traffic Engineering with SR
+
+ An SR Traffic Engineering policy is composed of two elements: a flow
+ classification and a segment-list to prepend on the packets of the
+ flow.
+
+ In SR, this per-flow state only exists at the ingress edge node where
+ the policy is defined and the SR header is pushed.
+
+ It is outside the scope of the document to define the process that
+ leads to the instantiation at a node N of an SR Traffic Engineering
+ policy.
+
+ [I-D.filsfils-spring-segment-routing-use-cases] illustrates various
+ alternatives:
+
+ N is deriving this policy automatically (e.g. FRR).
+
+ N is provisioned explicitly by the operator.
+
+ N is provisioned by a controller or server (e.g.: SDN Controller).
+
+ N is provisioned by the operator with a high-level policy which is
+ mapped into a path thanks to a local CSPF-based computation (e.g.
+ affinity/SRLG exclusion).
+
+ N could also be provisioned by other means.
+
+ [I-D.filsfils-spring-segment-routing-use-cases] explains why the
+ majority of use-cases require very short segment-lists, hence
+ minimizing the performance impact, if any, of inserting and
+ transporting the segment list.
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 9]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ A SDN controller, which desires to instantiate at node N an SR
+ Traffic Engineering policy, collects the SR capability of node N such
+ as to ensure that the policy meets its capability.
+
+4.3. Segment Routing Database
+
+ The Segment routing Database (SRDB) is a set of entries where each
+ entry is identified by a SID. The instruction associated with each
+ entry at least defines the identity of the next-hop to which the
+ packet should be forwarded and what operation should be performed on
+ the SR header (PUSH, CONTINUE, NEXT).
+
+ +---------+-----------+---------------------------------+
+ | Segment | Next-Hop | SR Header operation |
+ +---------+-----------+---------------------------------+
+ | Sk | M | CONTINUE |
+ | Sj | N | NEXT |
+ | Sl | NAT Srvc | NEXT |
+ | Sm | FW srvc | NEXT |
+ | Sn | Q | NEXT |
+ | etc. | etc. | etc. |
+ +---------+-----------+---------------------------------+
+
+ Figure 4: SR Database
+
+ Each SR-capable node maintains its local SRDB. SRDB entries can
+ either derive from local policy or from protocol segment
+ advertisement.
+
+5. IPv6 Instantiation of Segment Routing
+
+5.1. Segment Identifiers (SIDs) and SRGB
+
+ Segment Routing, as described in
+ [I-D.filsfils-spring-segment-routing], defines Node-SID and
+ Adjacency-SID. When SR is used over IPv6 data-plane the following
+ applies.
+
+ The SRGB is the global IPv6 address space which represents the set of
+ IPv6 routable addresses in the SR domain.
+
+ Node SIDs are IPv6 addresses part of the SRGB (i.e.: routable
+ addresses). Adjacency-SIDs are IPv6 addresses which may not be part
+ of the global IPv6 address space.
+
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 10]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+5.1.1. Node-SID
+
+ The Node-SID identifies a node. With SR-IPv6 the Node-SID is an IPv6
+ prefix that the operator configured on the node and that is used as
+ the node identifier. Typically, in case of a router, this is the
+ IPv6 address of the node loopback interface. Therefore, SR-IPv6 does
+ not require any additional SID advertisement for the Node Segment.
+ The Node-SID is in fact the IPv6 address of the node.
+
+5.1.2. Adjacency-SID
+
+ In the SR architecture defined in
+ [I-D.filsfils-spring-segment-routing] the Adjacency-SID (or Adj-SID)
+ identifies a given interface and may be local or global (depending on
+ how it is advertised). A node may advertise one (or more) Adj-SIDs
+ allocated to a given interface so to force the forwarding of the
+ packet (when received with that particular Adj-SID) into the
+ interface regardless the routing entry for the packet destination.
+ The semantic of the Adj-SID is:
+
+ Send out the packet to the interface this prefix is allocated to.
+
+ When SR is applied to IPv6, any SID is in a global IPv6 address and
+ therefore, an Adj-SID has a global significance (i.e.: the IPv6
+ address representing the SID is a global address). In other words, a
+ node that advertises the Adj-SID in the form of a global IPv6 address
+ representing the link/adjacency the packet has to be forwarded to,
+ will apply to the Adj-SID a global significance.
+
+ Advertisement of Adj-SID may be done using multiple mechanisms among
+ which the ones described in ISIS and OSPF protocol extensions:
+ [I-D.ietf-isis-segment-routing-extensions] and
+ [I-D.psenak-ospf-segment-routing-ospfv3-extension]. The distinction
+ between local and global significance of the Adj-SID is given in the
+ encoding of the Adj-SID advertisement.
+
+5.2. Segment Routing Extension Header (SRH)
+
+ A new type of the Routing Header (originally defined in [RFC2460]) is
+ defined: the Segment Routing Header (SRH) which has a new Routing
+ Type, (suggested value 4) to be assigned by IANA.
+
+ As an example, if an explicit path is to be constructed across a core
+ network running ISIS or OSPF, the segment list will contain SIDs
+ representing the nodes across the path (loose or strict) which,
+ usually, are the IPv6 loopback interface address of each node. If
+ the path is across service or application entities, the segment list
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 11]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ contains the IPv6 addresses of these services or application
+ instances.
+
+ The Segment Routing Header (SRH) is defined as follows:
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Next Header | Hdr Ext Len | Routing Type | Segments Left |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | First Segment | Flags | HMAC Key ID |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Segment List[0] (128 bits ipv6 address) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | |
+ ...
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Segment List[n] (128 bits ipv6 address) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Policy List[0] (optional) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Policy List[1] (optional) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Policy List[2] (optional) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | |
+ | |
+ | HMAC (256 bits) |
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 12]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ | (optional) |
+ | |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Next Header: 8-bit selector. Identifies the type of header
+ immediately following the SRH.
+
+ o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ header in 8-octet units, not including the first 8 octets.
+
+ o Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+
+ o Segments Left. Defined in [RFC2460], it contains the index, in
+ the Segment List, of the next segment to inspect. Segments Left
+ is decremented at each segment and it is used as an index in the
+ segment list.
+
+ o First Segment: offset in the SRH, not including the first 8 octets
+ and expressed in 16-octet units, pointing to the last element of
+ the segment list, which is in fact the first segment of the
+ segment routing path.
+
+ o Flags: 16 bits of flags. Following flags are defined:
+
+ 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |C|P|R|R| Policy Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ C-flag: Clean-up flag. Set when the SRH has to be removed from
+ the packet when packet reaches the last segment.
+
+ P-flag: Protected flag. Set when the packet has been rerouted
+ through FRR mechanism by a SR endpoint node. See Section 6.3
+ for more details.
+
+ R-flags. Reserved and for future use.
+
+ Policy Flags. Define the type of the IPv6 addresses encoded
+ into the Policy List (see below). The following have been
+ defined:
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 13]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ Bits 4-6: determine the type of the first element after the
+ segment list.
+
+ Bits 7-9: determine the type of the second element.
+
+ Bits 10-12: determine the type of the third element.
+
+ Bits 13-15: determine the type of the fourth element.
+
+ The following values are used for the type:
+
+ 0x0: Not present. If value is set to 0x0, it means the
+ element represented by these bits is not present.
+
+ 0x1: SR Ingress.
+
+ 0x2: SR Egress.
+
+ 0x3: Original Source Address.
+
+ o HMAC Key ID and HMAC field, and their use are defined in
+ [I-D.vyncke-6man-segment-routing-security].
+
+ o Segment List[n]: 128 bit IPv6 addresses representing the nth
+ segment in the Segment List. The Segment List is encoded starting
+ from the last segment of the path. I.e., the first element of the
+ segment list (Segment List [0]) contains the last segment of the
+ path while the last segment of the Segment List (Segment List[n])
+ contains the first segment of the path. The index contained in
+ "Segments Left" identifies the current active segment.
+
+ o Policy List. Optional addresses representing specific nodes in
+ the SR path such as:
+
+ SR Ingress: a 128 bit generic identifier representing the
+ ingress in the SR domain (i.e.: it needs not to be a valid IPv6
+ address).
+
+ SR Egress: a 128 bit generic identifier representing the egress
+ in the SR domain (i.e.: it needs not to be a valid IPv6
+ address).
+
+ Original Source Address: IPv6 address originally present in the
+ SA field of the packet.
+
+ The segments in the Policy List are encoded after the segment list
+ and they are optional. If none are in the SRH, all bits of the
+ Policy List Flags MUST be set to 0x0.
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 14]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+5.2.1. SRH and RFC2460 behavior
+
+ The SRH being a new type of the Routing Header, it also has the same
+ properties:
+
+ SHOULD only appear once in the packet.
+
+ Only the router whose address is in the DA field of the packet
+ header MUST inspect the SRH.
+
+ Therefore, Segment Routing in IPv6 networks implies that the segment
+ identifier (i.e.: the IPv6 address of the segment) is moved into the
+ DA of the packet.
+
+ The DA of the packet changes at each segment termination/completion
+ and therefore the original DA of the packet MUST be encoded as the
+ last segment of the path.
+
+ As illustrated in Section 3.2, nodes that are within the path of a
+ segment will forward packets based on the DA of the packet without
+ inspecting the SRH. This ensures full interoperability between SR-
+ capable and non-SR-capable nodes.
+
+6. SRH Procedures
+
+ In this section we describe the different procedures on the SRH.
+
+6.1. Segment Routing Operations
+
+ When Segment Routing is instantiated over the IPv6 data plane the
+ following applies:
+
+ o The segment list is encoded in the SRH.
+
+ o The active segment is in the destination address of the packet.
+
+ o The Segment Routing CONTINUE operation (as described in
+ [I-D.filsfils-spring-segment-routing]) is implemented as a
+ regular/plain IPv6 operation consisting of DA based forwarding.
+
+ o The NEXT operation is implemented through the update of the DA
+ with the value represented by the Next Segment field in the SRH.
+
+ o The PUSH operation is implemented through the insertion of the SRH
+ or the insertion of additional segments in the SRH segment list.
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 15]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+6.2. Segment Routing Node Functions
+
+ SR packets are forwarded to segments endpoints (i.e.: nodes whose
+ address is in the DA field of the packet). The segment endpoint,
+ when receiving a SR packet destined to itself, does:
+
+ o Inspect the SRH.
+
+ o Determine the next active segment.
+
+ o Update the Segments Left field (or, if requested, remove the SRH
+ from the packet).
+
+ o Update the DA.
+
+ o Send the packet to the next segment.
+
+ The procedures applied to the SRH are related to the node function.
+ Following nodes functions are defined:
+
+ Ingress SR Node.
+
+ Transit Non-SR Node.
+
+ Transit SR Intra Segment Node.
+
+ SR Endpoint Node.
+
+6.2.1. Ingress SR Node
+
+ Ingress Node can be a router at the edge of the SR domain or a SR-
+ capable host. The ingress SR node may obtain the segment list by
+ either:
+
+ Local path computation.
+
+ Local configuration.
+
+ Interaction with an SDN controller delivering the path as a
+ complete SRH.
+
+ Any other mechanism (mechanisms through which the path is acquired
+ are outside the scope of this document).
+
+ When creating the SRH (either at ingress node or in the SDN
+ controller) the following is done:
+
+ Next Header and Hdr Ext Len fields are set according to [RFC2460].
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 16]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ Routing Type field is set as TBD (SRH).
+
+ The Segment List is built with the FIRST segment of the path
+ encoded in the LAST element of the Segment List. Subsequent
+ segments are encoded on top of the first segment. Finally, the
+ LAST segment of the path is encoded in the FIRST element of the
+ Segment List. In other words, the Segment List is encoded in the
+ reverse order of the path.
+
+ The original DA of the packet is encoded as the last segment of
+ the path (encoded in the first element of the Segment List).
+
+ the DA of the packet is set with the value of the first segment
+ (found in the last element of the segment list).
+
+ the Segments Left field is set to n-1 where n is the number of
+ elements in the Segment List.
+
+ The packet is sent out towards the first segment (i.e.:
+ represented in the packet DA).
+
+6.2.1.1. Security at Ingress
+
+ The procedures related to the Segment Routing security are detailed
+ in [I-D.vyncke-6man-segment-routing-security].
+
+ In the case where the SR domain boundaries are not under control of
+ the network operator (e.g.: when the SR domain edge is in a home
+ network), it is important to authenticate and validate the content of
+ any SRH being received by the network operator. In such case, the
+ security procedure described in
+ [I-D.vyncke-6man-segment-routing-security] is to be used.
+
+ The ingress node (e.g.: the host in the home network) requests the
+ SRH from a control system (e.g.: an SDN controller) which delivers
+ the SRH with its HMAC signature on it.
+
+ Then, the home network host can send out SR packets (with an SRH on
+ it) that will be validated at the ingress of the network operator
+ infrastructure.
+
+ The ingress node of the network operator infrastructure, is
+ configured in order to validate the incoming SRH HMACs in order to
+ allow only packets having correct SRH according to their SA/DA
+ addresses.
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 17]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+6.2.2. Transit Non-SR Capable Node
+
+ SR is interoperable with plain IPv6 forwarding. Any non SR-capable
+ node will forward SR packets solely based on the DA. There's no SRH
+ inspection. This ensures full interoperability between SR and non-SR
+ nodes.
+
+6.2.3. SR Intra Segment Transit Node
+
+ Only the node whose address is in DA inspects and processes the SRH
+ (according to [RFC2460]). An intra segment transit node is not in
+ the DA and its forwarding is based on DA and its SR-IPv6 FIB.
+
+6.2.4. SR Segment Endpoint Node
+
+ The SR segment endpoint node is the node whose address is in the DA.
+ The segment endpoint node inspects the SRH and does:
+
+ 1. IF DA = myself (segment endpoint)
+ 2. IF Segments Left > 0 THEN
+ decrement Segments Left
+ update DA with Segment List[Segments Left]
+ 3. ELSE IF Segments List[Segments Left] <> DA THEN
+ update DA with Segments List[Segments Left]
+ IF Clean-up bit is set THEN remove the SRH
+ 4. ELSE give the packet to next PID (application)
+ End of processing.
+ 5. Forward the packet out
+
+6.3. FRR Flag Settings
+
+ A node supporting SR and doing Fast Reroute (as described in
+ [I-D.filsfils-spring-segment-routing-use-cases], when rerouting
+ packets through FRR mechanisms, SHOULD inspect the rerouted packet
+ header and look for the SRH. If the SRH is present, the rerouting
+ node SHOULD set the Protected bit on all rerouted packets.
+
+7. SR and Tunneling
+
+ Encapsulation can be realized in two different ways with SR-IPv6:
+
+ Outer encapsulation.
+
+ SRH with SA/DA original addresses.
+
+ Outer encapsulation tunneling is the traditional method where an
+ additional IPv6 header is prepended to the packet. The original IPv6
+ header being encapsulated, everything is preserved and the packet is
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 18]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ switched/routed according to the outer header (that could contain a
+ SRH).
+
+ SRH allows encoding both original SA and DA, hence an operator may
+ decide to change the SA/DA at ingress and restore them at egress.
+ This can be achieved without outer encapsulation, by changing SA/DA
+ and encoding the original SA in the Policy List and in the original
+ DA in the Segment List.
+
+8. Example Use Case
+
+ A more detailed description of use cases are available in
+ [I-D.ietf-spring-ipv6-use-cases]. In this section, a simple SR-IPv6
+ example is illustrated.
+
+ In the topology described in Figure 6 it is assumed an end-to-end SR
+ deployment. Therefore SR is supported by all nodes from A to J.
+
+ Home Network | Backbone | Datacenter
+ | |
+ | +---+ +---+ +---+ | +---+ |
+ +---|---| C |---| D |---| E |---|---| I |---|
+ | | +---+ +---+ +---+ | +---+ |
+ | | | | | | | | +---+
+ +---+ +---+ | | | | | | |--| X |
+ | A |---| B | | +---+ +---+ +---+ | +---+ | +---+
+ +---+ +---+ | | F |---| G |---| H |---|---| J |---|
+ | +---+ +---+ +---+ | +---+ |
+ | |
+ | +-----------+
+ | SDN |
+ | Orch/Ctlr |
+ +-----------+
+
+ Figure 6: Sample SR topology
+
+ The following workflow applies to packets sent by host A and destined
+ to server X.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 19]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ . Host A sends a request for a path to server X to the SDN
+ controller or orchestration system.
+
+ . The SDN controller/orchestrator builds a SRH with:
+ . Segment List: C, F, J, X
+ . HMAC
+ that satisfies the requirements expressed in the request
+ by host A and based on policies applicable to host A.
+
+ . Host A receives the SRH and insert it into the packet.
+ The packet has now:
+ . SA: A
+ . DA: C
+ . SRH with
+ . SL: X, J, F, C
+ . Segments Left: 3 (i.e.: Segment List size - 1)
+ . PL: C (ingress), J (egress)
+ Note that X is the last segment and C is the
+ first segment (i.e.: the SL is encoded in the reverse
+ path order).
+ . HMAC
+
+ . When packet arrives in C (first segment), C does:
+ . Validate the HMAC of the SRH.
+ . Decrement Segments Left by one: 2
+ . Update the DA with the next segment found in
+ Segment List[2]. DA is set to F.
+ . Forward the packet to F.
+
+ . When packet arrives in F (second segment), F does:
+ . Decrement Segments Left by one: 1
+ . Update the DA with the next segment found in
+ Segment List[1]. DA is set to J.
+ . Forward the packet to J.
+
+ . Packet travels across G and H nodes which do plain
+ IPv6 forwarding based on DA. No inspection of SRH needs
+ to be done in these nodes. However, any SR capable node
+ is allowed to set the Protected bit in case of FRR
+ protection.
+
+ . When packet arrives in J (third segment), J does:
+ . Decrement Segments Left by one: 0
+ . Update the DA with the next segment found in
+ Segment List[0]. DA is set to X.
+ . If the cleanup bit is set, then node J will strip out
+ the SRH from the packet.
+ . Forward the packet to X.
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 20]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ The packet arrives in the server that may or may not support SR. The
+ return traffic, from server to host, may be sent using the same
+ procedures.
+
+9. IANA Considerations
+
+ TBD
+
+10. Manageability Considerations
+
+ TBD
+
+11. Security Considerations
+
+ Security mechanisms applied to Segment Routing over IPv6 networks are
+ detailed in [I-D.vyncke-6man-segment-routing-security].
+
+12. Contributors
+
+ The authors would like to thank Dave Barach, John Leddy, John
+ Brzozowski, Pierre Francois, Nagendra Kumar, Mark Townsley, Christian
+ Martin, Roberta Maglione, Eric Vyncke, James Connolly, David Lebrun
+ and Fred Baker for their contribution to this document.
+
+13. Acknowledgements
+
+ TBD
+
+14. References
+
+14.1. Normative References
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6
+ (IPv6) Specification", RFC 2460, December 1998.
+
+14.2. Informative References
+
+ [I-D.filsfils-spring-segment-routing]
+ Filsfils, C., Previdi, S., Bashandy, A., Decraene, B.,
+ Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R.,
+ Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe,
+ "Segment Routing Architecture", draft-filsfils-spring-
+ segment-routing-04 (work in progress), July 2014.
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 21]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ [I-D.filsfils-spring-segment-routing-mpls]
+ Filsfils, C., Previdi, S., Bashandy, A., Decraene, B.,
+ Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R.,
+ Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe,
+ "Segment Routing with MPLS data plane", draft-filsfils-
+ spring-segment-routing-mpls-03 (work in progress), August
+ 2014.
+
+ [I-D.filsfils-spring-segment-routing-use-cases]
+ Filsfils, C., Francois, P., Previdi, S., Decraene, B.,
+ Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R.,
+ Ytti, S., Henderickx, W., Tantsura, J., Kini, S., and E.
+ Crabbe, "Segment Routing Use Cases", draft-filsfils-
+ spring-segment-routing-use-cases-01 (work in progress),
+ October 2014.
+
+ [I-D.ietf-isis-segment-routing-extensions]
+ Previdi, S., Filsfils, C., Bashandy, A., Gredler, H.,
+ Litkowski, S., Decraene, B., and J. Tantsura, "IS-IS
+ Extensions for Segment Routing", draft-ietf-isis-segment-
+ routing-extensions-03 (work in progress), October 2014.
+
+ [I-D.ietf-spring-ipv6-use-cases]
+ Brzozowski, J., Leddy, J., Leung, I., Previdi, S.,
+ Townsley, W., Martin, C., Filsfils, C., and R. Maglione,
+ "IPv6 SPRING Use Cases", draft-ietf-spring-ipv6-use-
+ cases-03 (work in progress), November 2014.
+
+ [I-D.psenak-ospf-segment-routing-ospfv3-extension]
+ Psenak, P., Previdi, S., Filsfils, C., Gredler, H.,
+ Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3
+ Extensions for Segment Routing", draft-psenak-ospf-
+ segment-routing-ospfv3-extension-02 (work in progress),
+ July 2014.
+
+ [I-D.vyncke-6man-segment-routing-security]
+ Vyncke, E. and S. Previdi, "IPv6 Segment Routing Header
+ (SRH) Security Considerations", July 2014.
+
+ [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D.
+ Zappala, "Source Demand Routing: Packet Format and
+ Forwarding Specification (Version 1)", RFC 1940, May 1996.
+
+Authors' Addresses
+
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 22]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ Stefano Previdi (editor)
+ Cisco Systems, Inc.
+ Via Del Serafico, 200
+ Rome 00142
+ Italy
+
+ Email: sprevidi@cisco.com
+
+
+ Clarence Filsfils
+ Cisco Systems, Inc.
+ Brussels
+ BE
+
+ Email: cfilsfil@cisco.com
+
+
+ Brian Field
+ Comcast
+ 4100 East Dry Creek Road
+ Centennial, CO 80122
+ US
+
+ Email: Brian_Field@cable.comcast.com
+
+
+ Ida Leung
+ Rogers Communications
+ 8200 Dixie Road
+ Brampton, ON L6T 0C1
+ CA
+
+ Email: Ida.Leung@rci.rogers.com
diff --git a/vnet/vnet/sr/sr.c b/vnet/vnet/sr/sr.c
new file mode 100644
index 00000000000..5cc33dc93e6
--- /dev/null
+++ b/vnet/vnet/sr/sr.c
@@ -0,0 +1,2228 @@
+/*
+ * sr.c: ipv6 segment routing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/sr/sr.h>
+
+#include <openssl/hmac.h>
+
+ip6_sr_main_t sr_main;
+static vlib_node_registration_t sr_local_node;
+
+static void sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip,
+ ip6_sr_header_t * sr)
+{
+ u32 key_index;
+ static u8 * keybuf;
+ u8 * copy_target;
+ int first_segment;
+ ip6_address_t *addrp;
+ int i;
+ ip6_sr_hmac_key_t * hmac_key;
+ u32 sig_len;
+
+ key_index = sr->hmac_key;
+
+ /* No signature? Pass... */
+ if (key_index == 0)
+ return;
+
+ /* We don't know about this key? Fail... */
+ if (key_index >= vec_len (sm->hmac_keys))
+ return;
+
+ hmac_key = sm->hmac_keys + key_index;
+
+ vec_reset_length (keybuf);
+
+ /* pkt ip6 src address */
+ vec_add2 (keybuf, copy_target, sizeof (ip6_address_t));
+ memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t));
+
+ /* first segment */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0] = sr->first_segment;
+
+ /* octet w/ bit 0 = "clean" flag */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0]
+ = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP))
+ ? 0x80 : 0;
+
+ /* hmac key id */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0] = sr->hmac_key;
+
+ first_segment = sr->first_segment;
+
+ addrp = sr->segments;
+
+ /* segments */
+ for (i = 0; i <= first_segment; i++)
+ {
+ vec_add2 (keybuf, copy_target, sizeof (ip6_address_t));
+ memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t));
+ addrp++;
+ }
+
+ addrp++;
+
+ HMAC_CTX_init(sm->hmac_ctx);
+ if (!HMAC_Init(sm->hmac_ctx, hmac_key->shared_secret,
+ vec_len(hmac_key->shared_secret),sm->md))
+ clib_warning ("barf1");
+ if (!HMAC_Update(sm->hmac_ctx,keybuf,vec_len(keybuf)))
+ clib_warning ("barf2");
+ if (!HMAC_Final(sm->hmac_ctx, (unsigned char *) addrp, &sig_len))
+ clib_warning ("barf3");
+ HMAC_CTX_cleanup(sm->hmac_ctx);
+}
+
+u8 * format_ip6_sr_header_flags (u8 * s, va_list * args)
+{
+ u16 flags = (u16) va_arg (*args, int);
+ u8 pl_flag;
+ int bswap_needed = va_arg (*args, int);
+ int i;
+
+ if (bswap_needed)
+ flags = clib_host_to_net_u16 (flags);
+
+ if (flags & IP6_SR_HEADER_FLAG_CLEANUP)
+ s = format (s, "cleanup ");
+
+ if (flags & IP6_SR_HEADER_FLAG_PROTECTED)
+ s = format (s, "reroute ");
+
+ s = format (s, "pl: ");
+ for (i = 1; i <= 4; i++)
+ {
+ pl_flag = ip6_sr_policy_list_flags (flags, i);
+ s = format (s, "[%d] ", i);
+
+ switch (pl_flag)
+ {
+ case IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT:
+ s = format (s, "NotPr ");
+ break;
+ case IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE:
+ s = format (s, "InPE ");
+ break;
+ case IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE:
+ s = format (s, "EgPE ");
+ break;
+
+ case IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR:
+ s = format (s, "OrgSrc ");
+ break;
+ }
+ }
+ return s;
+}
+
+u8 * format_ip6_sr_header (u8 * s, va_list * args)
+{
+ ip6_sr_header_t * h = va_arg (*args, ip6_sr_header_t *);
+ int print_hmac = va_arg (*args, int);
+ int i, pl_index, max_segs;
+ int flags_host_byte_order = clib_net_to_host_u16(h->flags);
+
+ s = format (s, "next proto %d, len %d, type %d",
+ h->protocol, (h->length<<3)+8, h->type);
+ s = format (s, "\n segs left %d, first_segment %d, hmac key %d",
+ h->segments_left, h->first_segment, h->hmac_key);
+ s = format (s, "\n flags %U", format_ip6_sr_header_flags,
+ flags_host_byte_order, 0 /* bswap needed */ );
+
+ /*
+ * Header length is in 8-byte units (minus one), so
+ * divide by 2 to ascertain the number of ip6 addresses in the
+ * segment list
+ */
+ max_segs = (h->length>>1);
+
+ if (!print_hmac && h->hmac_key)
+ max_segs -= 2;
+
+ s = format (s, "\n Segments (in processing order):");
+
+ for (i = h->first_segment; i >= 0; i--)
+ s = format (s, "\n %U", format_ip6_address, h->segments + i);
+
+ s = format (s, "\n Policy List:");
+
+ pl_index = 1; /* to match the RFC text */
+ for (i = (h->first_segment+1); i < max_segs; i++, pl_index++)
+ {
+ char * tag;
+ char * tags[] = {" ", "InPE: ", "EgPE: ", "OrgSrc: "};
+
+ tag = tags[0];
+ if (pl_index >=1 && pl_index <= 4)
+ {
+ int this_pl_flag = ip6_sr_policy_list_flags
+ (flags_host_byte_order, pl_index);
+ tag = tags[this_pl_flag];
+ }
+
+ s = format (s, "\n %s%U", tag, format_ip6_address, h->segments + i);
+ }
+
+ return s;
+}
+
+u8 * format_ip6_sr_header_with_length (u8 * s, va_list * args)
+{
+ ip6_header_t * h = va_arg (*args, ip6_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ uword header_bytes;
+
+ header_bytes = sizeof (h[0]) + sizeof (ip6_sr_header_t);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "ip6_sr header truncated");
+
+ s = format (s, "IP6: %U\n", format_ip6_header, h, max_header_bytes);
+ s = format (s, "SR: %U\n", format_ip6_sr_header, (ip6_sr_header_t *)(h+1),
+ 0 /* print_hmac */, max_header_bytes);
+ return s;
+}
+
+#define foreach_sr_rewrite_next \
+_(ERROR, "error-drop") \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(SR_LOCAL, "sr-local")
+
+typedef enum {
+#define _(s,n) SR_REWRITE_NEXT_##s,
+ foreach_sr_rewrite_next
+#undef _
+ SR_REWRITE_N_NEXT,
+} sr_rewrite_next_t;
+
+typedef struct {
+ ip6_address_t src, dst;
+ u16 length;
+ u32 next_index;
+ u32 tunnel_index;
+ u8 sr[256];
+} sr_rewrite_trace_t;
+
+static char * sr_rewrite_error_strings[] = {
+#define sr_error(n,s) s,
+#include "sr_error.def"
+#undef sr_error
+};
+
+typedef enum {
+#define sr_error(n,s) SR_REWRITE_ERROR_##n,
+#include "sr_error.def"
+#undef sr_error
+ SR_REWRITE_N_ERROR,
+} sr_rewrite_error_t;
+
+
+u8 * format_sr_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_rewrite_trace_t * t = va_arg (*args, sr_rewrite_trace_t *);
+ ip6_main_t * im = &ip6_main;
+ ip6_sr_main_t * sm = &sr_main;
+ ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index);
+ ip6_fib_t * rx_fib, * tx_fib;
+
+ rx_fib = find_ip6_fib_by_table_index_or_id (im, tun->rx_fib_index,
+ IP6_ROUTE_FLAG_FIB_INDEX);
+
+ tx_fib = find_ip6_fib_by_table_index_or_id (im, tun->tx_fib_index,
+ IP6_ROUTE_FLAG_FIB_INDEX);
+
+ s = format
+ (s, "SR-REWRITE: next %s ip6 src %U dst %U len %u\n"
+ " rx-fib-id %d tx-fib-id %d\n%U",
+ (t->next_index == SR_REWRITE_NEXT_SR_LOCAL)
+ ? "sr-local" : "ip6-lookup",
+ format_ip6_address, &t->src,
+ format_ip6_address, &t->dst, t->length,
+ rx_fib->table_id, tx_fib->table_id,
+ format_ip6_sr_header, t->sr, 0 /* print_hmac */);
+ return s;
+}
+
+static uword
+sr_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip6_sr_main_t * sm = &sr_main;
+ u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *,
+ vlib_buffer_t *, ip6_header_t *,
+ ip6_sr_header_t *);
+ sr_local_cb = sm->sr_local_cb;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ ip6_header_t * ip0, * ip1;
+ ip_adjacency_t * adj0, * adj1;
+ ip6_sr_header_t * sr0, * sr1;
+ ip6_sr_tunnel_t * t0, *t1;
+ u64 * copy_src0, * copy_dst0;
+ u64 * copy_src1, * copy_dst1;
+ u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
+ u32 next1 = SR_REWRITE_NEXT_IP6_LOOKUP;
+ u16 new_l0, new_l1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /*
+ * $$$ parse through header(s) to pick the point
+ * where we punch in the SR extention header
+ */
+
+ adj0 = ip_get_adjacency (lm, vnet_buffer(b0)->ip.adj_index[VLIB_TX]);
+ adj1 = ip_get_adjacency (lm, vnet_buffer(b1)->ip.adj_index[VLIB_TX]);
+ t0 = pool_elt_at_index (sm->tunnels,
+ adj0->rewrite_header.sw_if_index);
+ t1 = pool_elt_at_index (sm->tunnels,
+ adj1->rewrite_header.sw_if_index);
+
+ ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
+ >= ((word) vec_len (t0->rewrite)) + b0->current_data);
+ ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
+ >= ((word) vec_len (t1->rewrite)) + b1->current_data);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index;
+ vnet_buffer(b1)->sw_if_index[VLIB_TX] = t1->tx_fib_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /*
+ * SR-unaware service chaining case: pkt coming back from
+ * service has the original dst address, and will already
+ * have an SR header. If so, send it to sr-local
+ */
+ if (PREDICT_FALSE(ip0->protocol == 43))
+ {
+ vlib_buffer_advance (b0, sizeof(ip0));
+ sr0 = (ip6_sr_header_t *) (ip0+1);
+ new_l0 = clib_net_to_host_u16(ip0->payload_length);
+ next0 = SR_REWRITE_NEXT_SR_LOCAL;
+ }
+ else
+ {
+ copy_dst0 = (u64 *)(((u8 *)ip0) - vec_len (t0->rewrite));
+ copy_src0 = (u64 *) ip0;
+
+ /*
+ * Copy data before the punch-in point left by the
+ * required amount. Assume (for the moment) that only
+ * the main packet header needs to be copied.
+ */
+ copy_dst0 [0] = copy_src0 [0];
+ copy_dst0 [1] = copy_src0 [1];
+ copy_dst0 [2] = copy_src0 [2];
+ copy_dst0 [3] = copy_src0 [3];
+ copy_dst0 [4] = copy_src0 [4];
+ vlib_buffer_advance (b0, - (word) vec_len(t0->rewrite));
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *) (ip0+1);
+ /* $$$ tune */
+ memcpy (sr0, t0->rewrite, vec_len (t0->rewrite));
+ /* Fix the next header chain */
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = 43; /* routing extension header */
+ new_l0 = clib_net_to_host_u16(ip0->payload_length) +
+ vec_len (t0->rewrite);
+ ip0->payload_length = clib_host_to_net_u16(new_l0);
+ /* Rewrite the ip6 dst address */
+ ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0];
+ ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1];
+
+ sr_fix_hmac (sm, ip0, sr0);
+
+ next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) :
+ next0;
+
+ /*
+ * Ignore "do not rewrite" shtik in this path
+ */
+ if (PREDICT_FALSE (next0 & 0x80000000))
+ {
+ next0 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE(next0 == SR_REWRITE_NEXT_ERROR))
+ b0->error =
+ node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
+ }
+ }
+
+ if (PREDICT_FALSE(ip1->protocol == 43))
+ {
+ vlib_buffer_advance (b1, sizeof(ip1));
+ sr1 = (ip6_sr_header_t *) (ip1+1);
+ new_l1 = clib_net_to_host_u16(ip1->payload_length);
+ next1 = SR_REWRITE_NEXT_SR_LOCAL;
+ }
+ else
+ {
+ copy_dst1 = (u64 *)(((u8 *)ip1) - vec_len (t1->rewrite));
+ copy_src1 = (u64 *) ip1;
+
+ copy_dst1 [0] = copy_src1 [0];
+ copy_dst1 [1] = copy_src1 [1];
+ copy_dst1 [2] = copy_src1 [2];
+ copy_dst1 [3] = copy_src1 [3];
+ copy_dst1 [4] = copy_src1 [4];
+ vlib_buffer_advance (b1, - (word) vec_len(t1->rewrite));
+ ip1 = vlib_buffer_get_current (b1);
+ sr1 = (ip6_sr_header_t *) (ip1+1);
+ memcpy (sr1, t1->rewrite, vec_len (t1->rewrite));
+ sr1->protocol = ip1->protocol;
+ ip1->protocol = 43;
+ new_l1 = clib_net_to_host_u16(ip1->payload_length) +
+ vec_len (t1->rewrite);
+ ip1->payload_length = clib_host_to_net_u16(new_l1);
+ ip1->dst_address.as_u64[0] = t1->first_hop.as_u64[0];
+ ip1->dst_address.as_u64[1] = t1->first_hop.as_u64[1];
+
+ sr_fix_hmac (sm, ip1, sr1);
+
+ next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) :
+ next1;
+
+ /*
+ * Ignore "do not rewrite" shtik in this path
+ */
+ if (PREDICT_FALSE (next1 & 0x80000000))
+ {
+ next1 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE(next1 == SR_REWRITE_NEXT_ERROR))
+ b1->error =
+ node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
+ }
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_rewrite_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_index = t0 - sm->tunnels;
+ memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = new_l0;
+ tr->next_index = next0;
+ memcpy (tr->sr, sr0, sizeof (tr->sr));
+ }
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_rewrite_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->tunnel_index = t1 - sm->tunnels;
+ memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = new_l1;
+ tr->next_index = next1;
+ memcpy (tr->sr, sr1, sizeof (tr->sr));
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ ip6_header_t * ip0;
+ ip_adjacency_t * adj0;
+ ip6_sr_header_t * sr0;
+ ip6_sr_tunnel_t * t0;
+ u64 * copy_src0, * copy_dst0;
+ u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
+ u16 new_l0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /*
+ * $$$ parse through header(s) to pick the point
+ * where we punch in the SR extention header
+ */
+
+ adj0 = ip_get_adjacency (lm, vnet_buffer(b0)->ip.adj_index[VLIB_TX]);
+ t0 = pool_elt_at_index (sm->tunnels,
+ adj0->rewrite_header.sw_if_index);
+
+ ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
+ >= ((word) vec_len (t0->rewrite)) + b0->current_data);
+
+ vnet_buffer(b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /*
+ * SR-unaware service chaining case: pkt coming back from
+ * service has the original dst address, and will already
+ * have an SR header. If so, send it to sr-local
+ */
+ if (PREDICT_FALSE(ip0->protocol == 43))
+ {
+ vlib_buffer_advance (b0, sizeof(ip0));
+ sr0 = (ip6_sr_header_t *) (ip0+1);
+ new_l0 = clib_net_to_host_u16(ip0->payload_length);
+ next0 = SR_REWRITE_NEXT_SR_LOCAL;
+ }
+ else
+ {
+ copy_dst0 = (u64 *)(((u8 *)ip0) - vec_len (t0->rewrite));
+ copy_src0 = (u64 *) ip0;
+
+ /*
+ * Copy data before the punch-in point left by the
+ * required amount. Assume (for the moment) that only
+ * the main packet header needs to be copied.
+ */
+ copy_dst0 [0] = copy_src0 [0];
+ copy_dst0 [1] = copy_src0 [1];
+ copy_dst0 [2] = copy_src0 [2];
+ copy_dst0 [3] = copy_src0 [3];
+ copy_dst0 [4] = copy_src0 [4];
+ vlib_buffer_advance (b0, - (word) vec_len(t0->rewrite));
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *) (ip0+1);
+ /* $$$ tune */
+ memcpy (sr0, t0->rewrite, vec_len (t0->rewrite));
+ /* Fix the next header chain */
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = 43; /* routing extension header */
+ new_l0 = clib_net_to_host_u16(ip0->payload_length) +
+ vec_len (t0->rewrite);
+ ip0->payload_length = clib_host_to_net_u16(new_l0);
+ /* Rewrite the ip6 dst address */
+ ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0];
+ ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1];
+
+ sr_fix_hmac (sm, ip0, sr0);
+
+ next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) :
+ next0;
+
+ /*
+ * Ignore "do not rewrite" shtik in this path
+ */
+ if (PREDICT_FALSE (next0 & 0x80000000))
+ {
+ next0 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE(next0 == SR_REWRITE_NEXT_ERROR))
+ b0->error =
+ node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
+ }
+ }
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_rewrite_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_index = t0 - sm->tunnels;
+ memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = new_l0;
+ tr->next_index = next0;
+ memcpy (tr->sr, sr0, sizeof (tr->sr));
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (sr_rewrite_node) = {
+ .function = sr_rewrite,
+ .name = "sr-rewrite",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_rewrite_trace,
+ .format_buffer = format_ip6_sr_header_with_length,
+
+ .n_errors = SR_REWRITE_N_ERROR,
+ .error_strings = sr_rewrite_error_strings,
+
+ .runtime_data_bytes = 0,
+
+ .n_next_nodes = SR_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_REWRITE_NEXT_##s] = n,
+ foreach_sr_rewrite_next
+#undef _
+ },
+};
+
+static int ip6_delete_route_no_next_hop (ip6_address_t *dst_address_arg,
+ u32 dst_address_length,
+ u32 rx_table_id)
+{
+ ip6_add_del_route_args_t a;
+ ip6_address_t dst_address;
+ ip6_fib_t * fib;
+ ip6_main_t * im6 = &ip6_main;
+ BVT(clib_bihash_kv) kv, value;
+
+ fib = find_ip6_fib_by_table_index_or_id (im6, rx_table_id,
+ IP6_ROUTE_FLAG_TABLE_ID);
+ memset (&a, 0, sizeof (a));
+ a.flags |= IP4_ROUTE_FLAG_DEL;
+ a.dst_address_length = dst_address_length;
+
+ dst_address = *dst_address_arg;
+
+ ip6_address_mask (&dst_address,
+ &im6->fib_masks[dst_address_length]);
+
+ kv.key[0] = dst_address.as_u64[0];
+ kv.key[1] = dst_address.as_u64[1];
+ kv.key[2] = ((u64)((fib - im6->fibs))<<32) | dst_address_length;
+
+ if (BV(clib_bihash_search)(&im6->ip6_lookup_table, &kv, &value) < 0)
+ {
+ clib_warning ("%U/%d not in FIB",
+ format_ip6_address, &a.dst_address,
+ a.dst_address_length);
+ return -10;
+ }
+
+ a.adj_index = value.value;
+ a.dst_address = dst_address;
+
+ ip6_add_del_route (im6, &a);
+ ip6_maybe_remap_adjacencies (im6, rx_table_id, IP6_ROUTE_FLAG_TABLE_ID);
+ return 0;
+}
+
+static ip6_sr_hmac_key_t *
+find_or_add_shared_secret (ip6_sr_main_t * sm, u8 * secret, u32 * indexp)
+{
+ uword * p;
+ ip6_sr_hmac_key_t * key = 0;
+ int i;
+
+ p = hash_get_mem (sm->hmac_key_by_shared_secret, secret);
+
+ if (p)
+ {
+ key = vec_elt_at_index (sm->hmac_keys, p[0]);
+ if (indexp)
+ *indexp = p[0];
+ return (key);
+ }
+
+ /* Specific key ID? */
+ if (indexp && *indexp)
+ {
+ vec_validate (sm->hmac_keys, *indexp);
+ key = sm->hmac_keys + *indexp;
+ }
+ else
+ {
+ for (i = 0; i < vec_len (sm->hmac_keys); i++)
+ {
+ if (sm->hmac_keys[i].shared_secret == 0)
+ key = sm->hmac_keys + i;
+ goto found;
+ }
+ vec_validate (sm->hmac_keys, i);
+ key = sm->hmac_keys + i;
+ found:
+ ;
+ }
+
+ key->shared_secret = vec_dup (secret);
+
+ hash_set_mem (sm->hmac_key_by_shared_secret, key->shared_secret,
+ key - sm->hmac_keys);
+
+ if (indexp)
+ *indexp = key - sm->hmac_keys;
+ return (key);
+}
+
+int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
+{
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ ip6_sr_tunnel_key_t key;
+ ip6_sr_tunnel_t * t;
+ uword * p;
+ ip6_sr_header_t * h = 0;
+ u32 header_length;
+ ip6_address_t * addrp, *this_address;
+ ip_adjacency_t adj, * ap, * add_adj = 0;
+ u32 adj_index;
+ ip6_sr_main_t * sm = &sr_main;
+ u8 * key_copy;
+ u32 rx_fib_index, tx_fib_index;
+ ip6_add_del_route_args_t aa;
+ u32 hmac_key_index_u32;
+ u8 hmac_key_index = 0;
+
+ /* Make sure that the rx FIB exists */
+ p = hash_get (im->fib_index_by_table_id, a->rx_table_id);
+
+ if (p == 0)
+ return -3;
+
+ /* remember the FIB index */
+ rx_fib_index = p[0];
+
+ /* Make sure that the supplied FIB exists */
+ p = hash_get (im->fib_index_by_table_id, a->tx_table_id);
+
+ if (p == 0)
+ return -4;
+
+ /* remember the FIB index */
+ tx_fib_index = p[0];
+
+ memcpy (key.src.as_u8, a->src_address->as_u8, sizeof (key.src));
+ memcpy (key.dst.as_u8, a->dst_address->as_u8, sizeof (key.dst));
+
+ p = hash_get_mem (sm->tunnel_index_by_key, &key);
+
+ if (p)
+ {
+ if (a->is_del)
+ {
+ hash_pair_t *hp;
+
+ /* Delete existing tunnel */
+ t = pool_elt_at_index (sm->tunnels, p[0]);
+
+ ip6_delete_route_no_next_hop (&t->key.dst, t->dst_mask_width,
+ a->rx_table_id);
+ vec_free (t->rewrite);
+ pool_put (sm->tunnels, t);
+ hp = hash_get_pair (sm->tunnel_index_by_key, &key);
+ key_copy = (void *)(hp->key);
+ hash_unset_mem (sm->tunnel_index_by_key, &key);
+ vec_free (key_copy);
+ return 0;
+ }
+ else /* create; tunnel already exists; complain */
+ return -1;
+ }
+ else
+ {
+ /* delete; tunnel does not exist; complain */
+ if (a->is_del)
+ return -2;
+ }
+
+ /* create a new tunnel */
+ pool_get (sm->tunnels, t);
+ memset (t, 0, sizeof (*t));
+
+ memcpy (&t->key, &key, sizeof (t->key));
+ t->dst_mask_width = a->dst_mask_width;
+ t->rx_fib_index = rx_fib_index;
+ t->tx_fib_index = tx_fib_index;
+
+ /* The first specified hop goes right into the dst address */
+ if (vec_len(a->segments))
+ {
+ t->first_hop = a->segments[0];
+ /* It won't feel nice if we do it twice */
+ vec_delete (a->segments, 1, 0);
+ }
+ else /* there must be at least one segment... */
+ return -4;
+
+ /*
+ * Create the sr header rewrite string
+ * We append the dst address to the set of next hops
+ * so the ultimate recipient can tell where the
+ * packet entered the SR domain
+ */
+ header_length = sizeof (*h) +
+ sizeof (ip6_address_t) * (vec_len (a->segments) + vec_len (a->tags));
+
+ if (a->shared_secret)
+ {
+ /* Allocate a new key slot if we don't find the secret key */
+ hmac_key_index_u32 = 0;
+ (void) find_or_add_shared_secret (sm, a->shared_secret,
+ &hmac_key_index_u32);
+
+ /* Hey Vinz Clortho: Gozzer is pissed.. you're out of keys! */
+ if (hmac_key_index_u32 >= 256)
+ return -5;
+ hmac_key_index = hmac_key_index_u32;
+ header_length += SHA256_DIGEST_LENGTH;
+ }
+
+ vec_validate (t->rewrite, header_length-1);
+
+ h = (ip6_sr_header_t *) t->rewrite;
+
+ h->protocol = 0xFF; /* we don't know yet */
+
+ h->length = (header_length/8) - 1;
+ h->type = ROUTING_HEADER_TYPE_SR;
+ h->segments_left = vec_len (a->segments);
+ h->first_segment = vec_len(a->segments) -1;
+ if (a->shared_secret)
+ h->hmac_key = hmac_key_index & 0xFF;
+
+ h->flags = a->flags_net_byte_order;
+
+ /* Paint on the segment list, in reverse */
+ addrp = h->segments + (vec_len (a->segments) - 1);
+
+ vec_foreach (this_address, a->segments)
+ {
+ memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+
+ /* Paint on the tag list, not reversed */
+ addrp = h->segments + vec_len(a->segments);
+
+ vec_foreach (this_address, a->tags)
+ {
+ memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp++;
+ }
+
+ key_copy = vec_new (ip6_sr_tunnel_key_t, 1);
+ memcpy (key_copy, &key, sizeof (ip6_sr_tunnel_key_t));
+ hash_set_mem (sm->tunnel_index_by_key, key_copy, t - sm->tunnels);
+
+ memset(&adj, 0, sizeof (adj));
+
+ /* Create an adjacency and add to v6 fib */
+ adj.lookup_next_index = IP_LOOKUP_NEXT_REWRITE;
+ adj.lookup_next_index = sm->ip6_lookup_sr_next_index;
+ adj.explicit_fib_index = ~0;
+
+ ap = ip_add_adjacency (lm, &adj, 1 /* one adj */,
+ &adj_index);
+
+ /*
+ * Stick the tunnel index into the rewrite header.
+ *
+ * Unfortunately, inserting an SR header according to the various
+ * RFC's requires parsing through the ip6 header, perhaps consing a
+ * buffer onto the head of the vlib_buffer_t, etc. We don't use the
+ * normal reverse bcopy rewrite code.
+ *
+ * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain
+ * at some point...
+ */
+ ap->rewrite_header.sw_if_index = t - sm->tunnels;
+
+ vec_add1 (add_adj, ap[0]);
+
+ memcpy (aa.dst_address.as_u8, a->dst_address, sizeof (aa.dst_address.as_u8));
+ aa.dst_address_length = a->dst_mask_width;
+
+ aa.flags = (a->is_del ? IP6_ROUTE_FLAG_DEL : IP6_ROUTE_FLAG_ADD);
+ aa.flags |= IP6_ROUTE_FLAG_FIB_INDEX;
+ aa.table_index_or_table_id = rx_fib_index;
+ aa.add_adj = add_adj;
+ aa.adj_index = adj_index;
+ aa.n_add_adj = 1;
+ ip6_add_del_route (im, &aa);
+ vec_free (add_adj);
+
+ return 0;
+}
+
+static clib_error_t *
+sr_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0;
+ ip6_address_t src_address;
+ int src_address_set = 0;
+ ip6_address_t dst_address;
+ u32 dst_mask_width;
+ int dst_address_set = 0;
+ u16 flags = 0;
+ u8 *shared_secret = 0;
+ u32 rx_table_id = 0;
+ u32 tx_table_id = 0;
+ ip6_address_t * segments = 0;
+ ip6_address_t * this_seg;
+ ip6_address_t * tags = 0;
+ ip6_address_t * this_tag;
+ ip6_sr_add_del_tunnel_args_t _a, *a=&_a;
+ ip6_address_t next_address, tag;
+ int pl_index;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "rx-fib-id %d", &rx_table_id))
+ ;
+ else if (unformat (input, "tx-fib-id %d", &tx_table_id))
+ ;
+ else if (unformat (input, "src %U", unformat_ip6_address, &src_address))
+ src_address_set = 1;
+ else if (unformat (input, "dst %U/%d",
+ unformat_ip6_address, &dst_address,
+ &dst_mask_width))
+ dst_address_set = 1;
+ else if (unformat (input, "next %U", unformat_ip6_address,
+ &next_address))
+ {
+ vec_add2 (segments, this_seg, 1);
+ memcpy (this_seg->as_u8, next_address.as_u8, sizeof (*this_seg));
+ }
+ else if (unformat (input, "tag %U", unformat_ip6_address,
+ &tag))
+ {
+ vec_add2 (tags, this_tag, 1);
+ memcpy (this_tag->as_u8, tag.as_u8, sizeof (*this_tag));
+ }
+ else if (unformat (input, "clean"))
+ flags |= IP6_SR_HEADER_FLAG_CLEANUP;
+ else if (unformat (input, "protected"))
+ flags |= IP6_SR_HEADER_FLAG_PROTECTED;
+ else if (unformat (input, "key %s", &shared_secret))
+ /* Do not include the trailing NULL byte. Guaranteed interop issue */
+ _vec_len (shared_secret) -= 1;
+ else if (unformat (input, "InPE %d", &pl_index))
+ {
+ if (pl_index <= 0 || pl_index > 4)
+ {
+ pl_index_range_error:
+ return clib_error_return
+ (0, "Policy List Element Index %d out of range (1-4)", pl_index);
+
+ }
+ flags |= IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE
+ << ip6_sr_policy_list_shift_from_index (pl_index);
+ }
+ else if (unformat (input, "EgPE %d", &pl_index))
+ {
+ if (pl_index <= 0 || pl_index > 4)
+ goto pl_index_range_error;
+ flags |= IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE
+ << ip6_sr_policy_list_shift_from_index (pl_index);
+ }
+ else if (unformat (input, "OrgSrc %d", &pl_index))
+ {
+ if (pl_index <= 0 || pl_index > 4)
+ goto pl_index_range_error;
+ flags |= IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR
+ << ip6_sr_policy_list_shift_from_index (pl_index);
+ }
+ else
+ break;
+ }
+
+ if (!src_address_set)
+ return clib_error_return (0, "src address required");
+
+ if (!dst_address_set)
+ return clib_error_return (0, "dst address required");
+
+ if (!segments)
+ return clib_error_return (0, "at least one sr segment required");
+
+ memset (a, 0, sizeof (*a));
+ a->src_address = &src_address;
+ a->dst_address = &dst_address;
+ a->dst_mask_width = dst_mask_width;
+ a->segments = segments;
+ a->tags = tags;
+ a->flags_net_byte_order = clib_host_to_net_u16(flags);
+ a->is_del = is_del;
+ a->rx_table_id = rx_table_id;
+ a->tx_table_id = tx_table_id;
+ a->shared_secret = shared_secret;
+
+ rv = ip6_sr_add_del_tunnel (a);
+
+ vec_free (segments);
+ vec_free (tags);
+ vec_free (shared_secret);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case -1:
+ return clib_error_return (0, "SR tunnel src %U dst %U already exists",
+ format_ip6_address, &src_address,
+ format_ip6_address, &dst_address);
+
+ case -2:
+ return clib_error_return (0, "SR tunnel src %U dst %U does not exist",
+ format_ip6_address, &src_address,
+ format_ip6_address, &dst_address);
+
+ case -3:
+ return clib_error_return (0, "FIB table %d does not exist", rx_table_id);
+
+ case -4:
+ return clib_error_return (0, "At least one segment is required");
+
+ default:
+ return clib_error_return (0, "BUG: ip6_sr_add_del_tunnel returns %d",
+ rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (sr_tunnel_command, static) = {
+ .path = "sr tunnel",
+ .short_help =
+ "sr tunnel [del] <src> <dst> [next <addr>] [cleanup] [reroute] [key %s]",
+ .function = sr_add_del_tunnel_command_fn,
+};
+
+
+static clib_error_t *
+show_sr_tunnel_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ static ip6_sr_tunnel_t ** tunnels;
+ ip6_sr_tunnel_t * t;
+ ip6_sr_main_t * sm = &sr_main;
+ ip6_main_t * im = &ip6_main;
+ ip6_fib_t * rx_fib, * tx_fib;
+ int i;
+
+ vec_reset_length (tunnels);
+
+ pool_foreach (t, sm->tunnels,
+ ({
+ vec_add1 (tunnels, t);
+ }));
+
+ if (vec_len (tunnels) == 0)
+ vlib_cli_output (vm, "No SR tunnels configured");
+
+ for (i = 0; i < vec_len (tunnels); i++)
+ {
+ t = tunnels [i];
+
+ rx_fib = find_ip6_fib_by_table_index_or_id (im, t->rx_fib_index,
+ IP6_ROUTE_FLAG_FIB_INDEX);
+
+ tx_fib = find_ip6_fib_by_table_index_or_id (im, t->tx_fib_index,
+ IP6_ROUTE_FLAG_FIB_INDEX);
+
+ vlib_cli_output (vm, "src %U dst %U first hop %U",
+ format_ip6_address, &t->key.src,
+ format_ip6_address, &t->key.dst,
+ format_ip6_address, &t->first_hop);
+ vlib_cli_output (vm, " rx-fib-id %d tx-fib-id %d",
+ rx_fib->table_id, tx_fib->table_id);
+ vlib_cli_output (vm, " sr: %U", format_ip6_sr_header, t->rewrite,
+ 0 /* print_hmac */);
+ vlib_cli_output (vm, "-------");
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_sr_tunnel_command, static) = {
+ .path = "show sr tunnel",
+ .short_help = "show sr tunnel",
+ .function = show_sr_tunnel_fn,
+};
+
+#define foreach_sr_fix_dst_addr_next \
+_(DROP, "error-drop")
+
+typedef enum {
+#define _(s,n) SR_FIX_DST_ADDR_NEXT_##s,
+foreach_sr_fix_dst_addr_next
+#undef _
+ SR_FIX_DST_ADDR_N_NEXT,
+} sr_fix_dst_addr_next_t;
+
+static char * sr_fix_dst_error_strings[] = {
+#define sr_fix_dst_error(n,s) s,
+#include "sr_fix_dst_error.def"
+#undef sr_fix_dst_error
+};
+
+typedef enum {
+#define sr_fix_dst_error(n,s) SR_FIX_DST_ERROR_##n,
+#include "sr_fix_dst_error.def"
+#undef sr_fix_dst_error
+ SR_FIX_DST_N_ERROR,
+} sr_fix_dst_error_t;
+
+typedef struct {
+ ip6_address_t src, dst;
+ u32 next_index;
+ u32 adj_index;
+ u8 sr[256];
+} sr_fix_addr_trace_t;
+
+u8 * format_sr_fix_addr_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_fix_addr_trace_t * t = va_arg (*args, sr_fix_addr_trace_t *);
+ vnet_hw_interface_t * hi = 0;
+ ip_adjacency_t * adj;
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ vnet_main_t * vnm = vnet_get_main();
+
+ if (t->adj_index != ~0)
+ {
+ adj = ip_get_adjacency (lm, t->adj_index);
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+ }
+
+ s = format (s, "SR-FIX_ADDR: next %s ip6 src %U dst %U\n",
+ (t->next_index == SR_FIX_DST_ADDR_NEXT_DROP)
+ ? "drop" : "output",
+ format_ip6_address, &t->src,
+ format_ip6_address, &t->dst);
+ if (t->next_index != SR_FIX_DST_ADDR_NEXT_DROP)
+ {
+ s = format (s, "%U\n", format_ip6_sr_header, t->sr, 1 /* print_hmac */);
+ s = format (s, " output via %s", hi ? (char *)(hi->name)
+ : "Invalid adj");
+ }
+ return s;
+}
+
+static uword
+sr_fix_dst_addr (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+#if 0
+ while (0 && n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ __attribute__((unused)) vlib_buffer_t * b0, * b1;
+ u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP;
+ u32 next1 = SR_FIX_DST_ADDR_NEXT_DROP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ ip6_header_t * ip0;
+ ip_adjacency_t * adj0;
+ ip6_sr_header_t * sr0;
+ u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP;
+ ip6_address_t *new_dst0;
+ ethernet_header_t * eh0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ adj0 = ip_get_adjacency (lm, vnet_buffer(b0)->ip.adj_index[VLIB_TX]);
+ next0 = adj0->mcast_group_index;
+
+ /* We should be pointing at an Ethernet header... */
+ eh0 = vlib_buffer_get_current (b0);
+ ip0 = (ip6_header_t *)(eh0+1);
+ sr0 = (ip6_sr_header_t *) (ip0+1);
+
+ /* We'd better find an SR header... */
+ if (PREDICT_FALSE(ip0->protocol != 43))
+ {
+ b0->error = node->errors[SR_FIX_DST_ERROR_NO_SR_HEADER];
+ goto do_trace0;
+ }
+ else
+ {
+ /*
+ * We get here from sr_rewrite or sr_local, with
+ * sr->segments_left pointing at the (copy of the original) dst
+ * address. Use it, then increment sr0->segments_left.
+ */
+
+ /* Out of segments? Turf the packet */
+ if (PREDICT_FALSE (sr0->segments_left == 0))
+ {
+ b0->error = node->errors[SR_FIX_DST_ERROR_NO_MORE_SEGMENTS];
+ goto do_trace0;
+ }
+
+ /*
+ * Rewrite the packet with the original dst address
+ * We assume that the last segment (in processing order) contains
+ * the original dst address. The list is reversed, so sr0->segments
+ * contains the original dst address.
+ */
+ new_dst0 = sr0->segments;
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+ }
+
+ do_trace0:
+
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_fix_addr_trace_t *t = vlib_add_trace (vm, node,
+ b0, sizeof (*t));
+ t->next_index = next0;
+ t->adj_index = ~0;
+
+ if (next0 != SR_FIX_DST_ADDR_NEXT_DROP)
+ {
+ t->adj_index = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+ memcpy (t->src.as_u8, ip0->src_address.as_u8,
+ sizeof (t->src.as_u8));
+ memcpy (t->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (t->dst.as_u8));
+ memcpy (t->sr, sr0, sizeof (t->sr));
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+
+VLIB_REGISTER_NODE (sr_fix_dst_addr_node) = {
+ .function = sr_fix_dst_addr,
+ .name = "sr-fix-dst-addr",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_fix_addr_trace,
+ .format_buffer = format_ip6_sr_header_with_length,
+
+ .runtime_data_bytes = 0,
+
+ .n_errors = SR_FIX_DST_N_ERROR,
+ .error_strings = sr_fix_dst_error_strings,
+
+ .n_next_nodes = SR_FIX_DST_ADDR_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_FIX_DST_ADDR_NEXT_##s] = n,
+ foreach_sr_fix_dst_addr_next
+#undef _
+ },
+};
+
+static clib_error_t * sr_init (vlib_main_t * vm)
+{
+ ip6_sr_main_t * sm = &sr_main;
+ clib_error_t * error = 0;
+ vlib_node_t * ip6_lookup_node, * ip6_rewrite_node;
+ vlib_node_t * ip6_rewrite_local_node;
+ u32 verify_next_index;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main();
+
+ vec_validate (sm->hmac_keys, 0);
+ sm->hmac_keys[0].shared_secret = (u8 *) 0xdeadbeef;
+
+ sm->tunnel_index_by_key =
+ hash_create_mem (0, sizeof (ip6_sr_tunnel_key_t), sizeof (uword));
+
+ sm->hmac_key_by_shared_secret = hash_create_string (0, sizeof(uword));
+
+ ip6_register_protocol (43, sr_local_node.index);
+
+ ip6_lookup_node = vlib_get_node_by_name (vm, (u8 *)"ip6-lookup");
+ ASSERT(ip6_lookup_node);
+
+ ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *)"ip6-rewrite");
+ ASSERT(ip6_rewrite_node);
+
+ ip6_rewrite_local_node = vlib_get_node_by_name (vm,
+ (u8 *)"ip6-rewrite-local");
+ ASSERT(ip6_rewrite_local_node);
+
+ /* Add a disposition to ip6_lookup for the sr rewrite node */
+ sm->ip6_lookup_sr_next_index =
+ vlib_node_add_next (vm, ip6_lookup_node->index, sr_rewrite_node.index);
+
+ /* Add a disposition to ip6_rewrite for the sr dst address hack node */
+ sm->ip6_rewrite_sr_next_index =
+ vlib_node_add_next (vm, ip6_rewrite_node->index,
+ sr_fix_dst_addr_node.index);
+ /*
+ * Fix ip6-rewrite-local, sibling of the above. The sibling bitmap
+ * isn't set up at this point, so we have to do it manually
+ */
+ verify_next_index = vlib_node_add_next
+ (vm, ip6_rewrite_local_node->index,
+ sr_fix_dst_addr_node.index);
+
+ ASSERT(sm->ip6_rewrite_sr_next_index == verify_next_index);
+
+ OpenSSL_add_all_digests();
+
+ sm->md = (void *) EVP_get_digestbyname ("sha1");
+ sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX));
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (sr_init);
+
+#define foreach_sr_local_next \
+ _ (ERROR, "error-drop") \
+ _ (IP6_LOOKUP, "ip6-lookup")
+
+typedef enum {
+#define _(s,n) SR_LOCAL_NEXT_##s,
+ foreach_sr_local_next
+#undef _
+ SR_LOCAL_N_NEXT,
+} sr_local_next_t;
+
+typedef struct {
+ u8 next_index;
+ u8 sr_valid;
+ ip6_address_t src, dst;
+ u16 length;
+ u8 sr[256];
+} sr_local_trace_t;
+
+static char * sr_local_error_strings[] = {
+#define sr_error(n,s) s,
+#include "sr_error.def"
+#undef sr_error
+};
+
+typedef enum {
+#define sr_error(n,s) SR_LOCAL_ERROR_##n,
+#include "sr_error.def"
+#undef sr_error
+ SR_LOCAL_N_ERROR,
+} sr_local_error_t;
+
+u8 * format_sr_local_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_local_trace_t * t = va_arg (*args, sr_local_trace_t *);
+
+ s = format (s, "SR-LOCAL: src %U dst %U len %u next_index %d",
+ format_ip6_address, &t->src,
+ format_ip6_address, &t->dst, t->length, t->next_index);
+ if (t->sr_valid)
+ s = format (s, "\n %U", format_ip6_sr_header, t->sr, 1 /* print_hmac */);
+ else
+ s = format (s, "\n popped SR header");
+
+ return s;
+}
+
+
+/* $$$$ fixme: smp, don't copy data, cache input, output (maybe) */
+
+static int sr_validate_hmac (ip6_sr_main_t * sm, ip6_header_t * ip,
+ ip6_sr_header_t * sr)
+{
+ u32 key_index;
+ static u8 * keybuf;
+ u8 * copy_target;
+ int first_segment;
+ ip6_address_t *addrp;
+ int i;
+ ip6_sr_hmac_key_t * hmac_key;
+ static u8 * signature;
+ u32 sig_len;
+
+ key_index = sr->hmac_key;
+
+ /* No signature? Pass... */
+ if (key_index == 0)
+ return 0;
+
+ /* We don't know about this key? Fail... */
+ if (key_index >= vec_len (sm->hmac_keys))
+ return 1;
+
+ vec_validate (signature, SHA256_DIGEST_LENGTH-1);
+
+ hmac_key = sm->hmac_keys + key_index;
+
+ vec_reset_length (keybuf);
+
+ /* pkt ip6 src address */
+ vec_add2 (keybuf, copy_target, sizeof (ip6_address_t));
+ memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t));
+
+ /* last segment */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0] = sr->first_segment;
+
+ /* octet w/ bit 0 = "clean" flag */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0]
+ = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP))
+ ? 0x80 : 0;
+
+ /* hmac key id */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0] = sr->hmac_key;
+
+ first_segment = sr->first_segment;
+
+ addrp = sr->segments;
+
+ /* segments */
+ for (i = 0; i <= first_segment; i++)
+ {
+ vec_add2 (keybuf, copy_target, sizeof (ip6_address_t));
+ memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t));
+ addrp++;
+ }
+
+ if (sm->is_debug)
+ clib_warning ("verify key index %d keybuf: %U", key_index,
+ format_hex_bytes, keybuf, vec_len(keybuf));
+
+ /* shared secret */
+
+ /* SHA1 is shorter than SHA-256 */
+ memset (signature, 0, vec_len(signature));
+
+ HMAC_CTX_init(sm->hmac_ctx);
+ if (!HMAC_Init(sm->hmac_ctx, hmac_key->shared_secret,
+ vec_len(hmac_key->shared_secret),sm->md))
+ clib_warning ("barf1");
+ if (!HMAC_Update(sm->hmac_ctx,keybuf,vec_len(keybuf)))
+ clib_warning ("barf2");
+ if (!HMAC_Final(sm->hmac_ctx,signature,&sig_len))
+ clib_warning ("barf3");
+ HMAC_CTX_cleanup(sm->hmac_ctx);
+
+ if (sm->is_debug)
+ clib_warning ("computed signature len %d, value %U", sig_len,
+ format_hex_bytes, signature, vec_len(signature));
+
+ /* Point at the SHA signature in the packet */
+ addrp++;
+ if (sm->is_debug)
+ clib_warning ("read signature %U", format_hex_bytes, addrp,
+ SHA256_DIGEST_LENGTH);
+
+ return memcmp (signature, addrp, SHA256_DIGEST_LENGTH);
+}
+
+static uword
+sr_local (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, * from, * to_next;
+ ip6_sr_main_t * sm = &sr_main;
+ u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *,
+ vlib_buffer_t *, ip6_header_t *,
+ ip6_sr_header_t *);
+ sr_local_cb = sm->sr_local_cb;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ ip6_header_t * ip0, *ip1;
+ ip6_sr_header_t * sr0, *sr1;
+ ip6_address_t * new_dst0, * new_dst1;
+ u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP;
+ u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP;
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *)(ip0+1);
+
+ if (PREDICT_FALSE(sr0->type != ROUTING_HEADER_TYPE_SR))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE];
+ goto do_trace0;
+ }
+
+ /* Out of segments? Turf the packet */
+ if (PREDICT_FALSE (sr0->segments_left == 0))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS];
+ goto do_trace0;
+ }
+
+ if (PREDICT_FALSE(sm->validate_hmac))
+ {
+ if (sr_validate_hmac (sm, ip0, sr0))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID];
+ goto do_trace0;
+ }
+ }
+
+ next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) :
+ next0;
+
+ /*
+ * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx
+ */
+ if (PREDICT_FALSE (next0 & 0x80000000))
+ {
+ next0 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE(next0 == SR_LOCAL_NEXT_ERROR))
+ b0->error =
+ node->errors[SR_LOCAL_ERROR_APP_CALLBACK];
+ }
+ else
+ {
+ u32 segment_index0;
+
+ segment_index0 = sr0->segments_left - 1;
+
+ /* Rewrite the packet */
+ new_dst0 = (ip6_address_t *)(sr0->segments + segment_index0);
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+
+ if (PREDICT_TRUE (sr0->segments_left > 0))
+ {
+ sr0->segments_left -= 1;
+ goto do_trace0;
+ }
+ }
+
+ /* End of the path. Clean up the SR header, or not */
+ if (sr0->flags & clib_host_to_net_u16(IP6_SR_HEADER_FLAG_CLEANUP))
+ {
+ u64 *copy_dst0, *copy_src0;
+ u16 new_l0;
+ /*
+ * Copy the ip6 header right by the (real) length of the
+ * sr header. Here's another place which assumes that
+ * the sr header is the only extention header.
+ */
+
+ ip0->protocol = sr0->protocol;
+ vlib_buffer_advance (b0, (sr0->length+1)*8);
+
+ new_l0 = clib_net_to_host_u16(ip0->payload_length) -
+ (sr0->length+1)*8;
+ ip0->payload_length = clib_host_to_net_u16(new_l0);
+
+ copy_src0 = (u64 *)ip0;
+ copy_dst0 = copy_src0 + (sr0->length + 1);
+
+ copy_dst0 [4] = copy_src0[4];
+ copy_dst0 [3] = copy_src0[3];
+ copy_dst0 [2] = copy_src0[2];
+ copy_dst0 [1] = copy_src0[1];
+ copy_dst0 [0] = copy_src0[0];
+
+ sr0 = 0;
+ }
+
+ do_trace0:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_local_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = vlib_buffer_length_in_chain (vm, b0);
+ tr->next_index = next0;
+ tr->sr_valid = sr0 != 0;
+ if (tr->sr_valid)
+ memcpy (tr->sr, sr0, sizeof (tr->sr));
+ }
+
+ b1 = vlib_get_buffer (vm, bi1);
+ ip1 = vlib_buffer_get_current (b1);
+ sr1 = (ip6_sr_header_t *)(ip1+1);
+
+ if (PREDICT_FALSE(sr1->type != ROUTING_HEADER_TYPE_SR))
+ {
+ next1 = SR_LOCAL_NEXT_ERROR;
+ b1->error = node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE];
+ goto do_trace1;
+ }
+
+ /* Out of segments? Turf the packet */
+ if (PREDICT_FALSE (sr1->segments_left == 0))
+ {
+ next1 = SR_LOCAL_NEXT_ERROR;
+ b1->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS];
+ goto do_trace1;
+ }
+
+ if (PREDICT_FALSE(sm->validate_hmac))
+ {
+ if (sr_validate_hmac (sm, ip1, sr1))
+ {
+ next1 = SR_LOCAL_NEXT_ERROR;
+ b1->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID];
+ goto do_trace1;
+ }
+ }
+
+ next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) :
+ next1;
+
+ /*
+ * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx
+ */
+ if (PREDICT_FALSE (next1 & 0x80000000))
+ {
+ next1 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE(next1 == SR_LOCAL_NEXT_ERROR))
+ b1->error =
+ node->errors[SR_LOCAL_ERROR_APP_CALLBACK];
+ }
+ else
+ {
+ u32 segment_index1;
+
+ segment_index1 = sr1->segments_left - 1;
+
+ /* Rewrite the packet */
+ new_dst1 = (ip6_address_t *)(sr1->segments + segment_index1);
+ ip1->dst_address.as_u64[0] = new_dst1->as_u64[0];
+ ip1->dst_address.as_u64[1] = new_dst1->as_u64[1];
+
+ if (PREDICT_TRUE (sr1->segments_left > 0))
+ {
+ sr1->segments_left -= 1;
+ goto do_trace1;
+ }
+ }
+
+ /* End of the path. Clean up the SR header, or not */
+ if (sr1->flags & clib_host_to_net_u16(IP6_SR_HEADER_FLAG_CLEANUP))
+ {
+ u64 *copy_dst1, *copy_src1;
+ u16 new_l1;
+ /*
+ * Copy the ip6 header right by the (real) length of the
+ * sr header. Here's another place which assumes that
+ * the sr header is the only extention header.
+ */
+
+ ip1->protocol = sr1->protocol;
+ vlib_buffer_advance (b1, (sr1->length+1)*8);
+
+ new_l1 = clib_net_to_host_u16(ip1->payload_length) -
+ (sr1->length+1)*8;
+ ip1->payload_length = clib_host_to_net_u16(new_l1);
+
+ copy_src1 = (u64 *)ip1;
+ copy_dst1 = copy_src1 + (sr1->length + 1);
+
+ copy_dst1 [4] = copy_src1[4];
+ copy_dst1 [3] = copy_src1[3];
+ copy_dst1 [2] = copy_src1[2];
+ copy_dst1 [1] = copy_src1[1];
+ copy_dst1 [0] = copy_src1[0];
+
+ sr1 = 0;
+ }
+
+ do_trace1:
+ if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_local_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = vlib_buffer_length_in_chain (vm, b1);
+ tr->next_index = next1;
+ tr->sr_valid = sr1 != 0;
+ if (tr->sr_valid)
+ memcpy (tr->sr, sr1, sizeof (tr->sr));
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ ip6_header_t * ip0;
+ ip6_sr_header_t * sr0;
+ ip6_address_t * new_dst0;
+ u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *)(ip0+1);
+
+ if (PREDICT_FALSE(sr0->type != ROUTING_HEADER_TYPE_SR))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE];
+ goto do_trace;
+ }
+
+ /* Out of segments? Turf the packet */
+ if (PREDICT_FALSE (sr0->segments_left == 0))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS];
+ goto do_trace;
+ }
+
+ if (PREDICT_FALSE(sm->validate_hmac))
+ {
+ if (sr_validate_hmac (sm, ip0, sr0))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID];
+ goto do_trace;
+ }
+ }
+
+ next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) :
+ next0;
+
+ /*
+ * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx
+ */
+ if (PREDICT_FALSE (next0 & 0x80000000))
+ {
+ next0 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE(next0 == SR_LOCAL_NEXT_ERROR))
+ b0->error =
+ node->errors[SR_LOCAL_ERROR_APP_CALLBACK];
+ }
+ else
+ {
+ u32 segment_index0;
+
+ segment_index0 = sr0->segments_left - 1;
+
+ /* Rewrite the packet */
+ new_dst0 = (ip6_address_t *)(sr0->segments + segment_index0);
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+
+ if (PREDICT_TRUE (sr0->segments_left > 0))
+ {
+ sr0->segments_left -= 1;
+ goto do_trace;
+ }
+ }
+
+ /* End of the path. Clean up the SR header, or not */
+ if (sr0->flags & clib_host_to_net_u16(IP6_SR_HEADER_FLAG_CLEANUP))
+ {
+ u64 *copy_dst0, *copy_src0;
+ u16 new_l0;
+ /*
+ * Copy the ip6 header right by the (real) length of the
+ * sr header. Here's another place which assumes that
+ * the sr header is the only extention header.
+ */
+
+ ip0->protocol = sr0->protocol;
+ vlib_buffer_advance (b0, (sr0->length+1)*8);
+
+ new_l0 = clib_net_to_host_u16(ip0->payload_length) -
+ (sr0->length+1)*8;
+ ip0->payload_length = clib_host_to_net_u16(new_l0);
+
+ copy_src0 = (u64 *)ip0;
+ copy_dst0 = copy_src0 + (sr0->length + 1);
+
+ copy_dst0 [4] = copy_src0[4];
+ copy_dst0 [3] = copy_src0[3];
+ copy_dst0 [2] = copy_src0[2];
+ copy_dst0 [1] = copy_src0[1];
+ copy_dst0 [0] = copy_src0[0];
+
+ sr0 = 0;
+ }
+
+ do_trace:
+ if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_local_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = vlib_buffer_length_in_chain (vm, b0);
+ tr->next_index = next0;
+ tr->sr_valid = sr0 != 0;
+ if (tr->sr_valid)
+ memcpy (tr->sr, sr0, sizeof (tr->sr));
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, sr_local_node.index,
+ SR_LOCAL_ERROR_PKTS_PROCESSED,
+ from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (sr_local_node, static) = {
+ .function = sr_local,
+ .name = "sr-local",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_local_trace,
+
+ .runtime_data_bytes = 0,
+
+ .n_errors = SR_LOCAL_N_ERROR,
+ .error_strings = sr_local_error_strings,
+
+ .n_next_nodes = SR_LOCAL_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_LOCAL_NEXT_##s] = n,
+ foreach_sr_local_next
+#undef _
+ },
+};
+
+ip6_sr_main_t * sr_get_main (vlib_main_t * vm)
+{
+ vlib_call_init_function (vm, sr_init);
+ ASSERT(sr_local_node.index);
+ return &sr_main;
+}
+
+
+static clib_error_t *
+set_ip6_sr_rewrite_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_address_t a;
+ ip6_main_t * im = &ip6_main;
+ ip_lookup_main_t * lm = &im->lookup_main;
+ u32 fib_index = 0;
+ u32 fib_id = 0;
+ u32 adj_index;
+ uword * p;
+ ip_adjacency_t * adj;
+ vnet_hw_interface_t * hi;
+ u32 sw_if_index;
+ ip6_sr_main_t * sm = &sr_main;
+ vnet_main_t * vnm = vnet_get_main();
+
+ if (!unformat (input, "%U", unformat_ip6_address, &a))
+ return clib_error_return (0, "ip6 address missing in '%U'",
+ format_unformat_error, input);
+
+ if (unformat (input, "rx-table-id %d", &fib_id))
+ {
+ p = hash_get (im->fib_index_by_table_id, fib_id);
+ if (p == 0)
+ return clib_error_return (0, "fib-id %d not found");
+ fib_index = p[0];
+ }
+
+ adj_index = ip6_fib_lookup_with_table (im, fib_index, &a);
+
+ if (adj_index == lm->miss_adj_index)
+ return clib_error_return (0, "no match for %U",
+ format_ip6_address, &a);
+
+ adj = ip_get_adjacency (lm, adj_index);
+
+ if (adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE)
+ return clib_error_return (0, "%U unresolved (not a rewrite adj)",
+ format_ip6_address, &a);
+
+ adj->rewrite_header.next_index = sm->ip6_rewrite_sr_next_index;
+
+ sw_if_index = adj->rewrite_header.sw_if_index;
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ adj->rewrite_header.node_index = sr_fix_dst_addr_node.index;
+
+ /* $$$$$ hack... steal the mcast group index */
+ adj->mcast_group_index =
+ vlib_node_add_next (vm, sr_fix_dst_addr_node.index, hi->output_node_index);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_ip6_sr_rewrite, static) = {
+ .path = "set ip6 sr rewrite",
+ .short_help = "set ip6 sr rewrite <ip6-address> [fib-id <id>]",
+ .function = set_ip6_sr_rewrite_fn,
+};
+
+void vnet_register_sr_app_callback (void *cb)
+{
+ ip6_sr_main_t * sm = &sr_main;
+
+ sm->sr_local_cb = cb;
+}
+
+static clib_error_t *
+test_sr_hmac_validate_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t * sm = &sr_main;
+
+ if (unformat (input, "validate on"))
+ sm->validate_hmac = 1;
+ else if (unformat (input, "chunk-offset off"))
+ sm->validate_hmac = 0;
+ else
+ return clib_error_return (0, "expected validate on|off in '%U'",
+ format_unformat_error, input);
+
+ vlib_cli_output (vm, "hmac signature validation %s",
+ sm->validate_hmac ?
+ "on" : "off");
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_sr_hmac_validate, static) = {
+ .path = "test sr hmac",
+ .short_help = "test sr hmac validate [on|off]",
+ .function = test_sr_hmac_validate_fn,
+};
+
+i32 sr_hmac_add_del_key (ip6_sr_main_t * sm, u32 key_id, u8 * shared_secret,
+ u8 is_del)
+{
+ u32 index;
+ ip6_sr_hmac_key_t * key;
+
+ if (is_del == 0)
+ {
+ /* Specific key in use? Fail. */
+ if (key_id && vec_len (sm->hmac_keys) > key_id
+ && sm->hmac_keys[key_id].shared_secret)
+ return -2;
+
+ index = key_id;
+ key = find_or_add_shared_secret (sm, shared_secret, &index);
+ ASSERT(index == key_id);
+ return 0;
+ }
+
+ /* delete */
+
+ if (key_id) /* delete by key ID */
+ {
+ if (vec_len (sm->hmac_keys) <= key_id)
+ return -3;
+
+ key = sm->hmac_keys + key_id;
+
+ hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret);
+ vec_free (key->shared_secret);
+ return 0;
+ }
+
+ index = 0;
+ key = find_or_add_shared_secret (sm, shared_secret, &index);
+ hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret);
+ vec_free (key->shared_secret);
+ return 0;
+}
+
+
+static clib_error_t *
+sr_hmac_add_del_key_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t * sm = &sr_main;
+ u8 is_del = 0;
+ u32 key_id = 0;
+ u8 key_id_set = 0;
+ u8 * shared_secret = 0;
+ i32 rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "id %d", &key_id))
+ key_id_set = 1;
+ else if (unformat (input, "key %s", &shared_secret))
+ {
+ /* Do not include the trailing NULL byte. Guaranteed interop issue */
+ _vec_len (shared_secret) -= 1;
+ }
+ else
+ break;
+ }
+
+ if (is_del == 0 && shared_secret == 0)
+ return clib_error_return (0, "shared secret must be set to add a key");
+
+ if (shared_secret == 0 && key_id_set == 0)
+ return clib_error_return (0, "shared secret and key id both unset");
+
+ rv = sr_hmac_add_del_key (sm, key_id, shared_secret, is_del);
+
+ vec_free (shared_secret);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "sr_hmac_add_del_key returned %d",
+ rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (sr_hmac, static) = {
+ .path = "sr hmac",
+ .short_help = "sr hmac [del] id <nn> key <str>",
+ .function = sr_hmac_add_del_key_fn,
+};
+
+
+static clib_error_t *
+show_sr_hmac_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t * sm = &sr_main;
+ int i;
+
+ for (i = 1; i < vec_len (sm->hmac_keys); i++)
+ {
+ if (sm->hmac_keys[i].shared_secret)
+ vlib_cli_output (vm, "[%d]: %v", i, sm->hmac_keys[i].shared_secret);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_sr_hmac, static) = {
+ .path = "show sr hmac",
+ .short_help = "show sr hmac",
+ .function = show_sr_hmac_fn,
+};
+
+static clib_error_t *
+test_sr_debug_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t * sm = &sr_main;
+
+ if (unformat (input, "on"))
+ sm->is_debug = 1;
+ else if (unformat (input, "off"))
+ sm->is_debug = 0;
+ else
+ return clib_error_return (0, "expected on|off in '%U'",
+ format_unformat_error, input);
+
+ vlib_cli_output (vm, "debug trace now %s", sm->is_debug ? "on" : "off");
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_sr_debug, static) = {
+ .path = "test sr debug",
+ .short_help = "test sr debug on|off",
+ .function = test_sr_debug_fn,
+};
diff --git a/vnet/vnet/sr/sr.h b/vnet/vnet/sr/sr.h
new file mode 100644
index 00000000000..3c6d981d17e
--- /dev/null
+++ b/vnet/vnet/sr/sr.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_sr_h
+#define included_vnet_sr_h
+
+#include <vnet/vnet.h>
+#include <vnet/sr/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#include <openssl/opensslconf.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+#include <openssl/opensslv.h>
+#include <openssl/hmac.h>
+
+typedef struct {
+ ip6_address_t src;
+ ip6_address_t dst;
+} ip6_sr_tunnel_key_t;
+
+typedef struct {
+ /* src, dst address */
+ ip6_sr_tunnel_key_t key;
+
+ /* mask width for FIB entry */
+ u32 dst_mask_width;
+
+ /* first hop, to save 1 elt in the segment list */
+ ip6_address_t first_hop;
+
+ /* Fib indices */
+ u32 rx_fib_index;
+ u32 tx_fib_index;
+
+ /* The actual ip6 sr header */
+ u8 * rewrite;
+} ip6_sr_tunnel_t;
+
+typedef struct {
+ u8 * shared_secret;
+} ip6_sr_hmac_key_t;
+
+typedef struct {
+ /* Key (header imposition case) */
+ ip6_address_t *src_address;
+ ip6_address_t *dst_address;
+ u32 dst_mask_width;
+ u32 rx_table_id;
+ u32 tx_table_id;
+
+ /* segment list, when inserting an ip6 SR header*/
+ ip6_address_t *segments;
+
+ /*
+ * "Tag" list, aka segments inserted at the end of the list,
+ * past last_seg
+ */
+ ip6_address_t *tags;
+
+ /* Shared secret => generate SHA-256 HMAC security fields */
+ u8 * shared_secret;
+
+ /* Flags, e.g. cleanup, policy-list flags */
+ u16 flags_net_byte_order;
+
+ /* Delete the tunnnel? */
+ u8 is_del;
+} ip6_sr_add_del_tunnel_args_t;
+
+typedef struct {
+ /* pool of tunnel instances, sr entry only */
+ ip6_sr_tunnel_t *tunnels;
+
+ /* find an sr "tunnel" by its outer-IP src/dst */
+ uword * tunnel_index_by_key;
+
+ /* ip6-lookup next index for imposition FIB entries */
+ u32 ip6_lookup_sr_next_index;
+
+ /* hmac key id by shared secret */
+ uword * hmac_key_by_shared_secret;
+
+ /* ip6-rewrite next index for reinstalling the original dst address */
+ u32 ip6_rewrite_sr_next_index;
+
+ /* application API callback */
+ void *sr_local_cb;
+
+ /* validate hmac keys */
+ u8 validate_hmac;
+
+ /* pool of hmac keys */
+ ip6_sr_hmac_key_t * hmac_keys;
+
+ /* Openssl vbls */
+ EVP_MD * md;
+ HMAC_CTX * hmac_ctx;
+
+ /* enable debug spew */
+ u8 is_debug;
+
+ /* convenience */
+ vlib_main_t * vlib_main;
+ vnet_main_t * vnet_main;
+} ip6_sr_main_t;
+
+ip6_sr_main_t sr_main;
+
+format_function_t format_ip6_sr_header;
+format_function_t format_ip6_sr_header_with_length;
+
+vlib_node_registration_t ip6_sr_input_node;
+
+int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a);
+void vnet_register_sr_app_callback (void *cb);
+
+#endif /* included_vnet_sr_h */
diff --git a/vnet/vnet/sr/sr_error.def b/vnet/vnet/sr/sr_error.def
new file mode 100644
index 00000000000..62d021fd47b
--- /dev/null
+++ b/vnet/vnet/sr/sr_error.def
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+sr_error (NONE, "no error")
+sr_error (BAD_ROUTING_HEADER_TYPE, "bad routing header type (not 4)")
+sr_error (NO_MORE_SEGMENTS, "out of SR segment drops")
+sr_error (PKTS_PROCESSED, "SR packets processed")
+sr_error (APP_CALLBACK, "SR application callback errors")
+sr_error (HMAC_INVALID, "SR packets with invalid HMAC signatures")
diff --git a/vnet/vnet/sr/sr_fix_dst_error.def b/vnet/vnet/sr/sr_fix_dst_error.def
new file mode 100644
index 00000000000..48fe7af6c98
--- /dev/null
+++ b/vnet/vnet/sr/sr_fix_dst_error.def
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+sr_fix_dst_error (NONE, "no error")
+sr_fix_dst_error (NO_SR_HEADER, "no SR header present")
+sr_fix_dst_error (NO_MORE_SEGMENTS, "no more SR segments")
diff --git a/vnet/vnet/sr/sr_packet.h b/vnet/vnet/sr/sr_packet.h
new file mode 100644
index 00000000000..5604a8dadb7
--- /dev/null
+++ b/vnet/vnet/sr/sr_packet.h
@@ -0,0 +1,227 @@
+#ifndef included_vnet_sr_packet_h
+#define included_vnet_sr_packet_h
+
+#include <vnet/ip/ip.h>
+
+/*
+ * ipv6 segment-routing header format
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The Segment Routing Header (SRH) is defined as follows:
+ *
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Next Header | Hdr Ext Len | Routing Type | Segments Left |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | First Segment | Flags | HMAC Key ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Segment List[0] (128 bits ipv6 address) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | |
+ * ...
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Segment List[n] (128 bits ipv6 address) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Policy List[0] (optional) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Policy List[1] (optional) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Policy List[2] (optional) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | |
+ * | |
+ * | HMAC (256 bits) |
+ * | (optional) |
+ * | |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * where:
+ *
+ * o Next Header: 8-bit selector. Identifies the type of header
+ * immediately following the SRH.
+ *
+ * o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ * header in 8-octet units, not including the first 8 octets.
+ *
+ * o Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+ *
+ * o Segments Left. Defined in [RFC2460], it contains the index, in
+ * the Segment List, of the next segment to inspect. Segments Left
+ * is decremented at each segment and it is used as an index in the
+ * segment list.
+ *
+ * o First Segment: offset in the SRH, not including the first 8 octets
+ * and expressed in 16-octet units, pointing to the last element of
+ * the segment list, which is in fact the first segment of the
+ * segment routing path.
+ *
+ * o Flags: 16 bits of flags. Following flags are defined:
+ *
+ * 1
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |C|P|R|R| Policy Flags |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * C-flag: Clean-up flag. Set when the SRH has to be removed from
+ * the packet when packet reaches the last segment.
+ *
+ * P-flag: Protected flag. Set when the packet has been rerouted
+ * through FRR mechanism by a SR endpoint node. See Section 6.3
+ * for more details.
+ *
+ * R-flags. Reserved and for future use.
+ *
+ * Policy Flags. Define the type of the IPv6 addresses encoded
+ * into the Policy List (see below). The following have been
+ * defined:
+ *
+ * Bits 4-6: determine the type of the first element after the
+ * segment list.
+ *
+ * Bits 7-9: determine the type of the second element.
+ *
+ * Bits 10-12: determine the type of the third element.
+ *
+ * Bits 13-15: determine the type of the fourth element.
+ *
+ * The following values are used for the type:
+ *
+ * 0x0: Not present. If value is set to 0x0, it means the
+ * element represented by these bits is not present.
+ *
+ * 0x1: SR Ingress.
+ *
+ * 0x2: SR Egress.
+ *
+ * 0x3: Original Source Address.
+ *
+ * o HMAC Key ID and HMAC field, and their use are defined in
+ * [I-D.vyncke-6man-segment-routing-security].
+ *
+ * o Segment List[n]: 128 bit IPv6 addresses representing the nth
+ * segment in the Segment List. The Segment List is encoded starting
+ * from the last segment of the path. I.e., the first element of the
+ * segment list (Segment List [0]) contains the last segment of the
+ * path while the last segment of the Segment List (Segment List[n])
+ * contains the first segment of the path. The index contained in
+ * "Segments Left" identifies the current active segment.
+ *
+ * o Policy List. Optional addresses representing specific nodes in
+ * the SR path such as:
+ *
+ * SR Ingress: a 128 bit generic identifier representing the
+ * ingress in the SR domain (i.e.: it needs not to be a valid IPv6
+ * address).
+ *
+ * SR Egress: a 128 bit generic identifier representing the egress
+ * in the SR domain (i.e.: it needs not to be a valid IPv6
+ * address).
+ *
+ * Original Source Address: IPv6 address originally present in the
+ * SA field of the packet.
+ *
+ * The segments in the Policy List are encoded after the segment list
+ * and they are optional. If none are in the SRH, all bits of the
+ * Policy List Flags MUST be set to 0x0.
+ */
+
+#define ROUTING_HEADER_TYPE_SR 4
+
+typedef struct {
+ /* Protocol for next header. */
+ u8 protocol;
+ /*
+ * Length of routing header in 8 octet units,
+ * not including the first 8 octets
+ */
+ u8 length;
+
+ /* Type of routing header; type 4 = segement routing */
+ u8 type;
+
+ /* Next segment in the segment list */
+ u8 segments_left;
+
+ /*
+ * Policy list pointer: offset in the SRH of the policy
+ * list - in 16-octet units - not including the first 8 octets.
+ */
+ u8 first_segment;
+
+ /* Flag bits */
+#define IP6_SR_HEADER_FLAG_CLEANUP (0x8000)
+#define IP6_SR_HEADER_FLAG_PROTECTED (0x4000)
+#define IP6_SR_HEADER_FLAG_RESERVED (0x3000)
+
+#define IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT (0x0)
+#define IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE (0x1)
+#define IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE (0x2)
+#define IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR (0x3)
+ /* values 0x4 - 0x7 are reserved */
+ u16 flags;
+ u8 hmac_key;
+
+ /* The segment + policy list elts */
+ ip6_address_t segments[0];
+} __attribute__((packed)) ip6_sr_header_t;
+
+static inline int
+ip6_sr_policy_list_shift_from_index (int pl_index)
+{
+ return (-3 * pl_index) + 12;
+}
+
+/* pl_index is one-origined, to match the text above */
+static inline int
+ip6_sr_policy_list_flags (u16 flags_host_byte_order, int pl_index)
+{
+ int shift;
+
+ if (pl_index <= 0 || pl_index > 4)
+ return 0;
+
+ shift = (-3 * pl_index) + 12;
+ flags_host_byte_order >>= shift;
+
+ return (flags_host_byte_order & 7);
+}
+
+#endif /* included_vnet_sr_packet_h */