aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/sr
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-12-19 23:05:39 +0100
committerDamjan Marion <damarion@cisco.com>2016-12-28 12:25:14 +0100
commit7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 (patch)
tree5de62f8dbd3a752f5a676ca600e43d2652d1ff1a /src/vnet/sr
parent696f1adec0df3b8f161862566dd9c86174302658 (diff)
Reorganize source tree to use single autotools instance
Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vnet/sr')
-rw-r--r--src/vnet/sr/dir.dox25
-rw-r--r--src/vnet/sr/examples/sr_multicastmap.script4
-rw-r--r--src/vnet/sr/rfc_draft_05.txt1265
-rw-r--r--src/vnet/sr/sr.c3333
-rw-r--r--src/vnet/sr/sr.h262
-rw-r--r--src/vnet/sr/sr_error.def20
-rw-r--r--src/vnet/sr/sr_fix_dst_error.def17
-rw-r--r--src/vnet/sr/sr_packet.h251
-rw-r--r--src/vnet/sr/sr_replicate.c490
9 files changed, 5667 insertions, 0 deletions
diff --git a/src/vnet/sr/dir.dox b/src/vnet/sr/dir.dox
new file mode 100644
index 00000000000..a98b202c93e
--- /dev/null
+++ b/src/vnet/sr/dir.dox
@@ -0,0 +1,25 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Segment Routing code
+
+ An implementation of Segment Routing as per:
+ draft-previdi-6man-segment-routing-header-05
+
+ See file: rfc_draft_05.txt
+
+*/ \ No newline at end of file
diff --git a/src/vnet/sr/examples/sr_multicastmap.script b/src/vnet/sr/examples/sr_multicastmap.script
new file mode 100644
index 00000000000..20bf7dc0eb7
--- /dev/null
+++ b/src/vnet/sr/examples/sr_multicastmap.script
@@ -0,0 +1,4 @@
+sr_tunnel_add_del name sr2 src ::a:1:1:0:6 dst ff15::2/128 next ::a:1:1:0:f next ::a:1:1:0:1a next ff15::1 tag ::a:1:1:0:7 clean
+sr_tunnel_add_del name sr3 src ::b:1:1:0:6 dst ff16::2/128 next ::a:1:1:0:13 next ::a:1:1:0:1a next ff15::1 tag ::a:1:1:0:7 clean
+sr_policy_add_del name pol1 tunnel sr2 tunnel sr3
+sr_multicast_map_add_del address ff15::1 sr-policy pol1
diff --git a/src/vnet/sr/rfc_draft_05.txt b/src/vnet/sr/rfc_draft_05.txt
new file mode 100644
index 00000000000..bc41c181ea4
--- /dev/null
+++ b/src/vnet/sr/rfc_draft_05.txt
@@ -0,0 +1,1265 @@
+Network Working Group S. Previdi, Ed.
+Internet-Draft C. Filsfils
+Intended status: Standards Track Cisco Systems, Inc.
+Expires: June 12, 2015 B. Field
+ Comcast
+ I. Leung
+ Rogers Communications
+ December 9, 2014
+
+
+ IPv6 Segment Routing Header (SRH)
+ draft-previdi-6man-segment-routing-header-05
+
+Abstract
+
+ Segment Routing (SR) allows a node to steer a packet through a
+ controlled set of instructions, called segments, by prepending a SR
+ header to the packet. A segment can represent any instruction,
+ topological or service-based. SR allows to enforce a flow through
+ any path (topological, or application/service based) while
+ maintaining per-flow state only at the ingress node to the SR domain.
+
+ Segment Routing can be applied to the IPv6 data plane with the
+ addition of a new type of Routing Extension Header. This draft
+ describes the Segment Routing Extension Header Type and how it is
+ used by SR capable nodes.
+
+Requirements Language
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in RFC 2119 [RFC2119].
+
+Status of This Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 1]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ This Internet-Draft will expire on June 12, 2015.
+
+Copyright Notice
+
+ Copyright (c) 2014 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+Table of Contents
+
+ 1. Structure of this document . . . . . . . . . . . . . . . . . 3
+ 2. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3
+ 3. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3
+ 3.1. Data Planes supporting Segment Routing . . . . . . . . . 4
+ 3.2. Illustration . . . . . . . . . . . . . . . . . . . . . . 4
+ 4. Abstract Routing Model . . . . . . . . . . . . . . . . . . . 7
+ 4.1. Segment Routing Global Block (SRGB) . . . . . . . . . . . 8
+ 4.2. Traffic Engineering with SR . . . . . . . . . . . . . . . 9
+ 4.3. Segment Routing Database . . . . . . . . . . . . . . . . 10
+ 5. IPv6 Instantiation of Segment Routing . . . . . . . . . . . . 10
+ 5.1. Segment Identifiers (SIDs) and SRGB . . . . . . . . . . . 10
+ 5.1.1. Node-SID . . . . . . . . . . . . . . . . . . . . . . 11
+ 5.1.2. Adjacency-SID . . . . . . . . . . . . . . . . . . . . 11
+ 5.2. Segment Routing Extension Header (SRH) . . . . . . . . . 11
+ 5.2.1. SRH and RFC2460 behavior . . . . . . . . . . . . . . 15
+ 6. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 15
+ 6.1. Segment Routing Operations . . . . . . . . . . . . . . . 15
+ 6.2. Segment Routing Node Functions . . . . . . . . . . . . . 16
+ 6.2.1. Ingress SR Node . . . . . . . . . . . . . . . . . . . 16
+ 6.2.2. Transit Non-SR Capable Node . . . . . . . . . . . . . 18
+ 6.2.3. SR Intra Segment Transit Node . . . . . . . . . . . . 18
+ 6.2.4. SR Segment Endpoint Node . . . . . . . . . . . . . . 18
+ 6.3. FRR Flag Settings . . . . . . . . . . . . . . . . . . . . 18
+ 7. SR and Tunneling . . . . . . . . . . . . . . . . . . . . . . 18
+ 8. Example Use Case . . . . . . . . . . . . . . . . . . . . . . 19
+ 9. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 21
+ 10. Manageability Considerations . . . . . . . . . . . . . . . . 21
+ 11. Security Considerations . . . . . . . . . . . . . . . . . . . 21
+ 12. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 21
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 2]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ 13. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 21
+ 14. References . . . . . . . . . . . . . . . . . . . . . . . . . 21
+ 14.1. Normative References . . . . . . . . . . . . . . . . . . 21
+ 14.2. Informative References . . . . . . . . . . . . . . . . . 21
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 22
+
+1. Structure of this document
+
+ Section 3 gives an introduction on SR for IPv6 networks.
+
+ Section 4 describes the Segment Routing abstract model.
+
+ Section 5 defines the Segment Routing Header (SRH) allowing
+ instantiation of SR over IPv6 dataplane.
+
+ Section 6 details the procedures of the Segment Routing Header.
+
+2. Segment Routing Documents
+
+ Segment Routing terminology is defined in
+ [I-D.filsfils-spring-segment-routing].
+
+ Segment Routing use cases are described in
+ [I-D.filsfils-spring-segment-routing-use-cases].
+
+ Segment Routing IPv6 use cases are described in
+ [I-D.ietf-spring-ipv6-use-cases].
+
+ Segment Routing protocol extensions are defined in
+ [I-D.ietf-isis-segment-routing-extensions], and
+ [I-D.psenak-ospf-segment-routing-ospfv3-extension].
+
+ The security mechanisms of the Segment Routing Header (SRH) are
+ described in [I-D.vyncke-6man-segment-routing-security].
+
+3. Introduction
+
+ Segment Routing (SR), defined in
+ [I-D.filsfils-spring-segment-routing], allows a node to steer a
+ packet through a controlled set of instructions, called segments, by
+ prepending a SR header to the packet. A segment can represent any
+ instruction, topological or service-based. SR allows to enforce a
+ flow through any path (topological or service/application based)
+ while maintaining per-flow state only at the ingress node to the SR
+ domain. Segments can be derived from different components: IGP, BGP,
+ Services, Contexts, Locators, etc. The list of segment forming the
+ path is called the Segment List and is encoded in the packet header.
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 3]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ SR allows the use of strict and loose source based routing paradigms
+ without requiring any additional signaling protocols in the
+ infrastructure hence delivering an excellent scalability property.
+
+ The source based routing model described in
+ [I-D.filsfils-spring-segment-routing] is inherited from the ones
+ proposed by [RFC1940] and [RFC2460]. The source based routing model
+ offers the support for explicit routing capability.
+
+3.1. Data Planes supporting Segment Routing
+
+ Segment Routing (SR), can be instantiated over MPLS
+ ([I-D.filsfils-spring-segment-routing-mpls]) and IPv6. This document
+ defines its instantiation over the IPv6 data-plane based on the use-
+ cases defined in [I-D.ietf-spring-ipv6-use-cases].
+
+ Segment Routing for IPv6 (SR-IPv6) is required in networks where MPLS
+ data-plane is not used or, when combined with SR-MPLS, in networks
+ where MPLS is used in the core and IPv6 is used at the edge (home
+ networks, datacenters).
+
+ This document defines a new type of Routing Header (originally
+ defined in [RFC2460]) called the Segment Routing Header (SRH) in
+ order to convey the Segment List in the packet header as defined in
+ [I-D.filsfils-spring-segment-routing]. Mechanisms through which
+ segment are known and advertised are outside the scope of this
+ document.
+
+3.2. Illustration
+
+ In the context of Figure 1 where all the links have the same IGP
+ cost, let us assume that a packet P enters the SR domain at an
+ ingress edge router I and that the operator requests the following
+ requirements for packet P:
+
+ The local service S offered by node B must be applied to packet P.
+
+ The links AB and CE cannot be used to transport the packet P.
+
+ Any node N along the journey of the packet should be able to
+ determine where the packet P entered the SR domain and where it
+ will exit. The intermediate node should be able to determine the
+ paths from the ingress edge router to itself, and from itself to
+ the egress edge router.
+
+ Per-flow State for packet P should only be created at the ingress
+ edge router.
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 4]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ The operator can forbid, for security reasons, anyone outside the
+ operator domain to exploit its intra-domain SR capabilities.
+
+ I---A---B---C---E
+ \ | / \ /
+ \ | / F
+ \|/
+ D
+
+ Figure 1: An illustration of SR properties
+
+ All these properties may be realized by instructing the ingress SR
+ edge router I to push the following abstract SR header on the packet
+ P.
+
+ +---------------------------------------------------------------+
+ | | |
+ | Abstract SR Header | |
+ | | |
+ | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported |
+ | ^ | | Packet |
+ | | | | P |
+ | +---------------------+ | |
+ | | |
+ +---------------------------------------------------------------+
+
+ Figure 2: Packet P at node I
+
+ The abstract SR header contains a source route encoded as a list of
+ segments {SD, SB, SS, SF, SE}, a pointer (Ptr) and the identification
+ of the ingress and egress SR edge routers (segments SI and SE).
+
+ A segment identifies a topological instruction or a service
+ instruction. A segment can either be global or local. The
+ instruction associated with a global segment is recognized and
+ executed by any SR-capable node in the domain. The instruction
+ associated with a local segment is only supported by the specific
+ node that originates it.
+
+ Let us assume some IGP (i.e.: ISIS and OSPF) extensions to define a
+ "Node Segment" as a global instruction within the IGP domain to
+ forward a packet along the shortest path to the specified node. Let
+ us further assume that within the SR domain illustrated in Figure 1,
+ segments SI, SD, SB, SE and SF respectively identify IGP node
+ segments to I, D, B, E and F.
+
+ Let us assume that node B identifies its local service S with local
+ segment SS.
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 5]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ With all of this in mind, let us describe the journey of the packet
+ P.
+
+ The packet P reaches the ingress SR edge router. I pushes the SR
+ header illustrated in Figure 2 and sets the pointer to the first
+ segment of the list (SD).
+
+ SD is an instruction recognized by all the nodes in the SR domain
+ which causes the packet to be forwarded along the shortest path to D.
+
+ Once at D, the pointer is incremented and the next segment is
+ executed (SB).
+
+ SB is an instruction recognized by all the nodes in the SR domain
+ which causes the packet to be forwarded along the shortest path to B.
+
+ Once at B, the pointer is incremented and the next segment is
+ executed (SS).
+
+ SS is an instruction only recognized by node B which causes the
+ packet to receive service S.
+
+ Once the service applied, the next segment is executed (SF) which
+ causes the packet to be forwarded along the shortest path to F.
+
+ Once at F, the pointer is incremented and the next segment is
+ executed (SE).
+
+ SE is an instruction recognized by all the nodes in the SR domain
+ which causes the packet to be forwarded along the shortest path to E.
+
+ E then removes the SR header and the packet continues its journey
+ outside the SR domain.
+
+ All of the requirements are met.
+
+ First, the packet P has not used links AB and CE: the shortest-path
+ from I to D is I-A-D, the shortest-path from D to B is D-B, the
+ shortest-path from B to F is B-C-F and the shortest-path from F to E
+ is F-E, hence the packet path through the SR domain is I-A-D-B-C-F-E
+ and the links AB and CE have been avoided.
+
+ Second, the service S supported by B has been applied on packet P.
+
+ Third, any node along the packet path is able to identify the service
+ and topological journey of the packet within the SR domain. For
+ example, node C receives the packet illustrated in Figure 3 and hence
+ is able to infer where the packet entered the SR domain (SI), how it
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 6]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ got up to itself {SD, SB, SS, SE}, where it will exit the SR domain
+ (SE) and how it will do so {SF, SE}.
+
+ +---------------------------------------------------------------+
+ | | |
+ | SR Header | |
+ | | |
+ | {SD, SB, SS, SF, SE}, Ptr, SI, SE | Transported |
+ | ^ | | Packet |
+ | | | | P |
+ | +--------+ | |
+ | | |
+ +---------------------------------------------------------------+
+
+ Figure 3: Packet P at node C
+
+ Fourth, only node I maintains per-flow state for packet P. The
+ entire program of topological and service instructions to be executed
+ by the SR domain on packet P is encoded by the ingress edge router I
+ in the SR header in the form of a list of segments where each segment
+ identifies a specific instruction. No further per-flow state is
+ required along the packet path. The per-flow state is in the SR
+ header and travels with the packet. Intermediate nodes only hold
+ states related to the IGP global node segments and the local IGP
+ adjacency segments. These segments are not per-flow specific and
+ hence scale very well. Typically, an intermediate node would
+ maintain in the order of 100's to 1000's global node segments and in
+ the order of 10's to 100 of local adjacency segments. Typically the
+ SR IGP forwarding table is expected to be much less than 10000
+ entries.
+
+ Fifth, the SR header is inserted at the entrance to the domain and
+ removed at the exit of the operator domain. For security reasons,
+ the operator can forbid anyone outside its domain to use its intra-
+ domain SR capability.
+
+4. Abstract Routing Model
+
+ At the entrance of the SR domain, the ingress SR edge router pushes
+ the SR header on top of the packet. At the exit of the SR domain,
+ the egress SR edge router removes the SR header.
+
+ The abstract SR header contains an ordered list of segments, a
+ pointer identifying the next segment to process and the
+ identifications of the ingress and egress SR edge routers on the path
+ of this packet. The pointer identifies the segment that MUST be used
+ by the receiving router to process the packet. This segment is
+ called the active segment.
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 7]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ A property of SR is that the entire source route of the packet,
+ including the identity of the ingress and egress edge routers is
+ always available with the packet. This allows for interesting
+ accounting and service applications.
+
+ We define three SR-header operations:
+
+ "PUSH": an SR header is pushed on an IP packet, or additional
+ segments are added at the head of the segment list. The pointer
+ is moved to the first entry of the added segments.
+
+ "NEXT": the active segment is completed, the pointer is moved to
+ the next segment in the list.
+
+ "CONTINUE": the active segment is not completed, the pointer is
+ left unchanged.
+
+ In the future, other SR-header management operations may be defined.
+
+ As the packet travels through the SR domain, the pointer is
+ incremented through the ordered list of segments and the source route
+ encoded by the SR ingress edge node is executed.
+
+ A node processes an incoming packet according to the instruction
+ associated with the active segment.
+
+ Any instruction might be associated with a segment: for example, an
+ intra-domain topological strict or loose forwarding instruction, a
+ service instruction, etc.
+
+ At minimum, a segment instruction must define two elements: the
+ identity of the next-hop to forward the packet to (this could be the
+ same node or a context within the node) and which SR-header
+ management operation to execute.
+
+ Each segment is known in the network through a Segment Identifier
+ (SID). The terms "segment" and "SID" are interchangeable.
+
+4.1. Segment Routing Global Block (SRGB)
+
+ In the SR abstract model, a segment is identified by a Segment
+ Routing Identifier (SID). The SR abstract model doesn't mandate a
+ specific format for the SID (IPv6 address or other formats).
+
+ In Segment Routing IPv6 the SID is an IPv6 address. Therefore, the
+ SRGB is materialized by the global IPv6 address space which
+ represents the set of IPv6 routable addresses in the SR domain. The
+ following rules apply:
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 8]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ o Each node of the SR domain MUST be configured with the Segment
+ Routing Global Block (SRGB).
+
+ o All global segments must be allocated from the SRGB. Any SR
+ capable node MUST be able to process any global segment advertised
+ by any other node within the SR domain.
+
+ o Any segment outside the SRGB has a local significance and is
+ called a "local segment". An SR-capable node MUST be able to
+ process the local segments it originates. An SR-capable node MUST
+ NOT support the instruction associated with a local segment
+ originated by a remote node.
+
+4.2. Traffic Engineering with SR
+
+ An SR Traffic Engineering policy is composed of two elements: a flow
+ classification and a segment-list to prepend on the packets of the
+ flow.
+
+ In SR, this per-flow state only exists at the ingress edge node where
+ the policy is defined and the SR header is pushed.
+
+ It is outside the scope of the document to define the process that
+ leads to the instantiation at a node N of an SR Traffic Engineering
+ policy.
+
+ [I-D.filsfils-spring-segment-routing-use-cases] illustrates various
+ alternatives:
+
+ N is deriving this policy automatically (e.g. FRR).
+
+ N is provisioned explicitly by the operator.
+
+ N is provisioned by a controller or server (e.g.: SDN Controller).
+
+ N is provisioned by the operator with a high-level policy which is
+ mapped into a path thanks to a local CSPF-based computation (e.g.
+ affinity/SRLG exclusion).
+
+ N could also be provisioned by other means.
+
+ [I-D.filsfils-spring-segment-routing-use-cases] explains why the
+ majority of use-cases require very short segment-lists, hence
+ minimizing the performance impact, if any, of inserting and
+ transporting the segment list.
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 9]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ A SDN controller, which desires to instantiate at node N an SR
+ Traffic Engineering policy, collects the SR capability of node N such
+ as to ensure that the policy meets its capability.
+
+4.3. Segment Routing Database
+
+ The Segment routing Database (SRDB) is a set of entries where each
+ entry is identified by a SID. The instruction associated with each
+ entry at least defines the identity of the next-hop to which the
+ packet should be forwarded and what operation should be performed on
+ the SR header (PUSH, CONTINUE, NEXT).
+
+ +---------+-----------+---------------------------------+
+ | Segment | Next-Hop | SR Header operation |
+ +---------+-----------+---------------------------------+
+ | Sk | M | CONTINUE |
+ | Sj | N | NEXT |
+ | Sl | NAT Srvc | NEXT |
+ | Sm | FW srvc | NEXT |
+ | Sn | Q | NEXT |
+ | etc. | etc. | etc. |
+ +---------+-----------+---------------------------------+
+
+ Figure 4: SR Database
+
+ Each SR-capable node maintains its local SRDB. SRDB entries can
+ either derive from local policy or from protocol segment
+ advertisement.
+
+5. IPv6 Instantiation of Segment Routing
+
+5.1. Segment Identifiers (SIDs) and SRGB
+
+ Segment Routing, as described in
+ [I-D.filsfils-spring-segment-routing], defines Node-SID and
+ Adjacency-SID. When SR is used over IPv6 data-plane the following
+ applies.
+
+ The SRGB is the global IPv6 address space which represents the set of
+ IPv6 routable addresses in the SR domain.
+
+ Node SIDs are IPv6 addresses part of the SRGB (i.e.: routable
+ addresses). Adjacency-SIDs are IPv6 addresses which may not be part
+ of the global IPv6 address space.
+
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 10]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+5.1.1. Node-SID
+
+ The Node-SID identifies a node. With SR-IPv6 the Node-SID is an IPv6
+ prefix that the operator configured on the node and that is used as
+ the node identifier. Typically, in case of a router, this is the
+ IPv6 address of the node loopback interface. Therefore, SR-IPv6 does
+ not require any additional SID advertisement for the Node Segment.
+ The Node-SID is in fact the IPv6 address of the node.
+
+5.1.2. Adjacency-SID
+
+ In the SR architecture defined in
+ [I-D.filsfils-spring-segment-routing] the Adjacency-SID (or Adj-SID)
+ identifies a given interface and may be local or global (depending on
+ how it is advertised). A node may advertise one (or more) Adj-SIDs
+ allocated to a given interface so to force the forwarding of the
+ packet (when received with that particular Adj-SID) into the
+ interface regardless the routing entry for the packet destination.
+ The semantic of the Adj-SID is:
+
+ Send out the packet to the interface this prefix is allocated to.
+
+ When SR is applied to IPv6, any SID is in a global IPv6 address and
+ therefore, an Adj-SID has a global significance (i.e.: the IPv6
+ address representing the SID is a global address). In other words, a
+ node that advertises the Adj-SID in the form of a global IPv6 address
+ representing the link/adjacency the packet has to be forwarded to,
+ will apply to the Adj-SID a global significance.
+
+ Advertisement of Adj-SID may be done using multiple mechanisms among
+ which the ones described in ISIS and OSPF protocol extensions:
+ [I-D.ietf-isis-segment-routing-extensions] and
+ [I-D.psenak-ospf-segment-routing-ospfv3-extension]. The distinction
+ between local and global significance of the Adj-SID is given in the
+ encoding of the Adj-SID advertisement.
+
+5.2. Segment Routing Extension Header (SRH)
+
+ A new type of the Routing Header (originally defined in [RFC2460]) is
+ defined: the Segment Routing Header (SRH) which has a new Routing
+ Type, (suggested value 4) to be assigned by IANA.
+
+ As an example, if an explicit path is to be constructed across a core
+ network running ISIS or OSPF, the segment list will contain SIDs
+ representing the nodes across the path (loose or strict) which,
+ usually, are the IPv6 loopback interface address of each node. If
+ the path is across service or application entities, the segment list
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 11]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ contains the IPv6 addresses of these services or application
+ instances.
+
+ The Segment Routing Header (SRH) is defined as follows:
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Next Header | Hdr Ext Len | Routing Type | Segments Left |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | First Segment | Flags | HMAC Key ID |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Segment List[0] (128 bits ipv6 address) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | |
+ ...
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Segment List[n] (128 bits ipv6 address) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Policy List[0] (optional) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Policy List[1] (optional) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Policy List[2] (optional) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | |
+ | |
+ | HMAC (256 bits) |
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 12]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ | (optional) |
+ | |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Next Header: 8-bit selector. Identifies the type of header
+ immediately following the SRH.
+
+ o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ header in 8-octet units, not including the first 8 octets.
+
+ o Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+
+ o Segments Left. Defined in [RFC2460], it contains the index, in
+ the Segment List, of the next segment to inspect. Segments Left
+ is decremented at each segment and it is used as an index in the
+ segment list.
+
+ o First Segment: offset in the SRH, not including the first 8 octets
+ and expressed in 16-octet units, pointing to the last element of
+ the segment list, which is in fact the first segment of the
+ segment routing path.
+
+ o Flags: 16 bits of flags. Following flags are defined:
+
+ 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |C|P|R|R| Policy Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ C-flag: Clean-up flag. Set when the SRH has to be removed from
+ the packet when packet reaches the last segment.
+
+ P-flag: Protected flag. Set when the packet has been rerouted
+ through FRR mechanism by a SR endpoint node. See Section 6.3
+ for more details.
+
+ R-flags. Reserved and for future use.
+
+ Policy Flags. Define the type of the IPv6 addresses encoded
+ into the Policy List (see below). The following have been
+ defined:
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 13]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ Bits 4-6: determine the type of the first element after the
+ segment list.
+
+ Bits 7-9: determine the type of the second element.
+
+ Bits 10-12: determine the type of the third element.
+
+ Bits 13-15: determine the type of the fourth element.
+
+ The following values are used for the type:
+
+ 0x0: Not present. If value is set to 0x0, it means the
+ element represented by these bits is not present.
+
+ 0x1: SR Ingress.
+
+ 0x2: SR Egress.
+
+ 0x3: Original Source Address.
+
+ o HMAC Key ID and HMAC field, and their use are defined in
+ [I-D.vyncke-6man-segment-routing-security].
+
+ o Segment List[n]: 128 bit IPv6 addresses representing the nth
+ segment in the Segment List. The Segment List is encoded starting
+ from the last segment of the path. I.e., the first element of the
+ segment list (Segment List [0]) contains the last segment of the
+ path while the last segment of the Segment List (Segment List[n])
+ contains the first segment of the path. The index contained in
+ "Segments Left" identifies the current active segment.
+
+ o Policy List. Optional addresses representing specific nodes in
+ the SR path such as:
+
+ SR Ingress: a 128 bit generic identifier representing the
+ ingress in the SR domain (i.e.: it needs not to be a valid IPv6
+ address).
+
+ SR Egress: a 128 bit generic identifier representing the egress
+ in the SR domain (i.e.: it needs not to be a valid IPv6
+ address).
+
+ Original Source Address: IPv6 address originally present in the
+ SA field of the packet.
+
+ The segments in the Policy List are encoded after the segment list
+ and they are optional. If none are in the SRH, all bits of the
+ Policy List Flags MUST be set to 0x0.
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 14]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+5.2.1. SRH and RFC2460 behavior
+
+ The SRH being a new type of the Routing Header, it also has the same
+ properties:
+
+ SHOULD only appear once in the packet.
+
+ Only the router whose address is in the DA field of the packet
+ header MUST inspect the SRH.
+
+ Therefore, Segment Routing in IPv6 networks implies that the segment
+ identifier (i.e.: the IPv6 address of the segment) is moved into the
+ DA of the packet.
+
+ The DA of the packet changes at each segment termination/completion
+ and therefore the original DA of the packet MUST be encoded as the
+ last segment of the path.
+
+ As illustrated in Section 3.2, nodes that are within the path of a
+ segment will forward packets based on the DA of the packet without
+ inspecting the SRH. This ensures full interoperability between SR-
+ capable and non-SR-capable nodes.
+
+6. SRH Procedures
+
+ In this section we describe the different procedures on the SRH.
+
+6.1. Segment Routing Operations
+
+ When Segment Routing is instantiated over the IPv6 data plane the
+ following applies:
+
+ o The segment list is encoded in the SRH.
+
+ o The active segment is in the destination address of the packet.
+
+ o The Segment Routing CONTINUE operation (as described in
+ [I-D.filsfils-spring-segment-routing]) is implemented as a
+ regular/plain IPv6 operation consisting of DA based forwarding.
+
+ o The NEXT operation is implemented through the update of the DA
+ with the value represented by the Next Segment field in the SRH.
+
+ o The PUSH operation is implemented through the insertion of the SRH
+ or the insertion of additional segments in the SRH segment list.
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 15]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+6.2. Segment Routing Node Functions
+
+ SR packets are forwarded to segments endpoints (i.e.: nodes whose
+ address is in the DA field of the packet). The segment endpoint,
+ when receiving a SR packet destined to itself, does:
+
+ o Inspect the SRH.
+
+ o Determine the next active segment.
+
+ o Update the Segments Left field (or, if requested, remove the SRH
+ from the packet).
+
+ o Update the DA.
+
+ o Send the packet to the next segment.
+
+ The procedures applied to the SRH are related to the node function.
+ Following nodes functions are defined:
+
+ Ingress SR Node.
+
+ Transit Non-SR Node.
+
+ Transit SR Intra Segment Node.
+
+ SR Endpoint Node.
+
+6.2.1. Ingress SR Node
+
+ Ingress Node can be a router at the edge of the SR domain or a SR-
+ capable host. The ingress SR node may obtain the segment list by
+ either:
+
+ Local path computation.
+
+ Local configuration.
+
+ Interaction with an SDN controller delivering the path as a
+ complete SRH.
+
+ Any other mechanism (mechanisms through which the path is acquired
+ are outside the scope of this document).
+
+ When creating the SRH (either at ingress node or in the SDN
+ controller) the following is done:
+
+ Next Header and Hdr Ext Len fields are set according to [RFC2460].
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 16]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ Routing Type field is set as TBD (SRH).
+
+ The Segment List is built with the FIRST segment of the path
+ encoded in the LAST element of the Segment List. Subsequent
+ segments are encoded on top of the first segment. Finally, the
+ LAST segment of the path is encoded in the FIRST element of the
+ Segment List. In other words, the Segment List is encoded in the
+ reverse order of the path.
+
+ The original DA of the packet is encoded as the last segment of
+ the path (encoded in the first element of the Segment List).
+
+ the DA of the packet is set with the value of the first segment
+ (found in the last element of the segment list).
+
+ the Segments Left field is set to n-1 where n is the number of
+ elements in the Segment List.
+
+ The packet is sent out towards the first segment (i.e.:
+ represented in the packet DA).
+
+6.2.1.1. Security at Ingress
+
+ The procedures related to the Segment Routing security are detailed
+ in [I-D.vyncke-6man-segment-routing-security].
+
+ In the case where the SR domain boundaries are not under control of
+ the network operator (e.g.: when the SR domain edge is in a home
+ network), it is important to authenticate and validate the content of
+ any SRH being received by the network operator. In such case, the
+ security procedure described in
+ [I-D.vyncke-6man-segment-routing-security] is to be used.
+
+ The ingress node (e.g.: the host in the home network) requests the
+ SRH from a control system (e.g.: an SDN controller) which delivers
+ the SRH with its HMAC signature on it.
+
+ Then, the home network host can send out SR packets (with an SRH on
+ it) that will be validated at the ingress of the network operator
+ infrastructure.
+
+ The ingress node of the network operator infrastructure, is
+ configured in order to validate the incoming SRH HMACs in order to
+ allow only packets having correct SRH according to their SA/DA
+ addresses.
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 17]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+6.2.2. Transit Non-SR Capable Node
+
+ SR is interoperable with plain IPv6 forwarding. Any non SR-capable
+ node will forward SR packets solely based on the DA. There's no SRH
+ inspection. This ensures full interoperability between SR and non-SR
+ nodes.
+
+6.2.3. SR Intra Segment Transit Node
+
+ Only the node whose address is in DA inspects and processes the SRH
+ (according to [RFC2460]). An intra segment transit node is not in
+ the DA and its forwarding is based on DA and its SR-IPv6 FIB.
+
+6.2.4. SR Segment Endpoint Node
+
+ The SR segment endpoint node is the node whose address is in the DA.
+ The segment endpoint node inspects the SRH and does:
+
+ 1. IF DA = myself (segment endpoint)
+ 2. IF Segments Left > 0 THEN
+ decrement Segments Left
+ update DA with Segment List[Segments Left]
+ 3. ELSE IF Segments List[Segments Left] <> DA THEN
+ update DA with Segments List[Segments Left]
+ IF Clean-up bit is set THEN remove the SRH
+ 4. ELSE give the packet to next PID (application)
+ End of processing.
+ 5. Forward the packet out
+
+6.3. FRR Flag Settings
+
+ A node supporting SR and doing Fast Reroute (as described in
+ [I-D.filsfils-spring-segment-routing-use-cases], when rerouting
+ packets through FRR mechanisms, SHOULD inspect the rerouted packet
+ header and look for the SRH. If the SRH is present, the rerouting
+ node SHOULD set the Protected bit on all rerouted packets.
+
+7. SR and Tunneling
+
+ Encapsulation can be realized in two different ways with SR-IPv6:
+
+ Outer encapsulation.
+
+ SRH with SA/DA original addresses.
+
+ Outer encapsulation tunneling is the traditional method where an
+ additional IPv6 header is prepended to the packet. The original IPv6
+ header being encapsulated, everything is preserved and the packet is
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 18]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ switched/routed according to the outer header (that could contain a
+ SRH).
+
+ SRH allows encoding both original SA and DA, hence an operator may
+ decide to change the SA/DA at ingress and restore them at egress.
+ This can be achieved without outer encapsulation, by changing SA/DA
+ and encoding the original SA in the Policy List and in the original
+ DA in the Segment List.
+
+8. Example Use Case
+
+ A more detailed description of use cases are available in
+ [I-D.ietf-spring-ipv6-use-cases]. In this section, a simple SR-IPv6
+ example is illustrated.
+
+ In the topology described in Figure 6 it is assumed an end-to-end SR
+ deployment. Therefore SR is supported by all nodes from A to J.
+
+ Home Network | Backbone | Datacenter
+ | |
+ | +---+ +---+ +---+ | +---+ |
+ +---|---| C |---| D |---| E |---|---| I |---|
+ | | +---+ +---+ +---+ | +---+ |
+ | | | | | | | | +---+
+ +---+ +---+ | | | | | | |--| X |
+ | A |---| B | | +---+ +---+ +---+ | +---+ | +---+
+ +---+ +---+ | | F |---| G |---| H |---|---| J |---|
+ | +---+ +---+ +---+ | +---+ |
+ | |
+ | +-----------+
+ | SDN |
+ | Orch/Ctlr |
+ +-----------+
+
+ Figure 6: Sample SR topology
+
+ The following workflow applies to packets sent by host A and destined
+ to server X.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 19]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ . Host A sends a request for a path to server X to the SDN
+ controller or orchestration system.
+
+ . The SDN controller/orchestrator builds a SRH with:
+ . Segment List: C, F, J, X
+ . HMAC
+ that satisfies the requirements expressed in the request
+ by host A and based on policies applicable to host A.
+
+ . Host A receives the SRH and insert it into the packet.
+ The packet has now:
+ . SA: A
+ . DA: C
+ . SRH with
+ . SL: X, J, F, C
+ . Segments Left: 3 (i.e.: Segment List size - 1)
+ . PL: C (ingress), J (egress)
+ Note that X is the last segment and C is the
+ first segment (i.e.: the SL is encoded in the reverse
+ path order).
+ . HMAC
+
+ . When packet arrives in C (first segment), C does:
+ . Validate the HMAC of the SRH.
+ . Decrement Segments Left by one: 2
+ . Update the DA with the next segment found in
+ Segment List[2]. DA is set to F.
+ . Forward the packet to F.
+
+ . When packet arrives in F (second segment), F does:
+ . Decrement Segments Left by one: 1
+ . Update the DA with the next segment found in
+ Segment List[1]. DA is set to J.
+ . Forward the packet to J.
+
+ . Packet travels across G and H nodes which do plain
+ IPv6 forwarding based on DA. No inspection of SRH needs
+ to be done in these nodes. However, any SR capable node
+ is allowed to set the Protected bit in case of FRR
+ protection.
+
+ . When packet arrives in J (third segment), J does:
+ . Decrement Segments Left by one: 0
+ . Update the DA with the next segment found in
+ Segment List[0]. DA is set to X.
+ . If the cleanup bit is set, then node J will strip out
+ the SRH from the packet.
+ . Forward the packet to X.
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 20]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ The packet arrives in the server that may or may not support SR. The
+ return traffic, from server to host, may be sent using the same
+ procedures.
+
+9. IANA Considerations
+
+ TBD
+
+10. Manageability Considerations
+
+ TBD
+
+11. Security Considerations
+
+ Security mechanisms applied to Segment Routing over IPv6 networks are
+ detailed in [I-D.vyncke-6man-segment-routing-security].
+
+12. Contributors
+
+ The authors would like to thank Dave Barach, John Leddy, John
+ Brzozowski, Pierre Francois, Nagendra Kumar, Mark Townsley, Christian
+ Martin, Roberta Maglione, Eric Vyncke, James Connolly, David Lebrun
+ and Fred Baker for their contribution to this document.
+
+13. Acknowledgements
+
+ TBD
+
+14. References
+
+14.1. Normative References
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119, March 1997.
+
+ [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6
+ (IPv6) Specification", RFC 2460, December 1998.
+
+14.2. Informative References
+
+ [I-D.filsfils-spring-segment-routing]
+ Filsfils, C., Previdi, S., Bashandy, A., Decraene, B.,
+ Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R.,
+ Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe,
+ "Segment Routing Architecture", draft-filsfils-spring-
+ segment-routing-04 (work in progress), July 2014.
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 21]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ [I-D.filsfils-spring-segment-routing-mpls]
+ Filsfils, C., Previdi, S., Bashandy, A., Decraene, B.,
+ Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R.,
+ Ytti, S., Henderickx, W., Tantsura, J., and E. Crabbe,
+ "Segment Routing with MPLS data plane", draft-filsfils-
+ spring-segment-routing-mpls-03 (work in progress), August
+ 2014.
+
+ [I-D.filsfils-spring-segment-routing-use-cases]
+ Filsfils, C., Francois, P., Previdi, S., Decraene, B.,
+ Litkowski, S., Horneffer, M., Milojevic, I., Shakir, R.,
+ Ytti, S., Henderickx, W., Tantsura, J., Kini, S., and E.
+ Crabbe, "Segment Routing Use Cases", draft-filsfils-
+ spring-segment-routing-use-cases-01 (work in progress),
+ October 2014.
+
+ [I-D.ietf-isis-segment-routing-extensions]
+ Previdi, S., Filsfils, C., Bashandy, A., Gredler, H.,
+ Litkowski, S., Decraene, B., and J. Tantsura, "IS-IS
+ Extensions for Segment Routing", draft-ietf-isis-segment-
+ routing-extensions-03 (work in progress), October 2014.
+
+ [I-D.ietf-spring-ipv6-use-cases]
+ Brzozowski, J., Leddy, J., Leung, I., Previdi, S.,
+ Townsley, W., Martin, C., Filsfils, C., and R. Maglione,
+ "IPv6 SPRING Use Cases", draft-ietf-spring-ipv6-use-
+ cases-03 (work in progress), November 2014.
+
+ [I-D.psenak-ospf-segment-routing-ospfv3-extension]
+ Psenak, P., Previdi, S., Filsfils, C., Gredler, H.,
+ Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3
+ Extensions for Segment Routing", draft-psenak-ospf-
+ segment-routing-ospfv3-extension-02 (work in progress),
+ July 2014.
+
+ [I-D.vyncke-6man-segment-routing-security]
+ Vyncke, E. and S. Previdi, "IPv6 Segment Routing Header
+ (SRH) Security Considerations", July 2014.
+
+ [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D.
+ Zappala, "Source Demand Routing: Packet Format and
+ Forwarding Specification (Version 1)", RFC 1940, May 1996.
+
+Authors' Addresses
+
+
+
+
+
+
+
+Previdi, et al. Expires June 12, 2015 [Page 22]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) December 2014
+
+
+ Stefano Previdi (editor)
+ Cisco Systems, Inc.
+ Via Del Serafico, 200
+ Rome 00142
+ Italy
+
+ Email: sprevidi@cisco.com
+
+
+ Clarence Filsfils
+ Cisco Systems, Inc.
+ Brussels
+ BE
+
+ Email: cfilsfil@cisco.com
+
+
+ Brian Field
+ Comcast
+ 4100 East Dry Creek Road
+ Centennial, CO 80122
+ US
+
+ Email: Brian_Field@cable.comcast.com
+
+
+ Ida Leung
+ Rogers Communications
+ 8200 Dixie Road
+ Brampton, ON L6T 0C1
+ CA
+
+ Email: Ida.Leung@rci.rogers.com
diff --git a/src/vnet/sr/sr.c b/src/vnet/sr/sr.c
new file mode 100644
index 00000000000..5d0275d992a
--- /dev/null
+++ b/src/vnet/sr/sr.c
@@ -0,0 +1,3333 @@
+/*
+ * sr.c: ipv6 segment routing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing main functions
+ *
+ */
+#include <vnet/vnet.h>
+#include <vnet/sr/sr.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+
+#include <openssl/hmac.h>
+
+ip6_sr_main_t sr_main;
+static vlib_node_registration_t sr_local_node;
+
+/**
+ * @brief Dynamically added SR DPO type
+ */
+static dpo_type_t sr_dpo_type;
+
+/**
+ * @brief Use passed HMAC key in ip6_sr_header_t in OpenSSL HMAC routines
+ *
+ * @param sm ip6_sr_main_t *
+ * @param ip ip6_header_t *
+ * @param sr ip6_sr_header_t *
+ */
+void
+sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, ip6_sr_header_t * sr)
+{
+ u32 key_index;
+ static u8 *keybuf;
+ u8 *copy_target;
+ int first_segment;
+ ip6_address_t *addrp;
+ int i;
+ ip6_sr_hmac_key_t *hmac_key;
+ u32 sig_len;
+
+ key_index = sr->hmac_key;
+
+ /* No signature? Pass... */
+ if (key_index == 0)
+ return;
+
+ /* We don't know about this key? Fail... */
+ if (key_index >= vec_len (sm->hmac_keys))
+ return;
+
+ hmac_key = sm->hmac_keys + key_index;
+
+ vec_reset_length (keybuf);
+
+ /* pkt ip6 src address */
+ vec_add2 (keybuf, copy_target, sizeof (ip6_address_t));
+ clib_memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t));
+
+ /* first segment */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0] = sr->first_segment;
+
+ /* octet w/ bit 0 = "clean" flag */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0]
+ = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP))
+ ? 0x80 : 0;
+
+ /* hmac key id */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0] = sr->hmac_key;
+
+ first_segment = sr->first_segment;
+
+ addrp = sr->segments;
+
+ /* segments */
+ for (i = 0; i <= first_segment; i++)
+ {
+ vec_add2 (keybuf, copy_target, sizeof (ip6_address_t));
+ clib_memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t));
+ addrp++;
+ }
+
+ addrp++;
+
+ HMAC_CTX_init (sm->hmac_ctx);
+ if (!HMAC_Init (sm->hmac_ctx, hmac_key->shared_secret,
+ vec_len (hmac_key->shared_secret), sm->md))
+ clib_warning ("barf1");
+ if (!HMAC_Update (sm->hmac_ctx, keybuf, vec_len (keybuf)))
+ clib_warning ("barf2");
+ if (!HMAC_Final (sm->hmac_ctx, (unsigned char *) addrp, &sig_len))
+ clib_warning ("barf3");
+ HMAC_CTX_cleanup (sm->hmac_ctx);
+}
+
+/**
+ * @brief Format function for decoding various SR flags
+ *
+ * @param s u8 * - formatted string
+ * @param args va_list * - u16 flags
+ *
+ * @return formatted output string u8 *
+ */
+u8 *
+format_ip6_sr_header_flags (u8 * s, va_list * args)
+{
+ u16 flags = (u16) va_arg (*args, int);
+ u8 pl_flag;
+ int bswap_needed = va_arg (*args, int);
+ int i;
+
+ if (bswap_needed)
+ flags = clib_host_to_net_u16 (flags);
+
+ if (flags & IP6_SR_HEADER_FLAG_CLEANUP)
+ s = format (s, "cleanup ");
+
+ if (flags & IP6_SR_HEADER_FLAG_PROTECTED)
+ s = format (s, "reroute ");
+
+ s = format (s, "pl: ");
+ for (i = 1; i <= 4; i++)
+ {
+ pl_flag = ip6_sr_policy_list_flags (flags, i);
+ s = format (s, "[%d] ", i);
+
+ switch (pl_flag)
+ {
+ case IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT:
+ s = format (s, "NotPr ");
+ break;
+ case IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE:
+ s = format (s, "InPE ");
+ break;
+ case IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE:
+ s = format (s, "EgPE ");
+ break;
+
+ case IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR:
+ s = format (s, "OrgSrc ");
+ break;
+ }
+ }
+ return s;
+}
+
+/**
+ * @brief Format function for decoding ip6_sr_header_t
+ *
+ * @param s u8 * - formatted string
+ * @param args va_list * - ip6_sr_header_t
+ *
+ * @return formatted output string u8 *
+ */
+u8 *
+format_ip6_sr_header (u8 * s, va_list * args)
+{
+ ip6_sr_header_t *h = va_arg (*args, ip6_sr_header_t *);
+ ip6_address_t placeholder_addr =
+ { {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+ 254, 254}
+ };
+ int print_hmac = va_arg (*args, int);
+ int i, pl_index, max_segs;
+ int flags_host_byte_order = clib_net_to_host_u16 (h->flags);
+
+ s = format (s, "next proto %d, len %d, type %d",
+ h->protocol, (h->length << 3) + 8, h->type);
+ s = format (s, "\n segs left %d, first_segment %d, hmac key %d",
+ h->segments_left, h->first_segment, h->hmac_key);
+ s = format (s, "\n flags %U", format_ip6_sr_header_flags,
+ flags_host_byte_order, 0 /* bswap needed */ );
+
+ /*
+ * Header length is in 8-byte units (minus one), so
+ * divide by 2 to ascertain the number of ip6 addresses in the
+ * segment list
+ */
+ max_segs = (h->length >> 1);
+
+ if (!print_hmac && h->hmac_key)
+ max_segs -= 2;
+
+ s = format (s, "\n Segments (in processing order):");
+
+ for (i = h->first_segment; i >= 1; i--)
+ s = format (s, "\n %U", format_ip6_address, h->segments + i);
+ if (ip6_address_is_equal (&placeholder_addr, h->segments))
+ s = format (s, "\n (empty placeholder)");
+ else
+ s = format (s, "\n %U", format_ip6_address, h->segments);
+
+ s = format (s, "\n Policy List:");
+
+ pl_index = 1; /* to match the RFC text */
+ for (i = (h->first_segment + 1); i < max_segs; i++, pl_index++)
+ {
+ char *tag;
+ char *tags[] = { " ", "InPE: ", "EgPE: ", "OrgSrc: " };
+
+ tag = tags[0];
+ if (pl_index >= 1 && pl_index <= 4)
+ {
+ int this_pl_flag = ip6_sr_policy_list_flags
+ (flags_host_byte_order, pl_index);
+ tag = tags[this_pl_flag];
+ }
+
+ s = format (s, "\n %s%U", tag, format_ip6_address, h->segments + i);
+ }
+
+ return s;
+}
+
+/**
+ * @brief Format function for decoding ip6_sr_header_t with length
+ *
+ * @param s u8 * - formatted string
+ * @param args va_list * - ip6_header_t + ip6_sr_header_t
+ *
+ * @return formatted output string u8 *
+ */
+u8 *
+format_ip6_sr_header_with_length (u8 * s, va_list * args)
+{
+ ip6_header_t *h = va_arg (*args, ip6_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ uword header_bytes;
+
+ header_bytes = sizeof (h[0]) + sizeof (ip6_sr_header_t);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "ip6_sr header truncated");
+
+ s = format (s, "IP6: %U\n", format_ip6_header, h, max_header_bytes);
+ s =
+ format (s, "SR: %U\n", format_ip6_sr_header, (ip6_sr_header_t *) (h + 1),
+ 0 /* print_hmac */ , max_header_bytes);
+ return s;
+}
+
+/**
+ * @brief Defined valid next nodes
+ * @note Cannot call replicate yet without DPDK
+*/
+#if DPDK > 0
+#define foreach_sr_rewrite_next \
+_(ERROR, "error-drop") \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(SR_LOCAL, "sr-local") \
+_(SR_REPLICATE,"sr-replicate")
+#else
+#define foreach_sr_rewrite_next \
+_(ERROR, "error-drop") \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(SR_LOCAL, "sr-local")
+#endif /* DPDK */
+
+/**
+ * @brief Struct for defined valid next nodes
+*/
+typedef enum
+{
+#define _(s,n) SR_REWRITE_NEXT_##s,
+ foreach_sr_rewrite_next
+#undef _
+ SR_REWRITE_N_NEXT,
+} sr_rewrite_next_t;
+
+/**
+ * @brief Struct for data for SR rewrite packet trace
+ */
+typedef struct
+{
+ ip6_address_t src, dst;
+ u16 length;
+ u32 next_index;
+ u32 tunnel_index;
+ u8 sr[256];
+} sr_rewrite_trace_t;
+
+/**
+ * @brief Error strings for SR rewrite
+ */
+static char *sr_rewrite_error_strings[] = {
+#define sr_error(n,s) s,
+#include "sr_error.def"
+#undef sr_error
+};
+
+/**
+ * @brief Struct for SR rewrite error strings
+ */
+typedef enum
+{
+#define sr_error(n,s) SR_REWRITE_ERROR_##n,
+#include "sr_error.def"
+#undef sr_error
+ SR_REWRITE_N_ERROR,
+} sr_rewrite_error_t;
+
+
+/**
+ * @brief Format function for SR rewrite trace.
+ */
+u8 *
+format_sr_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_rewrite_trace_t *t = va_arg (*args, sr_rewrite_trace_t *);
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index);
+ ip6_fib_t *rx_fib, *tx_fib;
+
+ rx_fib = ip6_fib_get (tun->rx_fib_index);
+ tx_fib = ip6_fib_get (tun->tx_fib_index);
+
+ s = format
+ (s, "SR-REWRITE: next %s ip6 src %U dst %U len %u\n"
+ " rx-fib-id %d tx-fib-id %d\n%U",
+ (t->next_index == SR_REWRITE_NEXT_SR_LOCAL)
+ ? "sr-local" : "ip6-lookup",
+ format_ip6_address, &t->src,
+ format_ip6_address, &t->dst, t->length,
+ rx_fib->table_id, tx_fib->table_id,
+ format_ip6_sr_header, t->sr, 0 /* print_hmac */ );
+ return s;
+}
+
+/**
+ * @brief Main processing dual-loop for Segment Routing Rewrite
+ * @node sr-rewrite
+ *
+ * @param vm vlib_main_t *
+ * @param node vlib_node_runtime_t *
+ * @param from_frame vlib_frame_t *
+ *
+ * @return from_frame->n_vectors uword
+ */
+static uword
+sr_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ ip6_sr_main_t *sm = &sr_main;
+ u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *,
+ vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *);
+ sr_local_cb = sm->sr_local_cb;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Note 2x loop disabled */
+ while (0 && n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip6_header_t *ip0, *ip1;
+ ip6_sr_header_t *sr0, *sr1;
+ ip6_sr_tunnel_t *t0, *t1;
+ u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
+ u32 next1 = SR_REWRITE_NEXT_IP6_LOOKUP;
+ u16 new_l0 = 0;
+ u16 new_l1 = 0;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /*
+ * $$$ parse through header(s) to pick the point
+ * where we punch in the SR extention header
+ */
+ t0 =
+ pool_elt_at_index (sm->tunnels,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ t1 =
+ pool_elt_at_index (sm->tunnels,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+
+ ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
+ >= ((word) vec_len (t0->rewrite)) + b0->current_data);
+ ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
+ >= ((word) vec_len (t1->rewrite)) + b1->current_data);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index;
+ vnet_buffer (b1)->sw_if_index[VLIB_TX] = t1->tx_fib_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+#if DPDK > 0 /* Cannot call replication node yet without DPDK */
+ /* add a replication node */
+ if (PREDICT_FALSE (t0->policy_index != ~0))
+ {
+ vnet_buffer (b0)->ip.save_protocol = t0->policy_index;
+ next0 = SR_REWRITE_NEXT_SR_REPLICATE;
+ sr0 = (ip6_sr_header_t *) (t0->rewrite);
+ goto processnext;
+ }
+#endif /* DPDK */
+
+ /*
+ * SR-unaware service chaining case: pkt coming back from
+ * service has the original dst address, and will already
+ * have an SR header. If so, send it to sr-local
+ */
+ if (PREDICT_FALSE (ip0->protocol == IPPROTO_IPV6_ROUTE))
+ {
+ vlib_buffer_advance (b0, sizeof (ip0));
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length);
+ next0 = SR_REWRITE_NEXT_SR_LOCAL;
+ }
+ else
+ {
+ u32 len_bytes = sizeof (ip6_header_t);
+ u8 next_hdr = ip0->protocol;
+
+ /* HBH must immediately follow ipv6 header */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ len_bytes +=
+ ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+ /* Ignoring the sr_local for now, if RH follows HBH here */
+ next_hdr = ext_hdr->next_hdr;
+ ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+ }
+ else
+ {
+ ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */
+ }
+ /*
+ * Copy data before the punch-in point left by the
+ * required amount. Assume (for the moment) that only
+ * the main packet header needs to be copied.
+ */
+ clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite),
+ ip0, len_bytes);
+ vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite));
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes);
+ /* $$$ tune */
+ clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite));
+
+ /* Fix the next header chain */
+ sr0->protocol = next_hdr;
+
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (t0->rewrite);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ /* Copy dst address into the DA slot in the segment list */
+ clib_memcpy (sr0->segments, ip0->dst_address.as_u64,
+ sizeof (ip6_address_t));
+ /* Rewrite the ip6 dst address with the first hop */
+ clib_memcpy (ip0->dst_address.as_u64, t0->first_hop.as_u64,
+ sizeof (ip6_address_t));
+
+ sr_fix_hmac (sm, ip0, sr0);
+
+ next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) :
+ next0;
+
+ /*
+ * Ignore "do not rewrite" shtik in this path
+ */
+ if (PREDICT_FALSE (next0 & 0x80000000))
+ {
+ next0 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE (next0 == SR_REWRITE_NEXT_ERROR))
+ b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
+ }
+ }
+#if DPDK > 0 /* Cannot call replication node yet without DPDK */
+ processnext:
+ /* add a replication node */
+ if (PREDICT_FALSE (t1->policy_index != ~0))
+ {
+ vnet_buffer (b1)->ip.save_protocol = t1->policy_index;
+ next1 = SR_REWRITE_NEXT_SR_REPLICATE;
+ sr1 = (ip6_sr_header_t *) (t1->rewrite);
+ goto trace00;
+ }
+#endif /* DPDK */
+ if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE))
+ {
+ vlib_buffer_advance (b1, sizeof (ip1));
+ sr1 = (ip6_sr_header_t *) (ip1 + 1);
+ new_l1 = clib_net_to_host_u16 (ip1->payload_length);
+ next1 = SR_REWRITE_NEXT_SR_LOCAL;
+ }
+ else
+ {
+ u32 len_bytes = sizeof (ip6_header_t);
+ u8 next_hdr = ip1->protocol;
+
+ /* HBH must immediately follow ipv6 header */
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+ len_bytes +=
+ ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+ /* Ignoring the sr_local for now, if RH follows HBH here */
+ next_hdr = ext_hdr->next_hdr;
+ ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+ }
+ else
+ {
+ ip1->protocol = IPPROTO_IPV6_ROUTE;
+ }
+ /*
+ * Copy data before the punch-in point left by the
+ * required amount. Assume (for the moment) that only
+ * the main packet header needs to be copied.
+ */
+ clib_memcpy (((u8 *) ip1) - vec_len (t1->rewrite),
+ ip1, len_bytes);
+ vlib_buffer_advance (b1, -(word) vec_len (t1->rewrite));
+ ip1 = vlib_buffer_get_current (b1);
+ sr1 = (ip6_sr_header_t *) ((u8 *) ip1 + len_bytes);
+ clib_memcpy (sr1, t1->rewrite, vec_len (t1->rewrite));
+
+ sr1->protocol = next_hdr;
+ new_l1 = clib_net_to_host_u16 (ip1->payload_length) +
+ vec_len (t1->rewrite);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+ /* Copy dst address into the DA slot in the segment list */
+ clib_memcpy (sr1->segments, ip1->dst_address.as_u64,
+ sizeof (ip6_address_t));
+ /* Rewrite the ip6 dst address with the first hop */
+ clib_memcpy (ip1->dst_address.as_u64, t1->first_hop.as_u64,
+ sizeof (ip6_address_t));
+
+ sr_fix_hmac (sm, ip1, sr1);
+
+ next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) :
+ next1;
+
+ /*
+ * Ignore "do not rewrite" shtik in this path
+ */
+ if (PREDICT_FALSE (next1 & 0x80000000))
+ {
+ next1 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE (next1 == SR_REWRITE_NEXT_ERROR))
+ b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
+ }
+ }
+#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */
+ trace00:
+#endif /* DPDK */
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_rewrite_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_index = t0 - sm->tunnels;
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = new_l0;
+ tr->next_index = next0;
+ if (sr0)
+ clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
+ }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_rewrite_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ tr->tunnel_index = t1 - sm->tunnels;
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = new_l1;
+ tr->next_index = next1;
+ if (sr1)
+ clib_memcpy (tr->sr, sr1, sizeof (tr->sr));
+ }
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_sr_header_t *sr0 = 0;
+ ip6_sr_tunnel_t *t0;
+ u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
+ u16 new_l0 = 0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+
+ /*
+ * $$$ parse through header(s) to pick the point
+ * where we punch in the SR extention header
+ */
+ t0 =
+ pool_elt_at_index (sm->tunnels,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+#if DPDK > 0 /* Cannot call replication node yet without DPDK */
+ /* add a replication node */
+ if (PREDICT_FALSE (t0->policy_index != ~0))
+ {
+ vnet_buffer (b0)->ip.save_protocol = t0->policy_index;
+ next0 = SR_REWRITE_NEXT_SR_REPLICATE;
+ sr0 = (ip6_sr_header_t *) (t0->rewrite);
+ goto trace0;
+ }
+#endif /* DPDK */
+
+ ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
+ >= ((word) vec_len (t0->rewrite)) + b0->current_data);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /*
+ * SR-unaware service chaining case: pkt coming back from
+ * service has the original dst address, and will already
+ * have an SR header. If so, send it to sr-local
+ */
+ if (PREDICT_FALSE (ip0->protocol == IPPROTO_IPV6_ROUTE))
+ {
+ vlib_buffer_advance (b0, sizeof (ip0));
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length);
+ next0 = SR_REWRITE_NEXT_SR_LOCAL;
+ }
+ else
+ {
+ u32 len_bytes = sizeof (ip6_header_t);
+ u8 next_hdr = ip0->protocol;
+
+ /* HBH must immediately follow ipv6 header */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ len_bytes +=
+ ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+ next_hdr = ext_hdr->next_hdr;
+ ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+ /* Ignoring the sr_local for now, if RH follows HBH here */
+ }
+ else
+ {
+ ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */
+ }
+ /*
+ * Copy data before the punch-in point left by the
+ * required amount. Assume (for the moment) that only
+ * the main packet header needs to be copied.
+ */
+ clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite),
+ ip0, len_bytes);
+ vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite));
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes);
+ /* $$$ tune */
+ clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite));
+
+ /* Fix the next header chain */
+ sr0->protocol = next_hdr;
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (t0->rewrite);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ /* Copy dst address into the DA slot in the segment list */
+ clib_memcpy (sr0->segments, ip0->dst_address.as_u64,
+ sizeof (ip6_address_t));
+ /* Rewrite the ip6 dst address with the first hop */
+ clib_memcpy (ip0->dst_address.as_u64, t0->first_hop.as_u64,
+ sizeof (ip6_address_t));
+
+ sr_fix_hmac (sm, ip0, sr0);
+
+ next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) :
+ next0;
+
+ /*
+ * Ignore "do not rewrite" shtik in this path
+ */
+ if (PREDICT_FALSE (next0 & 0x80000000))
+ {
+ next0 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE (next0 == SR_REWRITE_NEXT_ERROR))
+ b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
+ }
+ }
+#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */
+ trace0:
+#endif /* DPDK */
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_rewrite_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_index = t0 - sm->tunnels;
+ if (ip0)
+ {
+ memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ tr->length = new_l0;
+ tr->next_index = next0;
+ if (sr0)
+ clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_rewrite_node) = {
+ .function = sr_rewrite,
+ .name = "sr-rewrite",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_rewrite_trace,
+ .format_buffer = format_ip6_sr_header_with_length,
+
+ .n_errors = SR_REWRITE_N_ERROR,
+ .error_strings = sr_rewrite_error_strings,
+
+ .runtime_data_bytes = 0,
+
+ .n_next_nodes = SR_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_REWRITE_NEXT_##s] = n,
+ foreach_sr_rewrite_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (sr_rewrite_node, sr_rewrite)
+/* *INDENT-ON* */
+
+static int
+ip6_delete_route_no_next_hop (ip6_address_t * dst_address_arg,
+ u32 dst_address_length, u32 rx_table_id)
+{
+ fib_prefix_t pfx = {
+ .fp_len = dst_address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = *dst_address_arg,
+ }
+ };
+
+ fib_table_entry_delete (fib_table_id_find_fib_index (FIB_PROTOCOL_IP6,
+ rx_table_id),
+ &pfx, FIB_SOURCE_SR);
+
+ return 0;
+}
+
+/**
+ * @brief Find or add if not found - HMAC shared secret
+ *
+ * @param sm ip6_sr_main_t *
+ * @param secret u8 *
+ * @param indexp u32 *
+ *
+ * @return ip6_sr_hmac_key_t *
+ */
+static ip6_sr_hmac_key_t *
+find_or_add_shared_secret (ip6_sr_main_t * sm, u8 * secret, u32 * indexp)
+{
+ uword *p;
+ ip6_sr_hmac_key_t *key = 0;
+ int i;
+
+ p = hash_get_mem (sm->hmac_key_by_shared_secret, secret);
+
+ if (p)
+ {
+ key = vec_elt_at_index (sm->hmac_keys, p[0]);
+ if (indexp)
+ *indexp = p[0];
+ return (key);
+ }
+
+ /* Specific key ID? */
+ if (indexp && *indexp)
+ {
+ vec_validate (sm->hmac_keys, *indexp);
+ key = sm->hmac_keys + *indexp;
+ }
+ else
+ {
+ for (i = 0; i < vec_len (sm->hmac_keys); i++)
+ {
+ if (sm->hmac_keys[i].shared_secret == 0)
+ {
+ key = sm->hmac_keys + i;
+ goto found;
+ }
+ }
+ vec_validate (sm->hmac_keys, i);
+ key = sm->hmac_keys + i;
+ found:
+ ;
+ }
+
+ key->shared_secret = vec_dup (secret);
+
+ hash_set_mem (sm->hmac_key_by_shared_secret, key->shared_secret,
+ key - sm->hmac_keys);
+
+ if (indexp)
+ *indexp = key - sm->hmac_keys;
+ return (key);
+}
+
+/**
+ * @brief Add or Delete a Segment Routing tunnel.
+ *
+ * @param a ip6_sr_add_del_tunnel_args_t *
+ *
+ * @return retval int
+ */
+int
+ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a)
+{
+ ip6_main_t *im = &ip6_main;
+ ip6_sr_tunnel_key_t key;
+ ip6_sr_tunnel_t *t;
+ uword *p, *n;
+ ip6_sr_header_t *h = 0;
+ u32 header_length;
+ ip6_address_t *addrp, *this_address;
+ ip6_sr_main_t *sm = &sr_main;
+ u8 *key_copy;
+ u32 rx_fib_index, tx_fib_index;
+ u32 hmac_key_index_u32;
+ u8 hmac_key_index = 0;
+ ip6_sr_policy_t *pt;
+ int i;
+ dpo_id_t dpo = DPO_INVALID;
+
+ /* Make sure that the rx FIB exists */
+ p = hash_get (im->fib_index_by_table_id, a->rx_table_id);
+
+ if (p == 0)
+ return -3;
+
+ /* remember the FIB index */
+ rx_fib_index = p[0];
+
+ /* Make sure that the supplied FIB exists */
+ p = hash_get (im->fib_index_by_table_id, a->tx_table_id);
+
+ if (p == 0)
+ return -4;
+
+ /* remember the FIB index */
+ tx_fib_index = p[0];
+
+ clib_memcpy (key.src.as_u8, a->src_address->as_u8, sizeof (key.src));
+ clib_memcpy (key.dst.as_u8, a->dst_address->as_u8, sizeof (key.dst));
+
+ /* When adding a tunnel:
+ * - If a "name" is given, it must not exist.
+ * - The "key" is always checked, and must not exist.
+ * When deleting a tunnel:
+ * - If the "name" is given, and it exists, then use it.
+ * - If the "name" is not given, use the "key".
+ * - If the "name" and the "key" are given, then both must point to the same
+ * thing.
+ */
+
+ /* Lookup the key */
+ p = hash_get_mem (sm->tunnel_index_by_key, &key);
+
+ /* If the name is given, look it up */
+ if (a->name)
+ n = hash_get_mem (sm->tunnel_index_by_name, a->name);
+ else
+ n = 0;
+
+ /* validate key/name parameters */
+ if (!a->is_del) /* adding a tunnel */
+ {
+ if (a->name && n) /* name given & exists already */
+ return -1;
+ if (p) /* key exists already */
+ return -1;
+ }
+ else /* deleting a tunnel */
+ {
+ if (!p) /* key doesn't exist */
+ return -2;
+ if (a->name && !n) /* name given & it doesn't exist */
+ return -2;
+
+ if (n) /* name given & found */
+ {
+ if (n[0] != p[0]) /* name and key do not point to the same thing */
+ return -2;
+ }
+ }
+
+
+ if (a->is_del) /* delete the tunnel */
+ {
+ hash_pair_t *hp;
+
+ /* Delete existing tunnel */
+ t = pool_elt_at_index (sm->tunnels, p[0]);
+
+ ip6_delete_route_no_next_hop (&t->key.dst, t->dst_mask_width,
+ a->rx_table_id);
+ vec_free (t->rewrite);
+ /* Remove tunnel from any policy if associated */
+ if (t->policy_index != ~0)
+ {
+ pt = pool_elt_at_index (sm->policies, t->policy_index);
+ for (i = 0; i < vec_len (pt->tunnel_indices); i++)
+ {
+ if (pt->tunnel_indices[i] == t - sm->tunnels)
+ {
+ vec_delete (pt->tunnel_indices, 1, i);
+ goto found;
+ }
+ }
+ clib_warning ("Tunnel index %d not found in policy_index %d",
+ t - sm->tunnels, pt - sm->policies);
+ found:
+ /* If this is last tunnel in the policy, clean up the policy too */
+ if (vec_len (pt->tunnel_indices) == 0)
+ {
+ hash_unset_mem (sm->policy_index_by_policy_name, pt->name);
+ vec_free (pt->name);
+ pool_put (sm->policies, pt);
+ }
+ }
+
+ /* Clean up the tunnel by name */
+ if (t->name)
+ {
+ hash_unset_mem (sm->tunnel_index_by_name, t->name);
+ vec_free (t->name);
+ }
+ pool_put (sm->tunnels, t);
+ hp = hash_get_pair (sm->tunnel_index_by_key, &key);
+ key_copy = (void *) (hp->key);
+ hash_unset_mem (sm->tunnel_index_by_key, &key);
+ vec_free (key_copy);
+ return 0;
+ }
+
+ /* create a new tunnel */
+ pool_get (sm->tunnels, t);
+ memset (t, 0, sizeof (*t));
+ t->policy_index = ~0;
+
+ clib_memcpy (&t->key, &key, sizeof (t->key));
+ t->dst_mask_width = a->dst_mask_width;
+ t->rx_fib_index = rx_fib_index;
+ t->tx_fib_index = tx_fib_index;
+
+ if (!vec_len (a->segments))
+ /* there must be at least one segment... */
+ return -4;
+
+ /* The first specified hop goes right into the dst address */
+ clib_memcpy (&t->first_hop, &a->segments[0], sizeof (ip6_address_t));
+
+ /*
+ * Create the sr header rewrite string
+ * The list of segments needs an extra slot for the ultimate destination
+ * which is taken from the packet we add the SRH to.
+ */
+ header_length = sizeof (*h) +
+ sizeof (ip6_address_t) * (vec_len (a->segments) + 1 + vec_len (a->tags));
+
+ if (a->shared_secret)
+ {
+ /* Allocate a new key slot if we don't find the secret key */
+ hmac_key_index_u32 = 0;
+ (void) find_or_add_shared_secret (sm, a->shared_secret,
+ &hmac_key_index_u32);
+
+ /* Hey Vinz Clortho: Gozzer is pissed.. you're out of keys! */
+ if (hmac_key_index_u32 >= 256)
+ return -5;
+ hmac_key_index = hmac_key_index_u32;
+ header_length += SHA256_DIGEST_LENGTH;
+ }
+
+ vec_validate (t->rewrite, header_length - 1);
+
+ h = (ip6_sr_header_t *) t->rewrite;
+
+ h->protocol = 0xFF; /* we don't know yet */
+
+ h->length = (header_length / 8) - 1;
+ h->type = ROUTING_HEADER_TYPE_SR;
+
+ /* first_segment and segments_left need to have the index of the last
+ * element in the list; a->segments has one element less than ends up
+ * in the header (it does not have the DA in it), so vec_len(a->segments)
+ * is the value we want.
+ */
+ h->first_segment = h->segments_left = vec_len (a->segments);
+
+ if (a->shared_secret)
+ h->hmac_key = hmac_key_index & 0xFF;
+
+ h->flags = a->flags_net_byte_order;
+
+ /* Paint on the segment list, in reverse.
+ * This is offset by one to leave room at the start for the ultimate
+ * destination.
+ */
+ addrp = h->segments + vec_len (a->segments);
+
+ vec_foreach (this_address, a->segments)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+
+ /*
+ * Since the ultimate destination address is not yet known, set that slot
+ * to a value we will instantly recognize as bogus.
+ */
+ memset (h->segments, 0xfe, sizeof (ip6_address_t));
+
+ /* Paint on the tag list, not reversed */
+ addrp = h->segments + vec_len (a->segments);
+
+ vec_foreach (this_address, a->tags)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp++;
+ }
+
+ key_copy = vec_new (ip6_sr_tunnel_key_t, 1);
+ clib_memcpy (key_copy, &key, sizeof (ip6_sr_tunnel_key_t));
+ hash_set_mem (sm->tunnel_index_by_key, key_copy, t - sm->tunnels);
+
+ /*
+ * Stick the tunnel index into the rewrite header.
+ *
+ * Unfortunately, inserting an SR header according to the various
+ * RFC's requires parsing through the ip6 header, perhaps consing a
+ * buffer onto the head of the vlib_buffer_t, etc. We don't use the
+ * normal reverse bcopy rewrite code.
+ *
+ * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain
+ * at some point...
+ */
+ dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels);
+
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = a->dst_mask_width,
+ .fp_addr = {
+ .ip6 = *a->dst_address,
+ }
+ };
+ fib_table_entry_special_dpo_add (rx_fib_index,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+
+ if (a->policy_name)
+ {
+ p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name);
+ if (p)
+ {
+ pt = pool_elt_at_index (sm->policies, p[0]);
+ }
+ else /* no policy, lets create one */
+ {
+ pool_get (sm->policies, pt);
+ memset (pt, 0, sizeof (*pt));
+ pt->name = format (0, "%s%c", a->policy_name, 0);
+ hash_set_mem (sm->policy_index_by_policy_name, pt->name,
+ pt - sm->policies);
+ p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name);
+ }
+ vec_add1 (pt->tunnel_indices, t - sm->tunnels);
+ if (p == 0)
+ clib_warning ("p is NULL!");
+ t->policy_index = p ? p[0] : ~0; /* equiv. to (pt - sm->policies) */
+ }
+
+ if (a->name)
+ {
+ t->name = format (0, "%s%c", a->name, 0);
+ hash_set_mem (sm->tunnel_index_by_name, t->name, t - sm->tunnels);
+ }
+
+ return 0;
+}
+
+/**
+ * @brief no-op lock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+static void
+sr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+/**
+ * @brief no-op unlock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+static void
+sr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+u8 *
+format_sr_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ return (format (s, "SR: tunnel:[%d]", index));
+}
+
+const static dpo_vft_t sr_vft = {
+ .dv_lock = sr_dpo_lock,
+ .dv_unlock = sr_dpo_unlock,
+ .dv_format = format_sr_dpo,
+};
+
+const static char *const sr_ip6_nodes[] = {
+ "sr-rewrite",
+ NULL,
+};
+
+const static char *const *const sr_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_ip6_nodes,
+};
+
+/**
+ * @brief CLI parser for Add or Delete a Segment Routing tunnel.
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+sr_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0;
+ ip6_address_t src_address;
+ int src_address_set = 0;
+ ip6_address_t dst_address;
+ u32 dst_mask_width;
+ int dst_address_set = 0;
+ u16 flags = 0;
+ u8 *shared_secret = 0;
+ u8 *name = 0;
+ u8 *policy_name = 0;
+ u32 rx_table_id = 0;
+ u32 tx_table_id = 0;
+ ip6_address_t *segments = 0;
+ ip6_address_t *this_seg;
+ ip6_address_t *tags = 0;
+ ip6_address_t *this_tag;
+ ip6_sr_add_del_tunnel_args_t _a, *a = &_a;
+ ip6_address_t next_address, tag;
+ int pl_index;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "rx-fib-id %d", &rx_table_id))
+ ;
+ else if (unformat (input, "tx-fib-id %d", &tx_table_id))
+ ;
+ else if (unformat (input, "src %U", unformat_ip6_address, &src_address))
+ src_address_set = 1;
+ else if (unformat (input, "name %s", &name))
+ ;
+ else if (unformat (input, "policy %s", &policy_name))
+ ;
+ else if (unformat (input, "dst %U/%d",
+ unformat_ip6_address, &dst_address, &dst_mask_width))
+ dst_address_set = 1;
+ else if (unformat (input, "next %U", unformat_ip6_address,
+ &next_address))
+ {
+ vec_add2 (segments, this_seg, 1);
+ clib_memcpy (this_seg->as_u8, next_address.as_u8,
+ sizeof (*this_seg));
+ }
+ else if (unformat (input, "tag %U", unformat_ip6_address, &tag))
+ {
+ vec_add2 (tags, this_tag, 1);
+ clib_memcpy (this_tag->as_u8, tag.as_u8, sizeof (*this_tag));
+ }
+ else if (unformat (input, "clean"))
+ flags |= IP6_SR_HEADER_FLAG_CLEANUP;
+ else if (unformat (input, "protected"))
+ flags |= IP6_SR_HEADER_FLAG_PROTECTED;
+ else if (unformat (input, "key %s", &shared_secret))
+ /* Do not include the trailing NULL byte. Guaranteed interop issue */
+ _vec_len (shared_secret) -= 1;
+ else if (unformat (input, "InPE %d", &pl_index))
+ {
+ if (pl_index <= 0 || pl_index > 4)
+ {
+ pl_index_range_error:
+ return clib_error_return
+ (0, "Policy List Element Index %d out of range (1-4)",
+ pl_index);
+
+ }
+ flags |= IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE
+ << ip6_sr_policy_list_shift_from_index (pl_index);
+ }
+ else if (unformat (input, "EgPE %d", &pl_index))
+ {
+ if (pl_index <= 0 || pl_index > 4)
+ goto pl_index_range_error;
+ flags |= IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE
+ << ip6_sr_policy_list_shift_from_index (pl_index);
+ }
+ else if (unformat (input, "OrgSrc %d", &pl_index))
+ {
+ if (pl_index <= 0 || pl_index > 4)
+ goto pl_index_range_error;
+ flags |= IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR
+ << ip6_sr_policy_list_shift_from_index (pl_index);
+ }
+ else
+ break;
+ }
+
+ if (!src_address_set)
+ return clib_error_return (0, "src address required");
+
+ if (!dst_address_set)
+ return clib_error_return (0, "dst address required");
+
+ if (!segments)
+ return clib_error_return (0, "at least one sr segment required");
+
+ memset (a, 0, sizeof (*a));
+ a->src_address = &src_address;
+ a->dst_address = &dst_address;
+ a->dst_mask_width = dst_mask_width;
+ a->segments = segments;
+ a->tags = tags;
+ a->flags_net_byte_order = clib_host_to_net_u16 (flags);
+ a->is_del = is_del;
+ a->rx_table_id = rx_table_id;
+ a->tx_table_id = tx_table_id;
+ a->shared_secret = shared_secret;
+
+ if (vec_len (name))
+ a->name = name;
+ else
+ a->name = 0;
+
+ if (vec_len (policy_name))
+ a->policy_name = policy_name;
+ else
+ a->policy_name = 0;
+
+ rv = ip6_sr_add_del_tunnel (a);
+
+ vec_free (segments);
+ vec_free (tags);
+ vec_free (shared_secret);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case -1:
+ return clib_error_return (0, "SR tunnel src %U dst %U already exists",
+ format_ip6_address, &src_address,
+ format_ip6_address, &dst_address);
+
+ case -2:
+ return clib_error_return (0, "SR tunnel src %U dst %U does not exist",
+ format_ip6_address, &src_address,
+ format_ip6_address, &dst_address);
+
+ case -3:
+ return clib_error_return (0, "FIB table %d does not exist",
+ rx_table_id);
+
+ case -4:
+ return clib_error_return (0, "At least one segment is required");
+
+ default:
+ return clib_error_return (0, "BUG: ip6_sr_add_del_tunnel returns %d",
+ rv);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_tunnel_command, static) = {
+ .path = "sr tunnel",
+ .short_help =
+ "sr tunnel [del] [name <name>] src <addr> dst <addr> [next <addr>] "
+ "[clean] [reroute] [key <secret>] [policy <policy_name>]"
+ "[rx-fib-id <fib_id>] [tx-fib-id <fib_id>]",
+ .function = sr_add_del_tunnel_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Display Segment Routing tunnel
+ *
+ * @param vm vlib_main_t *
+ * @param t ip6_sr_tunnel_t *
+ *
+ */
+void
+ip6_sr_tunnel_display (vlib_main_t * vm, ip6_sr_tunnel_t * t)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_fib_t *rx_fib, *tx_fib;
+ ip6_sr_policy_t *pt;
+
+ rx_fib = ip6_fib_get (t->rx_fib_index);
+ tx_fib = ip6_fib_get (t->tx_fib_index);
+
+ if (t->name)
+ vlib_cli_output (vm, "sr tunnel name: %s", (char *) t->name);
+
+ vlib_cli_output (vm, "src %U dst %U first hop %U",
+ format_ip6_address, &t->key.src,
+ format_ip6_address, &t->key.dst,
+ format_ip6_address, &t->first_hop);
+ vlib_cli_output (vm, " rx-fib-id %d tx-fib-id %d",
+ rx_fib->table_id, tx_fib->table_id);
+ vlib_cli_output (vm, " sr: %U", format_ip6_sr_header, t->rewrite,
+ 0 /* print_hmac */ );
+
+ if (t->policy_index != ~0)
+ {
+ pt = pool_elt_at_index (sm->policies, t->policy_index);
+ vlib_cli_output (vm, "sr policy: %s", (char *) pt->name);
+ }
+ vlib_cli_output (vm, "-------");
+
+ return;
+}
+
+/**
+ * @brief CLI Parser for Display Segment Routing tunnel
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+show_sr_tunnel_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ static ip6_sr_tunnel_t **tunnels;
+ ip6_sr_tunnel_t *t;
+ ip6_sr_main_t *sm = &sr_main;
+ int i;
+ uword *p = 0;
+ u8 *name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "name %s", &name))
+ {
+ p = hash_get_mem (sm->tunnel_index_by_name, name);
+ if (!p)
+ vlib_cli_output (vm, "No SR tunnel with name: %s. Showing all.",
+ name);
+ }
+ else
+ break;
+ }
+
+ vec_reset_length (tunnels);
+
+ if (!p) /* Either name parm not passed or no tunnel with that name found, show all */
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (t, sm->tunnels,
+ ({
+ vec_add1 (tunnels, t);
+ }));
+ /* *INDENT-ON* */
+ }
+ else /* Just show the one tunnel by name */
+ vec_add1 (tunnels, &sm->tunnels[p[0]]);
+
+ if (vec_len (tunnels) == 0)
+ vlib_cli_output (vm, "No SR tunnels configured");
+
+ for (i = 0; i < vec_len (tunnels); i++)
+ {
+ t = tunnels[i];
+ ip6_sr_tunnel_display (vm, t);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_tunnel_command, static) = {
+ .path = "show sr tunnel",
+ .short_help = "show sr tunnel [name <sr-tunnel-name>]",
+ .function = show_sr_tunnel_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Add or Delete a Segment Routing policy
+ *
+ * @param a ip6_sr_add_del_policy_args_t *
+ *
+ * @return retval int
+ */
+int
+ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ uword *p;
+ ip6_sr_tunnel_t *t = 0;
+ ip6_sr_policy_t *policy;
+ u32 *tunnel_indices = 0;
+ int i;
+
+
+
+ if (a->is_del)
+ {
+ p = hash_get_mem (sm->policy_index_by_policy_name, a->name);
+ if (!p)
+ return -6; /* policy name not found */
+
+ policy = pool_elt_at_index (sm->policies, p[0]);
+
+ vec_foreach_index (i, policy->tunnel_indices)
+ {
+ t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[i]);
+ t->policy_index = ~0;
+ }
+ hash_unset_mem (sm->policy_index_by_policy_name, a->name);
+ pool_put (sm->policies, policy);
+ return 0;
+ }
+
+
+ if (!vec_len (a->tunnel_names))
+ return -3; /*tunnel name is required case */
+
+ vec_reset_length (tunnel_indices);
+ /* Check tunnel names, add tunnel_index to policy */
+ for (i = 0; i < vec_len (a->tunnel_names); i++)
+ {
+ p = hash_get_mem (sm->tunnel_index_by_name, a->tunnel_names[i]);
+ if (!p)
+ return -4; /* tunnel name not found case */
+
+ t = pool_elt_at_index (sm->tunnels, p[0]);
+ /*
+ No need to check t==0. -3 condition above ensures name
+ */
+ if (t->policy_index != ~0)
+ return -5; /* tunnel name already associated with a policy */
+
+ /* Add to tunnel indicies */
+ vec_add1 (tunnel_indices, p[0]);
+ }
+
+ /* Add policy to ip6_sr_main_t */
+ pool_get (sm->policies, policy);
+ policy->name = a->name;
+ policy->tunnel_indices = tunnel_indices;
+ hash_set_mem (sm->policy_index_by_policy_name, policy->name,
+ policy - sm->policies);
+
+ /* Yes, this could be construed as overkill but the last thing you should do is set
+ the policy_index on the tunnel after everything is set in ip6_sr_main_t.
+ If this is deemed overly cautious, could set this in the vec_len(tunnel_names) loop.
+ */
+ for (i = 0; i < vec_len (policy->tunnel_indices); i++)
+ {
+ t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[i]);
+ t->policy_index = policy - sm->policies;
+ }
+
+ return 0;
+}
+
+/**
+ * @brief CLI Parser for Add or Delete a Segment Routing policy
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+sr_add_del_policy_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0;
+ u8 **tunnel_names = 0;
+ u8 *tunnel_name = 0;
+ u8 *name = 0;
+ ip6_sr_add_del_policy_args_t _a, *a = &_a;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "name %s", &name))
+ ;
+ else if (unformat (input, "tunnel %s", &tunnel_name))
+ {
+ if (tunnel_name)
+ {
+ vec_add1 (tunnel_names, tunnel_name);
+ tunnel_name = 0;
+ }
+ }
+ else
+ break;
+ }
+
+ if (!name)
+ return clib_error_return (0, "name of SR policy required");
+
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_del = is_del;
+ a->name = name;
+ a->tunnel_names = tunnel_names;
+
+ rv = ip6_sr_add_del_policy (a);
+
+ vec_free (tunnel_names);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case -3:
+ return clib_error_return (0,
+ "tunnel name to associate to SR policy is required");
+
+ case -4:
+ return clib_error_return (0, "tunnel name not found");
+
+ case -5:
+ return clib_error_return (0, "tunnel already associated with policy");
+
+ case -6:
+ return clib_error_return (0, "policy name %s not found", name);
+
+ case -7:
+ return clib_error_return (0, "TODO: deleting policy name %s", name);
+
+ default:
+ return clib_error_return (0, "BUG: ip6_sr_add_del_policy returns %d",
+ rv);
+
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_policy_command, static) = {
+ .path = "sr policy",
+ .short_help =
+ "sr policy [del] name <policy-name> tunnel <sr-tunnel-name> [tunnel <sr-tunnel-name>]*",
+ .function = sr_add_del_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI Parser for Displaying Segment Routing policy
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+show_sr_policy_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ static ip6_sr_policy_t **policies;
+ ip6_sr_policy_t *policy;
+ ip6_sr_tunnel_t *t;
+ ip6_sr_main_t *sm = &sr_main;
+ int i, j;
+ uword *p = 0;
+ u8 *name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "name %s", &name))
+ {
+ p = hash_get_mem (sm->policy_index_by_policy_name, name);
+ if (!p)
+ vlib_cli_output (vm,
+ "policy with name %s not found. Showing all.",
+ name);
+ }
+ else
+ break;
+ }
+
+ vec_reset_length (policies);
+
+ if (!p) /* Either name parm not passed or no policy with that name found, show all */
+ {
+ /* *INDENT-OFF* */
+ pool_foreach (policy, sm->policies,
+ ({
+ vec_add1 (policies, policy);
+ }));
+ /* *INDENT-ON* */
+ }
+ else /* Just show the one policy by name and a summary of tunnel names */
+ {
+ policy = pool_elt_at_index (sm->policies, p[0]);
+ vec_add1 (policies, policy);
+ }
+
+ if (vec_len (policies) == 0)
+ vlib_cli_output (vm, "No SR policies configured");
+
+ for (i = 0; i < vec_len (policies); i++)
+ {
+ policy = policies[i];
+
+ if (policy->name)
+ vlib_cli_output (vm, "SR policy name: %s", (char *) policy->name);
+ for (j = 0; j < vec_len (policy->tunnel_indices); j++)
+ {
+ t = pool_elt_at_index (sm->tunnels, policy->tunnel_indices[j]);
+ ip6_sr_tunnel_display (vm, t);
+ }
+ }
+
+ return 0;
+
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_policy_command, static) = {
+ .path = "show sr policy",
+ .short_help = "show sr policy [name <sr-policy-name>]",
+ .function = show_sr_policy_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Add or Delete a mapping of IP6 multicast address
+ * to Segment Routing policy.
+ *
+ * @param a ip6_sr_add_del_multicastmap_args_t *
+ *
+ * @return retval int
+ */
+int
+ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a)
+{
+ uword *p;
+ ip6_sr_tunnel_t *t;
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *pt;
+
+ if (a->is_del)
+ {
+ /* clean up the adjacency */
+ p =
+ hash_get_mem (sm->policy_index_by_multicast_address,
+ a->multicast_address);
+ }
+ else
+ {
+ /* Get our policy by policy_name */
+ p = hash_get_mem (sm->policy_index_by_policy_name, a->policy_name);
+
+ }
+ if (!p)
+ return -1;
+
+ pt = pool_elt_at_index (sm->policies, p[0]);
+
+ /*
+ Get the first tunnel associated with policy populate the fib adjacency.
+ From there, since this tunnel will have it's policy_index != ~0 it will
+ be the trigger in the dual_loop to pull up the policy and make a copy-rewrite
+ for each tunnel in the policy
+ */
+
+ t = pool_elt_at_index (sm->tunnels, pt->tunnel_indices[0]);
+
+ /*
+ * Stick the tunnel index into the rewrite header.
+ *
+ * Unfortunately, inserting an SR header according to the various
+ * RFC's requires parsing through the ip6 header, perhaps consing a
+ * buffer onto the head of the vlib_buffer_t, etc. We don't use the
+ * normal reverse bcopy rewrite code.
+ *
+ * We don't handle ugly RFC-related cases yet, but I'm sure PL will complain
+ * at some point...
+ */
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo, sr_dpo_type, DPO_PROTO_IP6, t - sm->tunnels);
+
+ /* Construct a FIB entry for multicast using the rx/tx fib from the first tunnel */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = *a->multicast_address,
+ }
+ };
+ fib_table_entry_special_dpo_add (t->rx_fib_index,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+
+ u8 *mcast_copy = 0;
+ mcast_copy = vec_new (ip6_address_t, 1);
+ memcpy (mcast_copy, a->multicast_address, sizeof (ip6_address_t));
+
+ if (a->is_del)
+ {
+ hash_unset_mem (sm->policy_index_by_multicast_address, mcast_copy);
+ vec_free (mcast_copy);
+ return 0;
+ }
+ /* else */
+
+ hash_set_mem (sm->policy_index_by_multicast_address, mcast_copy,
+ pt - sm->policies);
+
+
+ return 0;
+}
+
+/**
+ * @brief CLI Parser for Adding or Delete a mapping of IP6 multicast address
+ * to Segment Routing policy.
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+sr_add_del_multicast_map_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0;
+ ip6_address_t multicast_address;
+ u8 *policy_name = 0;
+ int multicast_address_set = 0;
+ ip6_sr_add_del_multicastmap_args_t _a, *a = &_a;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else
+ if (unformat
+ (input, "address %U", unformat_ip6_address, &multicast_address))
+ multicast_address_set = 1;
+ else if (unformat (input, "sr-policy %s", &policy_name))
+ ;
+ else
+ break;
+ }
+
+ if (!is_del && !policy_name)
+ return clib_error_return (0, "name of sr policy required");
+
+ if (!multicast_address_set)
+ return clib_error_return (0, "multicast address required");
+
+ memset (a, 0, sizeof (*a));
+
+ a->is_del = is_del;
+ a->multicast_address = &multicast_address;
+ a->policy_name = policy_name;
+
+#if DPDK > 0 /*Cannot call replicate or configure multicast map yet without DPDK */
+ rv = ip6_sr_add_del_multicastmap (a);
+#else
+ return clib_error_return (0,
+ "cannot use multicast replicate spray case without DPDK installed");
+#endif /* DPDK */
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case -1:
+ return clib_error_return (0, "no policy with name: %s", policy_name);
+
+ case -2:
+ return clib_error_return (0, "multicast map someting ");
+
+ case -3:
+ return clib_error_return (0,
+ "tunnel name to associate to SR policy is required");
+
+ case -7:
+ return clib_error_return (0, "TODO: deleting policy name %s",
+ policy_name);
+
+ default:
+ return clib_error_return (0, "BUG: ip6_sr_add_del_policy returns %d",
+ rv);
+
+ }
+ return 0;
+
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_multicast_map_command, static) = {
+ .path = "sr multicast-map",
+ .short_help =
+ "sr multicast-map address <multicast-ip6-address> sr-policy <sr-policy-name> [del]",
+ .function = sr_add_del_multicast_map_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI Parser for Displaying a mapping of IP6 multicast address
+ * to Segment Routing policy.
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+show_sr_multicast_map_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u8 *key = 0;
+ u32 value;
+ ip6_address_t multicast_address;
+ ip6_sr_policy_t *pt;
+
+ /* pull all entries from the hash table into vector for display */
+
+ /* *INDENT-OFF* */
+ hash_foreach_mem (key, value, sm->policy_index_by_multicast_address,
+ ({
+ if (!key)
+ vlib_cli_output (vm, "no multicast maps configured");
+ else
+ {
+ multicast_address = *((ip6_address_t *)key);
+ pt = pool_elt_at_index (sm->policies, value);
+ if (pt)
+ {
+ vlib_cli_output (vm, "address: %U policy: %s",
+ format_ip6_address, &multicast_address,
+ pt->name);
+ }
+ else
+ vlib_cli_output (vm, "BUG: policy not found for address: %U with policy index %d",
+ format_ip6_address, &multicast_address,
+ value);
+
+ }
+
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_multicast_map_command, static) = {
+ .path = "show sr multicast-map",
+ .short_help = "show sr multicast-map",
+ .function = show_sr_multicast_map_fn,
+};
+/* *INDENT-ON* */
+
+
+#define foreach_sr_fix_dst_addr_next \
+_(DROP, "error-drop")
+
+/**
+ * @brief Struct for valid next-nodes for SR fix destination address node
+ */
+typedef enum
+{
+#define _(s,n) SR_FIX_DST_ADDR_NEXT_##s,
+ foreach_sr_fix_dst_addr_next
+#undef _
+ SR_FIX_DST_ADDR_N_NEXT,
+} sr_fix_dst_addr_next_t;
+
+/**
+ * @brief Error strings for SR Fix Destination rewrite
+ */
+static char *sr_fix_dst_error_strings[] = {
+#define sr_fix_dst_error(n,s) s,
+#include "sr_fix_dst_error.def"
+#undef sr_fix_dst_error
+};
+
+/**
+ * @brief Struct for errors for SR Fix Destination rewrite
+ */
+typedef enum
+{
+#define sr_fix_dst_error(n,s) SR_FIX_DST_ERROR_##n,
+#include "sr_fix_dst_error.def"
+#undef sr_fix_dst_error
+ SR_FIX_DST_N_ERROR,
+} sr_fix_dst_error_t;
+
+/**
+ * @brief Information for fix address trace
+ */
+typedef struct
+{
+ ip6_address_t src, dst;
+ u32 next_index;
+ u32 adj_index;
+ u8 sr[256];
+} sr_fix_addr_trace_t;
+
+/**
+ * @brief Formatter for fix address trace
+ */
+u8 *
+format_sr_fix_addr_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_fix_addr_trace_t *t = va_arg (*args, sr_fix_addr_trace_t *);
+ vnet_hw_interface_t *hi = 0;
+ ip_adjacency_t *adj;
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ if (t->adj_index != ~0)
+ {
+ adj = ip_get_adjacency (lm, t->adj_index);
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+ }
+
+ s = format (s, "SR-FIX_ADDR: next %s ip6 src %U dst %U\n",
+ (t->next_index == SR_FIX_DST_ADDR_NEXT_DROP)
+ ? "drop" : "output",
+ format_ip6_address, &t->src, format_ip6_address, &t->dst);
+ if (t->next_index != SR_FIX_DST_ADDR_NEXT_DROP)
+ {
+ s =
+ format (s, "%U\n", format_ip6_sr_header, t->sr, 1 /* print_hmac */ );
+ s =
+ format (s, " output via %s",
+ hi ? (char *) (hi->name) : "Invalid adj");
+ }
+ return s;
+}
+
+/**
+ * @brief Fix SR destination address - dual-loop
+ *
+ * @node sr-fix-dst-addr
+ * @param vm vlib_main_t *
+ * @param node vlib_node_runtime_t *
+ * @param from_frame vlib_frame_t *
+ *
+ * @return from_frame->n_vectors uword
+ */
+static uword
+sr_fix_dst_addr (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#if 0
+ while (0 && n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ __attribute__ ((unused)) vlib_buffer_t *b0, *b1;
+ u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP;
+ u32 next1 = SR_FIX_DST_ADDR_NEXT_DROP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ ip_adjacency_t *adj0;
+ ip6_sr_header_t *sr0;
+ u32 next0 = SR_FIX_DST_ADDR_NEXT_DROP;
+ ip6_address_t *new_dst0;
+ ethernet_header_t *eh0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ adj0 =
+ ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ next0 = adj0->mcast_group_index;
+
+ /* We should be pointing at an Ethernet header... */
+ eh0 = vlib_buffer_get_current (b0);
+ ip0 = (ip6_header_t *) (eh0 + 1);
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ /* We'd better find an SR header... */
+ if (PREDICT_FALSE (ip0->protocol != IPPROTO_IPV6_ROUTE))
+ {
+ b0->error = node->errors[SR_FIX_DST_ERROR_NO_SR_HEADER];
+ goto do_trace0;
+ }
+ else
+ {
+ /*
+ * We get here from sr_rewrite or sr_local, with
+ * sr->segments_left pointing at the (copy of the original) dst
+ * address. Use it, then increment sr0->segments_left.
+ */
+
+ /* Out of segments? Turf the packet */
+ if (PREDICT_FALSE (sr0->segments_left == 0))
+ {
+ b0->error = node->errors[SR_FIX_DST_ERROR_NO_MORE_SEGMENTS];
+ goto do_trace0;
+ }
+
+ /*
+ * Rewrite the packet with the original dst address
+ * We assume that the last segment (in processing order) contains
+ * the original dst address. The list is reversed, so sr0->segments
+ * contains the original dst address.
+ */
+ new_dst0 = sr0->segments;
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+ }
+
+ do_trace0:
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_fix_addr_trace_t *t = vlib_add_trace (vm, node,
+ b0, sizeof (*t));
+ t->next_index = next0;
+ t->adj_index = ~0;
+
+ if (next0 != SR_FIX_DST_ADDR_NEXT_DROP)
+ {
+ t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ clib_memcpy (t->src.as_u8, ip0->src_address.as_u8,
+ sizeof (t->src.as_u8));
+ clib_memcpy (t->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (t->dst.as_u8));
+ clib_memcpy (t->sr, sr0, sizeof (t->sr));
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_fix_dst_addr_node) = {
+ .function = sr_fix_dst_addr,
+ .name = "sr-fix-dst-addr",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_fix_addr_trace,
+ .format_buffer = format_ip6_sr_header_with_length,
+
+ .runtime_data_bytes = 0,
+
+ .n_errors = SR_FIX_DST_N_ERROR,
+ .error_strings = sr_fix_dst_error_strings,
+
+ .n_next_nodes = SR_FIX_DST_ADDR_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_FIX_DST_ADDR_NEXT_##s] = n,
+ foreach_sr_fix_dst_addr_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (sr_fix_dst_addr_node, sr_fix_dst_addr)
+/* *INDENT-ON* */
+
+static clib_error_t *
+sr_init (vlib_main_t * vm)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ clib_error_t *error = 0;
+ vlib_node_t *ip6_lookup_node, *ip6_rewrite_node;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ sm->vlib_main = vm;
+ sm->vnet_main = vnet_get_main ();
+
+ vec_validate (sm->hmac_keys, 0);
+ sm->hmac_keys[0].shared_secret = (u8 *) 0xdeadbeef;
+
+ sm->tunnel_index_by_key =
+ hash_create_mem (0, sizeof (ip6_sr_tunnel_key_t), sizeof (uword));
+
+ sm->tunnel_index_by_name = hash_create_string (0, sizeof (uword));
+
+ sm->policy_index_by_policy_name = hash_create_string (0, sizeof (uword));
+
+ sm->policy_index_by_multicast_address =
+ hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword));
+
+ sm->hmac_key_by_shared_secret = hash_create_string (0, sizeof (uword));
+
+ ip6_register_protocol (IPPROTO_IPV6_ROUTE, sr_local_node.index);
+
+ ip6_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip6-lookup");
+ ASSERT (ip6_lookup_node);
+
+ ip6_rewrite_node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
+ ASSERT (ip6_rewrite_node);
+
+#if DPDK > 0 /* Cannot run replicate without DPDK */
+ /* Add a disposition to sr_replicate for the sr multicast replicate node */
+ sm->ip6_lookup_sr_replicate_index =
+ vlib_node_add_next (vm, ip6_lookup_node->index, sr_replicate_node.index);
+#endif /* DPDK */
+
+ /* Add a disposition to ip6_rewrite for the sr dst address hack node */
+ sm->ip6_rewrite_sr_next_index =
+ vlib_node_add_next (vm, ip6_rewrite_node->index,
+ sr_fix_dst_addr_node.index);
+
+ OpenSSL_add_all_digests ();
+
+ sm->md = (void *) EVP_get_digestbyname ("sha1");
+ sm->hmac_ctx = clib_mem_alloc (sizeof (HMAC_CTX));
+
+ sr_dpo_type = dpo_register_new_type (&sr_vft, sr_nodes);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (sr_init);
+
+/**
+ * @brief Definition of next-nodes for SR local
+ */
+#define foreach_sr_local_next \
+ _ (ERROR, "error-drop") \
+ _ (IP6_LOOKUP, "ip6-lookup")
+
+/**
+ * @brief Struct for definition of next-nodes for SR local
+ */
+typedef enum
+{
+#define _(s,n) SR_LOCAL_NEXT_##s,
+ foreach_sr_local_next
+#undef _
+ SR_LOCAL_N_NEXT,
+} sr_local_next_t;
+
+/**
+ * @brief Struct for packet trace of SR local
+ */
+typedef struct
+{
+ u8 next_index;
+ u8 sr_valid;
+ ip6_address_t src, dst;
+ u16 length;
+ u8 sr[256];
+} sr_local_trace_t;
+
+/**
+ * @brief Definition of SR local error-strings
+ */
+static char *sr_local_error_strings[] = {
+#define sr_error(n,s) s,
+#include "sr_error.def"
+#undef sr_error
+};
+
+/**
+ * @brief Struct for definition of SR local error-strings
+ */
+typedef enum
+{
+#define sr_error(n,s) SR_LOCAL_ERROR_##n,
+#include "sr_error.def"
+#undef sr_error
+ SR_LOCAL_N_ERROR,
+} sr_local_error_t;
+
+/**
+ * @brief Format SR local trace
+ *
+ * @param s u8 *
+ * @param args va_list *
+ *
+ * @return s u8 *
+ */
+u8 *
+format_sr_local_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_local_trace_t *t = va_arg (*args, sr_local_trace_t *);
+
+ s = format (s, "SR-LOCAL: src %U dst %U len %u next_index %d",
+ format_ip6_address, &t->src,
+ format_ip6_address, &t->dst, t->length, t->next_index);
+ if (t->sr_valid)
+ s =
+ format (s, "\n %U", format_ip6_sr_header, t->sr, 1 /* print_hmac */ );
+ else
+ s = format (s, "\n popped SR header");
+
+ return s;
+}
+
+
+/* $$$$ fixme: smp, don't copy data, cache input, output (maybe) */
+/**
+ * @brief Validate the SR HMAC
+ *
+ * @param sm ip6_sr_main_t *
+ * @param ip ip6_header_t *
+ * @param sr ip6_sr_header_t *
+ *
+ * @return retval int
+ */
+static int
+sr_validate_hmac (ip6_sr_main_t * sm, ip6_header_t * ip, ip6_sr_header_t * sr)
+{
+ u32 key_index;
+ static u8 *keybuf;
+ u8 *copy_target;
+ int first_segment;
+ ip6_address_t *addrp;
+ int i;
+ ip6_sr_hmac_key_t *hmac_key;
+ static u8 *signature;
+ u32 sig_len;
+
+ key_index = sr->hmac_key;
+
+ /* No signature? Pass... */
+ if (key_index == 0)
+ return 0;
+
+ /* We don't know about this key? Fail... */
+ if (key_index >= vec_len (sm->hmac_keys))
+ return 1;
+
+ vec_validate (signature, SHA256_DIGEST_LENGTH - 1);
+
+ hmac_key = sm->hmac_keys + key_index;
+
+ vec_reset_length (keybuf);
+
+ /* pkt ip6 src address */
+ vec_add2 (keybuf, copy_target, sizeof (ip6_address_t));
+ clib_memcpy (copy_target, ip->src_address.as_u8, sizeof (ip6_address_t));
+
+ /* last segment */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0] = sr->first_segment;
+
+ /* octet w/ bit 0 = "clean" flag */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0]
+ = (sr->flags & clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP))
+ ? 0x80 : 0;
+
+ /* hmac key id */
+ vec_add2 (keybuf, copy_target, 1);
+ copy_target[0] = sr->hmac_key;
+
+ first_segment = sr->first_segment;
+
+ addrp = sr->segments;
+
+ /* segments */
+ for (i = 0; i <= first_segment; i++)
+ {
+ vec_add2 (keybuf, copy_target, sizeof (ip6_address_t));
+ clib_memcpy (copy_target, addrp->as_u8, sizeof (ip6_address_t));
+ addrp++;
+ }
+
+ if (sm->is_debug)
+ clib_warning ("verify key index %d keybuf: %U", key_index,
+ format_hex_bytes, keybuf, vec_len (keybuf));
+
+ /* shared secret */
+
+ /* SHA1 is shorter than SHA-256 */
+ memset (signature, 0, vec_len (signature));
+
+ HMAC_CTX_init (sm->hmac_ctx);
+ if (!HMAC_Init (sm->hmac_ctx, hmac_key->shared_secret,
+ vec_len (hmac_key->shared_secret), sm->md))
+ clib_warning ("barf1");
+ if (!HMAC_Update (sm->hmac_ctx, keybuf, vec_len (keybuf)))
+ clib_warning ("barf2");
+ if (!HMAC_Final (sm->hmac_ctx, signature, &sig_len))
+ clib_warning ("barf3");
+ HMAC_CTX_cleanup (sm->hmac_ctx);
+
+ if (sm->is_debug)
+ clib_warning ("computed signature len %d, value %U", sig_len,
+ format_hex_bytes, signature, vec_len (signature));
+
+ /* Point at the SHA signature in the packet */
+ addrp++;
+ if (sm->is_debug)
+ clib_warning ("read signature %U", format_hex_bytes, addrp,
+ SHA256_DIGEST_LENGTH);
+
+ return memcmp (signature, addrp, SHA256_DIGEST_LENGTH);
+}
+
+/**
+ * @brief SR local node
+ * @node sr-local
+ *
+ * @param vm vlib_main_t *
+ * @param node vlib_node_runtime_t *
+ * @param from_frame vlib_frame_t *
+ *
+ * @return from_frame->n_vectors uword
+ */
+static uword
+sr_local (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ ip6_sr_main_t *sm = &sr_main;
+ u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *,
+ vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *);
+ sr_local_cb = sm->sr_local_cb;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip6_header_t *ip0, *ip1;
+ ip6_sr_header_t *sr0, *sr1;
+ ip6_address_t *new_dst0, *new_dst1;
+ u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP;
+ u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ sr0 =
+ (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+ ext_hdr);
+ }
+
+ if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error =
+ node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE];
+ goto do_trace0;
+ }
+
+ /* Out of segments? Turf the packet */
+ if (PREDICT_FALSE (sr0->segments_left == 0))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS];
+ goto do_trace0;
+ }
+
+ if (PREDICT_FALSE (sm->validate_hmac))
+ {
+ if (sr_validate_hmac (sm, ip0, sr0))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID];
+ goto do_trace0;
+ }
+ }
+
+ next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : next0;
+
+ /*
+ * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx
+ */
+ if (PREDICT_FALSE (next0 & 0x80000000))
+ {
+ next0 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE (next0 == SR_LOCAL_NEXT_ERROR))
+ b0->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK];
+ }
+ else
+ {
+ u32 segment_index0;
+
+ segment_index0 = sr0->segments_left - 1;
+
+ /* Rewrite the packet */
+ new_dst0 = (ip6_address_t *) (sr0->segments + segment_index0);
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+
+ if (PREDICT_TRUE (sr0->segments_left > 0))
+ sr0->segments_left -= 1;
+ }
+
+ /* End of the path. Clean up the SR header, or not */
+ if (PREDICT_FALSE
+ (sr0->segments_left == 0 &&
+ (sr0->flags &
+ clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP))))
+ {
+ u64 *copy_dst0, *copy_src0;
+ u16 new_l0;
+ u32 copy_len_u64s0 = 0;
+ int i;
+
+ /*
+ * Copy the ip6 header right by the (real) length of the
+ * sr header.
+ */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ copy_len_u64s0 =
+ (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+ ext_hdr->next_hdr = sr0->protocol;
+ }
+ else
+ {
+ ip0->protocol = sr0->protocol;
+ }
+ vlib_buffer_advance (b0, (sr0->length + 1) * 8);
+
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ (sr0->length + 1) * 8;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (sr0->length + 1);
+
+ copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+ copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+ copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+ copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+ copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
+
+ for (i = copy_len_u64s0 - 1; i >= 0; i--)
+ {
+ copy_dst0[i] = copy_src0[i];
+ }
+
+ sr0 = 0;
+ }
+
+ do_trace0:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_local_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = vlib_buffer_length_in_chain (vm, b0);
+ tr->next_index = next0;
+ tr->sr_valid = sr0 != 0;
+ if (tr->sr_valid)
+ clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
+ }
+
+ b1 = vlib_get_buffer (vm, bi1);
+ ip1 = vlib_buffer_get_current (b1);
+ sr1 = (ip6_sr_header_t *) (ip1 + 1);
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+ sr1 =
+ (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+ ext_hdr);
+ }
+
+ if (PREDICT_FALSE (sr1->type != ROUTING_HEADER_TYPE_SR))
+ {
+ next1 = SR_LOCAL_NEXT_ERROR;
+ b1->error =
+ node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE];
+ goto do_trace1;
+ }
+
+ /* Out of segments? Turf the packet */
+ if (PREDICT_FALSE (sr1->segments_left == 0))
+ {
+ next1 = SR_LOCAL_NEXT_ERROR;
+ b1->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS];
+ goto do_trace1;
+ }
+
+ if (PREDICT_FALSE (sm->validate_hmac))
+ {
+ if (sr_validate_hmac (sm, ip1, sr1))
+ {
+ next1 = SR_LOCAL_NEXT_ERROR;
+ b1->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID];
+ goto do_trace1;
+ }
+ }
+
+ next1 = sr_local_cb ? sr_local_cb (vm, node, b1, ip1, sr1) : next1;
+
+ /*
+ * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx
+ */
+ if (PREDICT_FALSE (next1 & 0x80000000))
+ {
+ next1 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE (next1 == SR_LOCAL_NEXT_ERROR))
+ b1->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK];
+ }
+ else
+ {
+ u32 segment_index1;
+
+ segment_index1 = sr1->segments_left - 1;
+
+ /* Rewrite the packet */
+ new_dst1 = (ip6_address_t *) (sr1->segments + segment_index1);
+ ip1->dst_address.as_u64[0] = new_dst1->as_u64[0];
+ ip1->dst_address.as_u64[1] = new_dst1->as_u64[1];
+
+ if (PREDICT_TRUE (sr1->segments_left > 0))
+ sr1->segments_left -= 1;
+ }
+
+ /* End of the path. Clean up the SR header, or not */
+ if (PREDICT_FALSE
+ (sr1->segments_left == 0 &&
+ (sr1->flags &
+ clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP))))
+ {
+ u64 *copy_dst1, *copy_src1;
+ u16 new_l1;
+ u32 copy_len_u64s1 = 0;
+ int i;
+
+ /*
+ * Copy the ip6 header right by the (real) length of the
+ * sr header.
+ */
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+ copy_len_u64s1 =
+ (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+ ext_hdr->next_hdr = sr1->protocol;
+ }
+ else
+ {
+ ip1->protocol = sr1->protocol;
+ }
+ vlib_buffer_advance (b1, (sr1->length + 1) * 8);
+
+ new_l1 = clib_net_to_host_u16 (ip1->payload_length) -
+ (sr1->length + 1) * 8;
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+ copy_src1 = (u64 *) ip1;
+ copy_dst1 = copy_src1 + (sr1->length + 1);
+
+ copy_dst1[4 + copy_len_u64s1] = copy_src1[4 + copy_len_u64s1];
+ copy_dst1[3 + copy_len_u64s1] = copy_src1[3 + copy_len_u64s1];
+ copy_dst1[2 + copy_len_u64s1] = copy_src1[2 + copy_len_u64s1];
+ copy_dst1[1 + copy_len_u64s1] = copy_src1[1 + copy_len_u64s1];
+ copy_dst1[0 + copy_len_u64s1] = copy_src1[0 + copy_len_u64s1];
+
+ for (i = copy_len_u64s1 - 1; i >= 0; i--)
+ {
+ copy_dst1[i] = copy_src1[i];
+ }
+
+ sr1 = 0;
+ }
+
+ do_trace1:
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_local_trace_t *tr = vlib_add_trace (vm, node,
+ b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = vlib_buffer_length_in_chain (vm, b1);
+ tr->next_index = next1;
+ tr->sr_valid = sr1 != 0;
+ if (tr->sr_valid)
+ clib_memcpy (tr->sr, sr1, sizeof (tr->sr));
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_sr_header_t *sr0;
+ ip6_address_t *new_dst0;
+ u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ sr0 =
+ (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+ ext_hdr);
+ }
+ if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error =
+ node->errors[SR_LOCAL_ERROR_BAD_ROUTING_HEADER_TYPE];
+ goto do_trace;
+ }
+
+ /* Out of segments? Turf the packet */
+ if (PREDICT_FALSE (sr0->segments_left == 0))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_NO_MORE_SEGMENTS];
+ goto do_trace;
+ }
+
+ if (PREDICT_FALSE (sm->validate_hmac))
+ {
+ if (sr_validate_hmac (sm, ip0, sr0))
+ {
+ next0 = SR_LOCAL_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCAL_ERROR_HMAC_INVALID];
+ goto do_trace;
+ }
+ }
+
+ next0 = sr_local_cb ? sr_local_cb (vm, node, b0, ip0, sr0) : next0;
+
+ /*
+ * To suppress rewrite, return ~SR_LOCAL_NEXT_xxx
+ */
+ if (PREDICT_FALSE (next0 & 0x80000000))
+ {
+ next0 ^= 0xFFFFFFFF;
+ if (PREDICT_FALSE (next0 == SR_LOCAL_NEXT_ERROR))
+ b0->error = node->errors[SR_LOCAL_ERROR_APP_CALLBACK];
+ }
+ else
+ {
+ u32 segment_index0;
+
+ segment_index0 = sr0->segments_left - 1;
+
+ /* Rewrite the packet */
+ new_dst0 = (ip6_address_t *) (sr0->segments + segment_index0);
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+
+ if (PREDICT_TRUE (sr0->segments_left > 0))
+ sr0->segments_left -= 1;
+ }
+
+ /* End of the path. Clean up the SR header, or not */
+ if (PREDICT_FALSE
+ (sr0->segments_left == 0 &&
+ (sr0->flags &
+ clib_host_to_net_u16 (IP6_SR_HEADER_FLAG_CLEANUP))))
+ {
+ u64 *copy_dst0, *copy_src0;
+ u16 new_l0;
+ u32 copy_len_u64s0 = 0;
+ int i;
+
+ /*
+ * Copy the ip6 header right by the (real) length of the
+ * sr header.
+ */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ copy_len_u64s0 =
+ (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+ ext_hdr->next_hdr = sr0->protocol;
+ }
+ else
+ {
+ ip0->protocol = sr0->protocol;
+ }
+
+ vlib_buffer_advance (b0, (sr0->length + 1) * 8);
+
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ (sr0->length + 1) * 8;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (sr0->length + 1);
+ copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+ copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+ copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+ copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+ copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
+
+ for (i = copy_len_u64s0 - 1; i >= 0; i--)
+ {
+ copy_dst0[i] = copy_src0[i];
+ }
+
+ sr0 = 0;
+ }
+
+ do_trace:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_local_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ tr->length = vlib_buffer_length_in_chain (vm, b0);
+ tr->next_index = next0;
+ tr->sr_valid = sr0 != 0;
+ if (tr->sr_valid)
+ clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter (vm, sr_local_node.index,
+ SR_LOCAL_ERROR_PKTS_PROCESSED,
+ from_frame->n_vectors);
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_local_node, static) = {
+ .function = sr_local,
+ .name = "sr-local",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_local_trace,
+
+ .runtime_data_bytes = 0,
+
+ .n_errors = SR_LOCAL_N_ERROR,
+ .error_strings = sr_local_error_strings,
+
+ .n_next_nodes = SR_LOCAL_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_LOCAL_NEXT_##s] = n,
+ foreach_sr_local_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (sr_local_node, sr_local)
+/* *INDENT-ON* */
+
+ip6_sr_main_t *
+sr_get_main (vlib_main_t * vm)
+{
+ vlib_call_init_function (vm, sr_init);
+ ASSERT (sr_local_node.index);
+ return &sr_main;
+}
+
+/**
+ * @brief CLI parser for SR fix destination rewrite node
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+set_ip6_sr_rewrite_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ };
+ u32 fib_index = 0;
+ u32 fib_id = 0;
+ u32 adj_index;
+ ip_adjacency_t *adj;
+ vnet_hw_interface_t *hi;
+ u32 sw_if_index;
+ ip6_sr_main_t *sm = &sr_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ fib_node_index_t fei;
+
+ if (!unformat (input, "%U", unformat_ip6_address, &pfx.fp_addr.ip6))
+ return clib_error_return (0, "ip6 address missing in '%U'",
+ format_unformat_error, input);
+
+ if (unformat (input, "rx-table-id %d", &fib_id))
+ {
+ fib_index = fib_table_id_find_fib_index (FIB_PROTOCOL_IP6, fib_id);
+ if (fib_index == ~0)
+ return clib_error_return (0, "fib-id %d not found", fib_id);
+ }
+
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ return clib_error_return (0, "no match for %U",
+ format_ip6_address, &pfx.fp_addr.ip6);
+
+ adj_index = fib_entry_get_adj_for_source (fei, FIB_SOURCE_SR);
+
+ if (ADJ_INDEX_INVALID == adj_index)
+ return clib_error_return (0, "%U not SR sourced",
+ format_ip6_address, &pfx.fp_addr.ip6);
+
+ adj = adj_get (adj_index);
+
+ if (adj->lookup_next_index != IP_LOOKUP_NEXT_REWRITE)
+ return clib_error_return (0, "%U unresolved (not a rewrite adj)",
+ format_ip6_address, &pfx.fp_addr.ip6);
+
+ adj->rewrite_header.next_index = sm->ip6_rewrite_sr_next_index;
+
+ sw_if_index = adj->rewrite_header.sw_if_index;
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ adj->rewrite_header.node_index = sr_fix_dst_addr_node.index;
+
+ /* $$$$$ hack... steal the mcast group index */
+ adj->mcast_group_index =
+ vlib_node_add_next (vm, sr_fix_dst_addr_node.index,
+ hi->output_node_index);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_sr_rewrite, static) = {
+ .path = "set ip6 sr rewrite",
+ .short_help = "set ip6 sr rewrite <ip6-address> [fib-id <id>]",
+ .function = set_ip6_sr_rewrite_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Register a callback routine to set next0 in sr_local
+ *
+ * @param cb void *
+ */
+void
+vnet_register_sr_app_callback (void *cb)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ sm->sr_local_cb = cb;
+}
+
+/**
+ * @brief Test routine for validation of HMAC
+ */
+static clib_error_t *
+test_sr_hmac_validate_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ if (unformat (input, "validate on"))
+ sm->validate_hmac = 1;
+ else if (unformat (input, "chunk-offset off"))
+ sm->validate_hmac = 0;
+ else
+ return clib_error_return (0, "expected validate on|off in '%U'",
+ format_unformat_error, input);
+
+ vlib_cli_output (vm, "hmac signature validation %s",
+ sm->validate_hmac ? "on" : "off");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_sr_hmac_validate, static) = {
+ .path = "test sr hmac",
+ .short_help = "test sr hmac validate [on|off]",
+ .function = test_sr_hmac_validate_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Add or Delete HMAC key
+ *
+ * @param sm ip6_sr_main_t *
+ * @param key_id u32
+ * @param shared_secret u8 *
+ * @param is_del u8
+ *
+ * @return retval i32
+ */
+// $$$ fixme shouldn't return i32
+i32
+sr_hmac_add_del_key (ip6_sr_main_t * sm, u32 key_id, u8 * shared_secret,
+ u8 is_del)
+{
+ u32 index;
+ ip6_sr_hmac_key_t *key;
+
+ if (is_del == 0)
+ {
+ /* Specific key in use? Fail. */
+ if (key_id && vec_len (sm->hmac_keys) > key_id
+ && sm->hmac_keys[key_id].shared_secret)
+ return -2;
+
+ index = key_id;
+ key = find_or_add_shared_secret (sm, shared_secret, &index);
+ ASSERT (index == key_id);
+ return 0;
+ }
+
+ /* delete */
+
+ if (key_id) /* delete by key ID */
+ {
+ if (vec_len (sm->hmac_keys) <= key_id)
+ return -3;
+
+ key = sm->hmac_keys + key_id;
+
+ hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret);
+ vec_free (key->shared_secret);
+ return 0;
+ }
+
+ index = 0;
+ key = find_or_add_shared_secret (sm, shared_secret, &index);
+ hash_unset_mem (sm->hmac_key_by_shared_secret, key->shared_secret);
+ vec_free (key->shared_secret);
+ return 0;
+}
+
+
+static clib_error_t *
+sr_hmac_add_del_key_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u8 is_del = 0;
+ u32 key_id = 0;
+ u8 key_id_set = 0;
+ u8 *shared_secret = 0;
+ i32 rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "id %d", &key_id))
+ key_id_set = 1;
+ else if (unformat (input, "key %s", &shared_secret))
+ {
+ /* Do not include the trailing NULL byte. Guaranteed interop issue */
+ _vec_len (shared_secret) -= 1;
+ }
+ else
+ break;
+ }
+
+ if (is_del == 0 && shared_secret == 0)
+ return clib_error_return (0, "shared secret must be set to add a key");
+
+ if (shared_secret == 0 && key_id_set == 0)
+ return clib_error_return (0, "shared secret and key id both unset");
+
+ rv = sr_hmac_add_del_key (sm, key_id, shared_secret, is_del);
+
+ vec_free (shared_secret);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return (0, "sr_hmac_add_del_key returned %d", rv);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_hmac, static) = {
+ .path = "sr hmac",
+ .short_help = "sr hmac [del] id <nn> key <str>",
+ .function = sr_hmac_add_del_key_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI parser for show HMAC key shared secrets
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+show_sr_hmac_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ int i;
+
+ for (i = 1; i < vec_len (sm->hmac_keys); i++)
+ {
+ if (sm->hmac_keys[i].shared_secret)
+ vlib_cli_output (vm, "[%d]: %v", i, sm->hmac_keys[i].shared_secret);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_hmac, static) = {
+ .path = "show sr hmac",
+ .short_help = "show sr hmac",
+ .function = show_sr_hmac_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Test for SR debug flag
+ *
+ * @param vm vlib_main_t *
+ * @param input unformat_input_t *
+ * @param cmd vlib_cli_command_t *
+ *
+ * @return error clib_error_t *
+ */
+static clib_error_t *
+test_sr_debug_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ if (unformat (input, "on"))
+ sm->is_debug = 1;
+ else if (unformat (input, "off"))
+ sm->is_debug = 0;
+ else
+ return clib_error_return (0, "expected on|off in '%U'",
+ format_unformat_error, input);
+
+ vlib_cli_output (vm, "debug trace now %s", sm->is_debug ? "on" : "off");
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_sr_debug, static) = {
+ .path = "test sr debug",
+ .short_help = "test sr debug on|off",
+ .function = test_sr_debug_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sr/sr.h b/src/vnet/sr/sr.h
new file mode 100644
index 00000000000..610b36996f3
--- /dev/null
+++ b/src/vnet/sr/sr.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Segment Routing header
+ *
+ * @note sr_replicate only works using DPDK today
+ */
+#ifndef included_vnet_sr_h
+#define included_vnet_sr_h
+
+#include <vnet/vnet.h>
+#include <vnet/sr/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#include <openssl/opensslconf.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+#include <openssl/opensslv.h>
+#include <openssl/hmac.h>
+
+/**
+ * @brief Segment Route tunnel key
+ */
+typedef struct
+{
+ ip6_address_t src;
+ ip6_address_t dst;
+} ip6_sr_tunnel_key_t;
+
+/**
+ * @brief Segment Route tunnel
+ */
+typedef struct
+{
+ /** src, dst address */
+ ip6_sr_tunnel_key_t key;
+
+ /** Pptional tunnel name */
+ u8 *name;
+
+ /** Mask width for FIB entry */
+ u32 dst_mask_width;
+
+ /** First hop, to save 1 elt in the segment list */
+ ip6_address_t first_hop;
+
+ /** RX Fib index */
+ u32 rx_fib_index;
+ /** TX Fib index */
+ u32 tx_fib_index;
+
+ /** The actual ip6 SR header */
+ u8 *rewrite;
+
+ /** Indicates that this tunnel is part of a policy comprising
+ of multiple tunnels. If == ~0 tunnel is not part of a policy */
+ u32 policy_index;
+} ip6_sr_tunnel_t;
+
+/**
+ * @brief Shared secret for keyed-hash message authentication code (HMAC).
+ */
+typedef struct
+{
+ u8 *shared_secret;
+} ip6_sr_hmac_key_t;
+
+/**
+ * @brief Args required for add/del tunnel.
+ *
+ * Else we end up passing a LOT of parameters around.
+ */
+typedef struct
+{
+ /** Key (header imposition case) */
+ ip6_address_t *src_address;
+ ip6_address_t *dst_address;
+ u32 dst_mask_width;
+ u32 rx_table_id;
+ u32 tx_table_id;
+
+ /** optional name argument - for referencing SR tunnel/policy by name */
+ u8 *name;
+
+ /** optional policy name */
+ u8 *policy_name;
+
+ /** segment list, when inserting an ip6 SR header */
+ ip6_address_t *segments;
+
+ /**
+ * "Tag" list, aka segments inserted at the end of the list,
+ * past last_seg
+ */
+ ip6_address_t *tags;
+
+ /** Shared secret => generate SHA-256 HMAC security fields */
+ u8 *shared_secret;
+
+ /** Flags, e.g. cleanup, policy-list flags */
+ u16 flags_net_byte_order;
+
+ /** Delete the tunnnel? */
+ u8 is_del;
+} ip6_sr_add_del_tunnel_args_t;
+
+/**
+ * @brief Args for creating a policy.
+ *
+ * Typically used for multicast replication.
+ * ie a multicast address can be associated with a policy,
+ * then replicated across a number of unicast SR tunnels.
+ */
+typedef struct
+{
+ /** policy name */
+ u8 *name;
+
+ /** tunnel names */
+ u8 **tunnel_names;
+
+ /** Delete the policy? */
+ u8 is_del;
+} ip6_sr_add_del_policy_args_t;
+
+/**
+ * @brief Segment Routing policy.
+ *
+ * Typically used for multicast replication.
+ * ie a multicast address can be associated with a policy,
+ * then replicated across a number of unicast SR tunnels.
+ */
+typedef struct
+{
+ /** name of policy */
+ u8 *name;
+
+ /** vector to SR tunnel index */
+ u32 *tunnel_indices;
+
+} ip6_sr_policy_t;
+
+/**
+ * @brief Args for mapping of multicast address to policy name.
+ *
+ * Typically used for multicast replication.
+ * ie a multicast address can be associated with a policy,
+ * then replicated across a number of unicast SR tunnels.
+ */
+typedef struct
+{
+ /** multicast IP6 address */
+ ip6_address_t *multicast_address;
+
+ /** name of policy to map to */
+ u8 *policy_name;
+
+ /** Delete the mapping */
+ u8 is_del;
+
+} ip6_sr_add_del_multicastmap_args_t;
+
+/**
+ * @brief Segment Routing state.
+ */
+typedef struct
+{
+ /** pool of tunnel instances, sr entry only */
+ ip6_sr_tunnel_t *tunnels;
+
+ /** find an sr "tunnel" by its outer-IP src/dst */
+ uword *tunnel_index_by_key;
+
+ /** find an sr "tunnel" by its name */
+ uword *tunnel_index_by_name;
+
+ /** policy pool */
+ ip6_sr_policy_t *policies;
+
+ /** find a policy by name */
+ uword *policy_index_by_policy_name;
+
+ /** multicast address to policy mapping */
+ uword *policy_index_by_multicast_address;
+
+ /** hmac key id by shared secret */
+ uword *hmac_key_by_shared_secret;
+
+ /** ip6-rewrite next index for reinstalling the original dst address */
+ u32 ip6_rewrite_sr_next_index;
+
+ /** ip6-replicate next index for multicast tunnel */
+ u32 ip6_lookup_sr_replicate_index;
+
+ /** application API callback */
+ void *sr_local_cb;
+
+ /** validate hmac keys */
+ u8 validate_hmac;
+
+ /** pool of hmac keys */
+ ip6_sr_hmac_key_t *hmac_keys;
+
+ /** Openssl var */
+ EVP_MD *md;
+ /** Openssl var */
+ HMAC_CTX *hmac_ctx;
+
+ /** enable debug spew */
+ u8 is_debug;
+
+ /** convenience */
+ vlib_main_t *vlib_main;
+ /** convenience */
+ vnet_main_t *vnet_main;
+} ip6_sr_main_t;
+
+ip6_sr_main_t sr_main;
+
+format_function_t format_ip6_sr_header;
+format_function_t format_ip6_sr_header_with_length;
+
+vlib_node_registration_t ip6_sr_input_node;
+
+#if DPDK > 0
+extern vlib_node_registration_t sr_replicate_node;
+#endif /* DPDK */
+
+int ip6_sr_add_del_tunnel (ip6_sr_add_del_tunnel_args_t * a);
+int ip6_sr_add_del_policy (ip6_sr_add_del_policy_args_t * a);
+int ip6_sr_add_del_multicastmap (ip6_sr_add_del_multicastmap_args_t * a);
+
+void vnet_register_sr_app_callback (void *cb);
+
+void sr_fix_hmac (ip6_sr_main_t * sm, ip6_header_t * ip,
+ ip6_sr_header_t * sr);
+
+#endif /* included_vnet_sr_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sr/sr_error.def b/src/vnet/sr/sr_error.def
new file mode 100644
index 00000000000..62d021fd47b
--- /dev/null
+++ b/src/vnet/sr/sr_error.def
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+sr_error (NONE, "no error")
+sr_error (BAD_ROUTING_HEADER_TYPE, "bad routing header type (not 4)")
+sr_error (NO_MORE_SEGMENTS, "out of SR segment drops")
+sr_error (PKTS_PROCESSED, "SR packets processed")
+sr_error (APP_CALLBACK, "SR application callback errors")
+sr_error (HMAC_INVALID, "SR packets with invalid HMAC signatures")
diff --git a/src/vnet/sr/sr_fix_dst_error.def b/src/vnet/sr/sr_fix_dst_error.def
new file mode 100644
index 00000000000..48fe7af6c98
--- /dev/null
+++ b/src/vnet/sr/sr_fix_dst_error.def
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+sr_fix_dst_error (NONE, "no error")
+sr_fix_dst_error (NO_SR_HEADER, "no SR header present")
+sr_fix_dst_error (NO_MORE_SEGMENTS, "no more SR segments")
diff --git a/src/vnet/sr/sr_packet.h b/src/vnet/sr/sr_packet.h
new file mode 100644
index 00000000000..179b94c2dc7
--- /dev/null
+++ b/src/vnet/sr/sr_packet.h
@@ -0,0 +1,251 @@
+#ifndef included_vnet_sr_packet_h
+#define included_vnet_sr_packet_h
+
+#include <vnet/ip/ip.h>
+
+/*
+ * ipv6 segment-routing header format
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief The Segment Routing Header (SRH).
+ *
+ * The Segment Routing Header (SRH) is defined in the diagram below.
+ *
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Next Header | Hdr Ext Len | Routing Type | Segments Left |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | First Segment | Flags | HMAC Key ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Segment List[0] (128 bits ipv6 address) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | |
+ * ...
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Segment List[n] (128 bits ipv6 address) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Policy List[0] (optional) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Policy List[1] (optional) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Policy List[2] (optional) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | |
+ * | |
+ * | HMAC (256 bits) |
+ * | (optional) |
+ * | |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * where:
+ *
+ * o Next Header: 8-bit selector. Identifies the type of header
+ * immediately following the SRH.
+ *
+ * o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ * header in 8-octet units, not including the first 8 octets.
+ *
+ * o Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+ *
+ * o Segments Left. Defined in [RFC2460], it contains the index, in
+ * the Segment List, of the next segment to inspect. Segments Left
+ * is decremented at each segment and it is used as an index in the
+ * segment list.
+ *
+ * o First Segment: offset in the SRH, not including the first 8 octets
+ * and expressed in 16-octet units, pointing to the last element of
+ * the segment list, which is in fact the first segment of the
+ * segment routing path.
+ *
+ * o Flags: 16 bits of flags. Following flags are defined:
+ *
+ * 1
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |C|P|R|R| Policy Flags |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * C-flag: Clean-up flag. Set when the SRH has to be removed from
+ * the packet when packet reaches the last segment.
+ *
+ * P-flag: Protected flag. Set when the packet has been rerouted
+ * through FRR mechanism by a SR endpoint node. See Section 6.3
+ * for more details.
+ *
+ * R-flags. Reserved and for future use.
+ *
+ * Policy Flags. Define the type of the IPv6 addresses encoded
+ * into the Policy List (see below). The following have been
+ * defined:
+ *
+ * Bits 4-6: determine the type of the first element after the
+ * segment list.
+ *
+ * Bits 7-9: determine the type of the second element.
+ *
+ * Bits 10-12: determine the type of the third element.
+ *
+ * Bits 13-15: determine the type of the fourth element.
+ *
+ * The following values are used for the type:
+ *
+ * 0x0: Not present. If value is set to 0x0, it means the
+ * element represented by these bits is not present.
+ *
+ * 0x1: SR Ingress.
+ *
+ * 0x2: SR Egress.
+ *
+ * 0x3: Original Source Address.
+ *
+ * o HMAC Key ID and HMAC field, and their use are defined in
+ * [I-D.vyncke-6man-segment-routing-security].
+ *
+ * o Segment List[n]: 128 bit IPv6 addresses representing the nth
+ * segment in the Segment List. The Segment List is encoded starting
+ * from the last segment of the path. I.e., the first element of the
+ * segment list (Segment List [0]) contains the last segment of the
+ * path while the last segment of the Segment List (Segment List[n])
+ * contains the first segment of the path. The index contained in
+ * "Segments Left" identifies the current active segment.
+ *
+ * o Policy List. Optional addresses representing specific nodes in
+ * the SR path such as:
+ *
+ * SR Ingress: a 128 bit generic identifier representing the
+ * ingress in the SR domain (i.e.: it needs not to be a valid IPv6
+ * address).
+ *
+ * SR Egress: a 128 bit generic identifier representing the egress
+ * in the SR domain (i.e.: it needs not to be a valid IPv6
+ * address).
+ *
+ * Original Source Address: IPv6 address originally present in the
+ * SA field of the packet.
+ *
+ * The segments in the Policy List are encoded after the segment list
+ * and they are optional. If none are in the SRH, all bits of the
+ * Policy List Flags MUST be set to 0x0.
+ */
+
+#ifndef IPPROTO_IPV6_ROUTE
+#define IPPROTO_IPV6_ROUTE 43
+#endif
+
+#define ROUTING_HEADER_TYPE_SR 4
+/**
+ @brief SR header struct.
+*/
+typedef struct
+{
+ /** Protocol for next header. */
+ u8 protocol;
+
+ /**
+ * Length of routing header in 8 octet units,
+ * not including the first 8 octets
+ */
+ u8 length;
+
+ /** Type of routing header; type 4 = segement routing */
+ u8 type;
+
+ /** Next segment in the segment list */
+ u8 segments_left;
+
+ /**
+ * Policy list pointer: offset in the SRH of the policy
+ * list - in 16-octet units - not including the first 8 octets.
+ */
+ u8 first_segment;
+
+ /** Flag bits */
+#define IP6_SR_HEADER_FLAG_CLEANUP (0x8000)
+ /** Flag bits */
+#define IP6_SR_HEADER_FLAG_PROTECTED (0x4000)
+ /** Flag bits */
+#define IP6_SR_HEADER_FLAG_RESERVED (0x3000)
+ /** Flag bits */
+#define IP6_SR_HEADER_FLAG_PL_ELT_NOT_PRESENT (0x0)
+ /** Flag bits */
+#define IP6_SR_HEADER_FLAG_PL_ELT_INGRESS_PE (0x1)
+ /** Flag bits */
+#define IP6_SR_HEADER_FLAG_PL_ELT_EGRESS_PE (0x2)
+ /** Flag bits */
+#define IP6_SR_HEADER_FLAG_PL_ELT_ORIG_SRC_ADDR (0x3)
+ /** values 0x4 - 0x7 are reserved */
+ u16 flags;
+ u8 hmac_key;
+
+ /** The segment + policy list elts */
+ ip6_address_t segments[0];
+} __attribute__ ((packed)) ip6_sr_header_t;
+
+static inline int
+ip6_sr_policy_list_shift_from_index (int pl_index)
+{
+ return (-3 * pl_index) + 12;
+}
+
+/** pl_index is one-origined */
+static inline int
+ip6_sr_policy_list_flags (u16 flags_host_byte_order, int pl_index)
+{
+ int shift;
+
+ if (pl_index <= 0 || pl_index > 4)
+ return 0;
+
+ shift = (-3 * pl_index) + 12;
+ flags_host_byte_order >>= shift;
+
+ return (flags_host_byte_order & 7);
+}
+
+#endif /* included_vnet_sr_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sr/sr_replicate.c b/src/vnet/sr/sr_replicate.c
new file mode 100644
index 00000000000..5f9de5042af
--- /dev/null
+++ b/src/vnet/sr/sr_replicate.c
@@ -0,0 +1,490 @@
+/*
+ * sr_replicate.c: ipv6 segment routing replicator for multicast
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief Functions for replicating packets across SR tunnels.
+ *
+ * Leverages rte_pktmbuf_clone() so there is no memcpy for
+ * invariant parts of the packet.
+ *
+ * @note Currently requires DPDK
+*/
+
+#if DPDK > 0 /* Cannot run replicate without DPDK */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/sr/sr.h>
+#include <vnet/devices/dpdk/dpdk.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/ip6_fib.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief sr_replicate state.
+ *
+*/
+typedef struct
+{
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} sr_replicate_main_t;
+
+sr_replicate_main_t sr_replicate_main;
+
+/**
+ * @brief Information to display in packet trace.
+ *
+*/
+typedef struct
+{
+ ip6_address_t src, dst;
+ u16 length;
+ u32 next_index;
+ u32 tunnel_index;
+ u8 sr[256];
+} sr_replicate_trace_t;
+
+/**
+ * @brief packet trace format function.
+ *
+ * @param *s u8 used for string output
+ * @param *args va_list structured input to va_arg to output @ref sr_replicate_trace_t
+ * @return *s u8 - formatted trace output
+*/
+static u8 *
+format_sr_replicate_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_replicate_trace_t *t = va_arg (*args, sr_replicate_trace_t *);
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_tunnel_t *tun = pool_elt_at_index (sm->tunnels, t->tunnel_index);
+ ip6_fib_t *rx_fib, *tx_fib;
+
+ rx_fib = ip6_fib_get (tun->rx_fib_index);
+ tx_fib = ip6_fib_get (tun->tx_fib_index);
+
+ s = format
+ (s, "SR-REPLICATE: next %s ip6 src %U dst %U len %u\n"
+ " rx-fib-id %d tx-fib-id %d\n%U",
+ "ip6-lookup",
+ format_ip6_address, &t->src,
+ format_ip6_address, &t->dst, t->length,
+ rx_fib->table_id, tx_fib->table_id,
+ format_ip6_sr_header, t->sr, 0 /* print_hmac */ );
+ return s;
+
+}
+
+#define foreach_sr_replicate_error \
+_(REPLICATED, "sr packets replicated") \
+_(NO_BUFFERS, "error allocating buffers for replicas") \
+_(NO_REPLICAS, "no replicas were needed") \
+_(NO_BUFFER_DROPS, "sr no buffer drops")
+
+/**
+ * @brief Struct for SR replicate errors
+ */
+typedef enum
+{
+#define _(sym,str) SR_REPLICATE_ERROR_##sym,
+ foreach_sr_replicate_error
+#undef _
+ SR_REPLICATE_N_ERROR,
+} sr_replicate_error_t;
+
+/**
+ * @brief Error strings for SR replicate
+ */
+static char *sr_replicate_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sr_replicate_error
+#undef _
+};
+
+/**
+ * @brief Defines next-nodes for packet processing.
+ *
+*/
+typedef enum
+{
+ SR_REPLICATE_NEXT_IP6_LOOKUP,
+ SR_REPLICATE_N_NEXT,
+} sr_replicate_next_t;
+
+/**
+ * @brief Single loop packet replicator.
+ *
+ * @node sr-replicate
+ * @param vm vlib_main_t
+ * @return frame->n_vectors uword
+*/
+static uword
+sr_replicate_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ sr_replicate_next_t next_index;
+ int pkts_replicated = 0;
+ ip6_sr_main_t *sm = &sr_main;
+ int no_buffer_drops = 0;
+ vlib_buffer_free_list_t *fl;
+ unsigned socket_id = rte_socket_id ();
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0, hdr_bi0;
+ vlib_buffer_t *b0, *orig_b0;
+ struct rte_mbuf *orig_mb0 = 0, *hdr_mb0 = 0, *clone0 = 0;
+ struct rte_mbuf **hdr_vec = 0, **rte_mbuf_vec = 0;
+ ip6_sr_policy_t *pol0 = 0;
+ ip6_sr_tunnel_t *t0 = 0;
+ ip6_sr_header_t *hdr_sr0 = 0;
+ ip6_header_t *ip0 = 0, *hdr_ip0 = 0;
+ int num_replicas = 0;
+ int i;
+ u32 len_bytes = sizeof (ip6_header_t);
+ u8 next_hdr, ip_next_hdr = IPPROTO_IPV6_ROUTE;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ orig_b0 = b0;
+
+ pol0 = pool_elt_at_index (sm->policies,
+ vnet_buffer (b0)->ip.save_protocol);
+
+ ip0 = vlib_buffer_get_current (b0);
+ /* Skip forward to the punch-in point */
+ vlib_buffer_advance (b0, sizeof (*ip0));
+ next_hdr = ip0->protocol;
+
+ /* HBH must immediately follow ipv6 header */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ u32 ext_hdr_len = 0;
+ ext_hdr_len = ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+ len_bytes += ext_hdr_len;
+ next_hdr = ext_hdr->next_hdr;
+ ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+ ip_next_hdr = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
+ /* Skip forward to the punch-in point */
+ vlib_buffer_advance (b0, ext_hdr_len);
+
+ }
+
+ orig_mb0 = rte_mbuf_from_vlib_buffer (b0);
+
+ i16 delta0 = vlib_buffer_length_in_chain (vm, orig_b0)
+ - (i16) orig_mb0->pkt_len;
+
+ u16 new_data_len0 = (u16) ((i16) orig_mb0->data_len + delta0);
+ u16 new_pkt_len0 = (u16) ((i16) orig_mb0->pkt_len + delta0);
+
+ orig_mb0->data_len = new_data_len0;
+ orig_mb0->pkt_len = new_pkt_len0;
+ orig_mb0->data_off += (u16) (b0->current_data);
+
+ /*
+ Before entering loop determine if we can allocate:
+ - all the new HEADER RTE_MBUFs and assign them to a vector
+ - all the clones
+
+ if successful, then iterate over vectors of resources
+
+ */
+ num_replicas = vec_len (pol0->tunnel_indices);
+
+ if (PREDICT_FALSE (num_replicas == 0))
+ {
+ b0->error = node->errors[SR_REPLICATE_ERROR_NO_REPLICAS];
+ goto do_trace0;
+ }
+
+ vec_reset_length (hdr_vec);
+ vec_reset_length (rte_mbuf_vec);
+
+ for (i = 0; i < num_replicas; i++)
+ {
+ uint8_t nb_seg;
+ struct rte_mbuf *clone0i;
+ vlib_buffer_t *clone0_c, *clone_b0;
+
+ t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
+ hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]);
+
+ if (i < (num_replicas - 1))
+ {
+ /* Not the last tunnel to process */
+ clone0 = rte_pktmbuf_clone
+ (orig_mb0, bm->pktmbuf_pools[socket_id]);
+ if (clone0 == 0)
+ goto clone_fail;
+ nb_seg = 0;
+ clone0i = clone0;
+ clone0_c = NULL;
+ while ((clone0->nb_segs >= 1) && (nb_seg < clone0->nb_segs))
+ {
+
+ clone_b0 = vlib_buffer_from_rte_mbuf (clone0i);
+ vlib_buffer_init_for_free_list (clone_b0, fl);
+
+ ASSERT ((clone_b0->flags & VLIB_BUFFER_NEXT_PRESENT) ==
+ 0);
+ ASSERT (clone_b0->current_data == 0);
+
+ clone_b0->current_data =
+ (clone0i->buf_addr + clone0i->data_off) -
+ (void *) clone_b0->data;
+
+ clone_b0->current_length = clone0i->data_len;
+ if (PREDICT_FALSE (clone0_c != NULL))
+ {
+ clone0_c->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ clone0_c->next_buffer =
+ vlib_get_buffer_index (vm, clone_b0);
+ }
+ clone0_c = clone_b0;
+ clone0i = clone0i->next;
+ nb_seg++;
+ }
+ }
+ else
+ /* First tunnel to process, use original MB */
+ clone0 = orig_mb0;
+
+
+ if (PREDICT_FALSE (!clone0 || !hdr_mb0))
+ {
+ clone_fail:
+ b0->error = node->errors[SR_REPLICATE_ERROR_NO_BUFFERS];
+
+ vec_foreach_index (i, rte_mbuf_vec)
+ {
+ rte_pktmbuf_free (rte_mbuf_vec[i]);
+ }
+ vec_free (rte_mbuf_vec);
+
+ vec_foreach_index (i, hdr_vec)
+ {
+ rte_pktmbuf_free (hdr_vec[i]);
+ }
+ vec_free (hdr_vec);
+
+ goto do_trace0;
+ }
+
+ vec_add1 (hdr_vec, hdr_mb0);
+ vec_add1 (rte_mbuf_vec, clone0);
+
+ }
+
+ for (i = 0; i < num_replicas; i++)
+ {
+ vlib_buffer_t *hdr_b0;
+ u16 new_l0 = 0;
+
+ t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
+ /* Our replicas */
+ hdr_mb0 = hdr_vec[i];
+ clone0 = rte_mbuf_vec[i];
+
+ hdr_mb0->data_len = len_bytes + vec_len (t0->rewrite);
+ hdr_mb0->pkt_len = hdr_mb0->data_len +
+ vlib_buffer_length_in_chain (vm, orig_b0);
+
+ hdr_b0 = vlib_buffer_from_rte_mbuf (hdr_mb0);
+
+ vlib_buffer_init_for_free_list (hdr_b0, fl);
+
+ memcpy (hdr_b0->data, ip0, len_bytes);
+ memcpy (hdr_b0->data + len_bytes, t0->rewrite,
+ vec_len (t0->rewrite));
+
+ hdr_b0->current_data = 0;
+ hdr_b0->current_length = len_bytes + vec_len (t0->rewrite);
+ hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT;
+ hdr_b0->trace_index = orig_b0->trace_index;
+ vnet_buffer (hdr_b0)->l2_classify.opaque_index = 0;
+
+ hdr_b0->total_length_not_including_first_buffer =
+ hdr_mb0->pkt_len - hdr_b0->current_length;
+ vnet_buffer (hdr_b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index;
+
+ hdr_ip0 = (ip6_header_t *) hdr_b0->data;
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (t0->rewrite);
+ hdr_ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ hdr_sr0 = (ip6_sr_header_t *) ((u8 *) hdr_ip0 + len_bytes);
+ /* $$$ tune */
+ clib_memcpy (hdr_sr0, t0->rewrite, vec_len (t0->rewrite));
+ hdr_sr0->protocol = next_hdr;
+ hdr_ip0->protocol = ip_next_hdr;
+
+ /* Copy dst address into the DA slot in the segment list */
+ clib_memcpy (hdr_sr0->segments, ip0->dst_address.as_u64,
+ sizeof (ip6_address_t));
+
+ /* Rewrite the ip6 dst address */
+ hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0];
+ hdr_ip0->dst_address.as_u64[1] = t0->first_hop.as_u64[1];
+
+ sr_fix_hmac (sm, hdr_ip0, hdr_sr0);
+
+ /* prepend new header to invariant piece */
+ hdr_mb0->next = clone0;
+ hdr_b0->next_buffer =
+ vlib_get_buffer_index (vm,
+ vlib_buffer_from_rte_mbuf (clone0));
+
+ /* update header's fields */
+ hdr_mb0->pkt_len =
+ (uint16_t) (hdr_mb0->data_len + clone0->pkt_len);
+ hdr_mb0->nb_segs = (uint8_t) (clone0->nb_segs + 1);
+
+ /* copy metadata from source packet */
+ hdr_mb0->port = clone0->port;
+ hdr_mb0->vlan_tci = clone0->vlan_tci;
+ hdr_mb0->vlan_tci_outer = clone0->vlan_tci_outer;
+ hdr_mb0->tx_offload = clone0->tx_offload;
+ hdr_mb0->hash = clone0->hash;
+
+ hdr_mb0->ol_flags = clone0->ol_flags & ~(IND_ATTACHED_MBUF);
+
+ __rte_mbuf_sanity_check (hdr_mb0, 1);
+
+ hdr_bi0 = vlib_get_buffer_index (vm, hdr_b0);
+
+ to_next[0] = hdr_bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ if (n_left_to_next == 0)
+ {
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ }
+ pkts_replicated++;
+ }
+
+ from += 1;
+ n_left_from -= 1;
+
+ do_trace0:
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_replicate_trace_t *tr = vlib_add_trace (vm, node,
+ b0, sizeof (*tr));
+ tr->tunnel_index = t0 - sm->tunnels;
+ tr->length = 0;
+ if (hdr_ip0)
+ {
+ memcpy (tr->src.as_u8, hdr_ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ memcpy (tr->dst.as_u8, hdr_ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ if (hdr_ip0->payload_length)
+ tr->length = clib_net_to_host_u16
+ (hdr_ip0->payload_length);
+ }
+ tr->next_index = next_index;
+ if (hdr_sr0)
+ memcpy (tr->sr, hdr_sr0, sizeof (tr->sr));
+ }
+
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, sr_replicate_node.index,
+ SR_REPLICATE_ERROR_REPLICATED,
+ pkts_replicated);
+
+ vlib_node_increment_counter (vm, sr_replicate_node.index,
+ SR_REPLICATE_ERROR_NO_BUFFER_DROPS,
+ no_buffer_drops);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_replicate_node) = {
+ .function = sr_replicate_node_fn,
+ .name = "sr-replicate",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_replicate_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(sr_replicate_error_strings),
+ .error_strings = sr_replicate_error_strings,
+
+ .n_next_nodes = SR_REPLICATE_N_NEXT,
+
+ .next_nodes = {
+ [SR_REPLICATE_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (sr_replicate_node, sr_replicate_node_fn)
+/* *INDENT-ON* */
+
+clib_error_t *
+sr_replicate_init (vlib_main_t * vm)
+{
+ sr_replicate_main_t *msm = &sr_replicate_main;
+
+ msm->vlib_main = vm;
+ msm->vnet_main = vnet_get_main ();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_replicate_init);
+
+#endif /* DPDK */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */