aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/srv6
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/srv6')
-rwxr-xr-xsrc/vnet/srv6/dir.dox25
-rwxr-xr-xsrc/vnet/srv6/ietf_draft_05.txt1564
-rw-r--r--src/vnet/srv6/sr.api168
-rwxr-xr-xsrc/vnet/srv6/sr.c57
-rwxr-xr-xsrc/vnet/srv6/sr.h325
-rw-r--r--src/vnet/srv6/sr_api.c244
-rw-r--r--src/vnet/srv6/sr_doc.md55
-rwxr-xr-xsrc/vnet/srv6/sr_localsid.c1492
-rw-r--r--src/vnet/srv6/sr_localsid.md58
-rwxr-xr-xsrc/vnet/srv6/sr_packet.h159
-rw-r--r--src/vnet/srv6/sr_policy.md56
-rwxr-xr-xsrc/vnet/srv6/sr_policy_rewrite.c3227
-rwxr-xr-xsrc/vnet/srv6/sr_steering.c573
-rw-r--r--src/vnet/srv6/sr_steering.md11
14 files changed, 8014 insertions, 0 deletions
diff --git a/src/vnet/srv6/dir.dox b/src/vnet/srv6/dir.dox
new file mode 100755
index 00000000000..3f539a58ef1
--- /dev/null
+++ b/src/vnet/srv6/dir.dox
@@ -0,0 +1,25 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Segment Routing code
+
+ An implementation of Segment Routing as per:
+ draft-ietf-6man-segment-routing-header-05
+
+ @see ietf_draft_05.txt
+
+*/ \ No newline at end of file
diff --git a/src/vnet/srv6/ietf_draft_05.txt b/src/vnet/srv6/ietf_draft_05.txt
new file mode 100755
index 00000000000..e9bff04fa0a
--- /dev/null
+++ b/src/vnet/srv6/ietf_draft_05.txt
@@ -0,0 +1,1564 @@
+Network Working Group S. Previdi, Ed.
+Internet-Draft C. Filsfils
+Intended status: Standards Track Cisco Systems, Inc.
+Expires: August 5, 2017 B. Field
+ Comcast
+ I. Leung
+ Rogers Communications
+ J. Linkova
+ Google
+ E. Aries
+ Facebook
+ T. Kosugi
+ NTT
+ E. Vyncke
+ Cisco Systems, Inc.
+ D. Lebrun
+ Universite Catholique de Louvain
+ February 1, 2017
+
+
+ IPv6 Segment Routing Header (SRH)
+ draft-ietf-6man-segment-routing-header-05
+
+Abstract
+
+ Segment Routing (SR) allows a node to steer a packet through a
+ controlled set of instructions, called segments, by prepending an SR
+ header to the packet. A segment can represent any instruction,
+ topological or service-based. SR allows to enforce a flow through
+ any path (topological, or application/service based) while
+ maintaining per-flow state only at the ingress node to the SR domain.
+
+ Segment Routing can be applied to the IPv6 data plane with the
+ addition of a new type of Routing Extension Header. This draft
+ describes the Segment Routing Extension Header Type and how it is
+ used by SR capable nodes.
+
+Requirements Language
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in RFC 2119 [RFC2119].
+
+Status of This Memo
+
+ This Internet-Draft is submitted in full conformance with the
+ provisions of BCP 78 and BCP 79.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 1]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ Internet-Drafts are working documents of the Internet Engineering
+ Task Force (IETF). Note that other groups may also distribute
+ working documents as Internet-Drafts. The list of current Internet-
+ Drafts is at http://datatracker.ietf.org/drafts/current/.
+
+ Internet-Drafts are draft documents valid for a maximum of six months
+ and may be updated, replaced, or obsoleted by other documents at any
+ time. It is inappropriate to use Internet-Drafts as reference
+ material or to cite them other than as "work in progress."
+
+ This Internet-Draft will expire on August 5, 2017.
+
+Copyright Notice
+
+ Copyright (c) 2017 IETF Trust and the persons identified as the
+ document authors. All rights reserved.
+
+ This document is subject to BCP 78 and the IETF Trust's Legal
+ Provisions Relating to IETF Documents
+ (http://trustee.ietf.org/license-info) in effect on the date of
+ publication of this document. Please review these documents
+ carefully, as they describe your rights and restrictions with respect
+ to this document. Code Components extracted from this document must
+ include Simplified BSD License text as described in Section 4.e of
+ the Trust Legal Provisions and are provided without warranty as
+ described in the Simplified BSD License.
+
+Table of Contents
+
+ 1. Segment Routing Documents . . . . . . . . . . . . . . . . . . 3
+ 2. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 3
+ 2.1. Data Planes supporting Segment Routing . . . . . . . . . 4
+ 2.2. Segment Routing (SR) Domain . . . . . . . . . . . . . . . 4
+ 2.2.1. SR Domain in a Service Provider Network . . . . . . . 5
+ 2.2.2. SR Domain in a Overlay Network . . . . . . . . . . . 6
+ 3. Segment Routing Extension Header (SRH) . . . . . . . . . . . 7
+ 3.1. SRH TLVs . . . . . . . . . . . . . . . . . . . . . . . . 9
+ 3.1.1. Ingress Node TLV . . . . . . . . . . . . . . . . . . 10
+ 3.1.2. Egress Node TLV . . . . . . . . . . . . . . . . . . . 11
+ 3.1.3. Opaque Container TLV . . . . . . . . . . . . . . . . 11
+ 3.1.4. Padding TLV . . . . . . . . . . . . . . . . . . . . . 12
+ 3.1.5. HMAC TLV . . . . . . . . . . . . . . . . . . . . . . 13
+ 3.2. SRH and RFC2460 behavior . . . . . . . . . . . . . . . . 14
+ 4. SRH Procedures . . . . . . . . . . . . . . . . . . . . . . . 14
+ 4.1. Source SR Node . . . . . . . . . . . . . . . . . . . . . 14
+ 4.2. Transit Node . . . . . . . . . . . . . . . . . . . . . . 15
+ 4.3. SR Segment Endpoint Node . . . . . . . . . . . . . . . . 16
+ 5. Security Considerations . . . . . . . . . . . . . . . . . . . 16
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 2]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ 5.1. Threat model . . . . . . . . . . . . . . . . . . . . . . 17
+ 5.1.1. Source routing threats . . . . . . . . . . . . . . . 17
+ 5.1.2. Applicability of RFC 5095 to SRH . . . . . . . . . . 17
+ 5.1.3. Service stealing threat . . . . . . . . . . . . . . . 18
+ 5.1.4. Topology disclosure . . . . . . . . . . . . . . . . . 18
+ 5.1.5. ICMP Generation . . . . . . . . . . . . . . . . . . . 18
+ 5.2. Security fields in SRH . . . . . . . . . . . . . . . . . 19
+ 5.2.1. Selecting a hash algorithm . . . . . . . . . . . . . 20
+ 5.2.2. Performance impact of HMAC . . . . . . . . . . . . . 21
+ 5.2.3. Pre-shared key management . . . . . . . . . . . . . . 21
+ 5.3. Deployment Models . . . . . . . . . . . . . . . . . . . . 22
+ 5.3.1. Nodes within the SR domain . . . . . . . . . . . . . 22
+ 5.3.2. Nodes outside of the SR domain . . . . . . . . . . . 22
+ 5.3.3. SR path exposure . . . . . . . . . . . . . . . . . . 23
+ 5.3.4. Impact of BCP-38 . . . . . . . . . . . . . . . . . . 23
+ 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 24
+ 7. Manageability Considerations . . . . . . . . . . . . . . . . 24
+ 8. Contributors . . . . . . . . . . . . . . . . . . . . . . . . 24
+ 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 24
+ 10. References . . . . . . . . . . . . . . . . . . . . . . . . . 25
+ 10.1. Normative References . . . . . . . . . . . . . . . . . . 25
+ 10.2. Informative References . . . . . . . . . . . . . . . . . 25
+ Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 27
+
+1. Segment Routing Documents
+
+ Segment Routing terminology is defined in
+ [I-D.ietf-spring-segment-routing].
+
+ Segment Routing use cases are described in [RFC7855] and
+ [I-D.ietf-spring-ipv6-use-cases].
+
+ Segment Routing protocol extensions are defined in
+ [I-D.ietf-isis-segment-routing-extensions], and
+ [I-D.ietf-ospf-ospfv3-segment-routing-extensions].
+
+2. Introduction
+
+ Segment Routing (SR), defined in [I-D.ietf-spring-segment-routing],
+ allows a node to steer a packet through a controlled set of
+ instructions, called segments, by prepending an SR header to the
+ packet. A segment can represent any instruction, topological or
+ service-based. SR allows to enforce a flow through any path
+ (topological or service/application based) while maintaining per-flow
+ state only at the ingress node to the SR domain. Segments can be
+ derived from different components: IGP, BGP, Services, Contexts,
+ Locators, etc. The list of segment forming the path is called the
+ Segment List and is encoded in the packet header.
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 3]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ SR allows the use of strict and loose source based routing paradigms
+ without requiring any additional signaling protocols in the
+ infrastructure hence delivering an excellent scalability property.
+
+ The source based routing model described in
+ [I-D.ietf-spring-segment-routing] is inherited from the ones proposed
+ by [RFC1940] and [RFC2460]. The source based routing model offers
+ the support for explicit routing capability.
+
+2.1. Data Planes supporting Segment Routing
+
+ Segment Routing (SR), can be instantiated over MPLS
+ ([I-D.ietf-spring-segment-routing-mpls]) and IPv6. This document
+ defines its instantiation over the IPv6 data-plane based on the use-
+ cases defined in [I-D.ietf-spring-ipv6-use-cases].
+
+ This document defines a new type of Routing Header (originally
+ defined in [RFC2460]) called the Segment Routing Header (SRH) in
+ order to convey the Segment List in the packet header as defined in
+ [I-D.ietf-spring-segment-routing]. Mechanisms through which segment
+ are known and advertised are outside the scope of this document.
+
+ A segment is materialized by an IPv6 address. A segment identifies a
+ topological instruction or a service instruction. A segment can be
+ either:
+
+ o global: a global segment represents an instruction supported by
+ all nodes in the SR domain and it is instantiated through an IPv6
+ address globally known in the SR domain.
+
+ o local: a local segment represents an instruction supported only by
+ the node who originates it and it is instantiated through an IPv6
+ address that is known only by the local node.
+
+2.2. Segment Routing (SR) Domain
+
+ We define the concept of the Segment Routing Domain (SR Domain) as
+ the set of nodes participating into the source based routing model.
+ These nodes may be connected to the same physical infrastructure
+ (e.g.: a Service Provider's network) as well as nodes remotely
+ connected to each other (e.g.: an enterprise VPN or an overlay).
+
+ A non-exhaustive list of examples of SR Domains is:
+
+ o The network of an operator, service provider, content provider,
+ enterprise including nodes, links and Autonomous Systems.
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 4]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o A set of nodes connected as an overlay over one or more transit
+ providers. The overlay nodes exchange SR-enabled traffic with
+ segments belonging solely to the overlay routers (the SR domain).
+ None of the segments in the SR-enabled packets exchanged by the
+ overlay belong to the transit networks
+
+ The source based routing model through its instantiation of the
+ Segment Routing Header (SRH) defined in this document equally applies
+ to all the above examples.
+
+ It is assumed in this document that the SRH is added to the packet by
+ its source, consistently with the source routing model defined in
+ [RFC2460]. For example:
+
+ o At the node originating the packet (host, server).
+
+ o At the ingress node of an SR domain where the ingress node
+ receives an IPv6 packet and encapsulates it into an outer IPv6
+ header followed by a Segment Routing header.
+
+2.2.1. SR Domain in a Service Provider Network
+
+ The following figure illustrates an SR domain consisting of an
+ operator's network infrastructure.
+
+ (-------------------------- Operator 1 -----------------------)
+ ( )
+ ( (-----AS 1-----) (-------AS 2-------) (----AS 3-------) )
+ ( ( ) ( ) ( ) )
+ A1--(--(--11---13--14-)--(-21---22---23--24-)--(-31---32---34--)--)--Z1
+ ( ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ ) )
+ A2--(--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--)--Z2
+ ( ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | ) )
+ ( ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| ) )
+ A3--(--(--15---17--18-)--(-25---26---27--28-)--(-35---36---38--)--)--Z3
+ ( ( ) ( ) ( ) )
+ ( (--------------) (------------------) (---------------) )
+ ( )
+ (-------------------------------------------------------------)
+
+ Figure 1: Service Provider SR Domain
+
+ Figure 1 describes an operator network including several ASes and
+ delivering connectivity between endpoints. In this scenario, Segment
+ Routing is used within the operator networks and across the ASes
+ boundaries (all being under the control of the same operator). In
+ this case segment routing can be used in order to address use cases
+ such as end-to-end traffic engineering, fast re-route, egress peer
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 5]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ engineering, data-center traffic engineering as described in
+ [RFC7855], [I-D.ietf-spring-ipv6-use-cases] and
+ [I-D.ietf-spring-resiliency-use-cases].
+
+ Typically, an IPv6 packet received at ingress (i.e.: from outside the
+ SR domain), is classified according to network operator policies and
+ such classification results into an outer header with an SRH applied
+ to the incoming packet. The SRH contains the list of segment
+ representing the path the packet must take inside the SR domain.
+ Thus, the SA of the packet is the ingress node, the DA (due to SRH
+ procedures described in Section 4) is set as the first segment of the
+ path and the last segment of the path is the egress node of the SR
+ domain.
+
+ The path may include intra-AS as well as inter-AS segments. It has
+ to be noted that all nodes within the SR domain are under control of
+ the same administration. When the packet reaches the egress point of
+ the SR domain, the outer header and its SRH are removed so that the
+ destination of the packet is unaware of the SR domain the packet has
+ traversed.
+
+ The outer header with the SRH is no different from any other
+ tunneling encapsulation mechanism and allows a network operator to
+ implement traffic engineering mechanisms so to efficiently steer
+ traffic across his infrastructure.
+
+2.2.2. SR Domain in a Overlay Network
+
+ The following figure illustrates an SR domain consisting of an
+ overlay network over multiple operator's networks.
+
+ (--Operator 1---) (-----Operator 2-----) (--Operator 3---)
+ ( ) ( ) ( )
+ A1--(--11---13--14--)--(--21---22---23--24--)--(-31---32---34--)--C1
+ ( /|\ /|\ /| ) ( |\ /|\ /|\ /| ) ( |\ /|\ /| \ )
+ A2--(/ | \/ | \/ | ) ( | \/ | \/ | \/ | ) ( | \/ | \/ | \)--C2
+ ( | /\ | /\ | ) ( | /\ | /\ | /\ | ) ( | /\ | /\ | )
+ ( |/ \|/ \| ) ( |/ \|/ \|/ \| ) ( |/ \|/ \| )
+ A3--(--15---17--18--)--(--25---26---27--28--)--(-35---36---38--)--C3
+ ( ) ( | | | ) ( )
+ (---------------) (--|----|---------|--) (---------------)
+ | | |
+ B1 B2 B3
+
+ Figure 2: Overlay SR Domain
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 6]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ Figure 2 describes an overlay consisting of nodes connected to three
+ different network operators and forming a single overlay network
+ where Segment routing packets are exchanged.
+
+ The overlay consists of nodes A1, A2, A3, B1, B2, B3, C1, C2 and C3.
+ These nodes are connected to their respective network operator and
+ form an overlay network.
+
+ Each node may originate packets with an SRH which contains, in the
+ segment list of the SRH or in the DA, segments identifying other
+ overlay nodes. This implies that packets with an SRH may traverse
+ operator's networks but, obviously, these SRHs cannot contain an
+ address/segment of the transit operators 1, 2 and 3. The SRH
+ originated by the overlay can only contain address/segment under the
+ administration of the overlay (e.g. address/segments supported by A1,
+ A2, A3, B1, B2, B3, C1,C2 or C3).
+
+ In this model, the operator network nodes are transit nodes and,
+ according to [RFC2460], MUST NOT inspect the routing extension header
+ since they are not the DA of the packet.
+
+ It is a common practice in operators networks to filter out, at
+ ingress, any packet whose DA is the address of an internal node and
+ it is also possible that an operator would filter out any packet
+ destined to an internal address and having an extension header in it.
+
+ This common practice does not impact the SR-enabled traffic between
+ the overlay nodes as the intermediate transit networks never see a
+ destination address belonging to their infrastructure. These SR-
+ enabled overlay packets will thus never be filtered by the transit
+ operators.
+
+ In all cases, transit packets (i.e.: packets whose DA is outside the
+ domain of the operator's network) will be forwarded accordingly
+ without introducing any security concern in the operator's network.
+ This is similar to tunneled packets.
+
+3. Segment Routing Extension Header (SRH)
+
+ A new type of the Routing Header (originally defined in [RFC2460]) is
+ defined: the Segment Routing Header (SRH) which has a new Routing
+ Type, (suggested value 4) to be assigned by IANA.
+
+ The Segment Routing Header (SRH) is defined as follows:
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 7]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Next Header | Hdr Ext Len | Routing Type | Segments Left |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | First Segment | Flags | RESERVED |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Segment List[0] (128 bits IPv6 address) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | |
+ ...
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Segment List[n] (128 bits IPv6 address) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // //
+ // Optional Type Length Value objects (variable) //
+ // //
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Next Header: 8-bit selector. Identifies the type of header
+ immediately following the SRH.
+
+ o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ header in 8-octet units, not including the first 8 octets.
+
+ o Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+
+ o Segments Left. Defined in [RFC2460], it contains the index, in
+ the Segment List, of the next segment to inspect. Segments Left
+ is decremented at each segment.
+
+ o First Segment: contains the index, in the Segment List, of the
+ first segment of the path which is in fact the last element of the
+ Segment List.
+
+ o Flags: 8 bits of flags. Following flags are defined:
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 8]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ 0 1 2 3 4 5 6 7
+ +-+-+-+-+-+-+-+-+
+ |U|P|O|A|H| U |
+ +-+-+-+-+-+-+-+-+
+
+ U: Unused and for future use. SHOULD be unset on transmission
+ and MUST be ignored on receipt.
+
+ P-flag: Protected flag. Set when the packet has been rerouted
+ through FRR mechanism by an SR endpoint node.
+
+ O-flag: OAM flag. When set, it indicates that this packet is
+ an operations and management (OAM) packet.
+
+ A-flag: Alert flag. If present, it means important Type Length
+ Value (TLV) objects are present. See Section 3.1 for details
+ on TLVs objects.
+
+ H-flag: HMAC flag. If set, the HMAC TLV is present and is
+ encoded as the last TLV of the SRH. In other words, the last
+ 36 octets of the SRH represent the HMAC information. See
+ Section 3.1.5 for details on the HMAC TLV.
+
+ o RESERVED: SHOULD be unset on transmission and MUST be ignored on
+ receipt.
+
+ o Segment List[n]: 128 bit IPv6 addresses representing the nth
+ segment in the Segment List. The Segment List is encoded starting
+ from the last segment of the path. I.e., the first element of the
+ segment list (Segment List [0]) contains the last segment of the
+ path while the last segment of the Segment List (Segment List[n])
+ contains the first segment of the path. The index contained in
+ "Segments Left" identifies the current active segment.
+
+ o Type Length Value (TLV) are described in Section 3.1.
+
+3.1. SRH TLVs
+
+ This section defines TLVs of the Segment Routing Header.
+
+ Type Length Value (TLV) contain optional information that may be used
+ by the node identified in the DA of the packet. It has to be noted
+ that the information carried in the TLVs is not intended to be used
+ by the routing layer. Typically, TLVs carry information that is
+ consumed by other components (e.g.: OAM) than the routing function.
+
+ Each TLV has its own length, format and semantic. The code-point
+ allocated (by IANA) to each TLV defines both the format and the
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 9]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ semantic of the information carried in the TLV. Multiple TLVs may be
+ encoded in the same SRH.
+
+ The "Length" field of the TLV is primarily used to skip the TLV while
+ inspecting the SRH in case the node doesn't support or recognize the
+ TLV codepoint. The "Length" defines the TLV length in octets and not
+ including the "Type" and "Length" fields.
+
+ The primary scope of TLVs is to give the receiver of the packet
+ information related to the source routed path (e.g.: where the packet
+ entered in the SR domain and where it is expected to exit).
+
+ Additional TLVs may be defined in the future.
+
+3.1.1. Ingress Node TLV
+
+ The Ingress Node TLV is optional and identifies the node this packet
+ traversed when entered the SR domain. The Ingress Node TLV has
+ following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | RESERVED | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Ingress Node (16 octets) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 1).
+
+ o Length: 18.
+
+ o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be
+ ignored on receipt.
+
+ o Flags: 8 bits. No flags are defined in this document.
+
+ o Ingress Node: 128 bits. Defines the node where the packet is
+ expected to enter the SR domain. In the encapsulation case
+ described in Section 2.2.1, this information corresponds to the SA
+ of the encapsulating header.
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 10]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+3.1.2. Egress Node TLV
+
+ The Egress Node TLV is optional and identifies the node this packet
+ is expected to traverse when exiting the SR domain. The Egress Node
+ TLV has following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | RESERVED | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Egress Node (16 octets) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 2).
+
+ o Length: 18.
+
+ o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be
+ ignored on receipt.
+
+ o Flags: 8 bits. No flags are defined in this document.
+
+ o Egress Node: 128 bits. Defines the node where the packet is
+ expected to exit the SR domain. In the encapsulation case
+ described in Section 2.2.1, this information corresponds to the
+ last segment of the SRH in the encapsulating header.
+
+3.1.3. Opaque Container TLV
+
+ The Opaque Container TLV is optional and has the following format:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 11]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | RESERVED | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ | Opaque Container (16 octets) |
+ | |
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 3).
+
+ o Length: 18.
+
+ o RESERVED: 8 bits. SHOULD be unset on transmission and MUST be
+ ignored on receipt.
+
+ o Flags: 8 bits. No flags are defined in this document.
+
+ o Opaque Container: 128 bits of opaque data not relevant for the
+ routing layer. Typically, this information is consumed by a non-
+ routing component of the node receiving the packet (i.e.: the node
+ in the DA).
+
+3.1.4. Padding TLV
+
+ The Padding TLV is optional and with the purpose of aligning the SRH
+ on a 8 octet boundary. The Padding TLV has the following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | Padding (variable) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // Padding (variable) //
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 4).
+
+ o Length: 1 to 7
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 12]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o Padding: from 1 to 7 octets of padding. Padding bits have no
+ semantic. They SHOULD be set to 0 on transmission and MUST be
+ ignored on receipt.
+
+ The following applies to the Padding TLV:
+
+ o Padding TLV is optional and MAY only appear once in the SRH. If
+ present, it MUST have a length between 1 and 7 octets.
+
+ o The Padding TLV is used in order to align the SRH total length on
+ the 8 octet boundary.
+
+ o When present, the Padding TLV MUST appear as the last TLV before
+ the HMAC TLV (if HMAC TLV is present).
+
+ o When present, the Padding TLV MUST have a length from 1 to 7 in
+ order to align the SRH total lenght on a 8-octet boundary.
+
+ o When a router inspecting the SRH encounters the Padding TLV, it
+ MUST assume that no other TLV (other than the HMAC) follow the
+ Padding TLV.
+
+3.1.5. HMAC TLV
+
+ HMAC TLV is optional and contains the HMAC information. The HMAC TLV
+ has the following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Length | RESERVED |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | HMAC Key ID (4 octets) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | //
+ | HMAC (32 octets) //
+ | //
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ where:
+
+ o Type: to be assigned by IANA (suggested value 5).
+
+ o Length: 38.
+
+ o RESERVED: 2 octets. SHOULD be unset on transmission and MUST be
+ ignored on receipt.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 13]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o HMAC Key ID: 4 octets.
+
+ o HMAC: 32 octets.
+
+ o HMAC and HMAC Key ID usage is described in Section 5
+
+ The Following applies to the HMAC TLV:
+
+ o When present, the HMAC TLV MUST be encoded as the last TLV of the
+ SRH.
+
+ o If the HMAC TLV is present, the SRH H-Flag (Figure 4) MUST be set.
+
+ o When the H-flag is set in the SRH, the router inspecting the SRH
+ MUST find the HMAC TLV in the last 38 octets of the SRH.
+
+3.2. SRH and RFC2460 behavior
+
+ The SRH being a new type of the Routing Header, it also has the same
+ properties:
+
+ SHOULD only appear once in the packet.
+
+ Only the router whose address is in the DA field of the packet
+ header MUST inspect the SRH.
+
+ Therefore, Segment Routing in IPv6 networks implies that the segment
+ identifier (i.e.: the IPv6 address of the segment) is moved into the
+ DA of the packet.
+
+ The DA of the packet changes at each segment termination/completion
+ and therefore the final DA of the packet MUST be encoded as the last
+ segment of the path.
+
+4. SRH Procedures
+
+ In this section we describe the different procedures on the SRH.
+
+4.1. Source SR Node
+
+ A Source SR Node can be any node originating an IPv6 packet with its
+ IPv6 and Segment Routing Headers. This include either:
+
+ A host originating an IPv6 packet.
+
+ An SR domain ingress router encapsulating a received IPv6 packet
+ into an outer IPv6 header followed by an SRH.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 14]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ The mechanism through which a Segment List is derived is outside of
+ the scope of this document. As an example, the Segment List may be
+ obtained through:
+
+ Local path computation.
+
+ Local configuration.
+
+ Interaction with a centralized controller delivering the path.
+
+ Any other mechanism.
+
+ The following are the steps of the creation of the SRH:
+
+ Next Header and Hdr Ext Len fields are set according to [RFC2460].
+
+ Routing Type field is set as TBD (to be allocated by IANA,
+ suggested value 4).
+
+ The Segment List is built with the FIRST segment of the path
+ encoded in the LAST element of the Segment List. Subsequent
+ segments are encoded on top of the first segment. Finally, the
+ LAST segment of the path is encoded in the FIRST element of the
+ Segment List. In other words, the Segment List is encoded in the
+ reverse order of the path.
+
+ The final DA of the packet is encoded as the last segment of the
+ path (encoded in the first element of the Segment List).
+
+ The DA of the packet is set with the value of the first segment
+ (found in the last element of the segment list).
+
+ The Segments Left field is set to n-1 where n is the number of
+ elements in the Segment List.
+
+ The First Segment field is set to n-1 where n is the number of
+ elements in the Segment List.
+
+ The packet is sent out towards the first segment (i.e.:
+ represented in the packet DA).
+
+ HMAC TLV may be set according to Section 5.
+
+4.2. Transit Node
+
+ According to [RFC2460], the only node who is allowed to inspect the
+ Routing Extension Header (and therefore the SRH), is the node
+ corresponding to the DA of the packet. Any other transit node MUST
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 15]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ NOT inspect the underneath routing header and MUST forward the packet
+ towards the DA and according to the IPv6 routing table.
+
+ In the example case described in Section 2.2.2, when SR capable nodes
+ are connected through an overlay spanning multiple third-party
+ infrastructure, it is safe to send SRH packets (i.e.: packet having a
+ Segment Routing Header) between each other overlay/SR-capable nodes
+ as long as the segment list does not include any of the transit
+ provider nodes. In addition, as a generic security measure, any
+ service provider will block any packet destined to one of its
+ internal routers, especially if these packets have an extended header
+ in it.
+
+4.3. SR Segment Endpoint Node
+
+ The SR segment endpoint node is the node whose address is in the DA.
+ The segment endpoint node inspects the SRH and does:
+
+ 1. IF DA = myself (segment endpoint)
+ 2. IF Segments Left > 0 THEN
+ decrement Segments Left
+ update DA with Segment List[Segments Left]
+ 3. ELSE continue IPv6 processing of the packet
+ End of processing.
+ 4. Forward the packet out
+
+5. Security Considerations
+
+ This section analyzes the security threat model, the security issues
+ and proposed solutions related to the new Segment Routing Header.
+
+ The Segment Routing Header (SRH) is simply another type of the
+ routing header as described in RFC 2460 [RFC2460] and is:
+
+ o Added by an SR edge router when entering the segment routing
+ domain or by the originating host itself. The source host can
+ even be outside the SR domain;
+
+ o inspected and acted upon when reaching the destination address of
+ the IP header per RFC 2460 [RFC2460].
+
+ Per RFC2460 [RFC2460], routers on the path that simply forward an
+ IPv6 packet (i.e. the IPv6 destination address is none of theirs)
+ will never inspect and process the content of the SRH. Routers whose
+ one interface IPv6 address equals the destination address field of
+ the IPv6 packet MUST parse the SRH and, if supported and if the local
+ configuration allows it, MUST act accordingly to the SRH content.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 16]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ According to RFC2460 [RFC2460], the default behavior of a non SR-
+ capable router upon receipt of an IPv6 packet with SRH destined to an
+ address of its, is to:
+
+ o ignore the SRH completely if the Segment Left field is 0 and
+ proceed to process the next header in the IPv6 packet;
+
+ o discard the IPv6 packet if Segment Left field is greater than 0,
+ it MAY send a Parameter Problem ICMP message back to the Source
+ Address.
+
+5.1. Threat model
+
+5.1.1. Source routing threats
+
+ Using an SRH is similar to source routing, therefore it has some
+ well-known security issues as described in RFC4942 [RFC4942] section
+ 2.1.1 and RFC5095 [RFC5095]:
+
+ o amplification attacks: where a packet could be forged in such a
+ way to cause looping among a set of SR-enabled routers causing
+ unnecessary traffic, hence a Denial of Service (DoS) against
+ bandwidth;
+
+ o reflection attack: where a hacker could force an intermediate node
+ to appear as the immediate attacker, hence hiding the real
+ attacker from naive forensic;
+
+ o bypass attack: where an intermediate node could be used as a
+ stepping stone (for example in a De-Militarized Zone) to attack
+ another host (for example in the datacenter or any back-end
+ server).
+
+5.1.2. Applicability of RFC 5095 to SRH
+
+ First of all, the reader must remember this specific part of section
+ 1 of RFC5095 [RFC5095], "A side effect is that this also eliminates
+ benign RH0 use-cases; however, such applications may be facilitated
+ by future Routing Header specifications.". In short, it is not
+ forbidden to create new secure type of Routing Header; for example,
+ RFC 6554 (RPL) [RFC6554] also creates a new Routing Header type for a
+ specific application confined in a single network.
+
+ In the segment routing architecture described in
+ [I-D.ietf-spring-segment-routing] there are basically two kinds of
+ nodes (routers and hosts):
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 17]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o nodes within the SR domain, which is within one single
+ administrative domain, i.e., where all nodes are trusted anyway
+ else the damage caused by those nodes could be worse than
+ amplification attacks: traffic interception, man-in-the-middle
+ attacks, more server DoS by dropping packets, and so on.
+
+ o nodes outside of the SR domain, which is outside of the
+ administrative segment routing domain hence they cannot be trusted
+ because there is no physical security for those nodes, i.e., they
+ can be replaced by hostile nodes or can be coerced in wrong
+ behaviors.
+
+ The main use case for SR consists of the single administrative domain
+ where only trusted nodes with SR enabled and configured participate
+ in SR: this is the same model as in RFC6554 [RFC6554]. All non-
+ trusted nodes do not participate as either SR processing is not
+ enabled by default or because they only process SRH from nodes within
+ their domain.
+
+ Moreover, all SR nodes ignore SRH created by outsiders based on
+ topology information (received on a peering or internal interface) or
+ on presence and validity of the HMAC field. Therefore, if
+ intermediate nodes ONLY act on valid and authorized SRH (such as
+ within a single administrative domain), then there is no security
+ threat similar to RH-0. Hence, the RFC 5095 [RFC5095] attacks are
+ not applicable.
+
+5.1.3. Service stealing threat
+
+ Segment routing is used for added value services, there is also a
+ need to prevent non-participating nodes to use those services; this
+ is called 'service stealing prevention'.
+
+5.1.4. Topology disclosure
+
+ The SRH may also contains IPv6 addresses of some intermediate SR-
+ nodes in the path towards the destination, this obviously reveals
+ those addresses to the potentially hostile attackers if those
+ attackers are able to intercept packets containing SRH. On the other
+ hand, if the attacker can do a traceroute whose probes will be
+ forwarded along the SR path, then there is little learned by
+ intercepting the SRH itself.
+
+5.1.5. ICMP Generation
+
+ Per section 4.4 of RFC2460 [RFC2460], when destination nodes (i.e.
+ where the destination address is one of theirs) receive a Routing
+ Header with unsupported Routing Type, the required behavior is:
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 18]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o If Segments Left is zero, the node must ignore the Routing header
+ and proceed to process the next header in the packet.
+
+ o If Segments Left is non-zero, the node must discard the packet and
+ send an ICMP Parameter Problem, Code 0, message to the packet's
+ Source Address, pointing to the unrecognized Routing Type.
+
+ This required behavior could be used by an attacker to force the
+ generation of ICMP message by any node. The attacker could send
+ packets with SRH (with Segment Left set to 0) destined to a node not
+ supporting SRH. Per RFC2460 [RFC2460], the destination node could
+ generate an ICMP message, causing a local CPU utilization and if the
+ source of the offending packet with SRH was spoofed could lead to a
+ reflection attack without any amplification.
+
+ It must be noted that this is a required behavior for any unsupported
+ Routing Type and not limited to SRH packets. So, it is not specific
+ to SRH and the usual rate limiting for ICMP generation is required
+ anyway for any IPv6 implementation and has been implemented and
+ deployed for many years.
+
+5.2. Security fields in SRH
+
+ This section summarizes the use of specific fields in the SRH. They
+ are based on a key-hashed message authentication code (HMAC).
+
+ The security-related fields in the SRH are instantiated by the HMAC
+ TLV, containing:
+
+ o HMAC Key-id, 32 bits wide;
+
+ o HMAC, 256 bits wide (optional, exists only if HMAC Key-id is not
+ 0).
+
+ The HMAC field is the output of the HMAC computation (per RFC 2104
+ [RFC2104]) using a pre-shared key identified by HMAC Key-id and of
+ the text which consists of the concatenation of:
+
+ o the source IPv6 address;
+
+ o First Segment field;
+
+ o an octet of bit flags;
+
+ o HMAC Key-id;
+
+ o all addresses in the Segment List.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 19]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ The purpose of the HMAC TLV is to verify the validity, the integrity
+ and the authorization of the SRH itself. If an outsider of the SR
+ domain does not have access to a current pre-shared secret, then it
+ cannot compute the right HMAC field and the first SR router on the
+ path processing the SRH and configured to check the validity of the
+ HMAC will simply reject the packet.
+
+ The HMAC TLV is located at the end of the SRH simply because only the
+ router on the ingress of the SR domain needs to process it, then all
+ other SR nodes can ignore it (based on local policy) because they
+ trust the upstream router. This is to speed up forwarding operations
+ because SR routers which do not validate the SRH do not need to parse
+ the SRH until the end.
+
+ The HMAC Key-id field allows for the simultaneous existence of
+ several hash algorithms (SHA-256, SHA3-256 ... or future ones) as
+ well as pre-shared keys. The HMAC Key-id field is opaque, i.e., it
+ has neither syntax nor semantic except as an index to the right
+ combination of pre-shared key and hash algorithm and except that a
+ value of 0 means that there is no HMAC field. Having an HMAC Key-id
+ field allows for pre-shared key roll-over when two pre-shared keys
+ are supported for a while when all SR nodes converged to a fresher
+ pre-shared key. It could also allow for interoperation among
+ different SR domains if allowed by local policy and assuming a
+ collision-free HMAC Key Id allocation.
+
+ When a specific SRH is linked to a time-related service (such as
+ turbo-QoS for a 1-hour period) where the DA, Segment ID (SID) are
+ identical, then it is important to refresh the shared-secret
+ frequently as the HMAC validity period expires only when the HMAC
+ Key-id and its associated shared-secret expires.
+
+5.2.1. Selecting a hash algorithm
+
+ The HMAC field in the HMAC TLV is 256 bit wide. Therefore, the HMAC
+ MUST be based on a hash function whose output is at least 256 bits.
+ If the output of the hash function is 256, then this output is simply
+ inserted in the HMAC field. If the output of the hash function is
+ larger than 256 bits, then the output value is truncated to 256 by
+ taking the least-significant 256 bits and inserting them in the HMAC
+ field.
+
+ SRH implementations can support multiple hash functions but MUST
+ implement SHA-2 [FIPS180-4] in its SHA-256 variant.
+
+ NOTE: SHA-1 is currently used by some early implementations used for
+ quick interoperations testing, the 160-bit hash value must then be
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 20]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ right-hand padded with 96 bits set to 0. The authors understand that
+ this is not secure but is ok for limited tests.
+
+5.2.2. Performance impact of HMAC
+
+ While adding an HMAC to each and every SR packet increases the
+ security, it has a performance impact. Nevertheless, it must be
+ noted that:
+
+ o the HMAC field is used only when SRH is added by a device (such as
+ a home set-up box) which is outside of the segment routing domain.
+ If the SRH is added by a router in the trusted segment routing
+ domain, then, there is no need for an HMAC field, hence no
+ performance impact.
+
+ o when present, the HMAC field MUST only be checked and validated by
+ the first router of the segment routing domain, this router is
+ named 'validating SR router'. Downstream routers may not inspect
+ the HMAC field.
+
+ o this validating router can also have a cache of <IPv6 header +
+ SRH, HMAC field value> to improve the performance. It is not the
+ same use case as in IPsec where HMAC value was unique per packet,
+ in SRH, the HMAC value is unique per flow.
+
+ o Last point, hash functions such as SHA-2 have been optimized for
+ security and performance and there are multiple implementations
+ with good performance.
+
+ With the above points in mind, the performance impact of using HMAC
+ is minimized.
+
+5.2.3. Pre-shared key management
+
+ The field HMAC Key-id allows for:
+
+ o key roll-over: when there is a need to change the key (the hash
+ pre-shared secret), then multiple pre-shared keys can be used
+ simultaneously. The validating routing can have a table of <HMAC
+ Key-id, pre-shared secret> for the currently active and future
+ keys.
+
+ o different algorithms: by extending the previous table to <HMAC
+ Key-id, hash function, pre-shared secret>, the validating router
+ can also support simultaneously several hash algorithms (see
+ section Section 5.2.1)
+
+ The pre-shared secret distribution can be done:
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 21]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ o in the configuration of the validating routers, either by static
+ configuration or any SDN oriented approach;
+
+ o dynamically using a trusted key distribution such as [RFC6407]
+
+ The intent of this document is NOT to define yet-another-key-
+ distribution-protocol.
+
+5.3. Deployment Models
+
+5.3.1. Nodes within the SR domain
+
+ An SR domain is defined as a set of interconnected routers where all
+ routers at the perimeter are configured to add and act on SRH. Some
+ routers inside the SR domain can also act on SRH or simply forward
+ IPv6 packets.
+
+ The routers inside an SR domain can be trusted to generate SRH and to
+ process SRH received on interfaces that are part of the SR domain.
+ These nodes MUST drop all SRH packets received on an interface that
+ is not part of the SR domain and containing an SRH whose HMAC field
+ cannot be validated by local policies. This includes obviously
+ packet with an SRH generated by a non-cooperative SR domain.
+
+ If the validation fails, then these packets MUST be dropped, ICMP
+ error messages (parameter problem) SHOULD be generated (but rate
+ limited) and SHOULD be logged.
+
+5.3.2. Nodes outside of the SR domain
+
+ Nodes outside of the SR domain cannot be trusted for physical
+ security; hence, they need to request by some trusted means (outside
+ of the scope of this document) a complete SRH for each new connection
+ (i.e. new destination address). The received SRH MUST include an
+ HMAC TLV which is computed correctly (see Section 5.2).
+
+ When an outside node sends a packet with an SRH and towards an SR
+ domain ingress node, the packet MUST contain the HMAC TLV (with a
+ Key-id and HMAC fields) and the the destination address MUST be an
+ address of an SR domain ingress node .
+
+ The ingress SR router, i.e., the router with an interface address
+ equals to the destination address, MUST verify the HMAC TLV.
+
+ If the validation is successful, then the packet is simply forwarded
+ as usual for an SR packet. As long as the packet travels within the
+ SR domain, no further HMAC check needs to be done. Subsequent
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 22]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ routers in the SR domain MAY verify the HMAC TLV when they process
+ the SRH (i.e. when they are the destination).
+
+ If the validation fails, then this packet MUST be dropped, an ICMP
+ error message (parameter problem) SHOULD be generated (but rate
+ limited) and SHOULD be logged.
+
+5.3.3. SR path exposure
+
+ As the intermediate SR nodes addresses appears in the SRH, if this
+ SRH is visible to an outsider then he/she could reuse this knowledge
+ to launch an attack on the intermediate SR nodes or get some insider
+ knowledge on the topology. This is especially applicable when the
+ path between the source node and the first SR domain ingress router
+ is on the public Internet.
+
+ The first remark is to state that 'security by obscurity' is never
+ enough; in other words, the security policy of the SR domain MUST
+ assume that the internal topology and addressing is known by the
+ attacker. A simple traceroute will also give the same information
+ (with even more information as all intermediate nodes between SID
+ will also be exposed). IPsec Encapsulating Security Payload
+ [RFC4303] cannot be use to protect the SRH as per RFC4303 the ESP
+ header must appear after any routing header (including SRH).
+
+ To prevent a user to leverage the gained knowledge by intercepting
+ SRH, it it recommended to apply an infrastructure Access Control List
+ (iACL) at the edge of the SR domain. This iACL will drop all packets
+ from outside the SR-domain whose destination is any address of any
+ router inside the domain. This security policy should be tuned for
+ local operations.
+
+5.3.4. Impact of BCP-38
+
+ BCP-38 [RFC2827], also known as "Network Ingress Filtering", checks
+ whether the source address of packets received on an interface is
+ valid for this interface. The use of loose source routing such as
+ SRH forces packets to follow a path which differs from the expected
+ routing. Therefore, if BCP-38 was implemented in all routers inside
+ the SR domain, then SR packets could be received by an interface
+ which is not expected one and the packets could be dropped.
+
+ As an SR domain is usually a subset of one administrative domain, and
+ as BCP-38 is only deployed at the ingress routers of this
+ administrative domain and as packets arriving at those ingress
+ routers have been normally forwarded using the normal routing
+ information, then there is no reason why this ingress router should
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 23]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ drop the SRH packet based on BCP-38. Routers inside the domain
+ commonly do not apply BCP-38; so, this is not a problem.
+
+6. IANA Considerations
+
+ This document makes the following registrations in the Internet
+ Protocol Version 6 (IPv6) Parameters "Routing Type" registry
+ maintained by IANA:
+
+ Suggested Description Reference
+ Value
+ ----------------------------------------------------------
+ 4 Segment Routing Header (SRH) This document
+
+ In addition, this document request IANA to create and maintain a new
+ Registry: "Segment Routing Header Type-Value Objects". The following
+ code-points are requested from the registry:
+
+ Registry: Segment Routing Header Type-Value Objects
+
+ Suggested Description Reference
+ Value
+ -----------------------------------------------------
+ 1 Ingress Node TLV This document
+ 2 Egress Node TLV This document
+ 3 Opaque Container TLV This document
+ 4 Padding TLV This document
+ 5 HMAC TLV This document
+
+7. Manageability Considerations
+
+ TBD
+
+8. Contributors
+
+ Dave Barach, John Leddy, John Brzozowski, Pierre Francois, Nagendra
+ Kumar, Mark Townsley, Christian Martin, Roberta Maglione, James
+ Connolly, Aloys Augustin contributed to the content of this document.
+
+9. Acknowledgements
+
+ The authors would like to thank Ole Troan, Bob Hinden, Fred Baker,
+ Brian Carpenter, Alexandru Petrescu and Punit Kumar Jaiswal for their
+ comments to this document.
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 24]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+10. References
+
+10.1. Normative References
+
+ [FIPS180-4]
+ National Institute of Standards and Technology, "FIPS
+ 180-4 Secure Hash Standard (SHS)", March 2012,
+ <http://csrc.nist.gov/publications/fips/fips180-4/
+ fips-180-4.pdf>.
+
+ [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
+ Requirement Levels", BCP 14, RFC 2119,
+ DOI 10.17487/RFC2119, March 1997,
+ <http://www.rfc-editor.org/info/rfc2119>.
+
+ [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6
+ (IPv6) Specification", RFC 2460, DOI 10.17487/RFC2460,
+ December 1998, <http://www.rfc-editor.org/info/rfc2460>.
+
+ [RFC4303] Kent, S., "IP Encapsulating Security Payload (ESP)",
+ RFC 4303, DOI 10.17487/RFC4303, December 2005,
+ <http://www.rfc-editor.org/info/rfc4303>.
+
+ [RFC5095] Abley, J., Savola, P., and G. Neville-Neil, "Deprecation
+ of Type 0 Routing Headers in IPv6", RFC 5095,
+ DOI 10.17487/RFC5095, December 2007,
+ <http://www.rfc-editor.org/info/rfc5095>.
+
+ [RFC6407] Weis, B., Rowles, S., and T. Hardjono, "The Group Domain
+ of Interpretation", RFC 6407, DOI 10.17487/RFC6407,
+ October 2011, <http://www.rfc-editor.org/info/rfc6407>.
+
+10.2. Informative References
+
+ [I-D.ietf-isis-segment-routing-extensions]
+ Previdi, S., Filsfils, C., Bashandy, A., Gredler, H.,
+ Litkowski, S., Decraene, B., and j. jefftant@gmail.com,
+ "IS-IS Extensions for Segment Routing", draft-ietf-isis-
+ segment-routing-extensions-09 (work in progress), October
+ 2016.
+
+ [I-D.ietf-ospf-ospfv3-segment-routing-extensions]
+ Psenak, P., Previdi, S., Filsfils, C., Gredler, H.,
+ Shakir, R., Henderickx, W., and J. Tantsura, "OSPFv3
+ Extensions for Segment Routing", draft-ietf-ospf-ospfv3-
+ segment-routing-extensions-07 (work in progress), October
+ 2016.
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 25]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ [I-D.ietf-spring-ipv6-use-cases]
+ Brzozowski, J., Leddy, J., Townsley, W., Filsfils, C., and
+ R. Maglione, "IPv6 SPRING Use Cases", draft-ietf-spring-
+ ipv6-use-cases-08 (work in progress), January 2017.
+
+ [I-D.ietf-spring-resiliency-use-cases]
+ Filsfils, C., Previdi, S., Decraene, B., and R. Shakir,
+ "Resiliency use cases in SPRING networks", draft-ietf-
+ spring-resiliency-use-cases-08 (work in progress), October
+ 2016.
+
+ [I-D.ietf-spring-segment-routing]
+ Filsfils, C., Previdi, S., Decraene, B., Litkowski, S.,
+ and R. Shakir, "Segment Routing Architecture", draft-ietf-
+ spring-segment-routing-10 (work in progress), November
+ 2016.
+
+ [I-D.ietf-spring-segment-routing-mpls]
+ Filsfils, C., Previdi, S., Bashandy, A., Decraene, B.,
+ Litkowski, S., Horneffer, M., Shakir, R.,
+ jefftant@gmail.com, j., and E. Crabbe, "Segment Routing
+ with MPLS data plane", draft-ietf-spring-segment-routing-
+ mpls-06 (work in progress), January 2017.
+
+ [RFC1940] Estrin, D., Li, T., Rekhter, Y., Varadhan, K., and D.
+ Zappala, "Source Demand Routing: Packet Format and
+ Forwarding Specification (Version 1)", RFC 1940,
+ DOI 10.17487/RFC1940, May 1996,
+ <http://www.rfc-editor.org/info/rfc1940>.
+
+ [RFC2104] Krawczyk, H., Bellare, M., and R. Canetti, "HMAC: Keyed-
+ Hashing for Message Authentication", RFC 2104,
+ DOI 10.17487/RFC2104, February 1997,
+ <http://www.rfc-editor.org/info/rfc2104>.
+
+ [RFC2827] Ferguson, P. and D. Senie, "Network Ingress Filtering:
+ Defeating Denial of Service Attacks which employ IP Source
+ Address Spoofing", BCP 38, RFC 2827, DOI 10.17487/RFC2827,
+ May 2000, <http://www.rfc-editor.org/info/rfc2827>.
+
+ [RFC4942] Davies, E., Krishnan, S., and P. Savola, "IPv6 Transition/
+ Co-existence Security Considerations", RFC 4942,
+ DOI 10.17487/RFC4942, September 2007,
+ <http://www.rfc-editor.org/info/rfc4942>.
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 26]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ [RFC6554] Hui, J., Vasseur, JP., Culler, D., and V. Manral, "An IPv6
+ Routing Header for Source Routes with the Routing Protocol
+ for Low-Power and Lossy Networks (RPL)", RFC 6554,
+ DOI 10.17487/RFC6554, March 2012,
+ <http://www.rfc-editor.org/info/rfc6554>.
+
+ [RFC7855] Previdi, S., Ed., Filsfils, C., Ed., Decraene, B.,
+ Litkowski, S., Horneffer, M., and R. Shakir, "Source
+ Packet Routing in Networking (SPRING) Problem Statement
+ and Requirements", RFC 7855, DOI 10.17487/RFC7855, May
+ 2016, <http://www.rfc-editor.org/info/rfc7855>.
+
+Authors' Addresses
+
+ Stefano Previdi (editor)
+ Cisco Systems, Inc.
+ Via Del Serafico, 200
+ Rome 00142
+ Italy
+
+ Email: sprevidi@cisco.com
+
+
+ Clarence Filsfils
+ Cisco Systems, Inc.
+ Brussels
+ BE
+
+ Email: cfilsfil@cisco.com
+
+
+ Brian Field
+ Comcast
+ 4100 East Dry Creek Road
+ Centennial, CO 80122
+ US
+
+ Email: Brian_Field@cable.comcast.com
+
+
+ Ida Leung
+ Rogers Communications
+ 8200 Dixie Road
+ Brampton, ON L6T 0C1
+ CA
+
+ Email: Ida.Leung@rci.rogers.com
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 27]
+
+Internet-Draft IPv6 Segment Routing Header (SRH) February 2017
+
+
+ Jen Linkova
+ Google
+ 1600 Amphitheatre Parkway
+ Mountain View, CA 94043
+ US
+
+ Email: furry@google.com
+
+
+ Ebben Aries
+ Facebook
+ US
+
+ Email: exa@fb.com
+
+
+ Tomoya Kosugi
+ NTT
+ 3-9-11, Midori-Cho Musashino-Shi,
+ Tokyo 180-8585
+ JP
+
+ Email: kosugi.tomoya@lab.ntt.co.jp
+
+
+ Eric Vyncke
+ Cisco Systems, Inc.
+ De Kleetlaann 6A
+ Diegem 1831
+ Belgium
+
+ Email: evyncke@cisco.com
+
+
+ David Lebrun
+ Universite Catholique de Louvain
+ Place Ste Barbe, 2
+ Louvain-la-Neuve, 1348
+ Belgium
+
+ Email: david.lebrun@uclouvain.be
+
+
+
+
+
+
+
+
+
+
+Previdi, et al. Expires August 5, 2017 [Page 28] \ No newline at end of file
diff --git a/src/vnet/srv6/sr.api b/src/vnet/srv6/sr.api
new file mode 100644
index 00000000000..9e9007418b5
--- /dev/null
+++ b/src/vnet/srv6/sr.api
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \brief IPv6 SR LocalSID add/del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_del Boolean of whether its a delete instruction
+ @param localsid_addr IPv6 address of the localsid
+ @param end_psp Boolean of whether decapsulation is allowed in this function
+ @param behavior Type of behavior (function) for this localsid
+ @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+ @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+ @param fib_table FIB table in which we should install the localsid entry
+ @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+*/
+autoreply define sr_localsid_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_del;
+ u8 localsid_addr[16];
+ u8 end_psp;
+ u8 behavior;
+ u32 sw_if_index;
+ u32 vlan_index;
+ u32 fib_table;
+ u8 nh_addr[16];
+};
+
+/** \brief IPv6 SR policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param weight is the weight of the sid list. optional.
+ @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation)
+ @param type is the type of the SR policy. (0.Default // 1.Spray)
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param segments is a vector of IPv6 address composing the segment list
+*/
+autoreply define sr_policy_add
+{
+ u32 client_index;
+ u32 context;
+ u8 bsid_addr[16];
+ u32 weight;
+ u8 is_encap;
+ u8 type;
+ u32 fib_table;
+ u8 n_segments;
+ u8 segments[0];
+};
+
+/** \brief IPv6 SR policy modification
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param sr_policy_index is the index of the SR policy
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param operation is the operation to perform (among the top ones)
+ @param segments is a vector of IPv6 address composing the segment list
+ @param sl_index is the index of the Segment List to modify/delete
+ @param weight is the weight of the sid list. optional.
+ @param is_encap Mode. Encapsulation or SRH insertion.
+*/
+autoreply define sr_policy_mod
+{
+ u32 client_index;
+ u32 context;
+ u8 bsid_addr[16];
+ u32 sr_policy_index;
+ u32 fib_table;
+ u8 operation;
+ u32 sl_index;
+ u32 weight;
+ u8 n_segments;
+ u8 segments[0];
+};
+
+/** \brief IPv6 SR policy deletion
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param index is the index of the SR policy
+*/
+autoreply define sr_policy_del
+{
+ u32 client_index;
+ u32 context;
+ u8 bsid_addr[16];
+ u32 sr_policy_index;
+};
+
+/** \brief IPv6 SR steering add/del
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_del
+ @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ @param sr_policy is the index of the SR Policy (alt to bsid)
+ @param table_id is the VRF where to install the FIB entry for the BSID
+ @param prefix is the IPv4/v6 address for L3 traffic type
+ @param mask_width is the mask for L3 traffic type
+ @param sw_if_index is the incoming interface for L2 traffic
+ @param traffic_type describes the type of traffic
+*/
+autoreply define sr_steering_add_del
+{
+ u32 client_index;
+ u32 context;
+ u8 is_del;
+ u8 bsid_addr[16];
+ u32 sr_policy_index;
+ u32 table_id;
+ u8 prefix_addr[16];
+ u32 mask_width;
+ u32 sw_if_index;
+ u8 traffic_type;
+};
+
+/** \brief Dump the list of SR LocalSIDs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+/**define sr_localsids_dump
+{
+ u32 client_index;
+ u32 context;
+};*/
+
+/** \brief Details about a single SR LocalSID
+ @param context - returned sender context, to match reply w/ request
+ @param localsid_addr IPv6 address of the localsid
+ @param behavior Type of behavior (function) for this localsid
+ @param end_psp Boolean of whether decapsulation is allowed in this function
+ @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+ @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+ @param fib_table FIB table in which we should install the localsid entry
+ @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+*/
+/**manual_endian define sr_localsid_details
+{
+ u32 context;
+ u8 localsid_addr[16];
+ u8 behavior;
+ u8 end_psp;
+ u32 sw_if_index;
+ u32 vlan_index;
+ u32 fib_table;
+ u8 nh_addr[16];
+};*/
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr.c b/src/vnet/srv6/sr.c
new file mode 100755
index 00000000000..eb4f09e7aae
--- /dev/null
+++ b/src/vnet/srv6/sr.c
@@ -0,0 +1,57 @@
+/*
+ * sr.c: ipv6 segment routing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing initialization
+ *
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+ip6_sr_main_t sr_main;
+
+/**
+ * @brief no-op lock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+void
+sr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+/**
+ * @brief no-op unlock function.
+ * The lifetime of the SR entry is managed by the control plane
+ */
+void
+sr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h
new file mode 100755
index 00000000000..2014a23edae
--- /dev/null
+++ b/src/vnet/srv6/sr.h
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing data structures definitions
+ *
+ */
+
+#ifndef included_vnet_srv6_h
+#define included_vnet_srv6_h
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#define IPv6_DEFAULT_HEADER_LENGTH 40
+#define IPv6_DEFAULT_HOP_LIMIT 64
+#define IPv6_DEFAULT_MAX_MASK_WIDTH 128
+
+#define SR_BEHAVIOR_END 1
+#define SR_BEHAVIOR_X 2
+#define SR_BEHAVIOR_D_FIRST 3 /* Unused. Separator in between regular and D */
+#define SR_BEHAVIOR_DX2 4
+#define SR_BEHAVIOR_DX6 5
+#define SR_BEHAVIOR_DX4 6
+#define SR_BEHAVIOR_DT6 7
+#define SR_BEHAVIOR_DT4 8
+#define SR_BEHAVIOR_LAST 9 /* Must always be the last one */
+
+#define SR_STEER_L2 2
+#define SR_STEER_IPV4 4
+#define SR_STEER_IPV6 6
+
+#define SR_FUNCTION_SIZE 4
+#define SR_ARGUMENT_SIZE 4
+
+#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
+
+/**
+ * @brief SR Segment List (SID list)
+ */
+typedef struct
+{
+ ip6_address_t *segments; /**< SIDs (key) */
+
+ u32 weight; /**< SID list weight (wECMP / UCMP) */
+
+ u8 *rewrite; /**< Precomputed rewrite header */
+ u8 *rewrite_bsid; /**< Precomputed rewrite header for bindingSID */
+
+ dpo_id_t bsid_dpo; /**< DPO for Encaps/Insert for BSID */
+ dpo_id_t ip6_dpo; /**< DPO for Encaps/Insert IPv6 */
+ dpo_id_t ip4_dpo; /**< DPO for Encaps IPv6 */
+} ip6_sr_sl_t;
+
+/* SR policy types */
+#define SR_POLICY_TYPE_DEFAULT 0
+#define SR_POLICY_TYPE_SPRAY 1
+/**
+ * @brief SR Policy
+ */
+typedef struct
+{
+ u32 *segments_lists; /**< SID lists indexes (vector) */
+
+ ip6_address_t bsid; /**< BindingSID (key) */
+
+ u8 type; /**< Type (default is 0) */
+ /* SR Policy specific DPO */
+ /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */
+ /* IF Type = SPRAY then Spray DPO with all SID lists */
+ dpo_id_t bsid_dpo; /**< SR Policy specific DPO - BSID */
+ dpo_id_t ip4_dpo; /**< SR Policy specific DPO - IPv6 */
+ dpo_id_t ip6_dpo; /**< SR Policy specific DPO - IPv4 */
+
+ u32 fib_table; /**< FIB table */
+
+ u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */
+} ip6_sr_policy_t;
+
+/**
+ * @brief SR LocalSID
+ */
+typedef struct
+{
+ ip6_address_t localsid; /**< LocalSID IPv6 address */
+
+ char end_psp; /**< Combined with End.PSP? */
+
+ u16 behavior; /**< Behavior associated to this localsid */
+
+ union
+ {
+ u32 sw_if_index; /**< xconnect only */
+ u32 vrf_index; /**< vrf only */
+ };
+
+ u32 fib_table; /**< FIB table where localsid is registered */
+
+ u32 vlan_index; /**< VLAN tag (not an index) */
+
+ ip46_address_t next_hop; /**< Next_hop for xconnect usage only */
+
+ u32 nh_adj; /**< Next_adj for xconnect usage only */
+
+ void *plugin_mem; /**< Memory to be used by the plugin callback functions */
+} ip6_sr_localsid_t;
+
+typedef int (sr_plugin_callback_t) (ip6_sr_localsid_t * localsid);
+
+/**
+ * @brief SR LocalSID behavior registration
+ */
+typedef struct
+{
+ u16 sr_localsid_function_number; /**< SR LocalSID plugin function (>SR_BEHAVIOR_LAST) */
+
+ u8 *function_name; /**< Function name. (key). */
+
+ u8 *keyword_str; /**< Behavior keyword (i.e. End.X) */
+
+ u8 *def_str; /**< Behavior definition (i.e. Endpoint with cross-connect) */
+
+ u8 *params_str; /**< Behavior parameters (i.e. <oif> <IP46next_hop>) */
+
+ dpo_type_t dpo; /**< DPO type registration */
+
+ format_function_t *ls_format; /**< LocalSID format function */
+
+ unformat_function_t *ls_unformat; /**< LocalSID unformat function */
+
+ sr_plugin_callback_t *creation; /**< Function within plugin that will be called after localsid creation*/
+
+ sr_plugin_callback_t *removal; /**< Function within plugin that will be called before localsid removal */
+} sr_localsid_fn_registration_t;
+
+/**
+ * @brief Steering db key
+ *
+ * L3 is IPv4/IPv6 + mask
+ * L2 is sf_if_index + vlan
+ */
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ ip46_address_t prefix; /**< IP address of the prefix */
+ u32 mask_width; /**< Mask width of the prefix */
+ u32 fib_table; /**< VRF of the prefix */
+ } l3;
+ struct
+ {
+ u32 sw_if_index; /**< Incoming software interface */
+ } l2;
+ };
+ u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */
+ u8 padding[3];
+} sr_steering_key_t;
+
+typedef struct
+{
+ sr_steering_key_t classify; /**< Traffic classification */
+ u32 sr_policy; /**< SR Policy index */
+} ip6_sr_steering_policy_t;
+
+/**
+ * @brief Segment Routing main datastructure
+ */
+typedef struct
+{
+ /* L2-input -> SR rewrite next index */
+ u32 l2_sr_policy_rewrite_index;
+
+ /* SR SID lists */
+ ip6_sr_sl_t *sid_lists;
+
+ /* SRv6 policies */
+ ip6_sr_policy_t *sr_policies;
+
+ /* Hash table mapping BindingSID to SRv6 policy */
+ mhash_t sr_policies_index_hash;
+
+ /* Pool of SR localsid instances */
+ ip6_sr_localsid_t *localsids;
+
+ /* Hash table mapping LOC:FUNC to SR LocalSID instance */
+ mhash_t sr_localsids_index_hash;
+
+ /* Pool of SR steer policies instances */
+ ip6_sr_steering_policy_t *steer_policies;
+
+ /* Hash table mapping steering rules to SR steer instance */
+ mhash_t sr_steer_policies_hash;
+
+ /* L2 steering ifaces - sr_policies */
+ u32 *sw_iface_sr_policies;
+
+ /* Spray DPO */
+ dpo_type_t sr_pr_spray_dpo_type;
+
+ /* Plugin functions */
+ sr_localsid_fn_registration_t *plugin_functions;
+
+ /* Find plugin function by name */
+ uword *plugin_functions_by_key;
+
+ /* Counters */
+ vlib_combined_counter_main_t sr_ls_valid_counters;
+ vlib_combined_counter_main_t sr_ls_invalid_counters;
+
+ /* SR Policies FIBs */
+ u32 fib_table_ip6;
+ u32 fib_table_ip4;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ip6_sr_main_t;
+
+extern ip6_sr_main_t sr_main;
+
+extern vlib_node_registration_t sr_policy_rewrite_encaps_node;
+extern vlib_node_registration_t sr_policy_rewrite_insert_node;
+extern vlib_node_registration_t sr_localsid_node;
+extern vlib_node_registration_t sr_localsid_d_node;
+
+extern void sr_dpo_lock (dpo_id_t * dpo);
+extern void sr_dpo_unlock (dpo_id_t * dpo);
+
+extern int
+sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name,
+ u8 * keyword_str, u8 * def_str,
+ u8 * params_str, dpo_type_t * dpo,
+ format_function_t * ls_format,
+ unformat_function_t * ls_unformat,
+ sr_plugin_callback_t * creation_fn,
+ sr_plugin_callback_t * removal_fn);
+
+extern int
+sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
+ u32 weight, u8 behavior, u32 fib_table, u8 is_encap);
+extern int
+sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
+ u8 operation, ip6_address_t * segments, u32 sl_index,
+ u32 weight);
+extern int sr_policy_del (ip6_address_t * bsid, u32 index);
+
+extern int
+sr_cli_localsid (char is_del, ip6_address_t * localsid_addr,
+ char end_psp, u8 behavior, u32 sw_if_index,
+ u32 vlan_index, u32 fib_table, ip46_address_t * nh_addr,
+ void *ls_plugin_mem);
+
+extern int
+sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
+ u32 table_id, ip46_address_t * prefix, u32 mask_width,
+ u32 sw_if_index, u8 traffic_type);
+
+/**
+ * @brief SR rewrite string computation for SRH insertion (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion
+ */
+static inline u8 *
+ip6_sr_compute_rewrite_string_insert (ip6_address_t * sl)
+{
+ ip6_sr_header_t *srh;
+ ip6_address_t *addrp, *this_address;
+ u32 header_length = 0;
+ u8 *rs = NULL;
+
+ header_length = 0;
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
+
+ vec_validate (rs, header_length - 1);
+
+ srh = (ip6_sr_header_t *) rs;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->segments_left = vec_len (sl);
+ srh->first_segment = vec_len (sl);
+ srh->length = ((sizeof (ip6_sr_header_t) +
+ ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
+ srh->flags = 0x00;
+ srh->reserved = 0x0000;
+ addrp = srh->segments + vec_len (sl);
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+ return rs;
+}
+
+
+#endif /* included_vnet_sr_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c
new file mode 100644
index 00000000000..925b50a1689
--- /dev/null
+++ b/src/vnet/srv6/sr_api.c
@@ -0,0 +1,244 @@
+/*
+ *------------------------------------------------------------------
+ * sr_api.c - ipv6 segment routing api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SR_LOCALSID_ADD_DEL, sr_localsid_add_del) \
+_(SR_POLICY_DEL, sr_policy_del) \
+_(SR_STEERING_ADD_DEL, sr_steering_add_del)
+//_(SR_LOCALSIDS, sr_localsids_dump)
+//_(SR_LOCALSID_BEHAVIORS, sr_localsid_behaviors_dump)
+
+static void vl_api_sr_localsid_add_del_t_handler
+ (vl_api_sr_localsid_add_del_t * mp)
+{
+ vl_api_sr_localsid_add_del_reply_t *rmp;
+ int rv = 0;
+/*
+ * int sr_cli_localsid (char is_del, ip6_address_t *localsid_addr,
+ * char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index, u32 fib_table,
+ * ip46_address_t *nh_addr, void *ls_plugin_mem)
+ */
+ rv = sr_cli_localsid (mp->is_del,
+ (ip6_address_t *) & mp->localsid_addr,
+ mp->end_psp,
+ mp->behavior,
+ ntohl (mp->sw_if_index),
+ ntohl (mp->vlan_index),
+ ntohl (mp->fib_table),
+ (ip46_address_t *) & mp->nh_addr, NULL);
+
+ REPLY_MACRO (VL_API_SR_LOCALSID_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp)
+{
+ vl_api_sr_policy_add_reply_t *rmp;
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t *this_address = (ip6_address_t *) mp->segments;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
+ this_address++;
+ }
+
+/*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ * u32 weight, u8 behavior, u32 fib_table, u8 is_encap)
+ */
+ int rv = 0;
+ rv = sr_policy_add ((ip6_address_t *) & mp->bsid_addr,
+ segments,
+ ntohl (mp->weight),
+ mp->type, ntohl (mp->fib_table), mp->is_encap);
+
+ REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY);
+}
+
+static void
+vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp)
+{
+ vl_api_sr_policy_mod_reply_t *rmp;
+
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t *this_address = (ip6_address_t *) mp->segments;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ clib_memcpy (seg->as_u8, this_address->as_u8, sizeof (*this_address));
+ this_address++;
+ }
+
+ int rv = 0;
+/*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ * u8 operation, ip6_address_t *segments, u32 sl_index,
+ * u32 weight, u8 is_encap)
+ */
+ rv = sr_policy_mod ((ip6_address_t *) & mp->bsid_addr,
+ ntohl (mp->sr_policy_index),
+ ntohl (mp->fib_table),
+ mp->operation,
+ segments, ntohl (mp->sl_index), ntohl (mp->weight));
+
+ REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
+}
+
+static void
+vl_api_sr_policy_del_t_handler (vl_api_sr_policy_del_t * mp)
+{
+ vl_api_sr_policy_del_reply_t *rmp;
+ int rv = 0;
+/*
+ * int
+ * sr_policy_del (ip6_address_t *bsid, u32 index)
+ */
+ rv = sr_policy_del ((ip6_address_t *) & mp->bsid_addr,
+ ntohl (mp->sr_policy_index));
+
+ REPLY_MACRO (VL_API_SR_POLICY_DEL_REPLY);
+}
+
+static void vl_api_sr_steering_add_del_t_handler
+ (vl_api_sr_steering_add_del_t * mp)
+{
+ vl_api_sr_steering_add_del_reply_t *rmp;
+ int rv = 0;
+/*
+ * int
+ * sr_steering_policy(int is_del, ip6_address_t *bsid, u32 sr_policy_index,
+ * u32 table_id, ip46_address_t *prefix, u32 mask_width, u32 sw_if_index,
+ * u8 traffic_type)
+ */
+ rv = sr_steering_policy (mp->is_del,
+ (ip6_address_t *) & mp->bsid_addr,
+ ntohl (mp->sr_policy_index),
+ ntohl (mp->table_id),
+ (ip46_address_t *) & mp->prefix_addr,
+ ntohl (mp->mask_width),
+ ntohl (mp->sw_if_index), mp->traffic_type);
+
+ REPLY_MACRO (VL_API_SR_STEERING_ADD_DEL_REPLY);
+}
+
+/*
+ * sr_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_sr;
+#undef _
+}
+
+static clib_error_t *
+sr_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Manually register the sr policy add msg, so we trace
+ * enough bytes to capture a typical segment list
+ */
+ vl_msg_api_set_handlers (VL_API_SR_POLICY_ADD,
+ "sr_policy_add",
+ vl_api_sr_policy_add_t_handler,
+ vl_noop_handler,
+ vl_api_sr_policy_add_t_endian,
+ vl_api_sr_policy_add_t_print, 256, 1);
+
+ /*
+ * Manually register the sr policy mod msg, so we trace
+ * enough bytes to capture a typical segment list
+ */
+ vl_msg_api_set_handlers (VL_API_SR_POLICY_MOD,
+ "sr_policy_mod",
+ vl_api_sr_policy_mod_t_handler,
+ vl_noop_handler,
+ vl_api_sr_policy_mod_t_endian,
+ vl_api_sr_policy_mod_t_print, 256, 1);
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srv6/sr_doc.md b/src/vnet/srv6/sr_doc.md
new file mode 100644
index 00000000000..5cdfc9066b5
--- /dev/null
+++ b/src/vnet/srv6/sr_doc.md
@@ -0,0 +1,55 @@
+# SRv6: Segment Routing for IPv6 {#srv6_doc}
+
+This is a memo intended to contain documentation of the VPP SRv6 implementation.
+Everything that is not directly obvious should come here.
+For any feedback on content that should be explained please mailto:pcamaril@cisco.com
+
+## Segment Routing
+
+Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior.
+
+Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SRv6 documentation.**
+
+## Segment Routing terminology
+
+* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05)
+* SegmentID (SID): is an IPv6 address.
+* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse.
+* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights.
+* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed.
+* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet.
+
+## SRv6 Features in VPP
+
+The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/">SRv6 Network Programming (*draft-filsfils-spring-srv6-network-programming*)</a> defines the SRv6 architecture.
+
+VPP supports the following SRv6 LocalSID functions: End, End.X, End.DX6, End.DT6, End.DX4, End.DT4, End.DX2, End.B6, End.B6.Encaps.
+
+For further information and how to configure each specific function: @subpage srv6_localsid_doc
+
+
+The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-segment-routing-policy/">Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*)</a> defines SR Policies.
+
+VPP supports SRv6 Policies with T.Insert and T.Encaps behaviors.
+
+For further information on how to create SR Policies: @subpage srv6_policy_doc
+
+For further information on how to steer traffic into SR Policies: @subpage srv6_steering_doc
+
+## SRv6 LocalSID development framework
+
+One of the *'key'* concepts about SRv6 is network programmability. This is why an SRv6 LocalSID is associated with an specific function.
+
+However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code.
+
+The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code.
+
+For more information please refer to: @subpage srv6_plugin_doc
diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c
new file mode 100755
index 00000000000..bdc66386f32
--- /dev/null
+++ b/src/vnet/srv6/sr_localsid.c
@@ -0,0 +1,1492 @@
+/*
+ * sr_localsid.c: ipv6 segment routing Endpoint behaviors
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Processing of packets with a SRH
+ *
+ * CLI to define new Segment Routing End processing functions.
+ * Graph node to support such functions.
+ *
+ * Each function associates an SRv6 segment (IPv6 address) with an specific
+ * Segment Routing function.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief Dynamically added SR localsid DPO type
+ */
+static dpo_type_t sr_localsid_dpo_type;
+static dpo_type_t sr_localsid_d_dpo_type;
+
+/**
+ * @brief SR localsid add/del
+ *
+ * Function to add or delete SR LocalSIDs.
+ *
+ * @param is_del Boolean of whether its a delete instruction
+ * @param localsid_addr IPv6 address of the localsid
+ * @param is_decap Boolean of whether decapsulation is allowed in this function
+ * @param behavior Type of behavior (function) for this localsid
+ * @param sw_if_index Only for L2/L3 xconnect. OIF. In VRF variant the fib_table.
+ * @param vlan_index Only for L2 xconnect. Outgoing VLAN tag.
+ * @param fib_table FIB table in which we should install the localsid entry
+ * @param nh_addr Next Hop IPv4/IPv6 address. Only for L2/L3 xconnect.
+ *
+ * @return 0 on success, error otherwise.
+ */
+int
+sr_cli_localsid (char is_del, ip6_address_t * localsid_addr,
+ char end_psp, u8 behavior, u32 sw_if_index, u32 vlan_index,
+ u32 fib_table, ip46_address_t * nh_addr, void *ls_plugin_mem)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ uword *p;
+ int rv;
+
+ ip6_sr_localsid_t *ls = 0;
+
+ dpo_id_t dpo = DPO_INVALID;
+
+ /* Search for the item */
+ p = mhash_get (&sm->sr_localsids_index_hash, localsid_addr);
+
+ if (p)
+ {
+ if (is_del)
+ {
+ /* Retrieve localsid */
+ ls = pool_elt_at_index (sm->localsids, p[0]);
+ /* Delete FIB entry */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = *localsid_addr,
+ }
+ };
+
+ fib_table_entry_delete (fib_table_find (FIB_PROTOCOL_IP6,
+ fib_table),
+ &pfx, FIB_SOURCE_SR);
+
+ /* In case it is a Xconnect iface remove the (OIF, NHOP) adj */
+ if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX6
+ || ls->behavior == SR_BEHAVIOR_DX4)
+ adj_unlock (ls->nh_adj);
+
+ if (ls->behavior >= SR_BEHAVIOR_LAST)
+ {
+ sr_localsid_fn_registration_t *plugin = 0;
+ plugin = pool_elt_at_index (sm->plugin_functions,
+ ls->behavior - SR_BEHAVIOR_LAST);
+
+ /* Callback plugin removal function */
+ rv = plugin->removal (ls);
+ }
+
+ /* Delete localsid registry */
+ pool_put (sm->localsids, ls);
+ mhash_unset (&sm->sr_localsids_index_hash, localsid_addr, NULL);
+ return 1;
+ }
+ else /* create with function already existing; complain */
+ return -1;
+ }
+ else
+ /* delete; localsid does not exist; complain */
+ if (is_del)
+ return -2;
+
+ /* Check whether there exists a FIB entry with such address */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ };
+
+ pfx.fp_addr.as_u64[0] = localsid_addr->as_u64[0];
+ pfx.fp_addr.as_u64[1] = localsid_addr->as_u64[1];
+
+ /* Lookup the FIB index associated to the table id provided */
+ u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6, fib_table);
+ if (fib_index == ~0)
+ return -3;
+
+ /* Lookup the localsid in such FIB table */
+ fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx);
+ if (FIB_NODE_INDEX_INVALID != fei)
+ return -4; //There is an entry for such address (the localsid addr)
+
+ /* Create a new localsid registry */
+ pool_get (sm->localsids, ls);
+ memset (ls, 0, sizeof (*ls));
+
+ clib_memcpy (&ls->localsid, localsid_addr, sizeof (ip6_address_t));
+ ls->end_psp = end_psp;
+ ls->behavior = behavior;
+ ls->nh_adj = (u32) ~ 0;
+ ls->fib_table = fib_table;
+ switch (behavior)
+ {
+ case SR_BEHAVIOR_END:
+ break;
+ case SR_BEHAVIOR_X:
+ ls->sw_if_index = sw_if_index;
+ clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t));
+ break;
+ case SR_BEHAVIOR_DX4:
+ ls->sw_if_index = sw_if_index;
+ clib_memcpy (&ls->next_hop.ip4, &nh_addr->ip4, sizeof (ip4_address_t));
+ break;
+ case SR_BEHAVIOR_DX6:
+ ls->sw_if_index = sw_if_index;
+ clib_memcpy (&ls->next_hop.ip6, &nh_addr->ip6, sizeof (ip6_address_t));
+ break;
+ case SR_BEHAVIOR_DT6:
+ ls->vrf_index = sw_if_index;
+ break;
+ case SR_BEHAVIOR_DX2:
+ ls->sw_if_index = sw_if_index;
+ ls->vlan_index = vlan_index;
+ break;
+ }
+
+ /* Figure out the adjacency magic for Xconnect variants */
+ if (ls->behavior == SR_BEHAVIOR_X || ls->behavior == SR_BEHAVIOR_DX4
+ || ls->behavior == SR_BEHAVIOR_DX6)
+ {
+ adj_index_t nh_adj_index = ADJ_INDEX_INVALID;
+
+ /* Retrieve the adjacency corresponding to the (OIF, next_hop) */
+ if (ls->behavior == SR_BEHAVIOR_DX6 || ls->behavior == SR_BEHAVIOR_X)
+ nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
+ nh_addr, sw_if_index);
+
+ else if (ls->behavior == SR_BEHAVIOR_DX4)
+ nh_adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4,
+ nh_addr, sw_if_index);
+
+ /* Check for ADJ creation error. If so panic */
+ if (nh_adj_index == ADJ_INDEX_INVALID)
+ {
+ pool_put (sm->localsids, ls);
+ return -5;
+ }
+
+ ls->nh_adj = nh_adj_index;
+ }
+
+ /* Set DPO */
+ if (ls->behavior == SR_BEHAVIOR_END || ls->behavior == SR_BEHAVIOR_X)
+ dpo_set (&dpo, sr_localsid_dpo_type, DPO_PROTO_IP6, ls - sm->localsids);
+ else if (ls->behavior > SR_BEHAVIOR_D_FIRST
+ && ls->behavior < SR_BEHAVIOR_LAST)
+ dpo_set (&dpo, sr_localsid_d_dpo_type, DPO_PROTO_IP6, ls - sm->localsids);
+ else if (ls->behavior >= SR_BEHAVIOR_LAST)
+ {
+ sr_localsid_fn_registration_t *plugin = 0;
+ plugin = pool_elt_at_index (sm->plugin_functions,
+ ls->behavior - SR_BEHAVIOR_LAST);
+ /* Copy the unformat memory result */
+ ls->plugin_mem = ls_plugin_mem;
+ /* Callback plugin creation function */
+ rv = plugin->creation (ls);
+ if (rv)
+ {
+ pool_put (sm->localsids, ls);
+ return -6;
+ }
+ dpo_set (&dpo, plugin->dpo, DPO_PROTO_IP6, ls - sm->localsids);
+ }
+
+ /* Set hash key for searching localsid by address */
+ mhash_set (&sm->sr_localsids_index_hash, localsid_addr, ls - sm->localsids,
+ NULL);
+
+ fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+
+ /* Set counter to zero */
+ vlib_validate_combined_counter (&(sm->sr_ls_valid_counters),
+ ls - sm->localsids);
+ vlib_validate_combined_counter (&(sm->sr_ls_invalid_counters),
+ ls - sm->localsids);
+
+ vlib_zero_combined_counter (&(sm->sr_ls_valid_counters),
+ ls - sm->localsids);
+ vlib_zero_combined_counter (&(sm->sr_ls_invalid_counters),
+ ls - sm->localsids);
+
+ return 0;
+}
+
+/**
+ * @brief SR LocalSID CLI function.
+ *
+ * @see sr_cli_localsid
+ */
+static clib_error_t *
+sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_sr_main_t *sm = &sr_main;
+ u32 sw_if_index = (u32) ~ 0, vlan_index = (u32) ~ 0, fib_index = 0;
+ int is_del = 0;
+ int end_psp = 0;
+ ip6_address_t resulting_address;
+ ip46_address_t next_hop;
+ char address_set = 0;
+ char behavior = 0;
+ void *ls_plugin_mem = 0;
+
+ int rv;
+
+ memset (&resulting_address, 0, sizeof (ip6_address_t));
+ ip46_address_reset (&next_hop);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (!address_set
+ && unformat (input, "address %U", unformat_ip6_address,
+ &resulting_address))
+ address_set = 1;
+ else if (!address_set
+ && unformat (input, "addr %U", unformat_ip6_address,
+ &resulting_address))
+ address_set = 1;
+ else if (unformat (input, "fib-table %u", &fib_index));
+ else if (vlan_index == (u32) ~ 0
+ && unformat (input, "vlan %u", &vlan_index));
+ else if (!behavior && unformat (input, "behavior"))
+ {
+ if (unformat (input, "end.x %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip6_address, &next_hop.ip6))
+ behavior = SR_BEHAVIOR_X;
+ else if (unformat (input, "end.dx6 %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip6_address, &next_hop.ip6))
+ behavior = SR_BEHAVIOR_DX6;
+ else if (unformat (input, "end.dx4 %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip4_address, &next_hop.ip4))
+ behavior = SR_BEHAVIOR_DX4;
+ else if (unformat (input, "end.dx2 %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ behavior = SR_BEHAVIOR_DX2;
+ else if (unformat (input, "end.dt6 %u", &sw_if_index))
+ behavior = SR_BEHAVIOR_DT6;
+ else if (unformat (input, "end.dt4 %u", &sw_if_index))
+ behavior = SR_BEHAVIOR_DT4;
+ else
+ {
+ /* Loop over all the plugin behavior format functions */
+ sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0;
+ sr_localsid_fn_registration_t **plugin_it = 0;
+
+ /* Create a vector out of the plugin pool as recommended */
+ /* *INDENT-OFF* */
+ pool_foreach (plugin, sm->plugin_functions,
+ {
+ vec_add1 (vec_plugins, plugin);
+ });
+ /* *INDENT-ON* */
+
+ vec_foreach (plugin_it, vec_plugins)
+ {
+ if (unformat
+ (input, "%U", (*plugin_it)->ls_unformat, &ls_plugin_mem))
+ {
+ behavior = (*plugin_it)->sr_localsid_function_number;
+ break;
+ }
+ }
+ }
+
+ if (!behavior)
+ {
+ if (unformat (input, "end"))
+ behavior = SR_BEHAVIOR_END;
+ else
+ break;
+ }
+ }
+ else if (!end_psp && unformat (input, "psp"))
+ end_psp = 1;
+ else
+ break;
+ }
+
+ if (!behavior && end_psp)
+ behavior = SR_BEHAVIOR_END;
+
+ if (!address_set)
+ return clib_error_return (0,
+ "Error: SRv6 LocalSID address is mandatory.");
+ if (!is_del && !behavior)
+ return clib_error_return (0,
+ "Error: SRv6 LocalSID behavior is mandatory.");
+ if (vlan_index != (u32) ~ 0)
+ return clib_error_return (0,
+ "Error: SRv6 End.DX2 with rewrite VLAN tag not supported by now.");
+ if (end_psp && !(behavior == SR_BEHAVIOR_END || behavior == SR_BEHAVIOR_X))
+ return clib_error_return (0,
+ "Error: SRv6 PSP only compatible with End and End.X");
+
+ rv = sr_cli_localsid (is_del, &resulting_address, end_psp, behavior,
+ sw_if_index, vlan_index, fib_index, &next_hop,
+ ls_plugin_mem);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -1:
+ return clib_error_return (0,
+ "Identical localsid already exists. Requested localsid not created.");
+ case -2:
+ return clib_error_return (0,
+ "The requested localsid could not be deleted. SR localsid not found");
+ case -3:
+ return clib_error_return (0, "FIB table %u does not exist", fib_index);
+ case -4:
+ return clib_error_return (0, "There is already one FIB entry for the"
+ "requested localsid non segment routing related");
+ case -5:
+ return clib_error_return (0,
+ "Could not create ARP/ND entry for such next_hop. Internal error.");
+ case -6:
+ return clib_error_return (0,
+ "Error on the plugin based localsid creation.");
+ default:
+ return clib_error_return (0, "BUG: sr localsid returns %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_localsid_command, static) = {
+ .path = "sr localsid",
+ .short_help = "sr localsid (del) address XX:XX::YY:YY"
+ "(fib-table 8) behavior STRING",
+ .long_help =
+ "Create SR LocalSID and binds it to a particular behavior\n"
+ "Arguments:\n"
+ "\tlocalSID IPv6_addr(128b) LocalSID IPv6 address\n"
+ "\t(fib-table X) Optional. VRF where to install SRv6 localsid\n"
+ "\tbehavior STRING Specifies the behavior\n"
+ "\n\tBehaviors:\n"
+ "\tEnd\t-> Endpoint.\n"
+ "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+ "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n"
+ "\t\tParameters: '<iface>'\n"
+ "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+ "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip4_next_hop>'\n"
+ "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n"
+ "\t\tParameters: '<ip6_fib_table>'\n"
+ "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n"
+ "\t\tParameters: '<ip4_fib_table>'\n",
+ .function = sr_cli_localsid_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI function to 'show' all SR LocalSIDs on console.
+ */
+static clib_error_t *
+show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_localsid_t **localsid_list = 0;
+ ip6_sr_localsid_t *ls;
+ int i;
+
+ vlib_cli_output (vm, "SRv6 - My LocalSID Table:");
+ vlib_cli_output (vm, "=========================");
+ /* *INDENT-OFF* */
+ pool_foreach (ls, sm->localsids, ({ vec_add1 (localsid_list, ls); }));
+ /* *INDENT-ON* */
+ for (i = 0; i < vec_len (localsid_list); i++)
+ {
+ ls = localsid_list[i];
+ switch (ls->behavior)
+ {
+ case SR_BEHAVIOR_END:
+ vlib_cli_output (vm, "\tAddress: \t%U\n\tBehavior: \tEnd",
+ format_ip6_address, &ls->localsid);
+ break;
+ case SR_BEHAVIOR_X:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tX (Endpoint with Layer-3 cross-connect)"
+ "\n\tIface: \t%U\n\tNext hop: \t%U",
+ format_ip6_address, &ls->localsid,
+ format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+ format_ip6_address, &ls->next_hop.ip6);
+ break;
+ case SR_BEHAVIOR_DX4:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDX4 (Endpoint with decapsulation and IPv4 cross-connect)"
+ "\n\tIface: \t%U\n\tNext hop: \t%U",
+ format_ip6_address, &ls->localsid,
+ format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+ format_ip4_address, &ls->next_hop.ip4);
+ break;
+ case SR_BEHAVIOR_DX6:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDX6 (Endpoint with decapsulation and IPv6 cross-connect)"
+ "\n\tIface: \t%U\n\tNext hop: \t%U",
+ format_ip6_address, &ls->localsid,
+ format_vnet_sw_if_index_name, vnm, ls->sw_if_index,
+ format_ip6_address, &ls->next_hop.ip6);
+ break;
+ case SR_BEHAVIOR_DX2:
+ if (ls->vlan_index == (u32) ~ 0)
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDX2 (Endpoint with decapulation and Layer-2 cross-connect)"
+ "\n\tIface: \t%U", format_ip6_address,
+ &ls->localsid, format_vnet_sw_if_index_name, vnm,
+ ls->sw_if_index);
+ else
+ vlib_cli_output (vm,
+ "Unsupported yet. (DX2 with egress VLAN rewrite)");
+ break;
+ case SR_BEHAVIOR_DT6:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDT6 (Endpoint with decapsulation and specific IPv6 table lookup)"
+ "\n\tTable: %u", format_ip6_address, &ls->localsid,
+ ls->fib_table);
+ break;
+ case SR_BEHAVIOR_DT4:
+ vlib_cli_output (vm,
+ "\tAddress: \t%U\n\tBehavior: \tDT4 (Endpoint with decapsulation and specific IPv4 table lookup)"
+ "\n\tTable: \t%u", format_ip6_address,
+ &ls->localsid, ls->fib_table);
+ break;
+ default:
+ if (ls->behavior >= SR_BEHAVIOR_LAST)
+ {
+ sr_localsid_fn_registration_t *plugin =
+ pool_elt_at_index (sm->plugin_functions,
+ ls->behavior - SR_BEHAVIOR_LAST);
+
+ vlib_cli_output (vm, "\tAddress: \t%U\n"
+ "\tBehavior: \t%s (%s)\n\t%U",
+ format_ip6_address, &ls->localsid,
+ plugin->keyword_str, plugin->def_str,
+ plugin->ls_format, ls->plugin_mem);
+ }
+ else
+ //Should never get here...
+ vlib_cli_output (vm, "Internal error");
+ break;
+ }
+ if (ls->end_psp)
+ vlib_cli_output (vm, "\tPSP: \tTrue\n");
+
+ /* Print counters */
+ vlib_counter_t valid, invalid;
+ vlib_get_combined_counter (&(sm->sr_ls_valid_counters), i, &valid);
+ vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), i, &invalid);
+ vlib_cli_output (vm, "\tGood traffic: \t[%Ld packets : %Ld bytes]\n",
+ valid.packets, valid.bytes);
+ vlib_cli_output (vm, "\tBad traffic: \t[%Ld packets : %Ld bytes]\n",
+ invalid.packets, invalid.bytes);
+ vlib_cli_output (vm, "--------------------");
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_localsid_command, static) = {
+ .path = "show sr localsids",
+ .short_help = "show sr localsids",
+ .function = show_sr_localsid_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Function to 'clear' ALL SR localsid counters
+ */
+static clib_error_t *
+clear_sr_localsid_counters_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ vlib_clear_combined_counters (&(sm->sr_ls_valid_counters));
+ vlib_clear_combined_counters (&(sm->sr_ls_invalid_counters));
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = {
+ .path = "clear sr localsid counters",
+ .short_help = "clear sr localsid counters",
+ .function = clear_sr_localsid_counters_command_fn,
+};
+/* *INDENT-ON* */
+
+/************************ SR LocalSID graphs node ****************************/
+/**
+ * @brief SR localsid node trace
+ */
+typedef struct
+{
+ u32 localsid_index;
+ ip6_address_t src, out_dst;
+ u8 sr[256];
+ u8 num_segments;
+ u8 segments_left;
+ //With SRv6 header update include flags here.
+} sr_localsid_trace_t;
+
+#define foreach_sr_localsid_error \
+_(NO_INNER_HEADER, "(SR-Error) No inner IP header") \
+_(NO_MORE_SEGMENTS, "(SR-Error) No more segments") \
+_(NO_SRH, "(SR-Error) No SR header") \
+_(NO_PSP, "(SR-Error) PSP Not available (segments left > 0)") \
+_(NOT_LS, "(SR-Error) Decaps not available (segments left > 0)") \
+_(L2, "(SR-Error) SRv6 decapsulated a L2 frame without dest")
+
+typedef enum
+{
+#define _(sym,str) SR_LOCALSID_ERROR_##sym,
+ foreach_sr_localsid_error
+#undef _
+ SR_LOCALSID_N_ERROR,
+} sr_localsid_error_t;
+
+static char *sr_localsid_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sr_localsid_error
+#undef _
+};
+
+#define foreach_sr_localsid_next \
+_(ERROR, "error-drop") \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(IP4_LOOKUP, "ip4-lookup") \
+_(IP6_REWRITE, "ip6-rewrite") \
+_(IP4_REWRITE, "ip4-rewrite") \
+_(INTERFACE_OUTPUT, "interface-output")
+
+typedef enum
+{
+#define _(s,n) SR_LOCALSID_NEXT_##s,
+ foreach_sr_localsid_next
+#undef _
+ SR_LOCALSID_N_NEXT,
+} sr_localsid_next_t;
+
+/**
+ * @brief SR LocalSID graph node trace function
+ *
+ * @see sr_localsid
+ */
+u8 *
+format_sr_localsid_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_sr_main_t *sm = &sr_main;
+ sr_localsid_trace_t *t = va_arg (*args, sr_localsid_trace_t *);
+
+ ip6_sr_localsid_t *ls =
+ pool_elt_at_index (sm->localsids, t->localsid_index);
+
+ s =
+ format (s, "SR-LOCALSID:\n\tLocalsid: %U\n", format_ip6_address,
+ &ls->localsid);
+ switch (ls->behavior)
+ {
+ case SR_BEHAVIOR_END:
+ s = format (s, "\tBehavior: End\n");
+ break;
+ case SR_BEHAVIOR_DX6:
+ s = format (s, "\tBehavior: Decapsulation with IPv6 L3 xconnect\n");
+ break;
+ case SR_BEHAVIOR_DX4:
+ s = format (s, "\tBehavior: Decapsulation with IPv4 L3 xconnect\n");
+ break;
+ case SR_BEHAVIOR_X:
+ s = format (s, "\tBehavior: IPv6 L3 xconnect\n");
+ break;
+ case SR_BEHAVIOR_DT6:
+ s = format (s, "\tBehavior: Decapsulation with IPv6 Table lookup\n");
+ break;
+ case SR_BEHAVIOR_DT4:
+ s = format (s, "\tBehavior: Decapsulation with IPv4 Table lookup\n");
+ break;
+ case SR_BEHAVIOR_DX2:
+ s = format (s, "\tBehavior: Decapsulation with L2 xconnect\n");
+ break;
+ default:
+ s = format (s, "\tBehavior: defined in plugin\n"); //TODO
+ break;
+ }
+ if (t->num_segments != 0xFF)
+ {
+ if (t->num_segments > 0)
+ {
+ s = format (s, "\tSegments left: %d\n", t->num_segments);
+ s = format (s, "\tSID list: [in ietf order]");
+ int i = 0;
+ for (i = 0; i < t->num_segments; i++)
+ {
+ s = format (s, "\n\t-> %U", format_ip6_address,
+ (ip6_address_t *) & t->sr[i *
+ sizeof (ip6_address_t)]);
+ }
+ }
+ }
+ return s;
+}
+
+/**
+ * @brief Function doing End processing.
+ */
+static_always_inline void
+end_srh_processing (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_sr_header_t * sr0,
+ ip6_sr_localsid_t * ls0, u32 * next0)
+{
+ ip6_address_t *new_dst0;
+
+ if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+ {
+ if (PREDICT_TRUE (sr0->segments_left != 0))
+ {
+ sr0->segments_left -= 1;
+ new_dst0 = (ip6_address_t *) (sr0->segments);
+ new_dst0 += sr0->segments_left;
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+
+ if (ls0->behavior == SR_BEHAVIOR_X)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+ }
+ }
+ else
+ {
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_MORE_SEGMENTS];
+ }
+ }
+ else
+ {
+ /* Error. Routing header of type != SR */
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_SRH];
+ }
+}
+
+/*
+ * @brief Function doing SRH processing for D* variants
+ */
+//FixME. I must crosscheck that next_proto matches the localsid
+static_always_inline void
+end_decaps_srh_processing (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_sr_header_t * sr0,
+ ip6_sr_localsid_t * ls0, u32 * next0)
+{
+ /* Compute the size of the IPv6 header with all Ext. headers */
+ u8 next_proto;
+ ip6_ext_header_t *next_ext_header;
+ u16 total_size = 0;
+
+ next_proto = ip0->protocol;
+ next_ext_header = (void *) (ip0 + 1);
+ total_size = sizeof (ip6_header_t);
+ while (ip6_ext_hdr (next_proto))
+ {
+ total_size += ip6_ext_header_len (next_ext_header);
+ next_proto = next_ext_header->next_hdr;
+ next_ext_header = ip6_ext_next_header (next_ext_header);
+ }
+
+ /* Ensure this is the last segment. Otherwise drop. */
+ if (sr0 && sr0->segments_left != 0)
+ {
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NOT_LS];
+ return;
+ }
+
+ switch (next_proto)
+ {
+ case IP_PROTOCOL_IPV6:
+ /* Encap-End IPv6. Pop outer IPv6 header. */
+ if (ls0->behavior == SR_BEHAVIOR_DX6)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+ return;
+ }
+ else if (ls0->behavior == SR_BEHAVIOR_DT6)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table;
+ return;
+ }
+ break;
+ case IP_PROTOCOL_IP_IN_IP:
+ /* Encap-End IPv4. Pop outer IPv6 header */
+ if (ls0->behavior == SR_BEHAVIOR_DX4)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP4_REWRITE;
+ return;
+ }
+ else if (ls0->behavior == SR_BEHAVIOR_DT4)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->fib_table;
+ *next0 = SR_LOCALSID_NEXT_IP4_LOOKUP;
+ return;
+ }
+ break;
+ case IP_PROTOCOL_IP6_NONXT:
+ /* L2 encaps */
+ if (ls0->behavior == SR_BEHAVIOR_DX2)
+ {
+ vlib_buffer_advance (b0, total_size);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls0->sw_if_index;
+ *next0 = SR_LOCALSID_NEXT_INTERFACE_OUTPUT;
+ return;
+ }
+ break;
+ }
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_INNER_HEADER];
+ return;
+}
+
+/**
+ * @brief Function doing End processing with PSP
+ */
+static_always_inline void
+end_psp_srh_processing (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_ext_header_t * prev0,
+ ip6_sr_header_t * sr0,
+ ip6_sr_localsid_t * ls0, u32 * next0)
+{
+ u32 new_l0, sr_len;
+ u64 *copy_dst0, *copy_src0;
+ u32 copy_len_u64s0 = 0;
+ int i;
+
+ if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+ {
+ if (PREDICT_TRUE (sr0->segments_left == 1))
+ {
+ ip0->dst_address.as_u64[0] = sr0->segments->as_u64[0];
+ ip0->dst_address.as_u64[1] = sr0->segments->as_u64[1];
+
+ /* Remove the SRH taking care of the rest of IPv6 ext header */
+ if (prev0)
+ prev0->next_hdr = sr0->protocol;
+ else
+ ip0->protocol = sr0->protocol;
+
+ sr_len = ip6_ext_header_len (sr0);
+ vlib_buffer_advance (b0, sr_len);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) - sr_len;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (sr0->length + 1);
+ /* number of 8 octet units to copy
+ * By default in absence of extension headers it is equal to length of ip6 header
+ * With extension headers it number of 8 octet units of ext headers preceding
+ * SR header
+ */
+ copy_len_u64s0 =
+ (((u8 *) sr0 - (u8 *) ip0) - sizeof (ip6_header_t)) >> 3;
+ copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+ copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+ copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+ copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+ copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
+
+ for (i = copy_len_u64s0 - 1; i >= 0; i--)
+ {
+ copy_dst0[i] = copy_src0[i];
+ }
+
+ if (ls0->behavior == SR_BEHAVIOR_X)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ls0->nh_adj;
+ *next0 = SR_LOCALSID_NEXT_IP6_REWRITE;
+ }
+ return;
+ }
+ }
+ /* Error. Routing header of type != SR */
+ *next0 = SR_LOCALSID_NEXT_ERROR;
+ b0->error = node->errors[SR_LOCALSID_ERROR_NO_PSP];
+}
+
+/**
+ * @brief SR LocalSID graph node. Supports all default SR Endpoint variants
+ */
+static uword
+sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ ip6_sr_main_t *sm = &sr_main;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+ ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+4 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ ls0 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls1 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls2 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls3 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE);
+
+ end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+ end_decaps_srh_processing (node, b1, ip1, sr1, ls1, &next1);
+ end_decaps_srh_processing (node, b2, ip2, sr2, ls2, &next2);
+ end_decaps_srh_processing (node, b3, ip3, sr3, ls3, &next3);
+
+ //TODO: trace.
+
+ vlib_increment_combined_counter
+ (((next0 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_increment_combined_counter
+ (((next1 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b1));
+
+ vlib_increment_combined_counter
+ (((next2 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b2));
+
+ vlib_increment_combined_counter
+ (((next3 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b3));
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ ip6_ext_header_t *prev0;
+ ip6_sr_header_t *sr0;
+ u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+ ip6_sr_localsid_t *ls0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Lookup the SR End behavior based on IP DA (adj) */
+ ls0 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ /* Find SRH as well as previous header */
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+
+ /* SRH processing and End variants */
+ end_decaps_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->num_segments = 0;
+ tr->localsid_index = ls0 - sm->localsids;
+
+ if (ip0 == vlib_buffer_get_current (b0))
+ {
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->out_dst.as_u8));
+ if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr0->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
+ tr->num_segments =
+ sr0->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr0->segments_left;
+ }
+ }
+ else
+ tr->num_segments = 0xFF;
+ }
+
+ /* Increase the counters */
+ vlib_increment_combined_counter
+ (((next0 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_localsid_d_node) = {
+ .function = sr_localsid_d_fn,
+ .name = "sr-localsid-d",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_localsid_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_LOCALSID_N_ERROR,
+ .error_strings = sr_localsid_error_strings,
+ .n_next_nodes = SR_LOCALSID_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_LOCALSID_NEXT_##s] = n,
+ foreach_sr_localsid_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief SR LocalSID graph node. Supports all default SR Endpoint variants
+ */
+static uword
+sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ ip6_sr_main_t *sm = &sr_main;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+ ip6_sr_localsid_t *ls0, *ls1, *ls2, *ls3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip1, prev1, sr1, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip2, prev2, sr2, IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip3, prev3, sr3, IP_PROTOCOL_IPV6_ROUTE);
+
+ ls0 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls1 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls2 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls3 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ if (ls0->end_psp)
+ end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0);
+ else
+ end_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+
+ if (ls1->end_psp)
+ end_psp_srh_processing (node, b1, ip1, prev1, sr1, ls1, &next1);
+ else
+ end_srh_processing (node, b1, ip1, sr1, ls1, &next1);
+
+ if (ls2->end_psp)
+ end_psp_srh_processing (node, b2, ip2, prev2, sr2, ls2, &next2);
+ else
+ end_srh_processing (node, b2, ip2, sr2, ls2, &next2);
+
+ if (ls3->end_psp)
+ end_psp_srh_processing (node, b3, ip3, prev3, sr3, ls3, &next3);
+ else
+ end_srh_processing (node, b3, ip3, sr3, ls3, &next3);
+
+ //TODO: proper trace.
+
+ vlib_increment_combined_counter
+ (((next0 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_increment_combined_counter
+ (((next1 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls1 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b1));
+
+ vlib_increment_combined_counter
+ (((next2 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls2 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b2));
+
+ vlib_increment_combined_counter
+ (((next3 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls3 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b3));
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_ext_header_t *prev0;
+ ip6_sr_header_t *sr0;
+ u32 next0 = SR_LOCALSID_NEXT_IP6_LOOKUP;
+ ip6_sr_localsid_t *ls0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ ip6_ext_header_find_t (ip0, prev0, sr0, IP_PROTOCOL_IPV6_ROUTE);
+
+ /* Lookup the SR End behavior based on IP DA (adj) */
+ ls0 =
+ pool_elt_at_index (sm->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ /* SRH processing */
+ if (ls0->end_psp)
+ end_psp_srh_processing (node, b0, ip0, prev0, sr0, ls0, &next0);
+ else
+ end_srh_processing (node, b0, ip0, sr0, ls0, &next0);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_localsid_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->num_segments = 0;
+ tr->localsid_index = ls0 - sm->localsids;
+
+ if (ip0 == vlib_buffer_get_current (b0))
+ {
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->out_dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->out_dst.as_u8));
+ if (ip0->protocol == IP_PROTOCOL_IPV6_ROUTE
+ && sr0->type == ROUTING_HEADER_TYPE_SR)
+ {
+ clib_memcpy (tr->sr, sr0->segments, sr0->length * 8);
+ tr->num_segments =
+ sr0->length * 8 / sizeof (ip6_address_t);
+ tr->segments_left = sr0->segments_left;
+ }
+ }
+ else
+ {
+ tr->num_segments = 0xFF;
+ }
+ }
+
+ vlib_increment_combined_counter
+ (((next0 ==
+ SR_LOCALSID_NEXT_ERROR) ? &(sm->sr_ls_invalid_counters) :
+ &(sm->sr_ls_valid_counters)), thread_index, ls0 - sm->localsids,
+ 1, vlib_buffer_length_in_chain (vm, b0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_localsid_node) = {
+ .function = sr_localsid_fn,
+ .name = "sr-localsid",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_localsid_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_LOCALSID_N_ERROR,
+ .error_strings = sr_localsid_error_strings,
+ .n_next_nodes = SR_LOCALSID_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_LOCALSID_NEXT_##s] = n,
+ foreach_sr_localsid_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_sr_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ return (format (s, "SR: localsid_index:[%d]", index));
+}
+
+const static dpo_vft_t sr_loc_vft = {
+ .dv_lock = sr_dpo_lock,
+ .dv_unlock = sr_dpo_unlock,
+ .dv_format = format_sr_dpo,
+};
+
+const static char *const sr_loc_ip6_nodes[] = {
+ "sr-localsid",
+ NULL,
+};
+
+const static char *const *const sr_loc_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_loc_ip6_nodes,
+};
+
+const static char *const sr_loc_d_ip6_nodes[] = {
+ "sr-localsid-d",
+ NULL,
+};
+
+const static char *const *const sr_loc_d_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_loc_d_ip6_nodes,
+};
+
+
+/*************************** SR LocalSID plugins ******************************/
+/**
+ * @brief SR LocalSID plugin registry
+ */
+int
+sr_localsid_register_function (vlib_main_t * vm, u8 * fn_name,
+ u8 * keyword_str, u8 * def_str,
+ u8 * params_str, dpo_type_t * dpo,
+ format_function_t * ls_format,
+ unformat_function_t * ls_unformat,
+ sr_plugin_callback_t * creation_fn,
+ sr_plugin_callback_t * removal_fn)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ uword *p;
+
+ sr_localsid_fn_registration_t *plugin;
+
+ /* Did this function exist? If so update it */
+ p = hash_get_mem (sm->plugin_functions_by_key, fn_name);
+ if (p)
+ {
+ plugin = pool_elt_at_index (sm->plugin_functions, p[0]);
+ }
+ /* Else create a new one and set hash key */
+ else
+ {
+ pool_get (sm->plugin_functions, plugin);
+ hash_set_mem (sm->plugin_functions_by_key, fn_name,
+ plugin - sm->plugin_functions);
+ }
+
+ memset (plugin, 0, sizeof (*plugin));
+
+ plugin->sr_localsid_function_number = (plugin - sm->plugin_functions);
+ plugin->sr_localsid_function_number += SR_BEHAVIOR_LAST;
+ plugin->ls_format = ls_format;
+ plugin->ls_unformat = ls_unformat;
+ plugin->creation = creation_fn;
+ plugin->removal = removal_fn;
+ clib_memcpy (&plugin->dpo, dpo, sizeof (dpo_type_t));
+ plugin->function_name = format (0, "%s%c", fn_name, 0);
+ plugin->keyword_str = format (0, "%s%c", keyword_str, 0);
+ plugin->def_str = format (0, "%s%c", def_str, 0);
+ plugin->params_str = format (0, "%s%c", params_str, 0);
+
+ return plugin->sr_localsid_function_number;
+}
+
+/**
+ * @brief CLI function to 'show' all available SR LocalSID behaviors
+ */
+static clib_error_t *
+show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ sr_localsid_fn_registration_t *plugin;
+ sr_localsid_fn_registration_t **plugins_vec = 0;
+ int i;
+
+ vlib_cli_output (vm,
+ "SR LocalSIDs behaviors:\n-----------------------\n\n");
+
+ /* *INDENT-OFF* */
+ pool_foreach (plugin, sm->plugin_functions,
+ ({ vec_add1 (plugins_vec, plugin); }));
+ /* *INDENT-ON* */
+
+ /* Print static behaviors */
+ vlib_cli_output (vm, "Default behaviors:\n"
+ "\tEnd\t-> Endpoint.\n"
+ "\tEnd.X\t-> Endpoint with decapsulation and Layer-3 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+ "\tEnd.DX2\t-> Endpoint with decapsulation and Layer-2 cross-connect.\n"
+ "\t\tParameters: '<iface>'\n"
+ "\tEnd.DX6\t-> Endpoint with decapsulation and IPv6 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip6_next_hop>'\n"
+ "\tEnd.DX4\t-> Endpoint with decapsulation and IPv4 cross-connect.\n"
+ "\t\tParameters: '<iface> <ip4_next_hop>'\n"
+ "\tEnd.DT6\t-> Endpoint with decapsulation and specific IPv6 table lookup.\n"
+ "\t\tParameters: '<ip6_fib_table>'\n"
+ "\tEnd.DT4\t-> Endpoint with decapsulation and specific IPv4 table lookup.\n"
+ "\t\tParameters: '<ip4_fib_table>'\n");
+ vlib_cli_output (vm, "Plugin behaviors:\n");
+ for (i = 0; i < vec_len (plugins_vec); i++)
+ {
+ plugin = plugins_vec[i];
+ vlib_cli_output (vm, "\t%s\t-> %s.\n", plugin->keyword_str,
+ plugin->def_str);
+ vlib_cli_output (vm, "\t\tParameters: '%s'\n", plugin->params_str);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = {
+ .path = "show sr localsids behaviors",
+ .short_help = "show sr localsids behaviors",
+ .function = show_sr_localsid_behaviors_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief SR LocalSID initialization
+ */
+clib_error_t *
+sr_localsids_init (vlib_main_t * vm)
+{
+ /* Init memory for function keys */
+ ip6_sr_main_t *sm = &sr_main;
+ mhash_init (&sm->sr_localsids_index_hash, sizeof (uword),
+ sizeof (ip6_address_t));
+ /* Init SR behaviors DPO type */
+ sr_localsid_dpo_type = dpo_register_new_type (&sr_loc_vft, sr_loc_nodes);
+ /* Init SR behaviors DPO type */
+ sr_localsid_d_dpo_type =
+ dpo_register_new_type (&sr_loc_vft, sr_loc_d_nodes);
+ /* Init memory for localsid plugins */
+ sm->plugin_functions_by_key = hash_create_string (0, sizeof (uword));
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_localsids_init);
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_localsid.md b/src/vnet/srv6/sr_localsid.md
new file mode 100644
index 00000000000..340af4a31b0
--- /dev/null
+++ b/src/vnet/srv6/sr_localsid.md
@@ -0,0 +1,58 @@
+# SR LocalSIDs {#srv6_localsid_doc}
+
+A local SID is associated to a Segment Routing behavior -or function- on the current node.
+
+The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA.
+
+A local END SID is instantiated using the following CLI:
+
+ sr localsid (del) address XX::YY behavior end
+
+This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above.
+
+Other examples of local SIDs are the following:
+
+ sr localsid (del) address XX::YY behavior end
+ sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a
+ sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a
+ sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1
+ sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0
+ sr localsid (del) address XX::YY behavior end.dt6 5
+ sr localsid (del) address XX::YY behavior end.dt6 5
+
+Note that all of these behaviors match the definitions of the SRv6 architecture (*draft-filsfils-spring-srv6-network-programming*). Please refer to this document for a detailed description of each behavior.
+
+Note also that you can configure the PSP flavor of the End and End.X behaviors by typing:
+
+ sr localsid (del) address XX::YY behavior end psp
+ sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a psp
+
+Help on the available local SID behaviors and their usage can be obtained with:
+
+ help sr localsid
+
+Alternatively they can be obtained using.
+
+ show sr localsids behavior
+
+The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework.
+
+
+VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes.
+
+The contents of the 'My LocalSID Table' is shown with:
+
+ vpp# show sr localsid
+ SRv6 - My LocalSID Table:
+ =========================
+ Address: c3::1
+ Behavior: DX6 (Endpoint with decapsulation and IPv6 cross-connect)
+ Iface: GigabitEthernet0/5/0
+ Next hop: b:c3::b
+ Good traffic: [51277 packets : 5332808 bytes]
+ Bad traffic: [0 packets : 0 bytes]
+ --------------------
+
+The traffic counters can be reset with:
+
+ vpp# clear sr localsid counters
diff --git a/src/vnet/srv6/sr_packet.h b/src/vnet/srv6/sr_packet.h
new file mode 100755
index 00000000000..7af4ad4d9c0
--- /dev/null
+++ b/src/vnet/srv6/sr_packet.h
@@ -0,0 +1,159 @@
+#ifndef included_vnet_sr_packet_h
+#define included_vnet_sr_packet_h
+
+#include <vnet/ip/ip.h>
+
+/*
+ * ipv6 segment-routing header format
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The Segment Routing Header (SRH) is defined as follows:
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Next Header | Hdr Ext Len | Routing Type | Segments Left |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | First Segment | Flags | RESERVED |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Segment List[0] (128 bits IPv6 address) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | |
+ * ...
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * | Segment List[n] (128 bits IPv6 address) |
+ * | |
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * // //
+ * // Optional Type Length Value objects (variable) //
+ * // //
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * where:
+ *
+ * o Next Header: 8-bit selector. Identifies the type of header
+ * immediately following the SRH.
+ *
+ * o Hdr Ext Len: 8-bit unsigned integer, is the length of the SRH
+ * header in 8-octet units, not including the first 8 octets.
+ *
+ * o Routing Type: TBD, to be assigned by IANA (suggested value: 4).
+ *
+ * o Segments Left. Defined in [RFC2460], it contains the index, in
+ * the Segment List, of the next segment to inspect. Segments Left
+ * is decremented at each segment.
+ *
+ * o First Segment: contains the index, in the Segment List, of the
+ * first segment of the path which is in fact the last element of the
+ * Segment List.
+ *
+ * o Flags: 8 bits of flags. Following flags are defined:
+ *
+ * 0 1 2 3 4 5 6 7
+ * +-+-+-+-+-+-+-+-+
+ * |U|P|O|A|H| U |
+ * +-+-+-+-+-+-+-+-+
+ *
+ * U: Unused and for future use. SHOULD be unset on transmission
+ * and MUST be ignored on receipt.
+ *
+ * P-flag: Protected flag. Set when the packet has been rerouted
+ * through FRR mechanism by an SR endpoint node.
+ *
+ * O-flag: OAM flag. When set, it indicates that this packet is
+ * an operations and management (OAM) packet.
+ *
+ * A-flag: Alert flag. If present, it means important Type Length
+ * Value (TLV) objects are present. See Section 3.1 for details
+ * on TLVs objects.
+ *
+ * H-flag: HMAC flag. If set, the HMAC TLV is present and is
+ * encoded as the last TLV of the SRH. In other words, the last
+ * 36 octets of the SRH represent the HMAC information. See
+ * Section 3.1.5 for details on the HMAC TLV.
+ *
+ * o RESERVED: SHOULD be unset on transmission and MUST be ignored on
+ * receipt.
+ *
+ * o Segment List[n]: 128 bit IPv6 addresses representing the nth
+ * segment in the Segment List. The Segment List is encoded starting
+ * from the last segment of the path. I.e., the first element of the
+ * segment list (Segment List [0]) contains the last segment of the
+ * path while the last segment of the Segment List (Segment List[n])
+ * contains the first segment of the path. The index contained in
+ * "Segments Left" identifies the current active segment.
+ *
+ * o Type Length Value (TLV) are described in Section 3.1.
+ *
+ */
+
+#ifndef IPPROTO_IPV6_ROUTE
+#define IPPROTO_IPV6_ROUTE 43
+#endif
+
+#define ROUTING_HEADER_TYPE_SR 4
+
+typedef struct
+{
+ /* Protocol for next header. */
+ u8 protocol;
+ /*
+ * Length of routing header in 8 octet units,
+ * not including the first 8 octets
+ */
+ u8 length;
+
+ /* Type of routing header; type 4 = segement routing */
+ u8 type;
+
+ /* Next segment in the segment list */
+ u8 segments_left;
+
+ /* Pointer to the first segment in the header */
+ u8 first_segment;
+
+ /* Flag bits */
+#define IP6_SR_HEADER_FLAG_PROTECTED (0x40)
+#define IP6_SR_HEADER_FLAG_OAM (0x20)
+#define IP6_SR_HEADER_FLAG_ALERT (0x10)
+#define IP6_SR_HEADER_FLAG_HMAC (0x80)
+
+ /* values 0x0, 0x4 - 0x7 are reserved */
+ u8 flags;
+ u16 reserved;
+
+ /* The segment elts */
+ ip6_address_t segments[0];
+} __attribute__ ((packed)) ip6_sr_header_t;
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
+
+#endif /* included_vnet_sr_packet_h */
diff --git a/src/vnet/srv6/sr_policy.md b/src/vnet/srv6/sr_policy.md
new file mode 100644
index 00000000000..521b84616c0
--- /dev/null
+++ b/src/vnet/srv6/sr_policy.md
@@ -0,0 +1,56 @@
+# Creating a SR Policy {#srv6_policy_doc}
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment Lists.
+
+A new SR policy is created with a first SID list using:
+
+ sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3)
+
+* The weight parameter is only used if more than one SID list is associated with the policy.
+* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed.
+
+An SR policy is deleted with:
+
+ sr policy del bsid 2001::1
+ sr policy del index 1
+
+The existing SR policies are listed with:
+
+ show sr policies
+
+## Adding/Removing SID Lists from an SR policy
+
+An additional SID list is associated with an existing SR policy with:
+
+ sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3)
+ sr policy mod index 3 add sl next A2:: next B2:: next C2:: (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+ sr policy mod bsid 2001::1 del sl index 1
+ sr policy mod index 3 del sl index 1
+
+Note that this cannot be used to remove the last SID list of a policy.
+
+The weight of a SID list can also be modified with:
+
+ sr policy mod bsid 2001::1 mod sl index 1 weight 4
+ sr policy mod index 3 mod sl index 1 weight 4
+
+## SR Policies: Spray policies
+
+Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in:
+
+ sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray
+
+Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node.
+
+## Encapsulation SR policies
+
+In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command:
+
+ set sr encaps source addr XXXX::YYYY
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
new file mode 100755
index 00000000000..7a37a66b402
--- /dev/null
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -0,0 +1,3227 @@
+/*
+ * sr_policy_rewrite.c: ipv6 sr policy creation
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief SR policy creation and application
+ *
+ * Create an SR policy.
+ * An SR policy can be either of 'default' type or 'spray' type
+ * An SR policy has attached a list of SID lists.
+ * In case the SR policy is a default one it will load balance among them.
+ * An SR policy has associated a BindingSID.
+ * In case any packet arrives with IPv6 DA == BindingSID then the SR policy
+ * associated to such bindingSID will be applied to such packet.
+ *
+ * SR policies can be applied either by using IPv6 encapsulation or
+ * SRH insertion. Both methods can be found on this file.
+ *
+ * Traffic input usually is IPv6 packets. However it is possible to have
+ * IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH)
+ *
+ * This file provides the appropiates VPP graph nodes to do any of these
+ * methods.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief SR policy rewrite trace
+ */
+typedef struct
+{
+ ip6_address_t src, dst;
+} sr_policy_rewrite_trace_t;
+
+/* Graph arcs */
+#define foreach_sr_policy_rewrite_next \
+_(IP6_LOOKUP, "ip6-lookup") \
+_(ERROR, "error-drop")
+
+typedef enum
+{
+#define _(s,n) SR_POLICY_REWRITE_NEXT_##s,
+ foreach_sr_policy_rewrite_next
+#undef _
+ SR_POLICY_REWRITE_N_NEXT,
+} sr_policy_rewrite_next_t;
+
+/* SR rewrite errors */
+#define foreach_sr_policy_rewrite_error \
+_(INTERNAL_ERROR, "Segment Routing undefined error") \
+_(BSID_ZERO, "BSID with SL = 0") \
+_(COUNTER_TOTAL, "SR steered IPv6 packets") \
+_(COUNTER_ENCAP, "SR: Encaps packets") \
+_(COUNTER_INSERT, "SR: SRH inserted packets") \
+_(COUNTER_BSID, "SR: BindingSID steered packets")
+
+typedef enum
+{
+#define _(sym,str) SR_POLICY_REWRITE_ERROR_##sym,
+ foreach_sr_policy_rewrite_error
+#undef _
+ SR_POLICY_REWRITE_N_ERROR,
+} sr_policy_rewrite_error_t;
+
+static char *sr_policy_rewrite_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sr_policy_rewrite_error
+#undef _
+};
+
+/**
+ * @brief Dynamically added SR SL DPO type
+ */
+static dpo_type_t sr_pr_encaps_dpo_type;
+static dpo_type_t sr_pr_insert_dpo_type;
+static dpo_type_t sr_pr_bsid_encaps_dpo_type;
+static dpo_type_t sr_pr_bsid_insert_dpo_type;
+
+/**
+ * @brief IPv6 SA for encapsulated packets
+ */
+static ip6_address_t sr_pr_encaps_src;
+
+/******************* SR rewrite set encaps IPv6 source addr *******************/
+/* Note: This is temporal. We don't know whether to follow this path or
+ take the ip address of a loopback interface or even the OIF */
+
+static clib_error_t *
+set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "addr %U", unformat_ip6_address, &sr_pr_encaps_src))
+ return 0;
+ else
+ return clib_error_return (0, "No address specified");
+ }
+ return clib_error_return (0, "No address specified");
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_sr_src_command, static) = {
+ .path = "set sr encaps source",
+ .short_help = "set sr encaps source addr <ip6_addr>",
+ .function = set_sr_src_command_fn,
+};
+/* *INDENT-ON* */
+
+/*********************** SR rewrite string computation ************************/
+/**
+ * @brief SR rewrite string computation for IPv6 encapsulation (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for encapsulation
+ */
+static inline u8 *
+compute_rewrite_encaps (ip6_address_t * sl)
+{
+ ip6_header_t *iph;
+ ip6_sr_header_t *srh;
+ ip6_address_t *addrp, *this_address;
+ u32 header_length = 0;
+ u8 *rs = NULL;
+
+ header_length = 0;
+ header_length += IPv6_DEFAULT_HEADER_LENGTH;
+ if (vec_len (sl) > 1)
+ {
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += vec_len (sl) * sizeof (ip6_address_t);
+ }
+
+ vec_validate (rs, header_length - 1);
+
+ iph = (ip6_header_t *) rs;
+ iph->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0 | ((6 & 0xF) << 28));
+ iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0];
+ iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1];
+ iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH;
+ iph->protocol = IP_PROTOCOL_IPV6;
+ iph->hop_limit = IPv6_DEFAULT_HOP_LIMIT;
+
+ srh = (ip6_sr_header_t *) (iph + 1);
+ iph->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ srh->protocol = IP_PROTOCOL_IPV6;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->segments_left = vec_len (sl) - 1;
+ srh->first_segment = vec_len (sl) - 1;
+ srh->length = ((sizeof (ip6_sr_header_t) +
+ (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1;
+ srh->flags = 0x00;
+ srh->reserved = 0x00;
+ addrp = srh->segments + vec_len (sl) - 1;
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+ iph->dst_address.as_u64[0] = sl->as_u64[0];
+ iph->dst_address.as_u64[1] = sl->as_u64[1];
+ return rs;
+}
+
+/**
+ * @brief SR rewrite string computation for SRH insertion (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion
+ */
+static inline u8 *
+compute_rewrite_insert (ip6_address_t * sl)
+{
+ ip6_sr_header_t *srh;
+ ip6_address_t *addrp, *this_address;
+ u32 header_length = 0;
+ u8 *rs = NULL;
+
+ header_length = 0;
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
+
+ vec_validate (rs, header_length - 1);
+
+ srh = (ip6_sr_header_t *) rs;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->segments_left = vec_len (sl);
+ srh->first_segment = vec_len (sl);
+ srh->length = ((sizeof (ip6_sr_header_t) +
+ ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
+ srh->flags = 0x00;
+ srh->reserved = 0x0000;
+ addrp = srh->segments + vec_len (sl);
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+ return rs;
+}
+
+/**
+ * @brief SR rewrite string computation for SRH insertion with BSID (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion with BSID
+ */
+static inline u8 *
+compute_rewrite_bsid (ip6_address_t * sl)
+{
+ ip6_sr_header_t *srh;
+ ip6_address_t *addrp, *this_address;
+ u32 header_length = 0;
+ u8 *rs = NULL;
+
+ header_length = 0;
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += vec_len (sl) * sizeof (ip6_address_t);
+
+ vec_validate (rs, header_length - 1);
+
+ srh = (ip6_sr_header_t *) rs;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->segments_left = vec_len (sl) - 1;
+ srh->first_segment = vec_len (sl) - 1;
+ srh->length = ((sizeof (ip6_sr_header_t) +
+ (vec_len (sl) * sizeof (ip6_address_t))) / 8) - 1;
+ srh->flags = 0x00;
+ srh->reserved = 0x0000;
+ addrp = srh->segments + vec_len (sl) - 1;
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+ addrp--;
+ }
+ return rs;
+}
+
+/*************************** SR LB helper functions **************************/
+/**
+ * @brief Creates a Segment List and adds it to an SR policy
+ *
+ * Creates a Segment List and adds it to the SR policy. Notice that the SL are
+ * not necessarily unique. Hence there might be two Segment List within the
+ * same SR Policy with exactly the same segments and same weight.
+ *
+ * @param sr_policy is the SR policy where the SL will be added
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param weight is the weight of the SegmentList (for load-balancing purposes)
+ * @param is_encap represents the mode (SRH insertion vs Encapsulation)
+ *
+ * @return pointer to the just created segment list
+ */
+static inline ip6_sr_sl_t *
+create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
+ u8 is_encap)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_sl_t *segment_list;
+
+ pool_get (sm->sid_lists, segment_list);
+ memset (segment_list, 0, sizeof (*segment_list));
+
+ vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
+
+ /* Fill in segment list */
+ segment_list->weight =
+ (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
+ segment_list->segments = vec_dup (sl);
+
+ if (is_encap)
+ {
+ segment_list->rewrite = compute_rewrite_encaps (sl);
+ segment_list->rewrite_bsid = segment_list->rewrite;
+ }
+ else
+ {
+ segment_list->rewrite = compute_rewrite_insert (sl);
+ segment_list->rewrite_bsid = compute_rewrite_bsid (sl);
+ }
+
+ /* Create DPO */
+ dpo_reset (&segment_list->bsid_dpo);
+ dpo_reset (&segment_list->ip6_dpo);
+ dpo_reset (&segment_list->ip4_dpo);
+
+ if (is_encap)
+ {
+ dpo_set (&segment_list->ip6_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP6,
+ segment_list - sm->sid_lists);
+ dpo_set (&segment_list->ip4_dpo, sr_pr_encaps_dpo_type, DPO_PROTO_IP4,
+ segment_list - sm->sid_lists);
+ dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_encaps_dpo_type,
+ DPO_PROTO_IP6, segment_list - sm->sid_lists);
+ }
+ else
+ {
+ dpo_set (&segment_list->ip6_dpo, sr_pr_insert_dpo_type, DPO_PROTO_IP6,
+ segment_list - sm->sid_lists);
+ dpo_set (&segment_list->bsid_dpo, sr_pr_bsid_insert_dpo_type,
+ DPO_PROTO_IP6, segment_list - sm->sid_lists);
+ }
+
+ return segment_list;
+}
+
+/**
+ * @brief Updates the Load Balancer after an SR Policy change
+ *
+ * @param sr_policy is the modified SR Policy
+ */
+static inline void
+update_lb (ip6_sr_policy_t * sr_policy)
+{
+ flow_hash_config_t fhc;
+ u32 *sl_index;
+ ip6_sr_sl_t *segment_list;
+ ip6_sr_main_t *sm = &sr_main;
+ load_balance_path_t path;
+ path.path_index = FIB_NODE_INDEX_INVALID;
+ load_balance_path_t *ip4_path_vector = 0;
+ load_balance_path_t *ip6_path_vector = 0;
+ load_balance_path_t *b_path_vector = 0;
+
+ /* In case LB does not exist, create it */
+ if (!dpo_id_is_valid (&sr_policy->bsid_dpo))
+ {
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = sr_policy->bsid,
+ }
+ };
+
+ /* Add FIB entry for BSID */
+ fhc = fib_table_get_flow_hash_config (sr_policy->fib_table,
+ dpo_proto_to_fib (DPO_PROTO_IP6));
+
+ dpo_set (&sr_policy->bsid_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
+ load_balance_create (0, DPO_PROTO_IP6, fhc));
+
+ dpo_set (&sr_policy->ip6_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP6,
+ load_balance_create (0, DPO_PROTO_IP6, fhc));
+
+ /* Update FIB entry's to point to the LB DPO in the main FIB and hidden one */
+ fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6,
+ sr_policy->fib_table),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->bsid_dpo);
+
+ fib_table_entry_special_dpo_update (sm->fib_table_ip6,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->ip6_dpo);
+
+ if (sr_policy->is_encap)
+ {
+ dpo_set (&sr_policy->ip4_dpo, DPO_LOAD_BALANCE, DPO_PROTO_IP4,
+ load_balance_create (0, DPO_PROTO_IP4, fhc));
+
+ fib_table_entry_special_dpo_update (sm->fib_table_ip4,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->ip4_dpo);
+ }
+
+ }
+
+ /* Create the LB path vector */
+ //path_vector = vec_new(load_balance_path_t, vec_len(sr_policy->segments_lists));
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ path.path_dpo = segment_list->bsid_dpo;
+ path.path_weight = segment_list->weight;
+ vec_add1 (b_path_vector, path);
+ path.path_dpo = segment_list->ip6_dpo;
+ vec_add1 (ip6_path_vector, path);
+ if (sr_policy->is_encap)
+ {
+ path.path_dpo = segment_list->ip4_dpo;
+ vec_add1 (ip4_path_vector, path);
+ }
+ }
+
+ /* Update LB multipath */
+ load_balance_multipath_update (&sr_policy->bsid_dpo, b_path_vector,
+ LOAD_BALANCE_FLAG_NONE);
+ load_balance_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector,
+ LOAD_BALANCE_FLAG_NONE);
+ if (sr_policy->is_encap)
+ load_balance_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector,
+ LOAD_BALANCE_FLAG_NONE);
+
+ /* Cleanup */
+ vec_free (b_path_vector);
+ vec_free (ip6_path_vector);
+ vec_free (ip4_path_vector);
+
+}
+
+/**
+ * @brief Updates the Replicate DPO after an SR Policy change
+ *
+ * @param sr_policy is the modified SR Policy (type spray)
+ */
+static inline void
+update_replicate (ip6_sr_policy_t * sr_policy)
+{
+ u32 *sl_index;
+ ip6_sr_sl_t *segment_list;
+ ip6_sr_main_t *sm = &sr_main;
+ load_balance_path_t path;
+ path.path_index = FIB_NODE_INDEX_INVALID;
+ load_balance_path_t *b_path_vector = 0;
+ load_balance_path_t *ip6_path_vector = 0;
+ load_balance_path_t *ip4_path_vector = 0;
+
+ /* In case LB does not exist, create it */
+ if (!dpo_id_is_valid (&sr_policy->bsid_dpo))
+ {
+ dpo_set (&sr_policy->bsid_dpo, DPO_REPLICATE,
+ DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6));
+
+ dpo_set (&sr_policy->ip6_dpo, DPO_REPLICATE,
+ DPO_PROTO_IP6, replicate_create (0, DPO_PROTO_IP6));
+
+ /* Update FIB entry's DPO to point to SR without LB */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = sr_policy->bsid,
+ }
+ };
+ fib_table_entry_special_dpo_update (fib_table_find (FIB_PROTOCOL_IP6,
+ sr_policy->fib_table),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->bsid_dpo);
+
+ fib_table_entry_special_dpo_update (sm->fib_table_ip6,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->ip6_dpo);
+
+ if (sr_policy->is_encap)
+ {
+ dpo_set (&sr_policy->ip4_dpo, DPO_REPLICATE, DPO_PROTO_IP4,
+ replicate_create (0, DPO_PROTO_IP4));
+
+ fib_table_entry_special_dpo_update (sm->fib_table_ip4,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &sr_policy->ip4_dpo);
+ }
+
+ }
+
+ /* Create the replicate path vector */
+ path.path_weight = 1;
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ path.path_dpo = segment_list->bsid_dpo;
+ vec_add1 (b_path_vector, path);
+ path.path_dpo = segment_list->ip6_dpo;
+ vec_add1 (ip6_path_vector, path);
+ if (sr_policy->is_encap)
+ {
+ path.path_dpo = segment_list->ip4_dpo;
+ vec_add1 (ip4_path_vector, path);
+ }
+ }
+
+ /* Update replicate multipath */
+ replicate_multipath_update (&sr_policy->bsid_dpo, b_path_vector);
+ replicate_multipath_update (&sr_policy->ip6_dpo, ip6_path_vector);
+ if (sr_policy->is_encap)
+ replicate_multipath_update (&sr_policy->ip4_dpo, ip4_path_vector);
+}
+
+/******************************* SR rewrite API *******************************/
+/* Three functions for handling sr policies:
+ * -> sr_policy_add
+ * -> sr_policy_del
+ * -> sr_policy_mod
+ * All of them are API. CLI function on sr_policy_command_fn */
+
+/**
+ * @brief Create a new SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param weight is the weight of the sid list. optional.
+ * @param behavior is the behavior of the SR policy. (default//spray)
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
+ u32 weight, u8 behavior, u32 fib_table, u8 is_encap)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *sr_policy = 0;
+ uword *p;
+
+ /* Search for existing keys (BSID) */
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ {
+ /* Add SR policy that already exists; complain */
+ return -12;
+ }
+
+ /* Search collision in FIB entries */
+ /* Explanation: It might be possible that some other entity has already
+ * created a route for the BSID. This in theory is impossible, but in
+ * practise we could see it. Assert it and scream if needed */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = *bsid,
+ }
+ };
+
+ /* Lookup the FIB index associated to the table selected */
+ u32 fib_index = fib_table_find (FIB_PROTOCOL_IP6,
+ (fib_table != (u32) ~ 0 ? fib_table : 0));
+ if (fib_index == ~0)
+ return -13;
+
+ /* Lookup whether there exists an entry for the BSID */
+ fib_node_index_t fei = fib_table_lookup_exact_match (fib_index, &pfx);
+ if (FIB_NODE_INDEX_INVALID != fei)
+ return -12; //There is an entry for such lookup
+
+ /* Add an SR policy object */
+ pool_get (sm->sr_policies, sr_policy);
+ memset (sr_policy, 0, sizeof (*sr_policy));
+ clib_memcpy (&sr_policy->bsid, bsid, sizeof (ip6_address_t));
+ sr_policy->type = behavior;
+ sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0); //Is default FIB 0 ?
+ sr_policy->is_encap = is_encap;
+
+ /* Copy the key */
+ mhash_set (&sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies,
+ NULL);
+
+ /* Create a segment list and add the index to the SR policy */
+ create_sl (sr_policy, segments, weight, is_encap);
+
+ /* If FIB doesnt exist, create them */
+ if (sm->fib_table_ip6 == (u32) ~ 0)
+ {
+ sm->fib_table_ip6 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
+ "SRv6 steering of IP6 prefixes through BSIDs");
+ sm->fib_table_ip4 = fib_table_create_and_lock (FIB_PROTOCOL_IP6,
+ "SRv6 steering of IP4 prefixes through BSIDs");
+ }
+
+ /* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ update_lb (sr_policy);
+ else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+ update_replicate (sr_policy);
+ return 0;
+}
+
+/**
+ * @brief Delete a SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_del (ip6_address_t * bsid, u32 index)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *sr_policy = 0;
+ ip6_sr_sl_t *segment_list;
+ u32 *sl_index;
+ uword *p;
+
+ if (bsid)
+ {
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+ }
+ else
+ {
+ sr_policy = pool_elt_at_index (sm->sr_policies, index);
+ if (!sr_policy)
+ return -1;
+ }
+
+ /* Remove BindingSID FIB entry */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = 128,
+ .fp_addr = {
+ .ip6 = sr_policy->bsid,
+ }
+ ,
+ };
+
+ fib_table_entry_special_remove (fib_table_find (FIB_PROTOCOL_IP6,
+ sr_policy->fib_table),
+ &pfx, FIB_SOURCE_SR);
+
+ fib_table_entry_special_remove (sm->fib_table_ip6, &pfx, FIB_SOURCE_SR);
+
+ if (sr_policy->is_encap)
+ fib_table_entry_special_remove (sm->fib_table_ip4, &pfx, FIB_SOURCE_SR);
+
+ if (dpo_id_is_valid (&sr_policy->bsid_dpo))
+ {
+ dpo_reset (&sr_policy->bsid_dpo);
+ dpo_reset (&sr_policy->ip4_dpo);
+ dpo_reset (&sr_policy->ip6_dpo);
+ }
+
+ /* Clean SID Lists */
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ vec_free (segment_list->segments);
+ vec_free (segment_list->rewrite);
+ vec_free (segment_list->rewrite_bsid);
+ pool_put_index (sm->sid_lists, *sl_index);
+ }
+
+ /* Remove SR policy entry */
+ mhash_unset (&sm->sr_policies_index_hash, &sr_policy->bsid, NULL);
+ pool_put (sm->sr_policies, sr_policy);
+
+ /* If FIB empty unlock it */
+ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
+ {
+ fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6);
+ fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6);
+ sm->fib_table_ip6 = (u32) ~ 0;
+ sm->fib_table_ip4 = (u32) ~ 0;
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Modify an existing SR policy
+ *
+ * The possible modifications are adding a new Segment List, modifying an
+ * existing Segment List (modify the weight only) and delete a given
+ * Segment List from the SR Policy.
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param operation is the operation to perform (among the top ones)
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param sl_index is the index of the Segment List to modify/delete
+ * @param weight is the weight of the sid list. optional.
+ * @param is_encap Mode. Encapsulation or SRH insertion.
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
+ u8 operation, ip6_address_t * segments, u32 sl_index,
+ u32 weight)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *sr_policy = 0;
+ ip6_sr_sl_t *segment_list;
+ u32 *sl_index_iterate;
+ uword *p;
+
+ if (bsid)
+ {
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+ }
+ else
+ {
+ sr_policy = pool_elt_at_index (sm->sr_policies, index);
+ if (!sr_policy)
+ return -1;
+ }
+
+ if (operation == 1) /* Add SR List to an existing SR policy */
+ {
+ /* Create the new SL */
+ segment_list =
+ create_sl (sr_policy, segments, weight, sr_policy->is_encap);
+
+ /* Create a new LB DPO */
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ update_lb (sr_policy);
+ else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+ update_replicate (sr_policy);
+ }
+ else if (operation == 2) /* Delete SR List from an existing SR policy */
+ {
+ /* Check that currently there are more than one SID list */
+ if (vec_len (sr_policy->segments_lists) == 1)
+ return -21;
+
+ /* Check that the SR list does exist and is assigned to the sr policy */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -22;
+
+ /* Remove the lucky SR list that is being kicked out */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+ vec_free (segment_list->segments);
+ vec_free (segment_list->rewrite);
+ vec_free (segment_list->rewrite_bsid);
+ pool_put_index (sm->sid_lists, sl_index);
+ vec_del1 (sr_policy->segments_lists,
+ sl_index_iterate - sr_policy->segments_lists);
+
+ /* Create a new LB DPO */
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ update_lb (sr_policy);
+ else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
+ update_replicate (sr_policy);
+ }
+ else if (operation == 3) /* Modify the weight of an existing SR List */
+ {
+ /* Find the corresponding SL */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -32;
+
+ /* Change the weight */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+ segment_list->weight = weight;
+
+ /* Update LB */
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ update_lb (sr_policy);
+ }
+ else /* Incorrect op. */
+ return -1;
+
+ return 0;
+}
+
+/**
+ * @brief CLI for 'sr policies' command family
+ */
+static clib_error_t *
+sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv = -1;
+ char is_del = 0, is_add = 0, is_mod = 0;
+ char policy_set = 0;
+ ip6_address_t bsid, next_address;
+ u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
+ u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0;
+ ip6_address_t *segments = 0, *this_seg;
+ u8 operation = 0;
+ char is_encap = 1;
+ char is_spray = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!is_add && !is_mod && !is_del && unformat (input, "add"))
+ is_add = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
+ is_del = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
+ is_mod = 1;
+ else if (!policy_set
+ && unformat (input, "bsid %U", unformat_ip6_address, &bsid))
+ policy_set = 1;
+ else if (!is_add && !policy_set
+ && unformat (input, "index %d", &sr_policy_index))
+ policy_set = 1;
+ else if (unformat (input, "weight %d", &weight));
+ else
+ if (unformat (input, "next %U", unformat_ip6_address, &next_address))
+ {
+ vec_add2 (segments, this_seg, 1);
+ clib_memcpy (this_seg->as_u8, next_address.as_u8,
+ sizeof (*this_seg));
+ }
+ else if (unformat (input, "add sl"))
+ operation = 1;
+ else if (unformat (input, "del sl index %d", &sl_index))
+ operation = 2;
+ else if (unformat (input, "mod sl index %d", &sl_index))
+ operation = 3;
+ else if (fib_table == (u32) ~ 0
+ && unformat (input, "fib-table %d", &fib_table));
+ else if (unformat (input, "encap"))
+ is_encap = 1;
+ else if (unformat (input, "insert"))
+ is_encap = 0;
+ else if (unformat (input, "spray"))
+ is_spray = 1;
+ else
+ break;
+ }
+
+ if (!is_add && !is_mod && !is_del)
+ return clib_error_return (0, "Incorrect CLI");
+
+ if (!policy_set)
+ return clib_error_return (0, "No SR policy BSID or index specified");
+
+ if (is_add)
+ {
+ if (vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+ rv = sr_policy_add (&bsid, segments, weight,
+ (is_spray ? SR_POLICY_TYPE_SPRAY :
+ SR_POLICY_TYPE_DEFAULT), fib_table, is_encap);
+ }
+ else if (is_del)
+ rv = sr_policy_del ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+ sr_policy_index);
+ else if (is_mod)
+ {
+ if (!operation)
+ return clib_error_return (0, "No SL modification specified");
+ if (operation != 1 && sl_index == (u32) ~ 0)
+ return clib_error_return (0, "No Segment List index specified");
+ if (operation == 1 && vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+ if (operation == 3 && weight == (u32) ~ 0)
+ return clib_error_return (0, "No new weight for the SL specified");
+ rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+ sr_policy_index, fib_table, operation, segments,
+ sl_index, weight);
+ }
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -12:
+ return clib_error_return (0,
+ "There is already a FIB entry for the BindingSID address.\n"
+ "The SR policy could not be created.");
+ case -13:
+ return clib_error_return (0, "The specified FIB table does not exist.");
+ case -21:
+ return clib_error_return (0,
+ "The selected SR policy only contains ONE segment list. "
+ "Please remove the SR policy instead");
+ case -22:
+ return clib_error_return (0,
+ "Could not delete the segment list. "
+ "It is not associated with that SR policy.");
+ case -32:
+ return clib_error_return (0,
+ "Could not modify the segment list. "
+ "The given SL is not associated with such SR policy.");
+ default:
+ return clib_error_return (0, "BUG: sr policy returns %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_policy_command, static) = {
+ .path = "sr policy",
+ .short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] "
+ "next A:: next B:: next C:: (weight 1) (fib-table 2) (encap|insert)",
+ .long_help =
+ "Manipulation of SR policies.\n"
+ "A Segment Routing policy may contain several SID lists. Each SID list has\n"
+ "an associated weight (default 1), which will result in wECMP (uECMP).\n"
+ "Segment Routing policies might be of type encapsulation or srh insertion\n"
+ "Each SR policy will be associated with a unique BindingSID.\n"
+ "A BindingSID is a locally allocated SegmentID. For every packet that arrives\n"
+ "with IPv6_DA:BSID such traffic will be steered into the SR policy.\n"
+ "The add command will create a SR policy with its first segment list (sl)\n"
+ "The mod command allows you to add, remove, or modify the existing segment lists\n"
+ "within an SR policy.\n"
+ "The del command allows you to delete a SR policy along with all its associated\n"
+ "SID lists.\n",
+ .function = sr_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief CLI to display onscreen all the SR policies
+ */
+static clib_error_t *
+show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 *sl_index;
+ ip6_sr_sl_t *segment_list = 0;
+ ip6_sr_policy_t *sr_policy = 0;
+ ip6_sr_policy_t **vec_policies = 0;
+ ip6_address_t *addr;
+ u8 *s;
+ int i = 0;
+
+ vlib_cli_output (vm, "SR policies:");
+
+ /* *INDENT-OFF* */
+ pool_foreach (sr_policy, sm->sr_policies,
+ {vec_add1 (vec_policies, sr_policy); } );
+ /* *INDENT-ON* */
+
+ vec_foreach_index (i, vec_policies)
+ {
+ sr_policy = vec_policies[i];
+ vlib_cli_output (vm, "[%u].-\tBSID: %U",
+ (u32) (sr_policy - sm->sr_policies),
+ format_ip6_address, &sr_policy->bsid);
+ vlib_cli_output (vm, "\tBehavior: %s",
+ (sr_policy->is_encap ? "Encapsulation" :
+ "SRH insertion"));
+ vlib_cli_output (vm, "\tType: %s",
+ (sr_policy->type ==
+ SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
+ vlib_cli_output (vm, "\tFIB table: %u",
+ (sr_policy->fib_table !=
+ (u32) ~ 0 ? sr_policy->fib_table : 0));
+ vlib_cli_output (vm, "\tSegment Lists:");
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ s = NULL;
+ s = format (s, "\t[%u].- ", *sl_index);
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ s = format (s, "< ");
+ vec_foreach (addr, segment_list->segments)
+ {
+ s = format (s, "%U, ", format_ip6_address, addr);
+ }
+ s = format (s, "\b\b > ");
+ s = format (s, "weight: %u", segment_list->weight);
+ vlib_cli_output (vm, " %s", s);
+ }
+ vlib_cli_output (vm, "-----------");
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_policies_command, static) = {
+ .path = "show sr policies",
+ .short_help = "show sr policies",
+ .function = show_sr_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+/*************************** SR rewrite graph node ****************************/
+/**
+ * @brief Trace for the SR Policy Rewrite graph node
+ */
+static u8 *
+format_sr_policy_rewrite_trace (u8 * s, va_list * args)
+{
+ //TODO
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ sr_policy_rewrite_trace_t *t = va_arg (*args, sr_policy_rewrite_trace_t *);
+
+ s = format
+ (s, "SR-policy-rewrite: src %U dst %U",
+ format_ip6_address, &t->src, format_ip6_address, &t->dst);
+
+ return s;
+}
+
+/**
+ * @brief IPv6 encapsulation processing as per RFC2473
+ */
+static_always_inline void
+encaps_processing_v6 (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0, ip6_header_t * ip0_encap)
+{
+ u32 new_l0;
+
+ ip0_encap->hop_limit -= 1;
+ new_l0 =
+ ip0->payload_length + sizeof (ip6_header_t) +
+ clib_net_to_host_u16 (ip0_encap->payload_length);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip0->ip_version_traffic_class_and_flow_label =
+ ip0_encap->ip_version_traffic_class_and_flow_label;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into an IPv6 packet. Encapsulation
+ */
+static uword
+sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int encap_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+ ip1_encap = vlib_buffer_get_current (b1);
+ ip2_encap = vlib_buffer_get_current (b2);
+ ip3_encap = vlib_buffer_get_current (b3);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+ sl1->rewrite, vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+ sl2->rewrite, vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+ sl3->rewrite, vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ encaps_processing_v6 (node, b0, ip0, ip0_encap);
+ encaps_processing_v6 (node, b1, ip1, ip1_encap);
+ encaps_processing_v6 (node, b2, ip2, ip2_encap);
+ encaps_processing_v6 (node, b3, ip3, ip3_encap);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ encap_pkts += 4;
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0, *ip0_encap = 0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ encaps_processing_v6 (node, b0, ip0, ip0_encap);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ encap_pkts++;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ encap_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
+ .function = sr_policy_rewrite_encaps,
+ .name = "sr-pl-rewrite-encaps",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief IPv4 encapsulation processing as per RFC2473
+ */
+static_always_inline void
+encaps_processing_v4 (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0, ip4_header_t * ip0_encap)
+{
+ u32 new_l0;
+ ip6_sr_header_t *sr0;
+
+ u32 checksum0;
+
+ /* Inner IPv4: Decrement TTL & update checksum */
+ ip0_encap->ttl -= 1;
+ checksum0 = ip0_encap->checksum + clib_host_to_net_u16 (0x0100);
+ checksum0 += checksum0 >= 0xffff;
+ ip0_encap->checksum = checksum0;
+
+ /* Outer IPv6: Update length, FL, proto */
+ new_l0 = ip0->payload_length + clib_net_to_host_u16 (ip0_encap->length);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0 | ((6 & 0xF) << 28) |
+ ((ip0_encap->tos & 0xFF) << 20));
+ sr0 = (void *) (ip0 + 1);
+ sr0->protocol = IP_PROTOCOL_IP_IN_IP;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into an IPv4 packet. Encapsulation
+ */
+static uword
+sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int encap_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip4_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+ ip1_encap = vlib_buffer_get_current (b1);
+ ip2_encap = vlib_buffer_get_current (b2);
+ ip3_encap = vlib_buffer_get_current (b3);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+ sl1->rewrite, vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+ sl2->rewrite, vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+ sl3->rewrite, vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ encaps_processing_v4 (node, b0, ip0, ip0_encap);
+ encaps_processing_v4 (node, b1, ip1, ip1_encap);
+ encaps_processing_v4 (node, b2, ip2, ip2_encap);
+ encaps_processing_v4 (node, b3, ip3, ip3_encap);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ encap_pkts += 4;
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip4_header_t *ip0_encap = 0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ encaps_processing_v4 (node, b0, ip0, ip0_encap);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ encap_pkts++;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ encap_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
+ .function = sr_policy_rewrite_encaps_v4,
+ .name = "sr-pl-rewrite-encaps-v4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+always_inline u32
+ip_flow_hash (void *data)
+{
+ ip4_header_t *iph = (ip4_header_t *) data;
+
+ if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
+ return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
+ else
+ return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
+}
+
+always_inline u64
+mac_to_u64 (u8 * m)
+{
+ return (*((u64 *) m) & 0xffffffffffff);
+}
+
+always_inline u32
+l2_flow_hash (vlib_buffer_t * b0)
+{
+ ethernet_header_t *eh;
+ u64 a, b, c;
+ uword is_ip, eh_size;
+ u16 eh_type;
+
+ eh = vlib_buffer_get_current (b0);
+ eh_type = clib_net_to_host_u16 (eh->type);
+ eh_size = ethernet_buffer_header_size (b0);
+
+ is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
+
+ /* since we have 2 cache lines, use them */
+ if (is_ip)
+ a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
+ else
+ a = eh->type;
+
+ b = mac_to_u64 ((u8 *) eh->dst_address);
+ c = mac_to_u64 ((u8 *) eh->src_address);
+ hash_mix64 (a, b, c);
+
+ return (u32) c;
+}
+
+/**
+ * @brief Graph node for applying a SR policy into a L2 frame
+ */
+static uword
+sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int encap_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ethernet_header_t *en0, *en1, *en2, *en3;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_sr_policy_t *sp0, *sp1, *sp2, *sp3;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sp0 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b0)->sw_if_index
+ [VLIB_RX]]);
+
+ sp1 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b1)->sw_if_index
+ [VLIB_RX]]);
+
+ sp2 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b2)->sw_if_index
+ [VLIB_RX]]);
+
+ sp3 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b3)->sw_if_index
+ [VLIB_RX]]);
+
+ if (vec_len (sp0->segments_lists) == 1)
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0];
+ else
+ {
+ vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash &
+ (vec_len (sp0->segments_lists) - 1))];
+ }
+
+ if (vec_len (sp1->segments_lists) == 1)
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] = sp1->segments_lists[1];
+ else
+ {
+ vnet_buffer (b1)->ip.flow_hash = l2_flow_hash (b1);
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] =
+ sp1->segments_lists[(vnet_buffer (b1)->ip.flow_hash &
+ (vec_len (sp1->segments_lists) - 1))];
+ }
+
+ if (vec_len (sp2->segments_lists) == 1)
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] = sp2->segments_lists[2];
+ else
+ {
+ vnet_buffer (b2)->ip.flow_hash = l2_flow_hash (b2);
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] =
+ sp2->segments_lists[(vnet_buffer (b2)->ip.flow_hash &
+ (vec_len (sp2->segments_lists) - 1))];
+ }
+
+ if (vec_len (sp3->segments_lists) == 1)
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] = sp3->segments_lists[3];
+ else
+ {
+ vnet_buffer (b3)->ip.flow_hash = l2_flow_hash (b3);
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] =
+ sp3->segments_lists[(vnet_buffer (b3)->ip.flow_hash &
+ (vec_len (sp3->segments_lists) - 1))];
+ }
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ en0 = vlib_buffer_get_current (b0);
+ en1 = vlib_buffer_get_current (b1);
+ en2 = vlib_buffer_get_current (b2);
+ en3 = vlib_buffer_get_current (b3);
+
+ clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite,
+ vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) en1) - vec_len (sl1->rewrite), sl1->rewrite,
+ vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) en2) - vec_len (sl2->rewrite), sl2->rewrite,
+ vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) en3) - vec_len (sl3->rewrite), sl3->rewrite,
+ vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
+ ip1->payload_length =
+ clib_host_to_net_u16 (b1->current_length - sizeof (ip6_header_t));
+ ip2->payload_length =
+ clib_host_to_net_u16 (b2->current_length - sizeof (ip6_header_t));
+ ip3->payload_length =
+ clib_host_to_net_u16 (b3->current_length - sizeof (ip6_header_t));
+
+ sr0 = (void *) (ip0 + 1);
+ sr1 = (void *) (ip1 + 1);
+ sr2 = (void *) (ip2 + 1);
+ sr3 = (void *) (ip3 + 1);
+
+ sr0->protocol = sr1->protocol = sr2->protocol = sr3->protocol =
+ IP_PROTOCOL_IP6_NONXT;
+
+ /* Which Traffic class and flow label do I set ? */
+ //ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0|((6&0xF)<<28)|((ip0_encap->tos&0xFF)<<20));
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ encap_pkts += 4;
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_sr_header_t *sr0;
+ ethernet_header_t *en0;
+ ip6_sr_policy_t *sp0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* Find the SR policy */
+ sp0 = pool_elt_at_index (sm->sr_policies,
+ sm->sw_iface_sr_policies[vnet_buffer
+ (b0)->sw_if_index
+ [VLIB_RX]]);
+
+ /* In case there is more than one SL, LB among them */
+ if (vec_len (sp0->segments_lists) == 1)
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = sp0->segments_lists[0];
+ else
+ {
+ vnet_buffer (b0)->ip.flow_hash = l2_flow_hash (b0);
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ sp0->segments_lists[(vnet_buffer (b0)->ip.flow_hash &
+ (vec_len (sp0->segments_lists) - 1))];
+ }
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ en0 = vlib_buffer_get_current (b0);
+
+ clib_memcpy (((u8 *) en0) - vec_len (sl0->rewrite), sl0->rewrite,
+ vec_len (sl0->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
+
+ sr0 = (void *) (ip0 + 1);
+ sr0->protocol = IP_PROTOCOL_IP6_NONXT;
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ encap_pkts++;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ encap_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
+ .function = sr_policy_rewrite_encaps_l2,
+ .name = "sr-pl-rewrite-encaps-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Graph node for applying a SR policy into a packet. SRH insertion.
+ */
+static uword
+sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int insert_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+ u16 new_l0, new_l1, new_l2, new_l3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr0 =
+ (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+ ip6_ext_header_len (ip0 + 1));
+ else
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr1 =
+ (ip6_sr_header_t *) (((void *) (ip1 + 1)) +
+ ip6_ext_header_len (ip1 + 1));
+ else
+ sr1 = (ip6_sr_header_t *) (ip1 + 1);
+
+ if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr2 =
+ (ip6_sr_header_t *) (((void *) (ip2 + 1)) +
+ ip6_ext_header_len (ip2 + 1));
+ else
+ sr2 = (ip6_sr_header_t *) (ip2 + 1);
+
+ if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr3 =
+ (ip6_sr_header_t *) (((void *) (ip3 + 1)) +
+ ip6_ext_header_len (ip3 + 1));
+ else
+ sr3 = (ip6_sr_header_t *) (ip3 + 1);
+
+ clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0,
+ (void *) sr0 - (void *) ip0);
+ clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite), (u8 *) ip1,
+ (void *) sr1 - (void *) ip1);
+ clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite), (u8 *) ip2,
+ (void *) sr2 - (void *) ip2);
+ clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite), (u8 *) ip3,
+ (void *) sr3 - (void *) ip3);
+
+ clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite,
+ vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite)), sl1->rewrite,
+ vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite)), sl2->rewrite,
+ vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite)), sl3->rewrite,
+ vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = ((void *) ip0) - vec_len (sl0->rewrite);
+ ip1 = ((void *) ip1) - vec_len (sl1->rewrite);
+ ip2 = ((void *) ip2) - vec_len (sl2->rewrite);
+ ip3 = ((void *) ip3) - vec_len (sl3->rewrite);
+
+ ip0->hop_limit -= 1;
+ ip1->hop_limit -= 1;
+ ip2->hop_limit -= 1;
+ ip3->hop_limit -= 1;
+
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (sl0->rewrite);
+ new_l1 =
+ clib_net_to_host_u16 (ip1->payload_length) +
+ vec_len (sl1->rewrite);
+ new_l2 =
+ clib_net_to_host_u16 (ip2->payload_length) +
+ vec_len (sl2->rewrite);
+ new_l3 =
+ clib_net_to_host_u16 (ip3->payload_length) +
+ vec_len (sl3->rewrite);
+
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+ ip2->payload_length = clib_host_to_net_u16 (new_l2);
+ ip3->payload_length = clib_host_to_net_u16 (new_l3);
+
+ sr0 = ((void *) sr0) - vec_len (sl0->rewrite);
+ sr1 = ((void *) sr1) - vec_len (sl1->rewrite);
+ sr2 = ((void *) sr2) - vec_len (sl2->rewrite);
+ sr3 = ((void *) sr3) - vec_len (sl3->rewrite);
+
+ sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0];
+ sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1];
+ sr1->segments->as_u64[0] = ip1->dst_address.as_u64[0];
+ sr1->segments->as_u64[1] = ip1->dst_address.as_u64[1];
+ sr2->segments->as_u64[0] = ip2->dst_address.as_u64[0];
+ sr2->segments->as_u64[1] = ip2->dst_address.as_u64[1];
+ sr3->segments->as_u64[0] = ip3->dst_address.as_u64[0];
+ sr3->segments->as_u64[1] = ip3->dst_address.as_u64[1];
+
+ ip0->dst_address.as_u64[0] =
+ (sr0->segments + sr0->segments_left)->as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ (sr0->segments + sr0->segments_left)->as_u64[1];
+ ip1->dst_address.as_u64[0] =
+ (sr1->segments + sr1->segments_left)->as_u64[0];
+ ip1->dst_address.as_u64[1] =
+ (sr1->segments + sr1->segments_left)->as_u64[1];
+ ip2->dst_address.as_u64[0] =
+ (sr2->segments + sr2->segments_left)->as_u64[0];
+ ip2->dst_address.as_u64[1] =
+ (sr2->segments + sr2->segments_left)->as_u64[1];
+ ip3->dst_address.as_u64[0] =
+ (sr3->segments + sr3->segments_left)->as_u64[0];
+ ip3->dst_address.as_u64[1] =
+ (sr3->segments + sr3->segments_left)->as_u64[1];
+
+ ip6_ext_header_t *ip_ext;
+ if (ip0 + 1 == (void *) sr0)
+ {
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip0 + 1);
+ sr0->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip1 + 1 == (void *) sr1)
+ {
+ sr1->protocol = ip1->protocol;
+ ip1->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip2 + 1);
+ sr2->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip2 + 1 == (void *) sr2)
+ {
+ sr2->protocol = ip2->protocol;
+ ip2->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip2 + 1);
+ sr2->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip3 + 1 == (void *) sr3)
+ {
+ sr3->protocol = ip3->protocol;
+ ip3->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip3 + 1);
+ sr3->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ insert_pkts += 4;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_sr_header_t *sr0 = 0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ u16 new_l0 = 0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr0 =
+ (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+ ip6_ext_header_len (ip0 + 1));
+ else
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite), (u8 *) ip0,
+ (void *) sr0 - (void *) ip0);
+ clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite)), sl0->rewrite,
+ vec_len (sl0->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = ((void *) ip0) - vec_len (sl0->rewrite);
+ ip0->hop_limit -= 1;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (sl0->rewrite);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ sr0 = ((void *) sr0) - vec_len (sl0->rewrite);
+ sr0->segments->as_u64[0] = ip0->dst_address.as_u64[0];
+ sr0->segments->as_u64[1] = ip0->dst_address.as_u64[1];
+
+ ip0->dst_address.as_u64[0] =
+ (sr0->segments + sr0->segments_left)->as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ (sr0->segments + sr0->segments_left)->as_u64[1];
+
+ if (ip0 + 1 == (void *) sr0)
+ {
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip6_ext_header_t *ip_ext = (void *) (ip0 + 1);
+ sr0->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ insert_pkts++;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ insert_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
+ .function = sr_policy_rewrite_insert,
+ .name = "sr-pl-rewrite-insert",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion.
+ */
+static uword
+sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int insert_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+ u16 new_l0, new_l1, new_l2, new_l3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite_bsid));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite_bsid));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite_bsid));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite_bsid));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr0 =
+ (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+ ip6_ext_header_len (ip0 + 1));
+ else
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ if (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr1 =
+ (ip6_sr_header_t *) (((void *) (ip1 + 1)) +
+ ip6_ext_header_len (ip1 + 1));
+ else
+ sr1 = (ip6_sr_header_t *) (ip1 + 1);
+
+ if (ip2->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr2 =
+ (ip6_sr_header_t *) (((void *) (ip2 + 1)) +
+ ip6_ext_header_len (ip2 + 1));
+ else
+ sr2 = (ip6_sr_header_t *) (ip2 + 1);
+
+ if (ip3->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr3 =
+ (ip6_sr_header_t *) (((void *) (ip3 + 1)) +
+ ip6_ext_header_len (ip3 + 1));
+ else
+ sr3 = (ip6_sr_header_t *) (ip3 + 1);
+
+ clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0,
+ (void *) sr0 - (void *) ip0);
+ clib_memcpy ((u8 *) ip1 - vec_len (sl1->rewrite_bsid), (u8 *) ip1,
+ (void *) sr1 - (void *) ip1);
+ clib_memcpy ((u8 *) ip2 - vec_len (sl2->rewrite_bsid), (u8 *) ip2,
+ (void *) sr2 - (void *) ip2);
+ clib_memcpy ((u8 *) ip3 - vec_len (sl3->rewrite_bsid), (u8 *) ip3,
+ (void *) sr3 - (void *) ip3);
+
+ clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)),
+ sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid));
+ clib_memcpy (((u8 *) sr1 - vec_len (sl1->rewrite_bsid)),
+ sl1->rewrite_bsid, vec_len (sl1->rewrite_bsid));
+ clib_memcpy (((u8 *) sr2 - vec_len (sl2->rewrite_bsid)),
+ sl2->rewrite_bsid, vec_len (sl2->rewrite_bsid));
+ clib_memcpy (((u8 *) sr3 - vec_len (sl3->rewrite_bsid)),
+ sl3->rewrite_bsid, vec_len (sl3->rewrite_bsid));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite_bsid));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite_bsid));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite_bsid));
+
+ ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid);
+ ip1 = ((void *) ip1) - vec_len (sl1->rewrite_bsid);
+ ip2 = ((void *) ip2) - vec_len (sl2->rewrite_bsid);
+ ip3 = ((void *) ip3) - vec_len (sl3->rewrite_bsid);
+
+ ip0->hop_limit -= 1;
+ ip1->hop_limit -= 1;
+ ip2->hop_limit -= 1;
+ ip3->hop_limit -= 1;
+
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (sl0->rewrite_bsid);
+ new_l1 =
+ clib_net_to_host_u16 (ip1->payload_length) +
+ vec_len (sl1->rewrite_bsid);
+ new_l2 =
+ clib_net_to_host_u16 (ip2->payload_length) +
+ vec_len (sl2->rewrite_bsid);
+ new_l3 =
+ clib_net_to_host_u16 (ip3->payload_length) +
+ vec_len (sl3->rewrite_bsid);
+
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+ ip2->payload_length = clib_host_to_net_u16 (new_l2);
+ ip3->payload_length = clib_host_to_net_u16 (new_l3);
+
+ sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid);
+ sr1 = ((void *) sr1) - vec_len (sl1->rewrite_bsid);
+ sr2 = ((void *) sr2) - vec_len (sl2->rewrite_bsid);
+ sr3 = ((void *) sr3) - vec_len (sl3->rewrite_bsid);
+
+ ip0->dst_address.as_u64[0] =
+ (sr0->segments + sr0->segments_left)->as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ (sr0->segments + sr0->segments_left)->as_u64[1];
+ ip1->dst_address.as_u64[0] =
+ (sr1->segments + sr1->segments_left)->as_u64[0];
+ ip1->dst_address.as_u64[1] =
+ (sr1->segments + sr1->segments_left)->as_u64[1];
+ ip2->dst_address.as_u64[0] =
+ (sr2->segments + sr2->segments_left)->as_u64[0];
+ ip2->dst_address.as_u64[1] =
+ (sr2->segments + sr2->segments_left)->as_u64[1];
+ ip3->dst_address.as_u64[0] =
+ (sr3->segments + sr3->segments_left)->as_u64[0];
+ ip3->dst_address.as_u64[1] =
+ (sr3->segments + sr3->segments_left)->as_u64[1];
+
+ ip6_ext_header_t *ip_ext;
+ if (ip0 + 1 == (void *) sr0)
+ {
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip0 + 1);
+ sr0->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip1 + 1 == (void *) sr1)
+ {
+ sr1->protocol = ip1->protocol;
+ ip1->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip2 + 1);
+ sr2->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip2 + 1 == (void *) sr2)
+ {
+ sr2->protocol = ip2->protocol;
+ ip2->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip2 + 1);
+ sr2->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (ip3 + 1 == (void *) sr3)
+ {
+ sr3->protocol = ip3->protocol;
+ ip3->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip_ext = (void *) (ip3 + 1);
+ sr3->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ insert_pkts += 4;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0;
+ ip6_sr_header_t *sr0 = 0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ u16 new_l0 = 0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite_bsid));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ sr0 =
+ (ip6_sr_header_t *) (((void *) (ip0 + 1)) +
+ ip6_ext_header_len (ip0 + 1));
+ else
+ sr0 = (ip6_sr_header_t *) (ip0 + 1);
+
+ clib_memcpy ((u8 *) ip0 - vec_len (sl0->rewrite_bsid), (u8 *) ip0,
+ (void *) sr0 - (void *) ip0);
+ clib_memcpy (((u8 *) sr0 - vec_len (sl0->rewrite_bsid)),
+ sl0->rewrite_bsid, vec_len (sl0->rewrite_bsid));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite_bsid));
+
+ ip0 = ((void *) ip0) - vec_len (sl0->rewrite_bsid);
+ ip0->hop_limit -= 1;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (sl0->rewrite_bsid);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ sr0 = ((void *) sr0) - vec_len (sl0->rewrite_bsid);
+
+ ip0->dst_address.as_u64[0] =
+ (sr0->segments + sr0->segments_left)->as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ (sr0->segments + sr0->segments_left)->as_u64[1];
+
+ if (ip0 + 1 == (void *) sr0)
+ {
+ sr0->protocol = ip0->protocol;
+ ip0->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ }
+ else
+ {
+ ip6_ext_header_t *ip_ext = (void *) (ip0 + 1);
+ sr0->protocol = ip_ext->next_hdr;
+ ip_ext->next_hdr = IP_PROTOCOL_IPV6_ROUTE;
+ }
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ insert_pkts++;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ insert_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_insert_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
+ .function = sr_policy_rewrite_b_insert,
+ .name = "sr-pl-rewrite-b-insert",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief Function BSID encapsulation
+ */
+static_always_inline void
+end_bsid_encaps_srh_processing (vlib_node_runtime_t * node,
+ vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_sr_header_t * sr0, u32 * next0)
+{
+ ip6_address_t *new_dst0;
+
+ if (PREDICT_FALSE (!sr0))
+ goto error_bsid_encaps;
+
+ if (PREDICT_TRUE (sr0->type == ROUTING_HEADER_TYPE_SR))
+ {
+ if (PREDICT_TRUE (sr0->segments_left != 0))
+ {
+ sr0->segments_left -= 1;
+ new_dst0 = (ip6_address_t *) (sr0->segments);
+ new_dst0 += sr0->segments_left;
+ ip0->dst_address.as_u64[0] = new_dst0->as_u64[0];
+ ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
+ return;
+ }
+ }
+
+error_bsid_encaps:
+ *next0 = SR_POLICY_REWRITE_NEXT_ERROR;
+ b0->error = node->errors[SR_POLICY_REWRITE_ERROR_BSID_ZERO];
+}
+
+/**
+ * @brief Graph node for applying a SR policy BSID - Encapsulation
+ */
+static uword
+sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ int encap_pkts = 0, bsid_pkts = 0;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Quad - Loop */
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 next0, next1, next2, next3;
+ next0 = next1 = next2 = next3 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+ ip6_header_t *ip0, *ip1, *ip2, *ip3;
+ ip6_header_t *ip0_encap, *ip1_encap, *ip2_encap, *ip3_encap;
+ ip6_sr_header_t *sr0, *sr1, *sr2, *sr3;
+ ip6_ext_header_t *prev0, *prev1, *prev2, *prev3;
+ ip6_sr_sl_t *sl0, *sl1, *sl2, *sl3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ /* Prefetch the buffer header and packet for the N+2 loop iteration */
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p5->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p6->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p7->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next[2] = bi2 = from[2];
+ to_next[3] = bi3 = from[3];
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ sl1 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
+ sl2 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX]);
+ sl3 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+ ASSERT (b1->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl1->rewrite));
+ ASSERT (b2->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl2->rewrite));
+ ASSERT (b3->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl3->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+ ip1_encap = vlib_buffer_get_current (b1);
+ ip2_encap = vlib_buffer_get_current (b2);
+ ip3_encap = vlib_buffer_get_current (b3);
+
+ ip6_ext_header_find_t (ip0_encap, prev0, sr0,
+ IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip1_encap, prev1, sr1,
+ IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip2_encap, prev2, sr2,
+ IP_PROTOCOL_IPV6_ROUTE);
+ ip6_ext_header_find_t (ip3_encap, prev3, sr3,
+ IP_PROTOCOL_IPV6_ROUTE);
+
+ end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
+ end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1);
+ end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2);
+ end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ clib_memcpy (((u8 *) ip1_encap) - vec_len (sl1->rewrite),
+ sl1->rewrite, vec_len (sl1->rewrite));
+ clib_memcpy (((u8 *) ip2_encap) - vec_len (sl2->rewrite),
+ sl2->rewrite, vec_len (sl2->rewrite));
+ clib_memcpy (((u8 *) ip3_encap) - vec_len (sl3->rewrite),
+ sl3->rewrite, vec_len (sl3->rewrite));
+
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+ vlib_buffer_advance (b1, -(word) vec_len (sl1->rewrite));
+ vlib_buffer_advance (b2, -(word) vec_len (sl2->rewrite));
+ vlib_buffer_advance (b3, -(word) vec_len (sl3->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ ip2 = vlib_buffer_get_current (b2);
+ ip3 = vlib_buffer_get_current (b3);
+
+ encaps_processing_v6 (node, b0, ip0, ip0_encap);
+ encaps_processing_v6 (node, b1, ip1, ip1_encap);
+ encaps_processing_v6 (node, b2, ip2, ip2_encap);
+ encaps_processing_v6 (node, b3, ip3, ip3_encap);
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip1->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip1->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip2->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip2->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip3->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip3->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
+
+ encap_pkts += 4;
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
+ }
+
+ /* Single loop for potentially the last three packets */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0 = 0, *ip0_encap = 0;
+ ip6_ext_header_t *prev0;
+ ip6_sr_header_t *sr0;
+ ip6_sr_sl_t *sl0;
+ u32 next0 = SR_POLICY_REWRITE_NEXT_IP6_LOOKUP;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sl0 =
+ pool_elt_at_index (sm->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ASSERT (b0->current_data + VLIB_BUFFER_PRE_DATA_SIZE >=
+ vec_len (sl0->rewrite));
+
+ ip0_encap = vlib_buffer_get_current (b0);
+ ip6_ext_header_find_t (ip0_encap, prev0, sr0,
+ IP_PROTOCOL_IPV6_ROUTE);
+ end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
+
+ clib_memcpy (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
+ sl0->rewrite, vec_len (sl0->rewrite));
+ vlib_buffer_advance (b0, -(word) vec_len (sl0->rewrite));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ encaps_processing_v6 (node, b0, ip0, ip0_encap);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ sr_policy_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip0->src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip0->dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+
+ encap_pkts++;
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Update counters */
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_TOTAL,
+ encap_pkts);
+ vlib_node_increment_counter (vm, sr_policy_rewrite_encaps_node.index,
+ SR_POLICY_REWRITE_ERROR_COUNTER_BSID,
+ bsid_pkts);
+
+ return from_frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
+ .function = sr_policy_rewrite_b_encaps,
+ .name = "sr-pl-rewrite-b-encaps",
+ .vector_size = sizeof (u32),
+ .format_trace = format_sr_policy_rewrite_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = SR_POLICY_REWRITE_N_ERROR,
+ .error_strings = sr_policy_rewrite_error_strings,
+ .n_next_nodes = SR_POLICY_REWRITE_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [SR_POLICY_REWRITE_NEXT_##s] = n,
+ foreach_sr_policy_rewrite_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+/*************************** SR Segment Lists DPOs ****************************/
+static u8 *
+format_sr_segment_list_dpo (u8 * s, va_list * args)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_address_t *addr;
+ ip6_sr_sl_t *sl;
+
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+ s = format (s, "SR: Segment List index:[%d]", index);
+ s = format (s, "\n\tSegments:");
+
+ sl = pool_elt_at_index (sm->sid_lists, index);
+
+ s = format (s, "< ");
+ vec_foreach (addr, sl->segments)
+ {
+ s = format (s, "%U, ", format_ip6_address, addr);
+ }
+ s = format (s, "\b\b > - ");
+ s = format (s, "Weight: %u", sl->weight);
+
+ return s;
+}
+
+const static dpo_vft_t sr_policy_rewrite_vft = {
+ .dv_lock = sr_dpo_lock,
+ .dv_unlock = sr_dpo_unlock,
+ .dv_format = format_sr_segment_list_dpo,
+};
+
+const static char *const sr_pr_encaps_ip6_nodes[] = {
+ "sr-pl-rewrite-encaps",
+ NULL,
+};
+
+const static char *const sr_pr_encaps_ip4_nodes[] = {
+ "sr-pl-rewrite-encaps-v4",
+ NULL,
+};
+
+const static char *const *const sr_pr_encaps_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_pr_encaps_ip6_nodes,
+ [DPO_PROTO_IP4] = sr_pr_encaps_ip4_nodes,
+};
+
+const static char *const sr_pr_insert_ip6_nodes[] = {
+ "sr-pl-rewrite-insert",
+ NULL,
+};
+
+const static char *const *const sr_pr_insert_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_pr_insert_ip6_nodes,
+};
+
+const static char *const sr_pr_bsid_insert_ip6_nodes[] = {
+ "sr-pl-rewrite-b-insert",
+ NULL,
+};
+
+const static char *const *const sr_pr_bsid_insert_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_pr_bsid_insert_ip6_nodes,
+};
+
+const static char *const sr_pr_bsid_encaps_ip6_nodes[] = {
+ "sr-pl-rewrite-b-encaps",
+ NULL,
+};
+
+const static char *const *const sr_pr_bsid_encaps_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP6] = sr_pr_bsid_encaps_ip6_nodes,
+};
+
+/********************* SR Policy Rewrite initialization ***********************/
+/**
+ * @brief SR Policy Rewrite initialization
+ */
+clib_error_t *
+sr_policy_rewrite_init (vlib_main_t * vm)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
+ mhash_init (&sm->sr_policies_index_hash, sizeof (uword),
+ sizeof (ip6_address_t));
+
+ /* Init SR VPO DPOs type */
+ sr_pr_encaps_dpo_type =
+ dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_encaps_nodes);
+
+ sr_pr_insert_dpo_type =
+ dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_insert_nodes);
+
+ sr_pr_bsid_encaps_dpo_type =
+ dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_encaps_nodes);
+
+ sr_pr_bsid_insert_dpo_type =
+ dpo_register_new_type (&sr_policy_rewrite_vft, sr_pr_bsid_insert_nodes);
+
+ /* Register the L2 encaps node used in HW redirect */
+ sm->l2_sr_policy_rewrite_index = sr_policy_rewrite_encaps_node.index;
+
+ sm->fib_table_ip6 = (u32) ~ 0;
+ sm->fib_table_ip4 = (u32) ~ 0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_policy_rewrite_init);
+
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c
new file mode 100755
index 00000000000..a7903751dda
--- /dev/null
+++ b/src/vnet/srv6/sr_steering.c
@@ -0,0 +1,573 @@
+/*
+ * sr_steering.c: ipv6 segment routing steering into SR policy
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Packet steering into SR Policies
+ *
+ * This file is in charge of handling the FIB appropiatly to steer packets
+ * through SR Policies as defined in 'sr_policy_rewrite.c'. Notice that here
+ * we are only doing steering. SR policy application is done in
+ * sr_policy_rewrite.c
+ *
+ * Supports:
+ * - Steering of IPv6 traffic Destination Address based
+ * - Steering of IPv4 traffic Destination Address based
+ * - Steering of L2 frames, interface based (sw interface)
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+/**
+ * @brief Steer traffic L2 and L3 traffic through a given SR policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param sw_if_index is the incoming interface for L2 traffic
+ * @param traffic_type describes the type of traffic
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
+ u32 table_id, ip46_address_t * prefix, u32 mask_width,
+ u32 sw_if_index, u8 traffic_type)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ sr_steering_key_t key;
+ ip6_sr_steering_policy_t *steer_pl;
+ fib_prefix_t pfx = { 0 };
+
+ ip6_sr_policy_t *sr_policy = 0;
+ uword *p = 0;
+
+ memset (&key, 0, sizeof (sr_steering_key_t));
+
+ /* Compute the steer policy key */
+ if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+ {
+ key.l3.prefix.as_u64[0] = prefix->as_u64[0];
+ key.l3.prefix.as_u64[1] = prefix->as_u64[1];
+ key.l3.mask_width = mask_width;
+ key.l3.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ }
+ else if (traffic_type == SR_STEER_L2)
+ {
+ key.l2.sw_if_index = sw_if_index;
+
+ /* Sanitise the SW_IF_INDEX */
+ if (pool_is_free_index (sm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return -3;
+
+ vnet_sw_interface_t *sw =
+ vnet_get_sw_interface (sm->vnet_main, sw_if_index);
+ if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ return -3;
+ }
+ else
+ return -1;
+
+ key.traffic_type = traffic_type;
+
+ /* Search for the item */
+ p = mhash_get (&sm->sr_steer_policies_hash, &key);
+
+ if (p)
+ {
+ /* Retrieve Steer Policy function */
+ steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+
+ if (is_del)
+ {
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.l3.fib_table),
+ &pfx, FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.l3.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_L2)
+ {
+ /* Remove HW redirection */
+ vnet_feature_enable_disable ("device-input",
+ "sr-policy-rewrite-encaps-l2",
+ sw_if_index, 0, 0, 0);
+ sm->sw_iface_sr_policies[sw_if_index] = ~(u32) 0;
+
+ /* Remove promiscous mode from interface */
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *eif =
+ ethernet_get_interface (em, sw_if_index);
+
+ if (!eif)
+ goto cleanup_error_redirection;
+
+ ethernet_set_flags (vnm, sw_if_index, 0);
+ }
+
+ /* Delete SR steering policy entry */
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+
+ /* If no more SR policies or steering policies */
+ if (!pool_elts (sm->sr_policies) && !pool_elts (sm->steer_policies))
+ {
+ fib_table_unlock (sm->fib_table_ip6, FIB_PROTOCOL_IP6);
+ fib_table_unlock (sm->fib_table_ip4, FIB_PROTOCOL_IP6);
+ sm->fib_table_ip6 = (u32) ~ 0;
+ sm->fib_table_ip4 = (u32) ~ 0;
+ }
+
+ return 1;
+ }
+ else /* It means user requested to update an existing SR steering policy */
+ {
+ /* Retrieve SR steering policy */
+ if (bsid)
+ {
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -2;
+ }
+ else
+ sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+ if (!sr_policy)
+ return -2;
+
+ steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+ /* Remove old FIB/hw redirection and create a new one */
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.l3.fib_table),
+ &pfx, FIB_SOURCE_SR);
+
+ /* Create a new one */
+ goto update_fib;
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ /* Remove FIB entry */
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.l3.fib_table),
+ &pfx, FIB_SOURCE_SR);
+
+ /* Create a new one */
+ goto update_fib;
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_L2)
+ {
+ /* Update L2-HW redirection */
+ goto update_fib;
+ }
+ }
+ }
+ else
+ /* delete; steering policy does not exist; complain */
+ if (is_del)
+ return -4;
+
+ /* Retrieve SR policy */
+ if (bsid)
+ {
+ p = mhash_get (&sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -2;
+ }
+ else
+ sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
+
+ /* Create a new steering policy */
+ pool_get (sm->steer_policies, steer_pl);
+ memset (steer_pl, 0, sizeof (*steer_pl));
+
+ if (traffic_type == SR_STEER_IPV4 || traffic_type == SR_STEER_IPV6)
+ {
+ clib_memcpy (&steer_pl->classify.l3.prefix, prefix,
+ sizeof (ip46_address_t));
+ steer_pl->classify.l3.mask_width = mask_width;
+ steer_pl->classify.l3.fib_table =
+ (table_id != (u32) ~ 0 ? table_id : 0);
+ steer_pl->classify.traffic_type = traffic_type;
+ }
+ else if (traffic_type == SR_STEER_L2)
+ {
+ steer_pl->classify.l2.sw_if_index = sw_if_index;
+ steer_pl->classify.traffic_type = traffic_type;
+ }
+ else
+ {
+ /* Incorrect API usage. Should never get here */
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ return -1;
+ }
+ steer_pl->sr_policy = sr_policy - sm->sr_policies;
+
+ /* Create and store key */
+ mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
+ NULL);
+
+ if (traffic_type == SR_STEER_L2)
+ {
+ if (!sr_policy->is_encap)
+ goto cleanup_error_encap;
+
+ if (vnet_feature_enable_disable
+ ("device-input", "sr-pl-rewrite-encaps-l2", sw_if_index, 1, 0, 0))
+ goto cleanup_error_redirection;
+
+ /* Set promiscous mode on interface */
+ vnet_main_t *vnm = vnet_get_main ();
+ ethernet_main_t *em = &ethernet_main;
+ ethernet_interface_t *eif = ethernet_get_interface (em, sw_if_index);
+
+ if (!eif)
+ goto cleanup_error_redirection;
+
+ ethernet_set_flags (vnm, sw_if_index,
+ ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
+ }
+ else if (traffic_type == SR_STEER_IPV4)
+ if (!sr_policy->is_encap)
+ goto cleanup_error_encap;
+
+update_fib:
+ /* FIB API calls - Recursive route through the BindingSID */
+ if (traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.l3.prefix.ip6;
+
+ fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP6,
+ (table_id !=
+ (u32) ~ 0 ?
+ table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+ FIB_PROTOCOL_IP6,
+ (ip46_address_t *) & sr_policy->bsid, ~0,
+ sm->fib_table_ip6, 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ else if (traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.l3.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.l3.prefix.ip4;
+
+ fib_table_entry_path_add (fib_table_find (FIB_PROTOCOL_IP4,
+ (table_id !=
+ (u32) ~ 0 ?
+ table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+ FIB_PROTOCOL_IP6,
+ (ip46_address_t *) & sr_policy->bsid, ~0,
+ sm->fib_table_ip4, 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ else if (traffic_type == SR_STEER_L2)
+ {
+ if (sw_if_index < vec_len (sm->sw_iface_sr_policies))
+ sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy;
+ else
+ {
+ vec_resize (sm->sw_iface_sr_policies,
+ (pool_len (sm->vnet_main->interface_main.sw_interfaces)
+ - vec_len (sm->sw_iface_sr_policies)));
+ sm->sw_iface_sr_policies[sw_if_index] = steer_pl->sr_policy;
+ }
+ }
+
+ return 0;
+
+cleanup_error_encap:
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ return -5;
+
+cleanup_error_redirection:
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ return -3;
+}
+
+static clib_error_t *
+sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ int is_del = 0;
+
+ ip46_address_t prefix;
+ u32 dst_mask_width = 0;
+ u32 sw_if_index = (u32) ~ 0;
+ u8 traffic_type = 0;
+ u32 fib_table = (u32) ~ 0;
+
+ ip6_address_t bsid;
+ u32 sr_policy_index = (u32) ~ 0;
+
+ u8 sr_policy_set = 0;
+
+ memset (&prefix, 0, sizeof (ip46_address_t));
+
+ int rv;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip6_address,
+ &prefix.ip6, &dst_mask_width))
+ traffic_type = SR_STEER_IPV6;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip4_address,
+ &prefix.ip4, &dst_mask_width))
+ traffic_type = SR_STEER_IPV4;
+ else if (!traffic_type
+ && unformat (input, "l2 %U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ traffic_type = SR_STEER_L2;
+ else if (!sr_policy_set
+ && unformat (input, "via sr policy index %d",
+ &sr_policy_index))
+ sr_policy_set = 1;
+ else if (!sr_policy_set
+ && unformat (input, "via sr policy bsid %U",
+ unformat_ip6_address, &bsid))
+ sr_policy_set = 1;
+ else if (fib_table == (u32) ~ 0
+ && unformat (input, "fib-table %d", &fib_table));
+ else
+ break;
+ }
+
+ if (!traffic_type)
+ return clib_error_return (0, "No L2/L3 traffic specified");
+ if (!sr_policy_set)
+ return clib_error_return (0, "No SR policy specified");
+
+ /* Make sure that the prefixes are clean */
+ if (traffic_type == SR_STEER_IPV4)
+ {
+ u32 mask =
+ (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
+ prefix.ip4.as_u32 &= mask;
+ }
+ else if (traffic_type == SR_STEER_IPV6)
+ {
+ ip6_address_t mask;
+ ip6_address_mask_from_width (&mask, dst_mask_width);
+ ip6_address_mask (&prefix.ip6, &mask);
+ }
+
+ rv =
+ sr_steering_policy (is_del, (sr_policy_index == ~(u32) 0 ? &bsid : NULL),
+ sr_policy_index, fib_table, &prefix, dst_mask_width,
+ sw_if_index, traffic_type);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -1:
+ return clib_error_return (0, "Incorrect API usage.");
+ case -2:
+ return clib_error_return (0,
+ "The requested SR policy could not be located. Review the BSID/index.");
+ case -3:
+ return clib_error_return (0,
+ "Unable to do SW redirect. Incorrect interface.");
+ case -4:
+ return clib_error_return (0,
+ "The requested SR steering policy could not be deleted.");
+ case -5:
+ return clib_error_return (0,
+ "The SR policy is not an encapsulation one.");
+ default:
+ return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
+ .path = "sr steer",
+ .short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>]"
+ "via sr policy [index <sr_policy_index>|bsid <bsid_ip6_addr>]"
+ "(fib-table <fib_table_index>)",
+ .long_help =
+ "\tSteer a L2 or L3 traffic through an existing SR policy.\n"
+ "\tExamples:\n"
+ "\t\tsr steer l3 2001::/64 via sr_policy index 5\n"
+ "\t\tsr steer l3 2001::/64 via sr_policy bsid 2010::9999:1\n"
+ "\t\tsr steer l2 GigabitEthernet0/5/0 via sr_policy index 5\n"
+ "\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
+ .function = sr_steer_policy_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_sr_steering_policies_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_steering_policy_t **steer_policies = 0;
+ ip6_sr_steering_policy_t *steer_pl;
+
+ vnet_main_t *vnm = vnet_get_main ();
+
+ ip6_sr_policy_t *pl = 0;
+ int i;
+
+ vlib_cli_output (vm, "SR steering policies:");
+ /* *INDENT-OFF* */
+ pool_foreach (steer_pl, sm->steer_policies, ({vec_add1(steer_policies, steer_pl);}));
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
+ for (i = 0; i < vec_len (steer_policies); i++)
+ {
+ steer_pl = steer_policies[i];
+ pl = pool_elt_at_index (sm->sr_policies, steer_pl->sr_policy);
+ if (steer_pl->classify.traffic_type == SR_STEER_L2)
+ {
+ vlib_cli_output (vm, "L2 %U\t%U",
+ format_vnet_sw_if_index_name, vnm,
+ steer_pl->classify.l2.sw_if_index,
+ format_ip6_address, &pl->bsid);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ vlib_cli_output (vm, "L3 %U/%d\t%U",
+ format_ip4_address,
+ &steer_pl->classify.l3.prefix.ip4,
+ steer_pl->classify.l3.mask_width,
+ format_ip6_address, &pl->bsid);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ vlib_cli_output (vm, "L3 %U/%d\t%U",
+ format_ip6_address,
+ &steer_pl->classify.l3.prefix.ip6,
+ steer_pl->classify.l3.mask_width,
+ format_ip6_address, &pl->bsid);
+ }
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = {
+ .path = "show sr steering policies",
+ .short_help = "show sr steering policies",
+ .function = show_sr_steering_policies_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+sr_steering_init (vlib_main_t * vm)
+{
+ ip6_sr_main_t *sm = &sr_main;
+
+ /* Init memory for function keys */
+ mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+ sizeof (sr_steering_key_t));
+
+ sm->sw_iface_sr_policies = 0;
+
+ sm->vnet_main = vnet_get_main ();
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_INIT_FUNCTION (sr_steering_init);
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) =
+{
+ .arc_name = "device-input",
+ .node_name = "sr-pl-rewrite-encaps-l2",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+/* *INDENT-ON* */
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/srv6/sr_steering.md b/src/vnet/srv6/sr_steering.md
new file mode 100644
index 00000000000..cf446f8171e
--- /dev/null
+++ b/src/vnet/srv6/sr_steering.md
@@ -0,0 +1,11 @@
+# Steering packets into a SR Policy {#srv6_steering_doc}
+
+To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'.
+
+ sr steer l3 2001::/64 via sr policy index 1
+ sr steer l3 2001::/64 via sr policy bsid cafe::1
+ sr steer l3 2001::/64 via sr policy bsid cafe::1 fib-table 3
+ sr steer l3 10.0.0.0/16 via sr policy bsid cafe::1
+ sr steer l2 TenGE0/1/0 via sr policy bsid cafe::1
+
+Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *put the interface into promiscous mode*.