aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/map
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-12-19 23:05:39 +0100
committerDamjan Marion <damarion@cisco.com>2016-12-28 12:25:14 +0100
commit7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 (patch)
tree5de62f8dbd3a752f5a676ca600e43d2652d1ff1a /src/vnet/map
parent696f1adec0df3b8f161862566dd9c86174302658 (diff)
Reorganize source tree to use single autotools instance
Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vnet/map')
-rwxr-xr-xsrc/vnet/map/examples/gen-rules.py186
-rw-r--r--src/vnet/map/examples/health_check.c109
-rwxr-xr-xsrc/vnet/map/examples/test_map.py141
-rwxr-xr-xsrc/vnet/map/gen-rules.py107
-rw-r--r--src/vnet/map/ip4_map.c813
-rw-r--r--src/vnet/map/ip4_map_t.c1363
-rw-r--r--src/vnet/map/ip6_map.c1269
-rw-r--r--src/vnet/map/ip6_map_t.c1517
-rw-r--r--src/vnet/map/map.api178
-rw-r--r--src/vnet/map/map.c2166
-rw-r--r--src/vnet/map/map.h591
-rw-r--r--src/vnet/map/map_api.c295
-rw-r--r--src/vnet/map/map_doc.md69
-rw-r--r--src/vnet/map/map_dpo.c191
-rw-r--r--src/vnet/map/map_dpo.h67
-rw-r--r--src/vnet/map/test.c205
16 files changed, 9267 insertions, 0 deletions
diff --git a/src/vnet/map/examples/gen-rules.py b/src/vnet/map/examples/gen-rules.py
new file mode 100755
index 00000000000..7964aa9a359
--- /dev/null
+++ b/src/vnet/map/examples/gen-rules.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import ipaddress
+import argparse
+import sys
+
+# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
+# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
+
+def_ip4_pfx = '192.0.2.0/24'
+def_ip6_pfx = '2001:db8::/32'
+def_ip6_src = '2001:db8::1'
+def_psid_offset = 6
+def_psid_len = 6
+def_ea_bits_len = 0
+
+parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
+parser.add_argument('-t', action="store", dest="mapmode")
+parser.add_argument('-f', action="store", dest="format", default="vpp")
+parser.add_argument('--ip4-prefix', action="store", dest="ip4_pfx", default=def_ip4_pfx)
+parser.add_argument('--ip6-prefix', action="store", dest="ip6_pfx", default=def_ip6_pfx)
+parser.add_argument('--ip6-src', action="store", dest="ip6_src", default=def_ip6_src)
+parser.add_argument('--psid-len', action="store", dest="psid_len", default=def_psid_len)
+parser.add_argument('--psid-offset', action="store", dest="psid_offset", default=def_psid_offset)
+parser.add_argument('--ea-bits-len', action="store", dest="ea_bits_len", default=def_ea_bits_len)
+args = parser.parse_args()
+
+#
+# Print domain
+#
+def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len):
+ if format == 'vpp':
+ print("map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", ip6_pfx, "ip6-src " + ip6_src +
+ " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len)
+ if format == 'confd':
+ print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + ip6_src +
+ " ipv6-prefix " + ip6_pfx + " ipv4-prefix " + ip4_pfx +
+ " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len)
+ if format == 'xml':
+ print("<softwire-instance>")
+ print("<id>", i, "</id>");
+ print(" <br-ipv6>" + ip6_src + "</br-ipv6>")
+ print(" <ipv6-prefix>" + ip6_pfx + "</ipv6-prefix>")
+ print(" <ipv4-prefix>" + ip4_pfx + "</ipv4-prefix>")
+ print(" <ea-len>", eabits_len, "</ea-len>")
+ print(" <psid-len>", psid_len, "</psid-len>")
+ print(" <psid-offset>", psid_offset, "</psid-offset>")
+
+def domain_print_end():
+ if format == 'xml':
+ print("</softwire-instance>")
+
+def rule_print(i, psid, dst):
+ if format == 'vpp':
+ print("map add rule index", i, "psid", psid, "ip6-dst", dst)
+ if format == 'confd':
+ print("binding", psid, "ipv6-addr", dst)
+ if format == 'xml':
+ print(" <binding>")
+ print(" <psid>", psid, "</psid>")
+ print(" <ipv6-addr>", dst, "</ipv6-addr>")
+ print(" </binding>")
+
+#
+# Algorithmic mapping Shared IPv4 address
+#
+def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ domain_print(0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len)
+ domain_print_end()
+
+#
+# 1:1 Full IPv4 address
+#
+def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
+ ip6_src = ipaddress.ip_address(ip6_src_str)
+ ip6_dst = ipaddress.ip_network(ip6_pfx_str)
+ psid_len = 0
+ mod = ip4_pfx.num_addresses / 1024
+
+ for i in range(ip4_pfx.num_addresses):
+ domain_print(i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0)
+ domain_print_end()
+ if ip6_src_ecmp and not i % mod:
+ ip6_src = ip6_src + 1
+
+#
+# 1:1 Shared IPv4 address, shared BR (16) VPP CLI
+#
+def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
+ ip6_src = ipaddress.ip_address(ip6_src_str)
+ ip6_dst = ipaddress.ip_network(ip6_pfx_str)
+ mod = ip4_pfx.num_addresses / 1024
+
+ for i in range(ip4_pfx.num_addresses):
+ domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len)
+ for psid in range(0x1 << int(psid_len)):
+ rule_print(i, psid, str(ip6_dst[(i * (0x1<<int(psid_len))) + psid]))
+ domain_print_end()
+ if ip6_src_ecmp and not i % mod:
+ ip6_src = ip6_src + 1
+
+
+#
+# 1:1 Shared IPv4 address, shared BR
+#
+def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
+ ip6_src = ipaddress.ip_address(ip6_src_str)
+ ip6_dst = list(ipaddress.ip_network(ip6_pfx_str).subnets(new_prefix=56))
+ mod = ip4_pfx.num_addresses / 1024
+
+ for i in range(ip4_pfx.num_addresses):
+ domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len)
+ for psid in range(0x1 << psid_len):
+ enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255-1]
+ rule_print(i, psid, enduserprefix[(i * (0x1<<psid_len)) + psid])
+ domain_print_end()
+ if ip6_src_ecmp and not i % mod:
+ ip6_src = ip6_src + 1
+
+
+def xml_header_print():
+ print('''
+<?xml version="1.0" encoding="UTF-8"?>
+ <hello xmlns="urn:ietf:params:xml:ns:netconf:base:1.0">
+ <capabilities>
+ <capability>urn:ietf:params:netconf:base:1.0</capability>
+ </capabilities>
+ </hello>
+]]>]]>
+
+<?xml version="1.0" encoding="UTF-8"?>
+ <rpc xmlns="urn:ietf:params:xml:ns:netconf:base:1.0" message-id="1">
+ <edit-config>
+ <target>
+ <candidate/>
+ </target>
+ <config>
+
+ <vpp xmlns="http://www.cisco.com/yang/cisco-vpp">
+ <softwire>
+ <softwire-instances>
+
+ ''')
+
+def xml_footer_print():
+ print('''
+</softwire-instances>
+</softwire>
+</vpp>
+ </config>
+ </edit-config>
+ </rpc>
+
+]]>]]>
+
+<?xml version="1.0" encoding="UTF-8"?>
+ <rpc xmlns="urn:ietf:params:xml:ns:netconf:base:1.0" message-id="2">
+ <close-session/>
+ </rpc>
+
+]]>]]>
+ ''')
+
+
+format = args.format
+if format == 'xml':
+ xml_header_print()
+globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.ea_bits_len, args.psid_offset, args.psid_len)
+if format == 'xml':
+ xml_footer_print()
diff --git a/src/vnet/map/examples/health_check.c b/src/vnet/map/examples/health_check.c
new file mode 100644
index 00000000000..5f0d85fec08
--- /dev/null
+++ b/src/vnet/map/examples/health_check.c
@@ -0,0 +1,109 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <errno.h>
+
+static void
+usage (void) {
+ fprintf(stderr,
+ "Usage: health_check"
+ " -d debug"
+ " -I interface"
+ "\n");
+ exit(2);
+}
+
+int
+main (int argc, char **argv)
+{
+ int sd, ch;
+ uint8_t *opt, *pkt;
+ struct ifreq ifr;
+ char *interface = NULL;
+ bool debug = false;
+
+ while ((ch = getopt(argc, argv, "h?" "I:" "d")) != EOF) {
+ switch(ch) {
+ case 'I':
+ interface = optarg;
+ break;
+ case 'd':
+ debug = true;
+ break;
+ default:
+ usage();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (!interface)
+ usage();
+
+ /* Request a socket descriptor sd. */
+ if ((sd = socket (AF_INET6, SOCK_RAW, IPPROTO_IPIP)) < 0) {
+ perror ("Failed to get socket descriptor ");
+ exit (EXIT_FAILURE);
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", interface);
+
+ /* Bind socket to interface of this node. */
+ if (setsockopt (sd, SOL_SOCKET, SO_BINDTODEVICE, (void *) &ifr, sizeof (ifr)) < 0) {
+ perror ("SO_BINDTODEVICE failed");
+ exit (EXIT_FAILURE);
+ }
+ if (debug) printf("Binding to interface %s\n", interface);
+
+ while (1) {
+ struct sockaddr_in6 src_addr;
+ socklen_t addrlen = sizeof(src_addr);
+ char source[INET6_ADDRSTRLEN+1];
+ int len;
+ uint8_t inpack[IP_MAXPACKET];
+
+ if ((len = recvfrom(sd, inpack, sizeof(inpack), 0, (struct sockaddr *)&src_addr, &addrlen)) < 0) {
+ perror("recvfrom failed ");
+ }
+ if (inet_ntop(AF_INET6, &src_addr.sin6_addr, source, INET6_ADDRSTRLEN) == NULL) {
+ perror("inet_ntop() failed.");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Reply */
+ struct iphdr *ip = (struct iphdr *)inpack;
+ uint32_t saddr;
+ struct icmphdr *icmp;
+
+ saddr = ip->saddr;
+ ip->saddr = ip->daddr;
+ ip->daddr = saddr;
+
+ switch (ip->protocol) {
+ case 1:
+ if (debug) printf ("ICMP Echo request from %s\n", source);
+ icmp = (struct icmphdr *)&ip[1];
+ icmp->type = ICMP_ECHOREPLY;
+ break;
+ default:
+ fprintf(stderr, "Unsupported protocol %d", ip->protocol);
+ }
+ if (len = sendto(sd, inpack, len, 0, (struct sockaddr *)&src_addr, addrlen) < 0) {
+ perror("sendto failed ");
+ }
+ }
+
+ close (sd);
+
+ return (EXIT_SUCCESS);
+}
diff --git a/src/vnet/map/examples/test_map.py b/src/vnet/map/examples/test_map.py
new file mode 100755
index 00000000000..21388d49526
--- /dev/null
+++ b/src/vnet/map/examples/test_map.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+import time,argparse,sys,cmd, unittest
+from ipaddress import *
+
+parser = argparse.ArgumentParser(description='VPP MAP test')
+parser.add_argument('-i', nargs='*', action="store", dest="inputdir")
+args = parser.parse_args()
+
+for dir in args.inputdir:
+ sys.path.append(dir)
+from vpp_papi import *
+
+#
+# 1:1 Shared IPv4 address, shared BR (16) VPP CLI
+#
+def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+ ip4_pfx = ip_network(ip4_pfx_str)
+ ip6_src = ip_address(ip6_src_str)
+ ip6_dst = ip_network(ip6_pfx_str)
+ ip6_nul = IPv6Address(u'0::0')
+ mod = ip4_pfx.num_addresses / 1024
+
+ for i in range(ip4_pfx.num_addresses):
+ a = time.clock()
+ t = map_add_domain(0, ip6_nul.packed, ip4_pfx[i].packed, ip6_src.packed, 0, 32, 128, ea_bits_len, psid_offset, psid_len, 0, 0)
+ #print "Return from map_add_domain", t
+ if t == None:
+ print "map_add_domain failed"
+ continue
+ if t.retval != 0:
+ print "map_add_domain failed", t
+ continue
+ for psid in range(0x1 << int(psid_len)):
+ r = map_add_del_rule(0, t.index, 1, (ip6_dst[(i * (0x1<<int(psid_len))) + psid]).packed, psid)
+ #print "Return from map_add_del_rule", r
+
+ if ip6_src_ecmp and not i % mod:
+ ip6_src = ip6_src + 1
+
+ print "Running time:", time.clock() - a
+
+class TestMAP(unittest.TestCase):
+ '''
+ def test_delete_all(self):
+ t = map_domain_dump(0)
+ self.assertNotEqual(t, None)
+ print "Number of domains configured: ", len(t)
+ for d in t:
+ ts = map_del_domain(0, d.domainindex)
+ self.assertNotEqual(ts, None)
+ t = map_domain_dump(0)
+ self.assertNotEqual(t, None)
+ print "Number of domains configured: ", len(t)
+ self.assertEqual(len(t), 0)
+
+ '''
+
+ def test_a_million_rules(self):
+ ip4_pfx = u'192.0.2.0/24'
+ ip6_pfx = u'2001:db8::/32'
+ ip6_src = u'2001:db8::1'
+ psid_offset = 6
+ psid_len = 6
+ ea_bits_len = 0
+ lw46_shared(ip4_pfx, ip6_pfx, ip6_src, ea_bits_len, psid_offset, psid_len)
+
+#
+# RX thread, that should sit on blocking vpe_api_read()
+
+#
+
+
+#
+#
+#
+import threading
+class RXThread (threading.Thread):
+ def __init__(self):
+ threading.Thread.__init__(self)
+
+ def run(self):
+ print "Starting "
+ i = 0
+ while True:
+ msg = vpe_api_read()
+ if msg:
+ #print msg
+ id = unpack('>H', msg[0:2])
+ size = unpack('>H', msg[2:4])
+ print "Received", id, "of size", size
+ i += 1
+ #del msg
+ continue
+
+ #time.sleep(0.001)
+ return
+
+# Create RX thread
+rxthread = RXThread()
+rxthread.setDaemon(True)
+
+print "Connect", connect_to_vpe("client124")
+import timeit
+rxthread.start()
+print "After thread started"
+
+#pneum_kill_thread()
+print "After thread killed"
+
+#t = show_version(0)
+#print "Result from show version", t
+
+print timeit.timeit('t = show_version(0)', number=1000, setup="from __main__ import show_version")
+time.sleep(10)
+#print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping")
+
+
+disconnect_from_vpe()
+sys.exit()
+
+
+print t.program, t.version,t.builddate,t.builddirectory
+
+'''
+
+t = map_domain_dump(0)
+if not t:
+ print('show map domain failed')
+
+for d in t:
+ print("IP6 prefix:",str(IPv6Address(d.ip6prefix)))
+ print( "IP4 prefix:",str(IPv4Address(d.ip4prefix)))
+'''
+
+suite = unittest.TestLoader().loadTestsFromTestCase(TestMAP)
+unittest.TextTestRunner(verbosity=2).run(suite)
+
+disconnect_from_vpe()
+
+
diff --git a/src/vnet/map/gen-rules.py b/src/vnet/map/gen-rules.py
new file mode 100755
index 00000000000..533a8e237f7
--- /dev/null
+++ b/src/vnet/map/gen-rules.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import ipaddress
+import argparse
+import sys
+
+# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
+# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
+
+parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
+parser.add_argument('-t', action="store", dest="mapmode")
+args = parser.parse_args()
+
+#
+# 1:1 Shared IPv4 address, shared BR
+#
+def shared11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+
+#
+# 1:1 Shared IPv4 address
+#
+def shared11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Shared IPv4 address small
+#
+def smallshared11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/24')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Full IPv4 address
+#
+def full11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 0
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 0 psid-len 0")
+def full11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 0
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 0 psid-len 0")
+
+#
+# Algorithmic mapping Shared IPv4 address
+#
+def algo():
+ print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8")
+ print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0")
+
+#
+# IP4 forwarding
+#
+def ip4():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ for i in range(ip4_pfx.num_addresses):
+ print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2")
+
+
+globals()[args.mapmode]()
+
+
diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c
new file mode 100644
index 00000000000..9fd10f62eb1
--- /dev/null
+++ b/src/vnet/map/ip4_map.c
@@ -0,0 +1,813 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Defines used for testing various optimisation schemes
+ */
+#define MAP_ENCAP_DUAL 0
+
+#include "map.h"
+#include "../ip/ip_frag.h"
+
+vlib_node_registration_t ip4_map_reass_node;
+
+enum ip4_map_next_e
+{
+ IP4_MAP_NEXT_IP6_LOOKUP,
+#ifdef MAP_SKIP_IP6_LOOKUP
+ IP4_MAP_NEXT_IP6_REWRITE,
+#endif
+ IP4_MAP_NEXT_IP4_FRAGMENT,
+ IP4_MAP_NEXT_IP6_FRAGMENT,
+ IP4_MAP_NEXT_REASS,
+ IP4_MAP_NEXT_ICMP_ERROR,
+ IP4_MAP_NEXT_DROP,
+ IP4_MAP_N_NEXT,
+};
+
+enum ip4_map_reass_next_t
+{
+ IP4_MAP_REASS_NEXT_IP6_LOOKUP,
+ IP4_MAP_REASS_NEXT_IP4_FRAGMENT,
+ IP4_MAP_REASS_NEXT_DROP,
+ IP4_MAP_REASS_N_NEXT,
+};
+
+typedef struct
+{
+ u32 map_domain_index;
+ u16 port;
+ u8 cached;
+} map_ip4_map_reass_trace_t;
+
+u8 *
+format_ip4_map_reass_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *);
+ return format (s, "MAP domain index: %d L4 port: %u Status: %s",
+ t->map_domain_index, t->port,
+ t->cached ? "cached" : "forwarded");
+}
+
+/*
+ * ip4_map_get_port
+ */
+u16
+ip4_map_get_port (ip4_header_t * ip, map_dir_e dir)
+{
+ /* Find port information */
+ if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ udp_header_t *udp = (void *) (ip + 1);
+ return (dir == MAP_SENDER ? udp->src_port : udp->dst_port);
+ }
+ else if (ip->protocol == IP_PROTOCOL_ICMP)
+ {
+ /*
+ * 1) ICMP Echo request or Echo reply
+ * 2) ICMP Error with inner packet being UDP or TCP
+ * 3) ICMP Error with inner packet being ICMP Echo request or Echo reply
+ */
+ icmp46_header_t *icmp = (void *) (ip + 1);
+ if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
+ {
+ return *((u16 *) (icmp + 1));
+ }
+ else if (clib_net_to_host_u16 (ip->length) >= 56)
+ { // IP + ICMP + IP + L4 header
+ ip4_header_t *icmp_ip = (ip4_header_t *) (icmp + 2);
+ if (PREDICT_TRUE ((icmp_ip->protocol == IP_PROTOCOL_TCP) ||
+ (icmp_ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ udp_header_t *udp = (void *) (icmp_ip + 1);
+ return (dir == MAP_SENDER ? udp->dst_port : udp->src_port);
+ }
+ else if (icmp_ip->protocol == IP_PROTOCOL_ICMP)
+ {
+ icmp46_header_t *inner_icmp = (void *) (icmp_ip + 1);
+ if (inner_icmp->type == ICMP4_echo_request
+ || inner_icmp->type == ICMP4_echo_reply)
+ return (*((u16 *) (inner_icmp + 1)));
+ }
+ }
+ }
+ return (0);
+}
+
+static_always_inline u16
+ip4_map_port_and_security_check (map_domain_t * d, ip4_header_t * ip,
+ u32 * next, u8 * error)
+{
+ u16 port = 0;
+
+ if (d->psid_length > 0)
+ {
+ if (ip4_get_fragment_offset (ip) == 0)
+ {
+ if (PREDICT_FALSE
+ ((ip->ip_version_and_header_length != 0x45)
+ || clib_host_to_net_u16 (ip->length) < 28))
+ {
+ return 0;
+ }
+ port = ip4_map_get_port (ip, MAP_RECEIVER);
+ if (port)
+ {
+ /* Verify that port is not among the well-known ports */
+ if ((d->psid_offset > 0)
+ && (clib_net_to_host_u16 (port) <
+ (0x1 << (16 - d->psid_offset))))
+ {
+ *error = MAP_ERROR_ENCAP_SEC_CHECK;
+ }
+ else
+ {
+ if (ip4_get_fragment_more (ip))
+ *next = IP4_MAP_NEXT_REASS;
+ return (port);
+ }
+ }
+ else
+ {
+ *error = MAP_ERROR_BAD_PROTOCOL;
+ }
+ }
+ else
+ {
+ *next = IP4_MAP_NEXT_REASS;
+ }
+ }
+ return (0);
+}
+
+/*
+ * ip4_map_vtcfl
+ */
+static_always_inline u32
+ip4_map_vtcfl (ip4_header_t * ip4, vlib_buffer_t * p)
+{
+ map_main_t *mm = &map_main;
+ u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
+ u32 vtcfl = 0x6 << 28;
+ vtcfl |= tc << 20;
+ vtcfl |= vnet_buffer (p)->ip.flow_hash & 0x000fffff;
+
+ return (clib_host_to_net_u32 (vtcfl));
+}
+
+static_always_inline bool
+ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
+{
+#ifdef MAP_SKIP_IP6_LOOKUP
+ map_main_t *mm = &map_main;
+ u32 adj_index0 = mm->adj6_index;
+ if (adj_index0 > 0)
+ {
+ ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+ ip_adjacency_t *adj = ip_get_adjacency (lm6, mm->adj6_index);
+ if (adj->n_adj > 1)
+ {
+ u32 hash_c0 = ip4_compute_flow_hash (ip, IP_FLOW_HASH_DEFAULT);
+ adj_index0 += (hash_c0 & (adj->n_adj - 1));
+ }
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ return (true);
+ }
+#endif
+ return (false);
+}
+
+/*
+ * ip4_map_ttl
+ */
+static inline void
+ip4_map_decrement_ttl (ip4_header_t * ip, u8 * error)
+{
+ i32 ttl = ip->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip->ttl > 0);
+
+ u32 checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
+ checksum += checksum >= 0xffff;
+ ip->checksum = checksum;
+ ttl -= 1;
+ ip->ttl = ttl;
+ *error = ttl <= 0 ? IP4_ERROR_TIME_EXPIRED : *error;
+
+ /* Verify checksum. */
+ ASSERT (ip->checksum == ip4_header_checksum (ip));
+}
+
+static u32
+ip4_map_fragment (vlib_buffer_t * b, u16 mtu, bool df, u8 * error)
+{
+ map_main_t *mm = &map_main;
+
+ if (mm->frag_inner)
+ {
+ ip_frag_set_vnet_buffer (b, sizeof (ip6_header_t), mtu,
+ IP4_FRAG_NEXT_IP6_LOOKUP,
+ IP_FRAG_FLAG_IP6_HEADER);
+ return (IP4_MAP_NEXT_IP4_FRAGMENT);
+ }
+ else
+ {
+ if (df && !mm->frag_ignore_df)
+ {
+ icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ mtu);
+ vlib_buffer_advance (b, sizeof (ip6_header_t));
+ *error = MAP_ERROR_DF_SET;
+ return (IP4_MAP_NEXT_ICMP_ERROR);
+ }
+ ip_frag_set_vnet_buffer (b, 0, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
+ IP_FRAG_FLAG_IP6_HEADER);
+ return (IP4_MAP_NEXT_IP6_FRAGMENT);
+ }
+}
+
+/*
+ * ip4_map
+ */
+static uword
+ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 cpu_index = os_get_cpu_number ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Dual loop */
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ map_domain_t *d0, *d1;
+ u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
+ ip4_header_t *ip40, *ip41;
+ u16 port0 = 0, port1 = 0;
+ ip6_header_t *ip6h0, *ip6h1;
+ u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
+ u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 =
+ IP4_MAP_NEXT_IP6_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+ /* IPv4 + 8 = 28. possibly plus -40 */
+ CLIB_PREFETCH (p2->data - 40, 68, STORE);
+ CLIB_PREFETCH (p3->data - 40, 68, STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip40 = vlib_buffer_get_current (p0);
+ ip41 = vlib_buffer_get_current (p1);
+ d0 =
+ ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ &map_domain_index0);
+ d1 =
+ ip4_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX],
+ &map_domain_index1);
+ ASSERT (d0);
+ ASSERT (d1);
+
+ /*
+ * Shared IPv4 address
+ */
+ port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0);
+ port1 = ip4_map_port_and_security_check (d1, ip41, &next1, &error1);
+
+ /* Decrement IPv4 TTL */
+ ip4_map_decrement_ttl (ip40, &error0);
+ ip4_map_decrement_ttl (ip41, &error1);
+ bool df0 =
+ ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+ bool df1 =
+ ip41->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ /* MAP calc */
+ u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
+ u32 da41 = clib_net_to_host_u32 (ip41->dst_address.as_u32);
+ u16 dp40 = clib_net_to_host_u16 (port0);
+ u16 dp41 = clib_net_to_host_u16 (port1);
+ u64 dal60 = map_get_pfx (d0, da40, dp40);
+ u64 dal61 = map_get_pfx (d1, da41, dp41);
+ u64 dar60 = map_get_sfx (d0, da40, dp40);
+ u64 dar61 = map_get_sfx (d1, da41, dp41);
+ if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE
+ && next0 != IP4_MAP_NEXT_REASS)
+ error0 = MAP_ERROR_NO_BINDING;
+ if (dal61 == 0 && dar61 == 0 && error1 == MAP_ERROR_NONE
+ && next1 != IP4_MAP_NEXT_REASS)
+ error1 = MAP_ERROR_NO_BINDING;
+
+ /* construct ipv6 header */
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+ vlib_buffer_advance (p1, -sizeof (ip6_header_t));
+ ip6h0 = vlib_buffer_get_current (p0);
+ ip6h1 = vlib_buffer_get_current (p1);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ ip6h0->ip_version_traffic_class_and_flow_label =
+ ip4_map_vtcfl (ip40, p0);
+ ip6h1->ip_version_traffic_class_and_flow_label =
+ ip4_map_vtcfl (ip41, p1);
+ ip6h0->payload_length = ip40->length;
+ ip6h1->payload_length = ip41->length;
+ ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h0->hop_limit = 0x40;
+ ip6h1->hop_limit = 0x40;
+ ip6h0->src_address = d0->ip6_src;
+ ip6h1->src_address = d1->ip6_src;
+ ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
+ ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
+ ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64 (dal61);
+ ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64 (dar61);
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ {
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_net_to_host_u16 (ip6h0->payload_length) +
+ sizeof (*ip6h0) > d0->mtu)))
+ {
+ next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
+ }
+ else
+ {
+ next0 =
+ ip4_map_ip6_lookup_bypass (p0,
+ ip40) ?
+ IP4_MAP_NEXT_IP6_REWRITE : next0;
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ cpu_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip6h0->payload_length) +
+ 40);
+ }
+ }
+ else
+ {
+ next0 = IP4_MAP_NEXT_DROP;
+ }
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE (error1 == MAP_ERROR_NONE))
+ {
+ if (PREDICT_FALSE
+ (d1->mtu
+ && (clib_net_to_host_u16 (ip6h1->payload_length) +
+ sizeof (*ip6h1) > d1->mtu)))
+ {
+ next1 = ip4_map_fragment (p1, d1->mtu, df1, &error1);
+ }
+ else
+ {
+ next1 =
+ ip4_map_ip6_lookup_bypass (p1,
+ ip41) ?
+ IP4_MAP_NEXT_IP6_REWRITE : next1;
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ cpu_index,
+ map_domain_index1, 1,
+ clib_net_to_host_u16
+ (ip6h1->payload_length) +
+ 40);
+ }
+ }
+ else
+ {
+ next1 = IP4_MAP_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->map_domain_index = map_domain_index1;
+ tr->port = port1;
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ map_domain_t *d0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip4_header_t *ip40;
+ u16 port0 = 0;
+ ip6_header_t *ip6h0;
+ u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
+ u32 map_domain_index0 = ~0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip40 = vlib_buffer_get_current (p0);
+ d0 =
+ ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ &map_domain_index0);
+ ASSERT (d0);
+
+ /*
+ * Shared IPv4 address
+ */
+ port0 = ip4_map_port_and_security_check (d0, ip40, &next0, &error0);
+
+ /* Decrement IPv4 TTL */
+ ip4_map_decrement_ttl (ip40, &error0);
+ bool df0 =
+ ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ /* MAP calc */
+ u32 da40 = clib_net_to_host_u32 (ip40->dst_address.as_u32);
+ u16 dp40 = clib_net_to_host_u16 (port0);
+ u64 dal60 = map_get_pfx (d0, da40, dp40);
+ u64 dar60 = map_get_sfx (d0, da40, dp40);
+ if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE
+ && next0 != IP4_MAP_NEXT_REASS)
+ error0 = MAP_ERROR_NO_BINDING;
+
+ /* construct ipv6 header */
+ vlib_buffer_advance (p0, -(sizeof (ip6_header_t)));
+ ip6h0 = vlib_buffer_get_current (p0);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+
+ ip6h0->ip_version_traffic_class_and_flow_label =
+ ip4_map_vtcfl (ip40, p0);
+ ip6h0->payload_length = ip40->length;
+ ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h0->hop_limit = 0x40;
+ ip6h0->src_address = d0->ip6_src;
+ ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64 (dal60);
+ ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64 (dar60);
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ {
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_net_to_host_u16 (ip6h0->payload_length) +
+ sizeof (*ip6h0) > d0->mtu)))
+ {
+ next0 = ip4_map_fragment (p0, d0->mtu, df0, &error0);
+ }
+ else
+ {
+ next0 =
+ ip4_map_ip6_lookup_bypass (p0,
+ ip40) ?
+ IP4_MAP_NEXT_IP6_REWRITE : next0;
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ cpu_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip6h0->payload_length) +
+ 40);
+ }
+ }
+ else
+ {
+ next0 = IP4_MAP_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/*
+ * ip4_map_reass
+ */
+static uword
+ip4_map_reass (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_reass_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 cpu_index = os_get_cpu_number ();
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_to_loopback = NULL;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ map_domain_t *d0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip4_header_t *ip40;
+ i32 port0 = 0;
+ ip6_header_t *ip60;
+ u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP;
+ u32 map_domain_index0;
+ u8 cached = 0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ ip40 = (ip4_header_t *) (ip60 + 1);
+ d0 =
+ ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ &map_domain_index0);
+
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32,
+ ip40->dst_address.as_u32,
+ ip40->fragment_id,
+ ip40->protocol,
+ &fragments_to_drop);
+ if (PREDICT_FALSE (!r))
+ {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+ else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40)))
+ {
+ if (r->port >= 0)
+ {
+ // We know the port already
+ port0 = r->port;
+ }
+ else if (map_ip4_reass_add_fragment (r, pi0))
+ {
+ // Not enough space for caching
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+ else
+ {
+ cached = 1;
+ }
+ }
+ else
+ if ((port0 =
+ ip4_get_port (ip40, MAP_RECEIVER, p0->current_length)) < 0)
+ {
+ // Could not find port. We'll free the reassembly.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ port0 = 0;
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+ else
+ {
+ r->port = port0;
+ map_ip4_reass_get_fragments (r, &fragments_to_loopback);
+ }
+
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ if (!cached && r)
+ {
+ r->forwarded += clib_host_to_net_u16 (ip40->length) - 20;
+ if (!ip4_get_fragment_more (ip40))
+ r->expected_total =
+ ip4_get_fragment_offset (ip40) * 8 +
+ clib_host_to_net_u16 (ip40->length) - 20;
+ if (r->forwarded >= r->expected_total)
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+#endif
+
+ map_ip4_reass_unlock ();
+
+ // NOTE: Most operations have already been performed by ip4_map
+ // All we need is the right destination address
+ ip60->dst_address.as_u64[0] =
+ map_get_pfx_net (d0, ip40->dst_address.as_u32, port0);
+ ip60->dst_address.as_u64[1] =
+ map_get_sfx_net (d0, ip40->dst_address.as_u32, port0);
+
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_net_to_host_u16 (ip60->payload_length) +
+ sizeof (*ip60) > d0->mtu)))
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60);
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
+ vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
+ vnet_buffer (p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
+ next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_ip4_map_reass_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ tr->cached = cached;
+ }
+
+ if (cached)
+ {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ }
+ else
+ {
+ if (error0 == MAP_ERROR_NONE)
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ cpu_index, map_domain_index0,
+ 1,
+ clib_net_to_host_u16
+ (ip60->payload_length) + 40);
+ next0 =
+ (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+
+ //Loopback when we reach the end of the inpu vector
+ if (n_left_from == 0 && vec_len (fragments_to_loopback))
+ {
+ from = vlib_frame_vector_args (frame);
+ u32 len = vec_len (fragments_to_loopback);
+ if (len <= VLIB_FRAME_SIZE)
+ {
+ clib_memcpy (from, fragments_to_loopback,
+ sizeof (u32) * len);
+ n_left_from = len;
+ vec_reset_length (fragments_to_loopback);
+ }
+ else
+ {
+ clib_memcpy (from,
+ fragments_to_loopback + (len -
+ VLIB_FRAME_SIZE),
+ sizeof (u32) * VLIB_FRAME_SIZE);
+ n_left_from = VLIB_FRAME_SIZE;
+ _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+ }
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ map_send_all_to_node (vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP4_MAP_REASS_NEXT_DROP);
+
+ vec_free (fragments_to_drop);
+ vec_free (fragments_to_loopback);
+ return frame->n_vectors;
+}
+
+static char *map_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_node) = {
+ .function = ip4_map,
+ .name = "ip4-map",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP4_MAP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+#ifdef MAP_SKIP_IP6_LOOKUP
+ [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-rewrite",
+#endif
+ [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag",
+ [IP4_MAP_NEXT_REASS] = "ip4-map-reass",
+ [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP4_MAP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_reass_node) = {
+ .function = ip4_map_reass,
+ .name = "ip4-map-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip4_map_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP4_MAP_REASS_N_NEXT,
+ .next_nodes = {
+ [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP4_MAP_REASS_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c
new file mode 100644
index 00000000000..15974d8a46e
--- /dev/null
+++ b/src/vnet/map/ip4_map_t.c
@@ -0,0 +1,1363 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+
+#define IP4_MAP_T_DUAL_LOOP 1
+
+typedef enum
+{
+ IP4_MAPT_NEXT_MAPT_TCP_UDP,
+ IP4_MAPT_NEXT_MAPT_ICMP,
+ IP4_MAPT_NEXT_MAPT_FRAGMENTED,
+ IP4_MAPT_NEXT_DROP,
+ IP4_MAPT_N_NEXT
+} ip4_mapt_next_t;
+
+typedef enum
+{
+ IP4_MAPT_ICMP_NEXT_IP6_LOOKUP,
+ IP4_MAPT_ICMP_NEXT_IP6_FRAG,
+ IP4_MAPT_ICMP_NEXT_DROP,
+ IP4_MAPT_ICMP_N_NEXT
+} ip4_mapt_icmp_next_t;
+
+typedef enum
+{
+ IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP,
+ IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG,
+ IP4_MAPT_TCP_UDP_NEXT_DROP,
+ IP4_MAPT_TCP_UDP_N_NEXT
+} ip4_mapt_tcp_udp_next_t;
+
+typedef enum
+{
+ IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP,
+ IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG,
+ IP4_MAPT_FRAGMENTED_NEXT_DROP,
+ IP4_MAPT_FRAGMENTED_N_NEXT
+} ip4_mapt_fragmented_next_t;
+
+//This is used to pass information within the buffer data.
+//Buffer structure being too small to contain big structures like this.
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ ip6_address_t daddr;
+ ip6_address_t saddr;
+ //IPv6 header + Fragmentation header will be here
+ //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4)
+ u8 unused[28];
+}) ip4_mapt_pseudo_header_t;
+/* *INDENT-ON* */
+
+#define frag_id_4to6(id) (id)
+
+//TODO: Find the right place in memory for this.
+/* *INDENT-OFF* */
+static u8 icmp_to_icmp6_updater_pointer_table[] =
+ { 0, 1, 4, 4, ~0,
+ ~0, ~0, ~0, 7, 6,
+ ~0, ~0, 8, 8, 8,
+ 8, 24, 24, 24, 24
+ };
+/* *INDENT-ON* */
+
+
+static_always_inline int
+ip4_map_fragment_cache (ip4_header_t * ip4, u16 port)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r =
+ map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32,
+ ip4->fragment_id,
+ (ip4->protocol ==
+ IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol,
+ &ignore);
+ if (r)
+ r->port = port;
+
+ map_ip4_reass_unlock ();
+ return !r;
+}
+
+static_always_inline i32
+ip4_map_fragment_get_port (ip4_header_t * ip4)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r =
+ map_ip4_reass_get (ip4->src_address.as_u32, ip4->dst_address.as_u32,
+ ip4->fragment_id,
+ (ip4->protocol ==
+ IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol,
+ &ignore);
+ i32 ret = r ? r->port : -1;
+ map_ip4_reass_unlock ();
+ return ret;
+}
+
+
+/* Statelessly translates an ICMP packet into ICMPv6.
+ *
+ * Warning: The checksum will need to be recomputed.
+ *
+ */
+static_always_inline int
+ip4_icmp_to_icmp6_in_place (icmp46_header_t * icmp, u32 icmp_len,
+ i32 * receiver_port, ip4_header_t ** inner_ip4)
+{
+ *inner_ip4 = NULL;
+ switch (icmp->type)
+ {
+ case ICMP4_echo_reply:
+ *receiver_port = ((u16 *) icmp)[2];
+ icmp->type = ICMP6_echo_reply;
+ break;
+ case ICMP4_echo_request:
+ *receiver_port = ((u16 *) icmp)[2];
+ icmp->type = ICMP6_echo_request;
+ break;
+ case ICMP4_destination_unreachable:
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+ *receiver_port = ip4_get_port (*inner_ip4, MAP_SENDER, icmp_len - 8);
+
+ switch (icmp->code)
+ {
+ case ICMP4_destination_unreachable_destination_unreachable_net: //0
+ case ICMP4_destination_unreachable_destination_unreachable_host: //1
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_no_route_to_destination;
+ break;
+ case ICMP4_destination_unreachable_protocol_unreachable: //2
+ icmp->type = ICMP6_parameter_problem;
+ icmp->code = ICMP6_parameter_problem_unrecognized_next_header;
+ break;
+ case ICMP4_destination_unreachable_port_unreachable: //3
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_port_unreachable;
+ break;
+ case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4
+ icmp->type =
+ ICMP6_packet_too_big;
+ icmp->code = 0;
+ {
+ u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ if (advertised_mtu)
+ advertised_mtu += 20;
+ else
+ advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value)
+
+ //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20)
+ *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (advertised_mtu);
+ }
+ break;
+
+ case ICMP4_destination_unreachable_source_route_failed: //5
+ case ICMP4_destination_unreachable_destination_network_unknown: //6
+ case ICMP4_destination_unreachable_destination_host_unknown: //7
+ case ICMP4_destination_unreachable_source_host_isolated: //8
+ case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11
+ case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12
+ icmp->type =
+ ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_no_route_to_destination;
+ break;
+ case ICMP4_destination_unreachable_network_administratively_prohibited: //9
+ case ICMP4_destination_unreachable_host_administratively_prohibited: //10
+ case ICMP4_destination_unreachable_communication_administratively_prohibited: //13
+ case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code =
+ ICMP6_destination_unreachable_destination_administratively_prohibited;
+ break;
+ case ICMP4_destination_unreachable_host_precedence_violation: //14
+ default:
+ return -1;
+ }
+ break;
+
+ case ICMP4_time_exceeded: //11
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+ *receiver_port = ip4_get_port (*inner_ip4, MAP_SENDER, icmp_len - 8);
+ icmp->type = ICMP6_time_exceeded;
+ //icmp->code = icmp->code //unchanged
+ break;
+
+ case ICMP4_parameter_problem:
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+ *receiver_port = ip4_get_port (*inner_ip4, MAP_SENDER, icmp_len - 8);
+
+ switch (icmp->code)
+ {
+ case ICMP4_parameter_problem_pointer_indicates_error:
+ case ICMP4_parameter_problem_bad_length:
+ icmp->type = ICMP6_parameter_problem;
+ icmp->code = ICMP6_parameter_problem_erroneous_header_field;
+ {
+ u8 ptr =
+ icmp_to_icmp6_updater_pointer_table[*((u8 *) (icmp + 1))];
+ if (ptr == 0xff)
+ return -1;
+
+ *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (ptr);
+ }
+ break;
+ default:
+ //All other codes cause dropping the packet
+ return -1;
+ }
+ break;
+
+ default:
+ //All other types cause dropping the packet
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+static_always_inline void
+_ip4_map_t_icmp (map_domain_t * d, vlib_buffer_t * p, u8 * error)
+{
+ ip4_header_t *ip4, *inner_ip4;
+ ip6_header_t *ip6, *inner_ip6;
+ u32 ip_len;
+ icmp46_header_t *icmp;
+ i32 recv_port;
+ ip_csum_t csum;
+ u16 *inner_L4_checksum = 0;
+ ip6_frag_hdr_t *inner_frag;
+ u32 inner_frag_id;
+ u32 inner_frag_offset;
+ u8 inner_frag_more;
+
+ ip4 = vlib_buffer_get_current (p);
+ ip_len = clib_net_to_host_u16 (ip4->length);
+ ASSERT (ip_len <= p->current_length);
+
+ icmp = (icmp46_header_t *) (ip4 + 1);
+ if (ip4_icmp_to_icmp6_in_place (icmp, ip_len - sizeof (*ip4),
+ &recv_port, &inner_ip4))
+ {
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+
+ if (recv_port < 0)
+ {
+ // In case of 1:1 mapping, we don't care about the port
+ if (d->ea_bits_len == 0 && d->rules)
+ {
+ recv_port = 0;
+ }
+ else
+ {
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+ }
+
+ if (inner_ip4)
+ {
+ //We have 2 headers to translate.
+ //We need to make some room in the middle of the packet
+
+ if (PREDICT_FALSE (ip4_is_fragment (inner_ip4)))
+ {
+ //Here it starts getting really tricky
+ //We will add a fragmentation header in the inner packet
+
+ if (!ip4_is_first_fragment (inner_ip4))
+ {
+ //For now we do not handle unless it is the first fragment
+ //Ideally we should handle the case as we are in slow path already
+ *error = MAP_ERROR_FRAGMENTED;
+ return;
+ }
+
+ vlib_buffer_advance (p,
+ -2 * (sizeof (*ip6) - sizeof (*ip4)) -
+ sizeof (*inner_frag));
+ ip6 = vlib_buffer_get_current (p);
+ clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4,
+ 20 + 8);
+ ip4 =
+ (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ icmp = (icmp46_header_t *) (ip4 + 1);
+
+ inner_ip6 =
+ (ip6_header_t *) u8_ptr_add (inner_ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*inner_frag));
+ inner_frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, sizeof (*inner_ip6));
+ ip6->payload_length =
+ u16_net_add (ip4->length,
+ sizeof (*ip6) - 2 * sizeof (*ip4) +
+ sizeof (*inner_frag));
+ inner_frag_id = frag_id_4to6 (inner_ip4->fragment_id);
+ inner_frag_offset = ip4_get_fragment_offset (inner_ip4);
+ inner_frag_more =
+ ! !(inner_ip4->flags_and_fragment_offset &
+ clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS));
+ }
+ else
+ {
+ vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4)));
+ ip6 = vlib_buffer_get_current (p);
+ clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4,
+ 20 + 8);
+ ip4 =
+ (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4));
+ inner_ip6 =
+ (ip6_header_t *) u8_ptr_add (inner_ip4,
+ sizeof (*ip4) - sizeof (*ip6));
+ ip6->payload_length =
+ u16_net_add (ip4->length, sizeof (*ip6) - 2 * sizeof (*ip4));
+ inner_frag = NULL;
+ }
+
+ if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_TCP))
+ {
+ inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum;
+ *inner_L4_checksum =
+ ip_csum_fold (ip_csum_sub_even
+ (*inner_L4_checksum,
+ *((u64 *) (&inner_ip4->src_address))));
+ }
+ else if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_UDP))
+ {
+ inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum;
+ if (!*inner_L4_checksum)
+ {
+ //The inner packet was first translated, and therefore came from IPv6.
+ //As the packet was an IPv6 packet, the UDP checksum can't be NULL
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+ *inner_L4_checksum =
+ ip_csum_fold (ip_csum_sub_even
+ (*inner_L4_checksum,
+ *((u64 *) (&inner_ip4->src_address))));
+ }
+ else if (inner_ip4->protocol == IP_PROTOCOL_ICMP)
+ {
+ //We have an ICMP inside an ICMP
+ //It needs to be translated, but not for error ICMP messages
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1);
+ csum = inner_icmp->checksum;
+ //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by ip4_icmp_to_icmp6_in_place
+ csum = ip_csum_sub_even (csum, *((u16 *) inner_icmp));
+ inner_icmp->type = (inner_icmp->type == ICMP4_echo_request) ?
+ ICMP6_echo_request : ICMP6_echo_reply;
+ csum = ip_csum_add_even (csum, *((u16 *) inner_icmp));
+ csum =
+ ip_csum_add_even (csum, clib_host_to_net_u16 (IP_PROTOCOL_ICMP6));
+ csum =
+ ip_csum_add_even (csum, inner_ip4->length - sizeof (*inner_ip4));
+ inner_icmp->checksum = ip_csum_fold (csum);
+ inner_L4_checksum = &inner_icmp->checksum;
+ inner_ip4->protocol = IP_PROTOCOL_ICMP6;
+ }
+ else
+ {
+ /* To shut up Coverity */
+ os_panic ();
+ }
+
+ //FIXME: Security check with the port found in the inner packet
+
+ csum = *inner_L4_checksum; //Initial checksum of the inner L4 header
+ //FIXME: Shouldn't we remove ip addresses from there ?
+
+ inner_ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (inner_ip4->tos << 20));
+ inner_ip6->payload_length =
+ u16_net_add (inner_ip4->length, -sizeof (*inner_ip4));
+ inner_ip6->hop_limit = inner_ip4->ttl;
+ inner_ip6->protocol = inner_ip4->protocol;
+
+ //Note that the source address is within the domain
+ //while the destination address is the one outside the domain
+ ip4_map_t_embedded_address (d, &inner_ip6->dst_address,
+ &inner_ip4->dst_address);
+ inner_ip6->src_address.as_u64[0] =
+ map_get_pfx_net (d, inner_ip4->src_address.as_u32, recv_port);
+ inner_ip6->src_address.as_u64[1] =
+ map_get_sfx_net (d, inner_ip4->src_address.as_u32, recv_port);
+
+ if (PREDICT_FALSE (inner_frag != NULL))
+ {
+ inner_frag->next_hdr = inner_ip6->protocol;
+ inner_frag->identification = inner_frag_id;
+ inner_frag->rsv = 0;
+ inner_frag->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (inner_frag_offset, inner_frag_more);
+ inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ inner_ip6->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16
+ (inner_ip6->payload_length) +
+ sizeof (*inner_frag));
+ }
+
+ csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
+ *inner_L4_checksum = ip_csum_fold (csum);
+
+ }
+ else
+ {
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+ ip6 = vlib_buffer_get_current (p);
+ ip6->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (*ip4));
+ }
+
+ //Translate outer IPv6
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = IP_PROTOCOL_ICMP6;
+
+ ip4_map_t_embedded_address (d, &ip6->src_address, &ip4->src_address);
+ ip6->dst_address.as_u64[0] =
+ map_get_pfx_net (d, ip4->dst_address.as_u32, recv_port);
+ ip6->dst_address.as_u64[1] =
+ map_get_sfx_net (d, ip4->dst_address.as_u32, recv_port);
+
+ //Truncate when the packet exceeds the minimal IPv6 MTU
+ if (p->current_length > 1280)
+ {
+ ip6->payload_length = clib_host_to_net_u16 (1280 - sizeof (*ip6));
+ p->current_length = 1280; //Looks too simple to be correct...
+ }
+
+ //TODO: We could do an easy diff-checksum for echo requests/replies
+ //Recompute ICMP checksum
+ icmp->checksum = 0;
+ csum = ip_csum_with_carry (0, ip6->payload_length);
+ csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
+ csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
+ csum =
+ ip_incremental_checksum (csum, icmp,
+ clib_net_to_host_u16 (ip6->payload_length));
+ icmp->checksum = ~ip_csum_fold (csum);
+}
+
+static uword
+ip4_map_t_icmp (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_t_icmp_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 cpu_index = os_get_cpu_number ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_mapt_icmp_next_t next0;
+ u8 error0;
+ map_domain_t *d0;
+ u16 len0;
+
+ next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP;
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ vlib_buffer_advance (p0, sizeof (ip4_mapt_pseudo_header_t)); //The pseudo-header is not used
+ len0 =
+ clib_net_to_host_u16 (((ip4_header_t *)
+ vlib_buffer_get_current (p0))->length);
+ d0 =
+ pool_elt_at_index (map_main.domains,
+ vnet_buffer (p0)->map_t.map_domain_index);
+ _ip4_map_t_icmp (d0, p0, &error0);
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
+ }
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ cpu_index,
+ vnet_buffer (p0)->map_t.
+ map_domain_index, 1, len0);
+ }
+ else
+ {
+ next0 = IP4_MAPT_ICMP_NEXT_DROP;
+ }
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip4_map_t_fragmented (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ ip4_mapt_pseudo_header_t *pheader0;
+ ip4_mapt_fragmented_next_t next0;
+
+ next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP;
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current (p0);
+ vlib_buffer_advance (p0, sizeof (*pheader0));
+
+ //Accessing ip4 header
+ ip40 = vlib_buffer_get_current (p0);
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip40,
+ sizeof (*ip40) - sizeof (*frag0));
+ ip60 =
+ (ip6_header_t *) u8_ptr_add (ip40,
+ sizeof (*ip40) - sizeof (*frag0) -
+ sizeof (*ip60));
+ vlib_buffer_advance (p0,
+ sizeof (*ip40) - sizeof (*ip60) -
+ sizeof (*frag0));
+
+ //We know that the protocol was one of ICMP, TCP or UDP
+ //because the first fragment was found and cached
+ frag0->next_hdr =
+ (ip40->protocol ==
+ IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip40->protocol;
+ frag0->identification = frag_id_4to6 (ip40->fragment_id);
+ frag0->rsv = 0;
+ frag0->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip40),
+ clib_net_to_host_u16
+ (ip40->flags_and_fragment_offset) &
+ IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ ip60->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip40->tos << 20));
+ ip60->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16 (ip40->length) -
+ sizeof (*ip40) + sizeof (*frag0));
+ ip60->hop_limit = ip40->ttl;
+ ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0];
+ ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1];
+ ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0];
+ ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1];
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip4_map_t_tcp_udp (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP4_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip40, *ip41;
+ ip6_header_t *ip60, *ip61;
+ ip_csum_t csum0, csum1;
+ u16 *checksum0, *checksum1;
+ ip6_frag_hdr_t *frag0, *frag1;
+ u32 frag_id0, frag_id1;
+ ip4_mapt_pseudo_header_t *pheader0, *pheader1;
+ ip4_mapt_tcp_udp_next_t next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current (p0);
+ pheader1 = vlib_buffer_get_current (p1);
+ vlib_buffer_advance (p0, sizeof (*pheader0));
+ vlib_buffer_advance (p1, sizeof (*pheader1));
+
+ //Accessing ip4 header
+ ip40 = vlib_buffer_get_current (p0);
+ ip41 = vlib_buffer_get_current (p1);
+ checksum0 =
+ (u16 *) u8_ptr_add (ip40,
+ vnet_buffer (p0)->map_t.checksum_offset);
+ checksum1 =
+ (u16 *) u8_ptr_add (ip41,
+ vnet_buffer (p1)->map_t.checksum_offset);
+
+ //UDP checksum is optional over IPv4 but mandatory for IPv6
+ //We do not check udp->length sanity but use our safe computed value instead
+ if (PREDICT_FALSE
+ (!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP))
+ {
+ u16 udp_len =
+ clib_host_to_net_u16 (ip40->length) - sizeof (*ip40);
+ udp_header_t *udp =
+ (udp_header_t *) u8_ptr_add (ip40, sizeof (*ip40));
+ ip_csum_t csum;
+ csum = ip_incremental_checksum (0, udp, udp_len);
+ csum =
+ ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+ csum =
+ ip_csum_with_carry (csum,
+ clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+ csum =
+ ip_csum_with_carry (csum, *((u64 *) (&ip40->src_address)));
+ *checksum0 = ~ip_csum_fold (csum);
+ }
+ if (PREDICT_FALSE
+ (!*checksum1 && ip41->protocol == IP_PROTOCOL_UDP))
+ {
+ u16 udp_len =
+ clib_host_to_net_u16 (ip41->length) - sizeof (*ip40);
+ udp_header_t *udp =
+ (udp_header_t *) u8_ptr_add (ip41, sizeof (*ip40));
+ ip_csum_t csum;
+ csum = ip_incremental_checksum (0, udp, udp_len);
+ csum =
+ ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+ csum =
+ ip_csum_with_carry (csum,
+ clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+ csum =
+ ip_csum_with_carry (csum, *((u64 *) (&ip41->src_address)));
+ *checksum1 = ~ip_csum_fold (csum);
+ }
+
+ csum0 = ip_csum_sub_even (*checksum0, ip40->src_address.as_u32);
+ csum1 = ip_csum_sub_even (*checksum1, ip41->src_address.as_u32);
+ csum0 = ip_csum_sub_even (csum0, ip40->dst_address.as_u32);
+ csum1 = ip_csum_sub_even (csum1, ip41->dst_address.as_u32);
+
+ // Deal with fragmented packets
+ if (PREDICT_FALSE (ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16
+ (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
+ {
+ ip60 =
+ (ip6_header_t *) u8_ptr_add (ip40,
+ sizeof (*ip40) - sizeof (*ip60) -
+ sizeof (*frag0));
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip40,
+ sizeof (*ip40) -
+ sizeof (*frag0));
+ frag_id0 = frag_id_4to6 (ip40->fragment_id);
+ vlib_buffer_advance (p0,
+ sizeof (*ip40) - sizeof (*ip60) -
+ sizeof (*frag0));
+ }
+ else
+ {
+ ip60 =
+ (ip6_header_t *) (((u8 *) ip40) + sizeof (*ip40) -
+ sizeof (*ip60));
+ vlib_buffer_advance (p0, sizeof (*ip40) - sizeof (*ip60));
+ frag0 = NULL;
+ }
+
+ if (PREDICT_FALSE (ip41->flags_and_fragment_offset &
+ clib_host_to_net_u16
+ (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
+ {
+ ip61 =
+ (ip6_header_t *) u8_ptr_add (ip41,
+ sizeof (*ip40) - sizeof (*ip60) -
+ sizeof (*frag0));
+ frag1 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip41,
+ sizeof (*ip40) -
+ sizeof (*frag0));
+ frag_id1 = frag_id_4to6 (ip41->fragment_id);
+ vlib_buffer_advance (p1,
+ sizeof (*ip40) - sizeof (*ip60) -
+ sizeof (*frag0));
+ }
+ else
+ {
+ ip61 =
+ (ip6_header_t *) (((u8 *) ip41) + sizeof (*ip40) -
+ sizeof (*ip60));
+ vlib_buffer_advance (p1, sizeof (*ip40) - sizeof (*ip60));
+ frag1 = NULL;
+ }
+
+ ip60->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip40->tos << 20));
+ ip61->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip41->tos << 20));
+ ip60->payload_length = u16_net_add (ip40->length, -sizeof (*ip40));
+ ip61->payload_length = u16_net_add (ip41->length, -sizeof (*ip40));
+ ip60->hop_limit = ip40->ttl;
+ ip61->hop_limit = ip41->ttl;
+ ip60->protocol = ip40->protocol;
+ ip61->protocol = ip41->protocol;
+
+ if (PREDICT_FALSE (frag0 != NULL))
+ {
+ frag0->next_hdr = ip60->protocol;
+ frag0->identification = frag_id0;
+ frag0->rsv = 0;
+ frag0->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (0, 1);
+ ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip60->payload_length =
+ u16_net_add (ip60->payload_length, sizeof (*frag0));
+ }
+
+ if (PREDICT_FALSE (frag1 != NULL))
+ {
+ frag1->next_hdr = ip61->protocol;
+ frag1->identification = frag_id1;
+ frag1->rsv = 0;
+ frag1->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (0, 1);
+ ip61->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip61->payload_length =
+ u16_net_add (ip61->payload_length, sizeof (*frag0));
+ }
+
+ //Finally copying the address
+ ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0];
+ ip61->dst_address.as_u64[0] = pheader1->daddr.as_u64[0];
+ ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1];
+ ip61->dst_address.as_u64[1] = pheader1->daddr.as_u64[1];
+ ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0];
+ ip61->src_address.as_u64[0] = pheader1->saddr.as_u64[0];
+ ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1];
+ ip61->src_address.as_u64[1] = pheader1->saddr.as_u64[1];
+
+ csum0 = ip_csum_add_even (csum0, ip60->src_address.as_u64[0]);
+ csum1 = ip_csum_add_even (csum1, ip61->src_address.as_u64[0]);
+ csum0 = ip_csum_add_even (csum0, ip60->src_address.as_u64[1]);
+ csum1 = ip_csum_add_even (csum1, ip61->src_address.as_u64[1]);
+ csum0 = ip_csum_add_even (csum0, ip60->dst_address.as_u64[0]);
+ csum1 = ip_csum_add_even (csum1, ip61->dst_address.as_u64[0]);
+ csum0 = ip_csum_add_even (csum0, ip60->dst_address.as_u64[1]);
+ csum1 = ip_csum_add_even (csum1, ip61->dst_address.as_u64[1]);
+ *checksum0 = ip_csum_fold (csum0);
+ *checksum1 = ip_csum_fold (csum1);
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+
+ if (vnet_buffer (p1)->map_t.mtu < p1->current_length)
+ {
+ vnet_buffer (p1)->ip_frag.header_offset = 0;
+ vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
+ vnet_buffer (p1)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next, pi0, pi1,
+ next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ ip_csum_t csum0;
+ u16 *checksum0;
+ ip6_frag_hdr_t *frag0;
+ u32 frag_id0;
+ ip4_mapt_pseudo_header_t *pheader0;
+ ip4_mapt_tcp_udp_next_t next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ p0 = vlib_get_buffer (vm, pi0);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current (p0);
+ vlib_buffer_advance (p0, sizeof (*pheader0));
+
+ //Accessing ip4 header
+ ip40 = vlib_buffer_get_current (p0);
+ checksum0 =
+ (u16 *) u8_ptr_add (ip40,
+ vnet_buffer (p0)->map_t.checksum_offset);
+
+ //UDP checksum is optional over IPv4 but mandatory for IPv6
+ //We do not check udp->length sanity but use our safe computed value instead
+ if (PREDICT_FALSE
+ (!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP))
+ {
+ u16 udp_len =
+ clib_host_to_net_u16 (ip40->length) - sizeof (*ip40);
+ udp_header_t *udp =
+ (udp_header_t *) u8_ptr_add (ip40, sizeof (*ip40));
+ ip_csum_t csum;
+ csum = ip_incremental_checksum (0, udp, udp_len);
+ csum =
+ ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+ csum =
+ ip_csum_with_carry (csum,
+ clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+ csum =
+ ip_csum_with_carry (csum, *((u64 *) (&ip40->src_address)));
+ *checksum0 = ~ip_csum_fold (csum);
+ }
+
+ csum0 = ip_csum_sub_even (*checksum0, ip40->src_address.as_u32);
+ csum0 = ip_csum_sub_even (csum0, ip40->dst_address.as_u32);
+
+ // Deal with fragmented packets
+ if (PREDICT_FALSE (ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16
+ (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
+ {
+ ip60 =
+ (ip6_header_t *) u8_ptr_add (ip40,
+ sizeof (*ip40) - sizeof (*ip60) -
+ sizeof (*frag0));
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip40,
+ sizeof (*ip40) -
+ sizeof (*frag0));
+ frag_id0 = frag_id_4to6 (ip40->fragment_id);
+ vlib_buffer_advance (p0,
+ sizeof (*ip40) - sizeof (*ip60) -
+ sizeof (*frag0));
+ }
+ else
+ {
+ ip60 =
+ (ip6_header_t *) (((u8 *) ip40) + sizeof (*ip40) -
+ sizeof (*ip60));
+ vlib_buffer_advance (p0, sizeof (*ip40) - sizeof (*ip60));
+ frag0 = NULL;
+ }
+
+ ip60->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip40->tos << 20));
+ ip60->payload_length = u16_net_add (ip40->length, -sizeof (*ip40));
+ ip60->hop_limit = ip40->ttl;
+ ip60->protocol = ip40->protocol;
+
+ if (PREDICT_FALSE (frag0 != NULL))
+ {
+ frag0->next_hdr = ip60->protocol;
+ frag0->identification = frag_id0;
+ frag0->rsv = 0;
+ frag0->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (0, 1);
+ ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip60->payload_length =
+ u16_net_add (ip60->payload_length, sizeof (*frag0));
+ }
+
+ //Finally copying the address
+ ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0];
+ ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1];
+ ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0];
+ ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1];
+
+ csum0 = ip_csum_add_even (csum0, ip60->src_address.as_u64[0]);
+ csum0 = ip_csum_add_even (csum0, ip60->src_address.as_u64[1]);
+ csum0 = ip_csum_add_even (csum0, ip60->dst_address.as_u64[0]);
+ csum0 = ip_csum_add_even (csum0, ip60->dst_address.as_u64[1]);
+ *checksum0 = ip_csum_fold (csum0);
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static_always_inline void
+ip4_map_t_classify (vlib_buffer_t * p0, map_domain_t * d0,
+ ip4_header_t * ip40, u16 ip4_len0, i32 * dst_port0,
+ u8 * error0, ip4_mapt_next_t * next0)
+{
+ if (PREDICT_FALSE (ip4_get_fragment_offset (ip40)))
+ {
+ *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED;
+ if (d0->ea_bits_len == 0 && d0->rules)
+ {
+ *dst_port0 = 0;
+ }
+ else
+ {
+ *dst_port0 = ip4_map_fragment_get_port (ip40);
+ *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0;
+ }
+ }
+ else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP))
+ {
+ vnet_buffer (p0)->map_t.checksum_offset = 36;
+ *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
+ *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0;
+ *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2));
+ }
+ else if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_UDP))
+ {
+ vnet_buffer (p0)->map_t.checksum_offset = 26;
+ *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
+ *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0;
+ *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 2));
+ }
+ else if (ip40->protocol == IP_PROTOCOL_ICMP)
+ {
+ *next0 = IP4_MAPT_NEXT_MAPT_ICMP;
+ if (d0->ea_bits_len == 0 && d0->rules)
+ *dst_port0 = 0;
+ else if (((icmp46_header_t *) u8_ptr_add (ip40, sizeof (*ip40)))->code
+ == ICMP4_echo_reply
+ || ((icmp46_header_t *)
+ u8_ptr_add (ip40,
+ sizeof (*ip40)))->code == ICMP4_echo_request)
+ *dst_port0 = (i32) * ((u16 *) u8_ptr_add (ip40, sizeof (*ip40) + 6));
+ }
+ else
+ {
+ *error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+}
+
+static uword
+ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_map_t_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 cpu_index = os_get_cpu_number ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP4_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip40, *ip41;
+ map_domain_t *d0, *d1;
+ ip4_mapt_next_t next0 = 0, next1 = 0;
+ u16 ip4_len0, ip4_len1;
+ u8 error0, error1;
+ i32 dst_port0, dst_port1;
+ ip4_mapt_pseudo_header_t *pheader0, *pheader1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ error0 = MAP_ERROR_NONE;
+ error1 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip40 = vlib_buffer_get_current (p0);
+ ip41 = vlib_buffer_get_current (p1);
+ ip4_len0 = clib_host_to_net_u16 (ip40->length);
+ ip4_len1 = clib_host_to_net_u16 (ip41->length);
+
+ if (PREDICT_FALSE (p0->current_length < ip4_len0 ||
+ ip40->ip_version_and_header_length != 0x45))
+ {
+ error0 = MAP_ERROR_UNKNOWN;
+ next0 = IP4_MAPT_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE (p1->current_length < ip4_len1 ||
+ ip41->ip_version_and_header_length != 0x45))
+ {
+ error1 = MAP_ERROR_UNKNOWN;
+ next1 = IP4_MAPT_NEXT_DROP;
+ }
+
+ d0 = ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ &vnet_buffer (p0)->map_t.map_domain_index);
+ d1 = ip4_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX],
+ &vnet_buffer (p1)->map_t.map_domain_index);
+
+ vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+ vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0;
+
+ dst_port0 = -1;
+ dst_port1 = -1;
+
+ ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0,
+ &next0);
+ ip4_map_t_classify (p1, d1, ip41, ip4_len1, &dst_port1, &error1,
+ &next1);
+
+ //Add MAP-T pseudo header in front of the packet
+ vlib_buffer_advance (p0, -sizeof (*pheader0));
+ vlib_buffer_advance (p1, -sizeof (*pheader1));
+ pheader0 = vlib_buffer_get_current (p0);
+ pheader1 = vlib_buffer_get_current (p1);
+
+ //Save addresses within the packet
+ ip4_map_t_embedded_address (d0, &pheader0->saddr,
+ &ip40->src_address);
+ ip4_map_t_embedded_address (d1, &pheader1->saddr,
+ &ip41->src_address);
+ pheader0->daddr.as_u64[0] =
+ map_get_pfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
+ pheader0->daddr.as_u64[1] =
+ map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
+ pheader1->daddr.as_u64[0] =
+ map_get_pfx_net (d1, ip41->dst_address.as_u32, (u16) dst_port1);
+ pheader1->daddr.as_u64[1] =
+ map_get_sfx_net (d1, ip41->dst_address.as_u32, (u16) dst_port1);
+
+ if (PREDICT_FALSE
+ (ip4_is_first_fragment (ip40) && (dst_port0 != -1)
+ && (d0->ea_bits_len != 0 || !d0->rules)
+ && ip4_map_fragment_cache (ip40, dst_port0)))
+ {
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+
+ if (PREDICT_FALSE
+ (ip4_is_first_fragment (ip41) && (dst_port1 != -1)
+ && (d1->ea_bits_len != 0 || !d1->rules)
+ && ip4_map_fragment_cache (ip41, dst_port1)))
+ {
+ error1 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+
+ if (PREDICT_TRUE
+ (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ cpu_index,
+ vnet_buffer (p0)->map_t.
+ map_domain_index, 1,
+ clib_net_to_host_u16 (ip40->
+ length));
+ }
+
+ if (PREDICT_TRUE
+ (error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ cpu_index,
+ vnet_buffer (p1)->map_t.
+ map_domain_index, 1,
+ clib_net_to_host_u16 (ip41->
+ length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
+ next1 = (error1 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next1;
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip40;
+ map_domain_t *d0;
+ ip4_mapt_next_t next0;
+ u16 ip4_len0;
+ u8 error0;
+ i32 dst_port0;
+ ip4_mapt_pseudo_header_t *pheader0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip40 = vlib_buffer_get_current (p0);
+ ip4_len0 = clib_host_to_net_u16 (ip40->length);
+ if (PREDICT_FALSE (p0->current_length < ip4_len0 ||
+ ip40->ip_version_and_header_length != 0x45))
+ {
+ error0 = MAP_ERROR_UNKNOWN;
+ next0 = IP4_MAPT_NEXT_DROP;
+ }
+
+ d0 = ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ &vnet_buffer (p0)->map_t.map_domain_index);
+
+ vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+
+ dst_port0 = -1;
+ ip4_map_t_classify (p0, d0, ip40, ip4_len0, &dst_port0, &error0,
+ &next0);
+
+ //Add MAP-T pseudo header in front of the packet
+ vlib_buffer_advance (p0, -sizeof (*pheader0));
+ pheader0 = vlib_buffer_get_current (p0);
+
+ //Save addresses within the packet
+ ip4_map_t_embedded_address (d0, &pheader0->saddr,
+ &ip40->src_address);
+ pheader0->daddr.as_u64[0] =
+ map_get_pfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
+ pheader0->daddr.as_u64[1] =
+ map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0);
+
+ //It is important to cache at this stage because the result might be necessary
+ //for packets within the same vector.
+ //Actually, this approach even provides some limited out-of-order fragments support
+ if (PREDICT_FALSE
+ (ip4_is_first_fragment (ip40) && (dst_port0 != -1)
+ && (d0->ea_bits_len != 0 || !d0->rules)
+ && ip4_map_fragment_cache (ip40, dst_port0)))
+ {
+ error0 = MAP_ERROR_UNKNOWN;
+ }
+
+ if (PREDICT_TRUE
+ (error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX,
+ cpu_index,
+ vnet_buffer (p0)->map_t.
+ map_domain_index, 1,
+ clib_net_to_host_u16 (ip40->
+ length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static char *map_t_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = {
+ .function = ip4_map_t_fragmented,
+ .name = "ip4-map-t-fragmented",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
+ .function = ip4_map_t_icmp,
+ .name = "ip4-map-t-icmp",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_ICMP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
+ .function = ip4_map_t_tcp_udp,
+ .name = "ip4-map-t-tcp-udp",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip4_map_t_node) = {
+ .function = ip4_map_t,
+ .name = "ip4-map-t",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp",
+ [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp",
+ [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented",
+ [IP4_MAPT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c
new file mode 100644
index 00000000000..d2945059df7
--- /dev/null
+++ b/src/vnet/map/ip6_map.c
@@ -0,0 +1,1269 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+
+enum ip6_map_next_e
+{
+ IP6_MAP_NEXT_IP4_LOOKUP,
+#ifdef MAP_SKIP_IP6_LOOKUP
+ IP6_MAP_NEXT_IP4_REWRITE,
+#endif
+ IP6_MAP_NEXT_IP6_REASS,
+ IP6_MAP_NEXT_IP4_REASS,
+ IP6_MAP_NEXT_IP4_FRAGMENT,
+ IP6_MAP_NEXT_IP6_ICMP_RELAY,
+ IP6_MAP_NEXT_IP6_LOCAL,
+ IP6_MAP_NEXT_DROP,
+ IP6_MAP_NEXT_ICMP,
+ IP6_MAP_N_NEXT,
+};
+
+enum ip6_map_ip6_reass_next_e
+{
+ IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
+ IP6_MAP_IP6_REASS_NEXT_DROP,
+ IP6_MAP_IP6_REASS_N_NEXT,
+};
+
+enum ip6_map_ip4_reass_next_e
+{
+ IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP,
+ IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT,
+ IP6_MAP_IP4_REASS_NEXT_DROP,
+ IP6_MAP_IP4_REASS_N_NEXT,
+};
+
+enum ip6_icmp_relay_next_e
+{
+ IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
+ IP6_ICMP_RELAY_NEXT_DROP,
+ IP6_ICMP_RELAY_N_NEXT,
+};
+
+vlib_node_registration_t ip6_map_ip4_reass_node;
+vlib_node_registration_t ip6_map_ip6_reass_node;
+static vlib_node_registration_t ip6_map_icmp_relay_node;
+
+typedef struct
+{
+ u32 map_domain_index;
+ u16 port;
+ u8 cached;
+} map_ip6_map_ip4_reass_trace_t;
+
+u8 *
+format_ip6_map_ip4_reass_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ map_ip6_map_ip4_reass_trace_t *t =
+ va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
+ return format (s, "MAP domain index: %d L4 port: %u Status: %s",
+ t->map_domain_index, t->port,
+ t->cached ? "cached" : "forwarded");
+}
+
+typedef struct
+{
+ u16 offset;
+ u16 frag_len;
+ u8 out;
+} map_ip6_map_ip6_reass_trace_t;
+
+u8 *
+format_ip6_map_ip6_reass_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ map_ip6_map_ip6_reass_trace_t *t =
+ va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
+ return format (s, "Offset: %d Fragment length: %d Status: %s", t->offset,
+ t->frag_len, t->out ? "out" : "in");
+}
+
+/*
+ * ip6_map_sec_check
+ */
+static_always_inline bool
+ip6_map_sec_check (map_domain_t * d, u16 port, ip4_header_t * ip4,
+ ip6_header_t * ip6)
+{
+ u16 sp4 = clib_net_to_host_u16 (port);
+ u32 sa4 = clib_net_to_host_u32 (ip4->src_address.as_u32);
+ u64 sal6 = map_get_pfx (d, sa4, sp4);
+ u64 sar6 = map_get_sfx (d, sa4, sp4);
+
+ if (PREDICT_FALSE
+ (sal6 != clib_net_to_host_u64 (ip6->src_address.as_u64[0])
+ || sar6 != clib_net_to_host_u64 (ip6->src_address.as_u64[1])))
+ return (false);
+ return (true);
+}
+
+static_always_inline void
+ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4,
+ ip6_header_t * ip6, u32 * next, u8 * error)
+{
+ map_main_t *mm = &map_main;
+ if (d->ea_bits_len || d->rules)
+ {
+ if (d->psid_length > 0)
+ {
+ if (!ip4_is_fragment (ip4))
+ {
+ u16 port = ip4_map_get_port (ip4, MAP_SENDER);
+ if (port)
+ {
+ if (mm->sec_check)
+ *error =
+ ip6_map_sec_check (d, port, ip4,
+ ip6) ? MAP_ERROR_NONE :
+ MAP_ERROR_DECAP_SEC_CHECK;
+ }
+ else
+ {
+ *error = MAP_ERROR_BAD_PROTOCOL;
+ }
+ }
+ else
+ {
+ *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next;
+ }
+ }
+ }
+}
+
+static_always_inline bool
+ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip)
+{
+#ifdef MAP_SKIP_IP6_LOOKUP
+ map_main_t *mm = &map_main;
+ u32 adj_index0 = mm->adj4_index;
+ if (adj_index0 > 0)
+ {
+ ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ ip_adjacency_t *adj = ip_get_adjacency (lm4, mm->adj4_index);
+ if (adj->n_adj > 1)
+ {
+ u32 hash_c0 = ip4_compute_flow_hash (ip, IP_FLOW_HASH_DEFAULT);
+ adj_index0 += (hash_c0 & (adj->n_adj - 1));
+ }
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ return (true);
+ }
+#endif
+ return (false);
+}
+
+/*
+ * ip6_map
+ */
+static uword
+ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_node.index);
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Dual loop */
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ u8 error0 = MAP_ERROR_NONE;
+ u8 error1 = MAP_ERROR_NONE;
+ map_domain_t *d0 = 0, *d1 = 0;
+ ip4_header_t *ip40, *ip41;
+ ip6_header_t *ip60, *ip61;
+ u16 port0 = 0, port1 = 0;
+ u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
+ u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
+ u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ /* IPv6 + IPv4 header + 8 bytes of ULP */
+ CLIB_PREFETCH (p2->data, 68, LOAD);
+ CLIB_PREFETCH (p3->data, 68, LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip60 = vlib_buffer_get_current (p0);
+ ip61 = vlib_buffer_get_current (p1);
+ vlib_buffer_advance (p0, sizeof (ip6_header_t));
+ vlib_buffer_advance (p1, sizeof (ip6_header_t));
+ ip40 = vlib_buffer_get_current (p0);
+ ip41 = vlib_buffer_get_current (p1);
+
+ /*
+ * Encapsulated IPv4 packet
+ * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
+ * - Lookup/Rewrite or Fragment node in case of packet > MTU
+ * Fragmented IPv6 packet
+ * ICMP IPv6 packet
+ * - Error -> Pass to ICMPv6/ICMPv4 relay
+ * - Info -> Pass to IPv6 local
+ * Anything else -> drop
+ */
+ if (PREDICT_TRUE
+ (ip60->protocol == IP_PROTOCOL_IP_IN_IP
+ && clib_net_to_host_u16 (ip60->payload_length) > 20))
+ {
+ d0 =
+ ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & ip40->src_address.
+ as_u32, &map_domain_index0, &error0);
+ }
+ else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16 (ip60->payload_length) >
+ sizeof (icmp46_header_t))
+ {
+ icmp46_header_t *icmp = (void *) (ip60 + 1);
+ next0 = (icmp->type == ICMP6_echo_request
+ || icmp->type ==
+ ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
+ IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ }
+ else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ next0 = IP6_MAP_NEXT_IP6_REASS;
+ }
+ else
+ {
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+ if (PREDICT_TRUE
+ (ip61->protocol == IP_PROTOCOL_IP_IN_IP
+ && clib_net_to_host_u16 (ip61->payload_length) > 20))
+ {
+ d1 =
+ ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & ip41->src_address.
+ as_u32, &map_domain_index1, &error1);
+ }
+ else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16 (ip61->payload_length) >
+ sizeof (icmp46_header_t))
+ {
+ icmp46_header_t *icmp = (void *) (ip61 + 1);
+ next1 = (icmp->type == ICMP6_echo_request
+ || icmp->type ==
+ ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
+ IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ }
+ else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ next1 = IP6_MAP_NEXT_IP6_REASS;
+ }
+ else
+ {
+ error1 = MAP_ERROR_BAD_PROTOCOL;
+ }
+
+ if (d0)
+ {
+ /* MAP inbound security check */
+ ip6_map_security_check (d0, ip40, ip60, &next0, &error0);
+
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
+ next0 == IP6_MAP_NEXT_IP4_LOOKUP))
+ {
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.flags = 0;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ }
+ else
+ {
+ next0 =
+ ip6_map_ip4_lookup_bypass (p0,
+ ip40) ?
+ IP6_MAP_NEXT_IP4_REWRITE : next0;
+ }
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ cpu_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip40->length));
+ }
+ }
+ if (d1)
+ {
+ /* MAP inbound security check */
+ ip6_map_security_check (d1, ip41, ip61, &next1, &error1);
+
+ if (PREDICT_TRUE (error1 == MAP_ERROR_NONE &&
+ next1 == IP6_MAP_NEXT_IP4_LOOKUP))
+ {
+ if (PREDICT_FALSE
+ (d1->mtu
+ && (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
+ {
+ vnet_buffer (p1)->ip_frag.header_offset = 0;
+ vnet_buffer (p1)->ip_frag.flags = 0;
+ vnet_buffer (p1)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p1)->ip_frag.mtu = d1->mtu;
+ next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ }
+ else
+ {
+ next1 =
+ ip6_map_ip4_lookup_bypass (p1,
+ ip41) ?
+ IP6_MAP_NEXT_IP4_REWRITE : next1;
+ }
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ cpu_index,
+ map_domain_index1, 1,
+ clib_net_to_host_u16
+ (ip41->length));
+ }
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->map_domain_index = map_domain_index1;
+ tr->port = port1;
+ }
+
+ if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
+ {
+ /* Set ICMP parameters */
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+ icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_source_address_failed_policy,
+ 0);
+ next0 = IP6_MAP_NEXT_ICMP;
+ }
+ else
+ {
+ next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
+ }
+
+ if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled)
+ {
+ /* Set ICMP parameters */
+ vlib_buffer_advance (p1, -sizeof (ip6_header_t));
+ icmp6_error_set_vnet_buffer (p1, ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_source_address_failed_policy,
+ 0);
+ next1 = IP6_MAP_NEXT_ICMP;
+ }
+ else
+ {
+ next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP;
+ }
+
+ /* Reset packet */
+ if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+ if (next1 == IP6_MAP_NEXT_IP6_LOCAL)
+ vlib_buffer_advance (p1, -sizeof (ip6_header_t));
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ map_domain_t *d0 = 0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ i32 port0 = 0;
+ u32 map_domain_index0 = ~0;
+ u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ vlib_buffer_advance (p0, sizeof (ip6_header_t));
+ ip40 = vlib_buffer_get_current (p0);
+
+ /*
+ * Encapsulated IPv4 packet
+ * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
+ * - Lookup/Rewrite or Fragment node in case of packet > MTU
+ * Fragmented IPv6 packet
+ * ICMP IPv6 packet
+ * - Error -> Pass to ICMPv6/ICMPv4 relay
+ * - Info -> Pass to IPv6 local
+ * Anything else -> drop
+ */
+ if (PREDICT_TRUE
+ (ip60->protocol == IP_PROTOCOL_IP_IN_IP
+ && clib_net_to_host_u16 (ip60->payload_length) > 20))
+ {
+ d0 =
+ ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & ip40->src_address.
+ as_u32, &map_domain_index0, &error0);
+ }
+ else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16 (ip60->payload_length) >
+ sizeof (icmp46_header_t))
+ {
+ icmp46_header_t *icmp = (void *) (ip60 + 1);
+ next0 = (icmp->type == ICMP6_echo_request
+ || icmp->type ==
+ ICMP6_echo_reply) ? IP6_MAP_NEXT_IP6_LOCAL :
+ IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ }
+ else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
+ (((ip6_frag_hdr_t *) (ip60 + 1))->next_hdr ==
+ IP_PROTOCOL_IP_IN_IP))
+ {
+ next0 = IP6_MAP_NEXT_IP6_REASS;
+ }
+ else
+ {
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+
+ if (d0)
+ {
+ /* MAP inbound security check */
+ ip6_map_security_check (d0, ip40, ip60, &next0, &error0);
+
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE &&
+ next0 == IP6_MAP_NEXT_IP4_LOOKUP))
+ {
+ if (PREDICT_FALSE
+ (d0->mtu
+ && (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.flags = 0;
+ vnet_buffer (p0)->ip_frag.next_index =
+ IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ }
+ else
+ {
+ next0 =
+ ip6_map_ip4_lookup_bypass (p0,
+ ip40) ?
+ IP6_MAP_NEXT_IP4_REWRITE : next0;
+ }
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ cpu_index,
+ map_domain_index0, 1,
+ clib_net_to_host_u16
+ (ip40->length));
+ }
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = (u16) port0;
+ }
+
+ if (mm->icmp6_enabled &&
+ (error0 == MAP_ERROR_DECAP_SEC_CHECK
+ || error0 == MAP_ERROR_NO_DOMAIN))
+ {
+ /* Set ICMP parameters */
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+ icmp6_error_set_vnet_buffer (p0, ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_source_address_failed_policy,
+ 0);
+ next0 = IP6_MAP_NEXT_ICMP;
+ }
+ else
+ {
+ next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
+ }
+
+ /* Reset packet */
+ if (next0 == IP6_MAP_NEXT_IP6_LOCAL)
+ vlib_buffer_advance (p0, -sizeof (ip6_header_t));
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+static_always_inline void
+ip6_map_ip6_reass_prepare (vlib_main_t * vm, vlib_node_runtime_t * node,
+ map_ip6_reass_t * r, u32 ** fragments_ready,
+ u32 ** fragments_to_drop)
+{
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ vlib_buffer_t *p0;
+
+ if (!r->ip4_header.ip_version_and_header_length)
+ return;
+
+ //The IP header is here, we need to check for packets
+ //that can be forwarded
+ int i;
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ {
+ if (r->fragments[i].pi == ~0 ||
+ ((!r->fragments[i].next_data_len)
+ && (r->fragments[i].next_data_offset != (0xffff))))
+ continue;
+
+ p0 = vlib_get_buffer (vm, r->fragments[i].pi);
+ ip60 = vlib_buffer_get_current (p0);
+ frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
+ ip40 = (ip4_header_t *) (frag0 + 1);
+
+ if (ip6_frag_hdr_offset (frag0))
+ {
+ //Not first fragment, add the IPv4 header
+ clib_memcpy (ip40, &r->ip4_header, 20);
+ }
+
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ r->forwarded +=
+ clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
+#endif
+
+ if (ip6_frag_hdr_more (frag0))
+ {
+ //Not last fragment, we copy end of next
+ clib_memcpy (u8_ptr_add (ip60, p0->current_length),
+ r->fragments[i].next_data, 20);
+ p0->current_length += 20;
+ ip60->payload_length = u16_net_add (ip60->payload_length, 20);
+ }
+
+ if (!ip4_is_fragment (ip40))
+ {
+ ip40->fragment_id = frag_id_6to4 (frag0->identification);
+ ip40->flags_and_fragment_offset =
+ clib_host_to_net_u16 (ip6_frag_hdr_offset (frag0));
+ }
+ else
+ {
+ ip40->flags_and_fragment_offset =
+ clib_host_to_net_u16 (ip4_get_fragment_offset (ip40) +
+ ip6_frag_hdr_offset (frag0));
+ }
+
+ if (ip6_frag_hdr_more (frag0))
+ ip40->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ ip40->length =
+ clib_host_to_net_u16 (p0->current_length - sizeof (*ip60) -
+ sizeof (*frag0));
+ ip40->checksum = ip4_header_checksum (ip40);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_ip6_map_ip6_reass_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->offset = ip4_get_fragment_offset (ip40);
+ tr->frag_len = clib_net_to_host_u16 (ip40->length) - sizeof (*ip40);
+ tr->out = 1;
+ }
+
+ vec_add1 (*fragments_ready, r->fragments[i].pi);
+ r->fragments[i].pi = ~0;
+ r->fragments[i].next_data_len = 0;
+ r->fragments[i].next_data_offset = 0;
+ map_main.ip6_reass_buffered_counter--;
+
+ //TODO: Best solution would be that ip6_map handles extension headers
+ // and ignores atomic fragment. But in the meantime, let's just copy the header.
+
+ u8 protocol = frag0->next_hdr;
+ memmove (u8_ptr_add (ip40, -sizeof (*ip60)), ip60, sizeof (*ip60));
+ ((ip6_header_t *) u8_ptr_add (ip40, -sizeof (*ip60)))->protocol =
+ protocol;
+ vlib_buffer_advance (p0, sizeof (*frag0));
+ }
+}
+
+void
+map_ip6_drop_pi (u32 pi)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_runtime_t *n =
+ vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
+ vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
+}
+
+void
+map_ip4_drop_pi (u32 pi)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_runtime_t *n =
+ vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
+ vlib_set_next_frame_buffer (vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi);
+}
+
+/*
+ * ip6_reass
+ * TODO: We should count the number of successfully
+ * transmitted fragment bytes and compare that to the last fragment
+ * offset such that we can free the reassembly structure when all fragments
+ * have been forwarded.
+ */
+static uword
+ip6_map_ip6_reass (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_ip6_reass_node.index);
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_ready = NULL;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ u16 offset;
+ u16 next_offset;
+ u16 frag_len;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ frag0 = (ip6_frag_hdr_t *) (ip60 + 1);
+ offset =
+ clib_host_to_net_u16 (frag0->fragment_offset_and_more) & (~7);
+ frag_len =
+ clib_net_to_host_u16 (ip60->payload_length) - sizeof (*frag0);
+ next_offset =
+ ip6_frag_hdr_more (frag0) ? (offset + frag_len) : (0xffff);
+
+ //FIXME: Support other extension headers, maybe
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_ip6_map_ip6_reass_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->offset = offset;
+ tr->frag_len = frag_len;
+ tr->out = 0;
+ }
+
+ map_ip6_reass_lock ();
+ map_ip6_reass_t *r =
+ map_ip6_reass_get (&ip60->src_address, &ip60->dst_address,
+ frag0->identification, frag0->next_hdr,
+ &fragments_to_drop);
+ //FIXME: Use better error codes
+ if (PREDICT_FALSE (!r))
+ {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+ else if (PREDICT_FALSE ((frag_len <= 20 &&
+ (ip6_frag_hdr_more (frag0) || (!offset)))))
+ {
+ //Very small fragment are restricted to the last one and
+ //can't be the first one
+ error0 = MAP_ERROR_FRAGMENT_MALFORMED;
+ }
+ else
+ if (map_ip6_reass_add_fragment
+ (r, pi0, offset, next_offset, (u8 *) (frag0 + 1), frag_len))
+ {
+ map_ip6_reass_free (r, &fragments_to_drop);
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+ else
+ {
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ if (!ip6_frag_hdr_more (frag0))
+ r->expected_total = offset + frag_len;
+#endif
+ ip6_map_ip6_reass_prepare (vm, node, r, &fragments_ready,
+ &fragments_to_drop);
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ if (r->forwarded >= r->expected_total)
+ map_ip6_reass_free (r, &fragments_to_drop);
+#endif
+ }
+ map_ip6_reass_unlock ();
+
+ if (error0 == MAP_ERROR_NONE)
+ {
+ if (frag_len > 20)
+ {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ }
+ else
+ {
+ //All data from that packet was copied no need to keep it, but this is not an error
+ p0->error = error_node->errors[MAP_ERROR_NONE];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0,
+ IP6_MAP_IP6_REASS_NEXT_DROP);
+ }
+ }
+ else
+ {
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0,
+ IP6_MAP_IP6_REASS_NEXT_DROP);
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ map_send_all_to_node (vm, fragments_ready, node,
+ &error_node->errors[MAP_ERROR_NONE],
+ IP6_MAP_IP6_REASS_NEXT_IP6_MAP);
+ map_send_all_to_node (vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP6_MAP_IP6_REASS_NEXT_DROP);
+
+ vec_free (fragments_to_drop);
+ vec_free (fragments_ready);
+ return frame->n_vectors;
+}
+
+/*
+ * ip6_ip4_virt_reass
+ */
+static uword
+ip6_map_ip4_reass (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_ip4_reass_node.index);
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 cpu_index = os_get_cpu_number ();
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_to_loopback = NULL;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ map_domain_t *d0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ i32 port0 = 0;
+ u32 map_domain_index0 = ~0;
+ u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP;
+ u8 cached = 0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip40 = vlib_buffer_get_current (p0);
+ ip60 = ((ip6_header_t *) ip40) - 1;
+
+ d0 =
+ ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & ip40->src_address.as_u32,
+ &map_domain_index0, &error0);
+
+ map_ip4_reass_lock ();
+ //This node only deals with fragmented ip4
+ map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32,
+ ip40->dst_address.as_u32,
+ ip40->fragment_id,
+ ip40->protocol,
+ &fragments_to_drop);
+ if (PREDICT_FALSE (!r))
+ {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+ else if (PREDICT_TRUE (ip4_get_fragment_offset (ip40)))
+ {
+ // This is a fragment
+ if (r->port >= 0)
+ {
+ // We know the port already
+ port0 = r->port;
+ }
+ else if (map_ip4_reass_add_fragment (r, pi0))
+ {
+ // Not enough space for caching
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+ else
+ {
+ cached = 1;
+ }
+ }
+ else
+ if ((port0 =
+ ip4_get_port (ip40, MAP_SENDER, p0->current_length)) < 0)
+ {
+ // Could not find port from first fragment. Stop reassembling.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ port0 = 0;
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+ else
+ {
+ // Found port. Remember it and loopback saved fragments
+ r->port = port0;
+ map_ip4_reass_get_fragments (r, &fragments_to_loopback);
+ }
+
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ if (!cached && r)
+ {
+ r->forwarded += clib_host_to_net_u16 (ip40->length) - 20;
+ if (!ip4_get_fragment_more (ip40))
+ r->expected_total =
+ ip4_get_fragment_offset (ip40) * 8 +
+ clib_host_to_net_u16 (ip40->length) - 20;
+ if (r->forwarded >= r->expected_total)
+ map_ip4_reass_free (r, &fragments_to_drop);
+ }
+#endif
+
+ map_ip4_reass_unlock ();
+
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ error0 =
+ ip6_map_sec_check (d0, port0, ip40,
+ ip60) ? MAP_ERROR_NONE :
+ MAP_ERROR_DECAP_SEC_CHECK;
+
+ if (PREDICT_FALSE
+ (d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
+ && error0 == MAP_ERROR_NONE && !cached))
+ {
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.flags = 0;
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT;
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_ip6_map_ip4_reass_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ tr->cached = cached;
+ }
+
+ if (cached)
+ {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ }
+ else
+ {
+ if (error0 == MAP_ERROR_NONE)
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ cpu_index, map_domain_index0,
+ 1,
+ clib_net_to_host_u16
+ (ip40->length));
+ next0 =
+ (error0 ==
+ MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+
+ //Loopback when we reach the end of the inpu vector
+ if (n_left_from == 0 && vec_len (fragments_to_loopback))
+ {
+ from = vlib_frame_vector_args (frame);
+ u32 len = vec_len (fragments_to_loopback);
+ if (len <= VLIB_FRAME_SIZE)
+ {
+ clib_memcpy (from, fragments_to_loopback,
+ sizeof (u32) * len);
+ n_left_from = len;
+ vec_reset_length (fragments_to_loopback);
+ }
+ else
+ {
+ clib_memcpy (from,
+ fragments_to_loopback + (len -
+ VLIB_FRAME_SIZE),
+ sizeof (u32) * VLIB_FRAME_SIZE);
+ n_left_from = VLIB_FRAME_SIZE;
+ _vec_len (fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+ }
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ map_send_all_to_node (vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP6_MAP_IP4_REASS_NEXT_DROP);
+
+ vec_free (fragments_to_drop);
+ vec_free (fragments_to_loopback);
+ return frame->n_vectors;
+}
+
+/*
+ * ip6_icmp_relay
+ */
+static uword
+ip6_map_icmp_relay (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_icmp_relay_node.index);
+ map_main_t *mm = &map_main;
+ u32 cpu_index = os_get_cpu_number ();
+ u16 *fragment_ids, *fid;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ /* Get random fragment IDs for replies. */
+ fid = fragment_ids =
+ clib_random_buffer_get_data (&vm->random_buffer,
+ n_left_from * sizeof (fragment_ids[0]));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip6_header_t *ip60;
+ u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
+ u32 mtu;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ u16 tlen = clib_net_to_host_u16 (ip60->payload_length);
+
+ /*
+ * In:
+ * IPv6 header (40)
+ * ICMPv6 header (8)
+ * IPv6 header (40)
+ * Original IPv4 header / packet
+ * Out:
+ * New IPv4 header
+ * New ICMP header
+ * Original IPv4 header / packet
+ */
+
+ /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
+ if (tlen < 76)
+ {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ icmp46_header_t *icmp60 = (icmp46_header_t *) (ip60 + 1);
+ ip6_header_t *inner_ip60 = (ip6_header_t *) (icmp60 + 2);
+
+ if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP)
+ {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ ip4_header_t *inner_ip40 = (ip4_header_t *) (inner_ip60 + 1);
+ vlib_buffer_advance (p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
+ ip4_header_t *new_ip40 = vlib_buffer_get_current (p0);
+ icmp46_header_t *new_icmp40 = (icmp46_header_t *) (new_ip40 + 1);
+
+ /*
+ * Relay according to RFC2473, section 8.3
+ */
+ switch (icmp60->type)
+ {
+ case ICMP6_destination_unreachable:
+ case ICMP6_time_exceeded:
+ case ICMP6_parameter_problem:
+ /* Type 3 - destination unreachable, Code 1 - host unreachable */
+ new_icmp40->type = ICMP4_destination_unreachable;
+ new_icmp40->code =
+ ICMP4_destination_unreachable_destination_unreachable_host;
+ break;
+
+ case ICMP6_packet_too_big:
+ /* Type 3 - destination unreachable, Code 4 - packet too big */
+ /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
+ mtu = clib_net_to_host_u32 (*((u32 *) (icmp60 + 1)));
+
+ /* Check DF flag */
+ if (!
+ (inner_ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)))
+ {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ new_icmp40->type = ICMP4_destination_unreachable;
+ new_icmp40->code =
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
+ *((u32 *) (new_icmp40 + 1)) =
+ clib_host_to_net_u32 (mtu < 1280 ? 1280 : mtu);
+ break;
+
+ default:
+ error0 = MAP_ERROR_ICMP_RELAY;
+ break;
+ }
+
+ /*
+ * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
+ */
+ new_ip40->ip_version_and_header_length = 0x45;
+ new_ip40->tos = 0;
+ u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
+ new_ip40->length = clib_host_to_net_u16 (nlen);
+ new_ip40->fragment_id = fid[0];
+ fid++;
+ new_ip40->ttl = 64;
+ new_ip40->protocol = IP_PROTOCOL_ICMP;
+ new_ip40->src_address = mm->icmp4_src_address;
+ new_ip40->dst_address = inner_ip40->src_address;
+ new_ip40->checksum = ip4_header_checksum (new_ip40);
+
+ new_icmp40->checksum = 0;
+ ip_csum_t sum = ip_incremental_checksum (0, new_icmp40, nlen - 20);
+ new_icmp40->checksum = ~ip_csum_fold (sum);
+
+ vlib_increment_simple_counter (&mm->icmp_relayed, cpu_index, 0, 1);
+
+ error:
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ map_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->map_domain_index = 0;
+ tr->port = 0;
+ }
+
+ next0 =
+ (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+
+}
+
+static char *map_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_node) = {
+ .function = ip6_map,
+ .name = "ip6-map",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP6_MAP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+#ifdef MAP_SKIP_IP6_LOOKUP
+ [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite",
+#endif
+ [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
+ [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",
+ [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
+ [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
+ [IP6_MAP_NEXT_DROP] = "error-drop",
+ [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
+ .function = ip6_map_ip6_reass,
+ .name = "ip6-map-ip6-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip6_map_ip6_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map",
+ [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = {
+ .function = ip6_map_ip4_reass,
+ .name = "ip6-map-ip4-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip6_map_ip4_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
+ .function = ip6_map_icmp_relay,
+ .name = "ip6-map-icmp-relay",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace, //FIXME
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
+ .next_nodes = {
+ [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/ip6_map_t.c b/src/vnet/map/ip6_map_t.c
new file mode 100644
index 00000000000..eb3996c2467
--- /dev/null
+++ b/src/vnet/map/ip6_map_t.c
@@ -0,0 +1,1517 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+
+#define IP6_MAP_T_DUAL_LOOP
+
+typedef enum
+{
+ IP6_MAPT_NEXT_MAPT_TCP_UDP,
+ IP6_MAPT_NEXT_MAPT_ICMP,
+ IP6_MAPT_NEXT_MAPT_FRAGMENTED,
+ IP6_MAPT_NEXT_DROP,
+ IP6_MAPT_N_NEXT
+} ip6_mapt_next_t;
+
+typedef enum
+{
+ IP6_MAPT_ICMP_NEXT_IP4_LOOKUP,
+ IP6_MAPT_ICMP_NEXT_IP4_FRAG,
+ IP6_MAPT_ICMP_NEXT_DROP,
+ IP6_MAPT_ICMP_N_NEXT
+} ip6_mapt_icmp_next_t;
+
+typedef enum
+{
+ IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP,
+ IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG,
+ IP6_MAPT_TCP_UDP_NEXT_DROP,
+ IP6_MAPT_TCP_UDP_N_NEXT
+} ip6_mapt_tcp_udp_next_t;
+
+typedef enum
+{
+ IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP,
+ IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG,
+ IP6_MAPT_FRAGMENTED_NEXT_DROP,
+ IP6_MAPT_FRAGMENTED_N_NEXT
+} ip6_mapt_fragmented_next_t;
+
+static_always_inline int
+ip6_map_fragment_cache (ip6_header_t * ip6, ip6_frag_hdr_t * frag,
+ map_domain_t * d, u16 port)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address),
+ ip6_map_t_embedded_address (d,
+ &ip6->
+ dst_address),
+ frag_id_6to4 (frag->identification),
+ (ip6->protocol ==
+ IP_PROTOCOL_ICMP6) ?
+ IP_PROTOCOL_ICMP : ip6->protocol,
+ &ignore);
+ if (r)
+ r->port = port;
+
+ map_ip4_reass_unlock ();
+ return !r;
+}
+
+/* Returns the associated port or -1 */
+static_always_inline i32
+ip6_map_fragment_get (ip6_header_t * ip6, ip6_frag_hdr_t * frag,
+ map_domain_t * d)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock ();
+ map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address),
+ ip6_map_t_embedded_address (d,
+ &ip6->
+ dst_address),
+ frag_id_6to4 (frag->identification),
+ (ip6->protocol ==
+ IP_PROTOCOL_ICMP6) ?
+ IP_PROTOCOL_ICMP : ip6->protocol,
+ &ignore);
+ i32 ret = r ? r->port : -1;
+ map_ip4_reass_unlock ();
+ return ret;
+}
+
+static_always_inline u8
+ip6_translate_tos (const ip6_header_t * ip6)
+{
+#ifdef IP6_MAP_T_OVERRIDE_TOS
+ return IP6_MAP_T_OVERRIDE_TOS;
+#else
+ return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label)
+ & 0x0ff00000) >> 20;
+#endif
+}
+
+//TODO: Find right place in memory for that
+/* *INDENT-OFF* */
+static u8 icmp6_to_icmp_updater_pointer_table[] =
+ { 0, 1, ~0, ~0,
+ 2, 2, 9, 8,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24
+ };
+/* *INDENT-ON* */
+
+static_always_inline int
+ip6_icmp_to_icmp6_in_place (icmp46_header_t * icmp, u32 icmp_len,
+ i32 * sender_port, ip6_header_t ** inner_ip6)
+{
+ *inner_ip6 = NULL;
+ switch (icmp->type)
+ {
+ case ICMP6_echo_request:
+ *sender_port = ((u16 *) icmp)[2];
+ icmp->type = ICMP4_echo_request;
+ break;
+ case ICMP6_echo_reply:
+ *sender_port = ((u16 *) icmp)[2];
+ icmp->type = ICMP4_echo_reply;
+ break;
+ case ICMP6_destination_unreachable:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+ *sender_port = ip6_get_port (*inner_ip6, MAP_RECEIVER, icmp_len);
+
+ switch (icmp->code)
+ {
+ case ICMP6_destination_unreachable_no_route_to_destination: //0
+ case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2
+ case ICMP6_destination_unreachable_address_unreachable: //3
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code =
+ ICMP4_destination_unreachable_destination_unreachable_host;
+ break;
+ case ICMP6_destination_unreachable_destination_administratively_prohibited: //1
+ icmp->type =
+ ICMP4_destination_unreachable;
+ icmp->code =
+ ICMP4_destination_unreachable_communication_administratively_prohibited;
+ break;
+ case ICMP6_destination_unreachable_port_unreachable:
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_port_unreachable;
+ break;
+ default:
+ return -1;
+ }
+ break;
+ case ICMP6_packet_too_big:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+ *sender_port = ip6_get_port (*inner_ip6, MAP_RECEIVER, icmp_len);
+
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = 4;
+ {
+ u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ advertised_mtu -= 20;
+ //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20)
+ ((u16 *) (icmp))[3] = clib_host_to_net_u16 (advertised_mtu);
+ }
+ break;
+
+ case ICMP6_time_exceeded:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+ *sender_port = ip6_get_port (*inner_ip6, MAP_RECEIVER, icmp_len);
+
+ icmp->type = ICMP4_time_exceeded;
+ break;
+
+ case ICMP6_parameter_problem:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+ *sender_port = ip6_get_port (*inner_ip6, MAP_RECEIVER, icmp_len);
+
+ switch (icmp->code)
+ {
+ case ICMP6_parameter_problem_erroneous_header_field:
+ icmp->type = ICMP4_parameter_problem;
+ icmp->code = ICMP4_parameter_problem_pointer_indicates_error;
+ u32 pointer = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ if (pointer >= 40)
+ return -1;
+
+ ((u8 *) (icmp + 1))[0] =
+ icmp6_to_icmp_updater_pointer_table[pointer];
+ break;
+ case ICMP6_parameter_problem_unrecognized_next_header:
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_port_unreachable;
+ break;
+ case ICMP6_parameter_problem_unrecognized_option:
+ default:
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+static_always_inline void
+_ip6_map_t_icmp (map_domain_t * d, vlib_buffer_t * p, u8 * error)
+{
+ ip6_header_t *ip6, *inner_ip6;
+ ip4_header_t *ip4, *inner_ip4;
+ u32 ip6_pay_len;
+ icmp46_header_t *icmp;
+ i32 sender_port;
+ ip_csum_t csum;
+ u32 ip4_sadr, inner_ip4_dadr;
+
+ ip6 = vlib_buffer_get_current (p);
+ ip6_pay_len = clib_net_to_host_u16 (ip6->payload_length);
+ icmp = (icmp46_header_t *) (ip6 + 1);
+ ASSERT (ip6_pay_len + sizeof (*ip6) <= p->current_length);
+
+ if (ip6->protocol != IP_PROTOCOL_ICMP6)
+ {
+ //No extensions headers allowed here
+ //TODO: SR header
+ *error = MAP_ERROR_MALFORMED;
+ return;
+ }
+
+ //There are no fragmented ICMP messages, so no extension header for now
+
+ if (ip6_icmp_to_icmp6_in_place
+ (icmp, ip6_pay_len, &sender_port, &inner_ip6))
+ {
+ //TODO: In case of 1:1 mapping it is not necessary to have the sender port
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+
+ if (sender_port < 0)
+ {
+ // In case of 1:1 mapping, we don't care about the port
+ if (d->ea_bits_len == 0 && d->rules)
+ {
+ sender_port = 0;
+ }
+ else
+ {
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+ }
+
+ //Security check
+ //Note that this prevents an intermediate IPv6 router from answering the request
+ ip4_sadr = map_get_ip4 (&ip6->src_address);
+ if (ip6->src_address.as_u64[0] != map_get_pfx_net (d, ip4_sadr, sender_port)
+ || ip6->src_address.as_u64[1] != map_get_sfx_net (d, ip4_sadr,
+ sender_port))
+ {
+ *error = MAP_ERROR_SEC_CHECK;
+ return;
+ }
+
+ if (inner_ip6)
+ {
+ u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset,
+ inner_frag_id;
+ u8 *inner_l4, inner_protocol;
+
+ //We have two headers to translate
+ // FROM
+ // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ...
+ // Handled cases:
+ // [ IPv6 ][IC][ IPv6 ][L4 header ...
+ // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ...
+ // TO
+ // [ IPv4][IC][ IPv4][L4 header ...
+
+ //TODO: This was already done deep in ip6_icmp_to_icmp6_in_place
+ //We shouldn't have to do it again
+ if (ip6_parse (inner_ip6, ip6_pay_len - 8,
+ &inner_protocol, &inner_l4_offset, &inner_frag_offset))
+ {
+ *error = MAP_ERROR_MALFORMED;
+ return;
+ }
+
+ inner_l4 = u8_ptr_add (inner_ip6, inner_l4_offset);
+ inner_ip4 =
+ (ip4_header_t *) u8_ptr_add (inner_l4, -sizeof (*inner_ip4));
+ if (inner_frag_offset)
+ {
+ ip6_frag_hdr_t *inner_frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, inner_frag_offset);
+ inner_frag_id = frag_id_6to4 (inner_frag->identification);
+ }
+ else
+ {
+ inner_frag_id = 0;
+ }
+
+ //Do the translation of the inner packet
+ if (inner_protocol == IP_PROTOCOL_TCP)
+ {
+ inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 16);
+ }
+ else if (inner_protocol == IP_PROTOCOL_UDP)
+ {
+ inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 6);
+ }
+ else if (inner_protocol == IP_PROTOCOL_ICMP6)
+ {
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4;
+ csum = inner_icmp->checksum;
+ csum = ip_csum_sub_even (csum, *((u16 *) inner_icmp));
+ //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded
+ inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ?
+ ICMP4_echo_request : ICMP4_echo_reply;
+ csum = ip_csum_add_even (csum, *((u16 *) inner_icmp));
+ inner_icmp->checksum = ip_csum_fold (csum);
+ inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later
+ inner_L4_checksum = &inner_icmp->checksum;
+ }
+ else
+ {
+ *error = MAP_ERROR_BAD_PROTOCOL;
+ return;
+ }
+
+ csum = *inner_L4_checksum;
+ csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
+
+ //Sanity check of the outer destination address
+ if (ip6->dst_address.as_u64[0] != inner_ip6->src_address.as_u64[0] &&
+ ip6->dst_address.as_u64[1] != inner_ip6->src_address.as_u64[1])
+ {
+ *error = MAP_ERROR_SEC_CHECK;
+ return;
+ }
+
+ //Security check of inner packet
+ inner_ip4_dadr = map_get_ip4 (&inner_ip6->dst_address);
+ if (inner_ip6->dst_address.as_u64[0] !=
+ map_get_pfx_net (d, inner_ip4_dadr, sender_port)
+ || inner_ip6->dst_address.as_u64[1] != map_get_sfx_net (d,
+ inner_ip4_dadr,
+ sender_port))
+ {
+ *error = MAP_ERROR_SEC_CHECK;
+ return;
+ }
+
+ inner_ip4->dst_address.as_u32 = inner_ip4_dadr;
+ inner_ip4->src_address.as_u32 =
+ ip6_map_t_embedded_address (d, &inner_ip6->src_address);
+ inner_ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ inner_ip4->tos = ip6_translate_tos (inner_ip6);
+ inner_ip4->length =
+ u16_net_add (inner_ip6->payload_length,
+ sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset);
+ inner_ip4->fragment_id = inner_frag_id;
+ inner_ip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ inner_ip4->ttl = inner_ip6->hop_limit;
+ inner_ip4->protocol = inner_protocol;
+ inner_ip4->checksum = ip4_header_checksum (inner_ip4);
+
+ if (inner_ip4->protocol == IP_PROTOCOL_ICMP)
+ {
+ //Remove remainings of the pseudo-header in the csum
+ csum =
+ ip_csum_sub_even (csum, clib_host_to_net_u16 (IP_PROTOCOL_ICMP6));
+ csum =
+ ip_csum_sub_even (csum, inner_ip4->length - sizeof (*inner_ip4));
+ }
+ else
+ {
+ //Update to new pseudo-header
+ csum = ip_csum_add_even (csum, inner_ip4->src_address.as_u32);
+ csum = ip_csum_add_even (csum, inner_ip4->dst_address.as_u32);
+ }
+ *inner_L4_checksum = ip_csum_fold (csum);
+
+ //Move up icmp header
+ ip4 = (ip4_header_t *) u8_ptr_add (inner_l4, -2 * sizeof (*ip4) - 8);
+ clib_memcpy (u8_ptr_add (inner_l4, -sizeof (*ip4) - 8), icmp, 8);
+ icmp = (icmp46_header_t *) u8_ptr_add (inner_l4, -sizeof (*ip4) - 8);
+ }
+ else
+ {
+ //Only one header to translate
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ }
+ vlib_buffer_advance (p, (u32) (((u8 *) ip4) - ((u8 *) ip6)));
+
+ ip4->dst_address.as_u32 = ip6_map_t_embedded_address (d, &ip6->dst_address);
+ ip4->src_address.as_u32 = ip4_sadr;
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->fragment_id = 0;
+ ip4->flags_and_fragment_offset = 0;
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol = IP_PROTOCOL_ICMP;
+ //TODO fix the length depending on offset length
+ ip4->length = u16_net_add (ip6->payload_length,
+ (inner_ip6 ==
+ NULL) ? sizeof (*ip4) : (2 * sizeof (*ip4) -
+ sizeof (*ip6)));
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ //TODO: We could do an easy diff-checksum for echo requests/replies
+ //Recompute ICMP checksum
+ icmp->checksum = 0;
+ csum =
+ ip_incremental_checksum (0, icmp,
+ clib_net_to_host_u16 (ip4->length) -
+ sizeof (*ip4));
+ icmp->checksum = ~ip_csum_fold (csum);
+}
+
+static uword
+ip6_map_t_icmp (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_t_icmp_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 cpu_index = os_get_cpu_number ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0;
+ ip6_mapt_icmp_next_t next0;
+ map_domain_t *d0;
+ u16 len0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+ next0 = IP6_MAPT_ICMP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ len0 =
+ clib_net_to_host_u16 (((ip6_header_t *)
+ vlib_buffer_get_current
+ (p0))->payload_length);
+ d0 =
+ pool_elt_at_index (map_main.domains,
+ vnet_buffer (p0)->map_t.map_domain_index);
+ _ip6_map_t_icmp (d0, p0, &error0);
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG;
+ }
+
+ if (PREDICT_TRUE (error0 == MAP_ERROR_NONE))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ cpu_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ len0);
+ }
+ else
+ {
+ next0 = IP6_MAPT_ICMP_NEXT_DROP;
+ }
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip6_map_t_fragmented (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip60, *ip61;
+ ip6_frag_hdr_t *frag0, *frag1;
+ ip4_header_t *ip40, *ip41;
+ u16 frag_id0, frag_offset0, frag_id1, frag_offset1;
+ u8 frag_more0, frag_more1;
+ u32 next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip60 = vlib_buffer_get_current (p0);
+ ip61 = vlib_buffer_get_current (p1);
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.frag_offset);
+ frag1 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip61,
+ vnet_buffer (p1)->map_t.
+ v6.frag_offset);
+ ip40 =
+ (ip4_header_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.l4_offset - sizeof (*ip40));
+ ip41 =
+ (ip4_header_t *) u8_ptr_add (ip61,
+ vnet_buffer (p1)->map_t.
+ v6.l4_offset - sizeof (*ip40));
+ vlib_buffer_advance (p0,
+ vnet_buffer (p0)->map_t.v6.l4_offset -
+ sizeof (*ip40));
+ vlib_buffer_advance (p1,
+ vnet_buffer (p1)->map_t.v6.l4_offset -
+ sizeof (*ip40));
+
+ frag_id0 = frag_id_6to4 (frag0->identification);
+ frag_id1 = frag_id_6to4 (frag1->identification);
+ frag_more0 = ip6_frag_hdr_more (frag0);
+ frag_more1 = ip6_frag_hdr_more (frag1);
+ frag_offset0 = ip6_frag_hdr_offset (frag0);
+ frag_offset1 = ip6_frag_hdr_offset (frag1);
+
+ ip40->dst_address.as_u32 = vnet_buffer (p0)->map_t.v6.daddr;
+ ip41->dst_address.as_u32 = vnet_buffer (p1)->map_t.v6.daddr;
+ ip40->src_address.as_u32 = vnet_buffer (p0)->map_t.v6.saddr;
+ ip41->src_address.as_u32 = vnet_buffer (p1)->map_t.v6.saddr;
+ ip40->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip41->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip40->tos = ip6_translate_tos (ip60);
+ ip41->tos = ip6_translate_tos (ip61);
+ ip40->length = u16_net_add (ip60->payload_length,
+ sizeof (*ip40) -
+ vnet_buffer (p0)->map_t.v6.l4_offset +
+ sizeof (*ip60));
+ ip41->length =
+ u16_net_add (ip61->payload_length,
+ sizeof (*ip40) -
+ vnet_buffer (p1)->map_t.v6.l4_offset +
+ sizeof (*ip60));
+ ip40->fragment_id = frag_id0;
+ ip41->fragment_id = frag_id1;
+ ip40->flags_and_fragment_offset =
+ clib_host_to_net_u16 (frag_offset0 |
+ (frag_more0 ? IP4_HEADER_FLAG_MORE_FRAGMENTS
+ : 0));
+ ip41->flags_and_fragment_offset =
+ clib_host_to_net_u16 (frag_offset1 |
+ (frag_more1 ? IP4_HEADER_FLAG_MORE_FRAGMENTS
+ : 0));
+ ip40->ttl = ip60->hop_limit;
+ ip41->ttl = ip61->hop_limit;
+ ip40->protocol =
+ (vnet_buffer (p0)->map_t.v6.l4_protocol ==
+ IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : vnet_buffer (p0)->
+ map_t.v6.l4_protocol;
+ ip41->protocol =
+ (vnet_buffer (p1)->map_t.v6.l4_protocol ==
+ IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : vnet_buffer (p1)->
+ map_t.v6.l4_protocol;
+ ip40->checksum = ip4_header_checksum (ip40);
+ ip41->checksum = ip4_header_checksum (ip41);
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+
+ if (vnet_buffer (p1)->map_t.mtu < p1->current_length)
+ {
+ vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
+ vnet_buffer (p1)->ip_frag.header_offset = 0;
+ vnet_buffer (p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next, pi0, pi1,
+ next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ ip4_header_t *ip40;
+ u16 frag_id0;
+ u8 frag_more0;
+ u16 frag_offset0;
+ u32 next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.frag_offset);
+ ip40 =
+ (ip4_header_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.l4_offset - sizeof (*ip40));
+ vlib_buffer_advance (p0,
+ vnet_buffer (p0)->map_t.v6.l4_offset -
+ sizeof (*ip40));
+
+ frag_id0 = frag_id_6to4 (frag0->identification);
+ frag_more0 = ip6_frag_hdr_more (frag0);
+ frag_offset0 = ip6_frag_hdr_offset (frag0);
+
+ ip40->dst_address.as_u32 = vnet_buffer (p0)->map_t.v6.daddr;
+ ip40->src_address.as_u32 = vnet_buffer (p0)->map_t.v6.saddr;
+ ip40->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip40->tos = ip6_translate_tos (ip60);
+ ip40->length = u16_net_add (ip60->payload_length,
+ sizeof (*ip40) -
+ vnet_buffer (p0)->map_t.v6.l4_offset +
+ sizeof (*ip60));
+ ip40->fragment_id = frag_id0;
+ ip40->flags_and_fragment_offset =
+ clib_host_to_net_u16 (frag_offset0 |
+ (frag_more0 ? IP4_HEADER_FLAG_MORE_FRAGMENTS
+ : 0));
+ ip40->ttl = ip60->hop_limit;
+ ip40->protocol =
+ (vnet_buffer (p0)->map_t.v6.l4_protocol ==
+ IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : vnet_buffer (p0)->
+ map_t.v6.l4_protocol;
+ ip40->checksum = ip4_header_checksum (ip40);
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip6_map_t_tcp_udp (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip60, *ip61;
+ ip_csum_t csum0, csum1;
+ ip4_header_t *ip40, *ip41;
+ u16 fragment_id0, flags0, *checksum0,
+ fragment_id1, flags1, *checksum1;
+ ip6_mapt_tcp_udp_next_t next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip60 = vlib_buffer_get_current (p0);
+ ip61 = vlib_buffer_get_current (p1);
+ ip40 =
+ (ip4_header_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.l4_offset - sizeof (*ip40));
+ ip41 =
+ (ip4_header_t *) u8_ptr_add (ip61,
+ vnet_buffer (p1)->map_t.
+ v6.l4_offset - sizeof (*ip40));
+ vlib_buffer_advance (p0,
+ vnet_buffer (p0)->map_t.v6.l4_offset -
+ sizeof (*ip40));
+ vlib_buffer_advance (p1,
+ vnet_buffer (p1)->map_t.v6.l4_offset -
+ sizeof (*ip40));
+ checksum0 =
+ (u16 *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.checksum_offset);
+ checksum1 =
+ (u16 *) u8_ptr_add (ip61,
+ vnet_buffer (p1)->map_t.checksum_offset);
+
+ csum0 = ip_csum_sub_even (*checksum0, ip60->src_address.as_u64[0]);
+ csum1 = ip_csum_sub_even (*checksum1, ip61->src_address.as_u64[0]);
+ csum0 = ip_csum_sub_even (csum0, ip60->src_address.as_u64[1]);
+ csum1 = ip_csum_sub_even (csum1, ip61->src_address.as_u64[1]);
+ csum0 = ip_csum_sub_even (csum0, ip60->dst_address.as_u64[0]);
+ csum1 = ip_csum_sub_even (csum0, ip61->dst_address.as_u64[0]);
+ csum0 = ip_csum_sub_even (csum0, ip60->dst_address.as_u64[1]);
+ csum1 = ip_csum_sub_even (csum1, ip61->dst_address.as_u64[1]);
+ csum0 = ip_csum_add_even (csum0, vnet_buffer (p0)->map_t.v6.daddr);
+ csum1 = ip_csum_add_even (csum1, vnet_buffer (p1)->map_t.v6.daddr);
+ csum0 = ip_csum_add_even (csum0, vnet_buffer (p0)->map_t.v6.saddr);
+ csum1 = ip_csum_add_even (csum1, vnet_buffer (p1)->map_t.v6.saddr);
+ *checksum0 = ip_csum_fold (csum0);
+ *checksum1 = ip_csum_fold (csum1);
+
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset))
+ {
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->
+ map_t.
+ v6.frag_offset);
+ fragment_id0 = frag_id_6to4 (hdr->identification);
+ flags0 = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ }
+ else
+ {
+ fragment_id0 = 0;
+ flags0 = 0;
+ }
+
+ if (PREDICT_FALSE (vnet_buffer (p1)->map_t.v6.frag_offset))
+ {
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip61,
+ vnet_buffer
+ (p1)->
+ map_t.
+ v6.frag_offset);
+ fragment_id1 = frag_id_6to4 (hdr->identification);
+ flags1 = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ }
+ else
+ {
+ fragment_id1 = 0;
+ flags1 = 0;
+ }
+
+ ip40->dst_address.as_u32 = vnet_buffer (p0)->map_t.v6.daddr;
+ ip41->dst_address.as_u32 = vnet_buffer (p1)->map_t.v6.daddr;
+ ip40->src_address.as_u32 = vnet_buffer (p0)->map_t.v6.saddr;
+ ip41->src_address.as_u32 = vnet_buffer (p1)->map_t.v6.saddr;
+ ip40->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip41->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip40->tos = ip6_translate_tos (ip60);
+ ip41->tos = ip6_translate_tos (ip61);
+ ip40->length = u16_net_add (ip60->payload_length,
+ sizeof (*ip40) + sizeof (*ip60) -
+ vnet_buffer (p0)->map_t.v6.l4_offset);
+ ip41->length =
+ u16_net_add (ip61->payload_length,
+ sizeof (*ip40) + sizeof (*ip60) -
+ vnet_buffer (p1)->map_t.v6.l4_offset);
+ ip40->fragment_id = fragment_id0;
+ ip41->fragment_id = fragment_id1;
+ ip40->flags_and_fragment_offset = flags0;
+ ip41->flags_and_fragment_offset = flags1;
+ ip40->ttl = ip60->hop_limit;
+ ip41->ttl = ip61->hop_limit;
+ ip40->protocol = vnet_buffer (p0)->map_t.v6.l4_protocol;
+ ip41->protocol = vnet_buffer (p1)->map_t.v6.l4_protocol;
+ ip40->checksum = ip4_header_checksum (ip40);
+ ip41->checksum = ip4_header_checksum (ip41);
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+
+ if (vnet_buffer (p1)->map_t.mtu < p1->current_length)
+ {
+ vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
+ vnet_buffer (p1)->ip_frag.header_offset = 0;
+ vnet_buffer (p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip60;
+ u16 *checksum0;
+ ip_csum_t csum0;
+ ip4_header_t *ip40;
+ u16 fragment_id0;
+ u16 flags0;
+ ip6_mapt_tcp_udp_next_t next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ ip40 =
+ (ip4_header_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.l4_offset - sizeof (*ip40));
+ vlib_buffer_advance (p0,
+ vnet_buffer (p0)->map_t.v6.l4_offset -
+ sizeof (*ip40));
+ checksum0 =
+ (u16 *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.checksum_offset);
+
+ //TODO: This can probably be optimized
+ csum0 = ip_csum_sub_even (*checksum0, ip60->src_address.as_u64[0]);
+ csum0 = ip_csum_sub_even (csum0, ip60->src_address.as_u64[1]);
+ csum0 = ip_csum_sub_even (csum0, ip60->dst_address.as_u64[0]);
+ csum0 = ip_csum_sub_even (csum0, ip60->dst_address.as_u64[1]);
+ csum0 = ip_csum_add_even (csum0, vnet_buffer (p0)->map_t.v6.daddr);
+ csum0 = ip_csum_add_even (csum0, vnet_buffer (p0)->map_t.v6.saddr);
+ *checksum0 = ip_csum_fold (csum0);
+
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset))
+ {
+ //Only the first fragment
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->
+ map_t.
+ v6.frag_offset);
+ fragment_id0 = frag_id_6to4 (hdr->identification);
+ flags0 = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ }
+ else
+ {
+ fragment_id0 = 0;
+ flags0 = 0;
+ }
+
+ ip40->dst_address.as_u32 = vnet_buffer (p0)->map_t.v6.daddr;
+ ip40->src_address.as_u32 = vnet_buffer (p0)->map_t.v6.saddr;
+ ip40->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip40->tos = ip6_translate_tos (ip60);
+ ip40->length = u16_net_add (ip60->payload_length,
+ sizeof (*ip40) + sizeof (*ip60) -
+ vnet_buffer (p0)->map_t.v6.l4_offset);
+ ip40->fragment_id = fragment_id0;
+ ip40->flags_and_fragment_offset = flags0;
+ ip40->ttl = ip60->hop_limit;
+ ip40->protocol = vnet_buffer (p0)->map_t.v6.l4_protocol;
+ ip40->checksum = ip4_header_checksum (ip40);
+
+ if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
+ {
+ //Send to fragmentation node if necessary
+ vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
+ vnet_buffer (p0)->ip_frag.header_offset = 0;
+ vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static_always_inline void
+ip6_map_t_classify (vlib_buffer_t * p0, ip6_header_t * ip60,
+ map_domain_t * d0, i32 * src_port0,
+ u8 * error0, ip6_mapt_next_t * next0,
+ u32 l4_len0, ip6_frag_hdr_t * frag0)
+{
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset &&
+ ip6_frag_hdr_offset (frag0)))
+ {
+ *next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED;
+ if (d0->ea_bits_len == 0 && d0->rules)
+ {
+ *src_port0 = 0;
+ }
+ else
+ {
+ *src_port0 = ip6_map_fragment_get (ip60, frag0, d0);
+ *error0 = (*src_port0 != -1) ? *error0 : MAP_ERROR_FRAGMENT_DROPPED;
+ }
+ }
+ else
+ if (PREDICT_TRUE
+ (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP))
+ {
+ *error0 =
+ l4_len0 < sizeof (tcp_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ vnet_buffer (p0)->map_t.checksum_offset =
+ vnet_buffer (p0)->map_t.v6.l4_offset + 16;
+ *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ *src_port0 =
+ (i32) *
+ ((u16 *) u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset));
+ }
+ else
+ if (PREDICT_TRUE
+ (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP))
+ {
+ *error0 =
+ l4_len0 < sizeof (udp_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ vnet_buffer (p0)->map_t.checksum_offset =
+ vnet_buffer (p0)->map_t.v6.l4_offset + 6;
+ *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ *src_port0 =
+ (i32) *
+ ((u16 *) u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset));
+ }
+ else if (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)
+ {
+ *error0 =
+ l4_len0 < sizeof (icmp46_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ *next0 = IP6_MAPT_NEXT_MAPT_ICMP;
+ if (d0->ea_bits_len == 0 && d0->rules)
+ {
+ *src_port0 = 0;
+ }
+ else
+ if (((icmp46_header_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.l4_offset))->code ==
+ ICMP6_echo_reply
+ || ((icmp46_header_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.l4_offset))->code ==
+ ICMP6_echo_request)
+ {
+ *src_port0 =
+ (i32) *
+ ((u16 *)
+ u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset + 6));
+ }
+ }
+ else
+ {
+ //TODO: In case of 1:1 mapping, it might be possible to do something with those packets.
+ *error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+}
+
+static uword
+ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_map_t_node.index);
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 cpu_index = os_get_cpu_number ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip60, *ip61;
+ u8 error0, error1;
+ ip6_mapt_next_t next0, next1;
+ u32 l4_len0, l4_len1;
+ i32 src_port0, src_port1;
+ map_domain_t *d0, *d1;
+ ip6_frag_hdr_t *frag0, *frag1;
+ u32 saddr0, saddr1;
+ next0 = next1 = 0; //Because compiler whines
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ error0 = MAP_ERROR_NONE;
+ error1 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ ip60 = vlib_buffer_get_current (p0);
+ ip61 = vlib_buffer_get_current (p1);
+
+ saddr0 = map_get_ip4 (&ip60->src_address);
+ saddr1 = map_get_ip4 (&ip61->src_address);
+ d0 = ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & saddr0,
+ &vnet_buffer (p0)->map_t.map_domain_index,
+ &error0);
+ d1 =
+ ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & saddr1,
+ &vnet_buffer (p1)->map_t.map_domain_index,
+ &error1);
+
+ vnet_buffer (p0)->map_t.v6.saddr = saddr0;
+ vnet_buffer (p1)->map_t.v6.saddr = saddr1;
+ vnet_buffer (p0)->map_t.v6.daddr =
+ ip6_map_t_embedded_address (d0, &ip60->dst_address);
+ vnet_buffer (p1)->map_t.v6.daddr =
+ ip6_map_t_embedded_address (d1, &ip61->dst_address);
+ vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+ vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0;
+
+ if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length,
+ &(vnet_buffer (p0)->map_t.
+ v6.l4_protocol),
+ &(vnet_buffer (p0)->map_t.
+ v6.l4_offset),
+ &(vnet_buffer (p0)->map_t.
+ v6.frag_offset))))
+ {
+ error0 = MAP_ERROR_MALFORMED;
+ next0 = IP6_MAPT_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE (ip6_parse (ip61, p1->current_length,
+ &(vnet_buffer (p1)->map_t.
+ v6.l4_protocol),
+ &(vnet_buffer (p1)->map_t.
+ v6.l4_offset),
+ &(vnet_buffer (p1)->map_t.
+ v6.frag_offset))))
+ {
+ error1 = MAP_ERROR_MALFORMED;
+ next1 = IP6_MAPT_NEXT_DROP;
+ }
+
+ src_port0 = src_port1 = -1;
+ l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) +
+ sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset;
+ l4_len1 = (u32) clib_net_to_host_u16 (ip61->payload_length) +
+ sizeof (*ip60) - vnet_buffer (p1)->map_t.v6.l4_offset;
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.frag_offset);
+ frag1 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip61,
+ vnet_buffer (p1)->map_t.
+ v6.frag_offset);
+
+ ip6_map_t_classify (p0, ip60, d0, &src_port0, &error0, &next0,
+ l4_len0, frag0);
+ ip6_map_t_classify (p1, ip61, d1, &src_port1, &error1, &next1,
+ l4_len1, frag1);
+
+ if (PREDICT_FALSE
+ ((src_port0 != -1)
+ && (ip60->src_address.as_u64[0] !=
+ map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr,
+ src_port0)
+ || ip60->src_address.as_u64[1] != map_get_sfx_net (d0,
+ vnet_buffer
+ (p0)->map_t.v6.saddr,
+ src_port0))))
+ {
+ error0 = MAP_ERROR_SEC_CHECK;
+ }
+
+ if (PREDICT_FALSE
+ ((src_port1 != -1)
+ && (ip61->src_address.as_u64[0] !=
+ map_get_pfx_net (d1, vnet_buffer (p1)->map_t.v6.saddr,
+ src_port1)
+ || ip61->src_address.as_u64[1] != map_get_sfx_net (d1,
+ vnet_buffer
+ (p1)->map_t.v6.saddr,
+ src_port1))))
+ {
+ error1 = MAP_ERROR_SEC_CHECK;
+ }
+
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset ((ip6_frag_hdr_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->map_t.
+ v6.frag_offset)))
+ && (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules)
+ && (error0 == MAP_ERROR_NONE))
+ {
+ ip6_map_fragment_cache (ip60,
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->map_t.
+ v6.frag_offset),
+ d0, src_port0);
+ }
+
+ if (PREDICT_FALSE (vnet_buffer (p1)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset ((ip6_frag_hdr_t *)
+ u8_ptr_add (ip61,
+ vnet_buffer
+ (p1)->map_t.
+ v6.frag_offset)))
+ && (src_port1 != -1) && (d1->ea_bits_len != 0 || !d1->rules)
+ && (error1 == MAP_ERROR_NONE))
+ {
+ ip6_map_fragment_cache (ip61,
+ (ip6_frag_hdr_t *) u8_ptr_add (ip61,
+ vnet_buffer
+ (p1)->map_t.
+ v6.frag_offset),
+ d1, src_port1);
+ }
+
+ if (PREDICT_TRUE
+ (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ cpu_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip60->payload_length));
+ }
+
+ if (PREDICT_TRUE
+ (error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ cpu_index,
+ vnet_buffer (p1)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip61->payload_length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0;
+ next1 = (error1 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next1;
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0,
+ next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip60;
+ u8 error0;
+ u32 l4_len0;
+ i32 src_port0;
+ map_domain_t *d0;
+ ip6_frag_hdr_t *frag0;
+ ip6_mapt_next_t next0 = 0;
+ u32 saddr;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip60 = vlib_buffer_get_current (p0);
+ //Save saddr in a different variable to not overwrite ip.adj_index
+ saddr = map_get_ip4 (&ip60->src_address);
+ d0 = ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *) & saddr,
+ &vnet_buffer (p0)->map_t.map_domain_index,
+ &error0);
+
+ //FIXME: What if d0 is null
+ vnet_buffer (p0)->map_t.v6.saddr = saddr;
+ vnet_buffer (p0)->map_t.v6.daddr =
+ ip6_map_t_embedded_address (d0, &ip60->dst_address);
+ vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+
+ if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length,
+ &(vnet_buffer (p0)->map_t.
+ v6.l4_protocol),
+ &(vnet_buffer (p0)->map_t.
+ v6.l4_offset),
+ &(vnet_buffer (p0)->map_t.
+ v6.frag_offset))))
+ {
+ error0 = MAP_ERROR_MALFORMED;
+ next0 = IP6_MAPT_NEXT_DROP;
+ }
+
+ src_port0 = -1;
+ l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) +
+ sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset;
+ frag0 =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.
+ v6.frag_offset);
+
+
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset &&
+ ip6_frag_hdr_offset (frag0)))
+ {
+ src_port0 = ip6_map_fragment_get (ip60, frag0, d0);
+ error0 = (src_port0 != -1) ? error0 : MAP_ERROR_FRAGMENT_MEMORY;
+ next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED;
+ }
+ else
+ if (PREDICT_TRUE
+ (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP))
+ {
+ error0 =
+ l4_len0 <
+ sizeof (tcp_header_t) ? MAP_ERROR_MALFORMED : error0;
+ vnet_buffer (p0)->map_t.checksum_offset =
+ vnet_buffer (p0)->map_t.v6.l4_offset + 16;
+ next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ src_port0 =
+ (i32) *
+ ((u16 *)
+ u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset));
+ }
+ else
+ if (PREDICT_TRUE
+ (vnet_buffer (p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP))
+ {
+ error0 =
+ l4_len0 <
+ sizeof (udp_header_t) ? MAP_ERROR_MALFORMED : error0;
+ vnet_buffer (p0)->map_t.checksum_offset =
+ vnet_buffer (p0)->map_t.v6.l4_offset + 6;
+ next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ src_port0 =
+ (i32) *
+ ((u16 *)
+ u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset));
+ }
+ else if (vnet_buffer (p0)->map_t.v6.l4_protocol ==
+ IP_PROTOCOL_ICMP6)
+ {
+ error0 =
+ l4_len0 <
+ sizeof (icmp46_header_t) ? MAP_ERROR_MALFORMED : error0;
+ next0 = IP6_MAPT_NEXT_MAPT_ICMP;
+ if (((icmp46_header_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.l4_offset))->code ==
+ ICMP6_echo_reply
+ || ((icmp46_header_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.
+ l4_offset))->code == ICMP6_echo_request)
+ src_port0 =
+ (i32) *
+ ((u16 *)
+ u8_ptr_add (ip60,
+ vnet_buffer (p0)->map_t.v6.l4_offset + 6));
+ }
+ else
+ {
+ //TODO: In case of 1:1 mapping, it might be possible to do something with those packets.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+
+ //Security check
+ if (PREDICT_FALSE
+ ((src_port0 != -1)
+ && (ip60->src_address.as_u64[0] !=
+ map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr,
+ src_port0)
+ || ip60->src_address.as_u64[1] != map_get_sfx_net (d0,
+ vnet_buffer
+ (p0)->map_t.v6.saddr,
+ src_port0))))
+ {
+ //Security check when src_port0 is not zero (non-first fragment, UDP or TCP)
+ error0 = MAP_ERROR_SEC_CHECK;
+ }
+
+ //Fragmented first packet needs to be cached for following packets
+ if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset ((ip6_frag_hdr_t *)
+ u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->map_t.
+ v6.frag_offset)))
+ && (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules)
+ && (error0 == MAP_ERROR_NONE))
+ {
+ ip6_map_fragment_cache (ip60,
+ (ip6_frag_hdr_t *) u8_ptr_add (ip60,
+ vnet_buffer
+ (p0)->map_t.
+ v6.frag_offset),
+ d0, src_port0);
+ }
+
+ if (PREDICT_TRUE
+ (error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP))
+ {
+ vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX,
+ cpu_index,
+ vnet_buffer (p0)->
+ map_t.map_domain_index, 1,
+ clib_net_to_host_u16
+ (ip60->payload_length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static char *map_t_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = {
+ .function = ip6_map_t_fragmented,
+ .name = "ip6-map-t-fragmented",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_FRAGMENTED_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = {
+ .function = ip6_map_t_icmp,
+ .name = "ip6-map-t-icmp",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_ICMP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = {
+ .function = ip6_map_t_tcp_udp,
+ .name = "ip6-map-t-tcp-udp",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_TCP_UDP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE(ip6_map_t_node) = {
+ .function = ip6_map_t,
+ .name = "ip6-map-t",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp",
+ [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp",
+ [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented",
+ [IP6_MAPT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/map.api b/src/vnet/map/map.api
new file mode 100644
index 00000000000..4e4be85effa
--- /dev/null
+++ b/src/vnet/map/map.api
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/** \brief Add MAP domains
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip6_prefix - Rule IPv6 prefix
+ @param ip4_prefix - Rule IPv4 prefix
+ @param ip6_src - MAP domain IPv6 BR address / Tunnel source
+ @param ip6_prefix_len - Rule IPv6 prefix length
+ @param ip4_prefix_len - Rule IPv4 prefix length
+ @param ea_bits_len - Embedded Address bits length
+ @param psid_offset - Port Set Identifider (PSID) offset
+ @param psid_length - PSID length
+ @param is_translation - MAP-E / MAP-T
+ @param mtu - MTU
+*/
+define map_add_domain
+{
+ u32 client_index;
+ u32 context;
+ u8 ip6_prefix[16];
+ u8 ip4_prefix[4];
+ u8 ip6_src[16];
+ u8 ip6_prefix_len;
+ u8 ip4_prefix_len;
+ u8 ip6_src_prefix_len;
+ u8 ea_bits_len;
+ u8 psid_offset;
+ u8 psid_length;
+ u8 is_translation;
+ u16 mtu;
+};
+
+/** \brief Reply for MAP domain add
+ @param context - returned sender context, to match reply w/ request
+ @param index - MAP domain index
+ @param retval - return code
+*/
+define map_add_domain_reply
+{
+ u32 context;
+ u32 index;
+ i32 retval;
+};
+
+/** \brief Delete MAP domain
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param index - MAP Domain index
+*/
+define map_del_domain
+{
+ u32 client_index;
+ u32 context;
+ u32 index;
+};
+
+/** \brief Reply for MAP domain del
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+define map_del_domain_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Add or Delete MAP rule from a domain (Only used for shared IPv4 per subscriber)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param index - MAP Domain index
+ @param is_add - If 1 add rule, if 0 delete rule
+ @param ip6_dst - MAP CE IPv6 address
+ @param psid - Rule PSID
+*/
+define map_add_del_rule
+{
+ u32 client_index;
+ u32 context;
+ u32 index;
+ u32 is_add;
+ u8 ip6_dst[16];
+ u16 psid;
+};
+
+/** \brief Reply for MAP rule add/del
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+*/
+define map_add_del_rule_reply
+{
+ u32 context;
+ i32 retval;
+};
+
+/** \brief Get list of map domains
+ @param client_index - opaque cookie to identify the sender
+*/
+define map_domain_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define map_domain_details
+{
+ u32 context;
+ u32 domain_index;
+ u8 ip6_prefix[16];
+ u8 ip4_prefix[4];
+ u8 ip6_src[16];
+ u8 ip6_prefix_len;
+ u8 ip4_prefix_len;
+ u8 ip6_src_len;
+ u8 ea_bits_len;
+ u8 psid_offset;
+ u8 psid_length;
+ u8 flags;
+ u16 mtu;
+ u8 is_translation;
+};
+
+define map_rule_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 domain_index;
+};
+
+define map_rule_details
+{
+ u32 context;
+ u8 ip6_dst[16];
+ u16 psid;
+};
+
+/** \brief Request for a single block of summary stats
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define map_summary_stats
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for map_summary_stats request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for request
+ @param total_bindings -
+ @param total_pkts -
+ @param total_ip4_fragments -
+ @param total_security_check -
+*/
+define map_summary_stats_reply
+{
+ u32 context;
+ i32 retval;
+ u64 total_bindings;
+ u64 total_pkts[2];
+ u64 total_bytes[2];
+ u64 total_ip4_fragments;
+ u64 total_security_check[2];
+};
diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c
new file mode 100644
index 00000000000..aeec6a946c9
--- /dev/null
+++ b/src/vnet/map/map.c
@@ -0,0 +1,2166 @@
+/*
+ * map.c : MAP support
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/map/map_dpo.h>
+
+#include "map.h"
+
+#ifdef __SSE4_2__
+static inline u32
+crc_u32 (u32 data, u32 value)
+{
+ __asm__ volatile ("crc32l %[data], %[value];":[value] "+r" (value):[data]
+ "rm" (data));
+ return value;
+}
+#else
+#include <vppinfra/xxhash.h>
+
+static inline u32
+crc_u32 (u32 data, u32 value)
+{
+ u64 tmp = ((u64) data << 32) | (u64) value;
+ return (u32) clib_xxhash (tmp);
+}
+#endif
+
+/*
+ * This code supports the following MAP modes:
+ *
+ * Algorithmic Shared IPv4 address (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix > 32
+ * psid_length > 0, ip6_prefix < 64, ip4_prefix <= 32
+ * Algorithmic Full IPv4 address (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix = 32
+ * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32
+ * Algorithmic IPv4 prefix (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix < 32
+ * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32
+ *
+ * Independent Shared IPv4 address (ea_bits_len = 0):
+ * ip4_prefix = 32
+ * psid_length > 0
+ * Rule IPv6 address = 128, Rule PSID Set
+ * Independent Full IPv4 address (ea_bits_len = 0):
+ * ip4_prefix = 32
+ * psid_length = 0, ip6_prefix = 128
+ * Independent IPv4 prefix (ea_bits_len = 0):
+ * ip4_prefix < 32
+ * psid_length = 0, ip6_prefix = 128
+ *
+ */
+
+/*
+ * This code supports MAP-T:
+ *
+ * With DMR prefix length equal to 96.
+ *
+ */
+
+
+i32
+ip4_get_port (ip4_header_t * ip, map_dir_e dir, u16 buffer_len)
+{
+ //TODO: use buffer length
+ if (ip->ip_version_and_header_length != 0x45 ||
+ ip4_get_fragment_offset (ip))
+ return -1;
+
+ if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ udp_header_t *udp = (void *) (ip + 1);
+ return (dir == MAP_SENDER) ? udp->src_port : udp->dst_port;
+ }
+ else if (ip->protocol == IP_PROTOCOL_ICMP)
+ {
+ icmp46_header_t *icmp = (void *) (ip + 1);
+ if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
+ {
+ return *((u16 *) (icmp + 1));
+ }
+ else if (clib_net_to_host_u16 (ip->length) >= 64)
+ {
+ ip = (ip4_header_t *) (icmp + 2);
+ if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ udp_header_t *udp = (void *) (ip + 1);
+ return (dir == MAP_SENDER) ? udp->dst_port : udp->src_port;
+ }
+ else if (ip->protocol == IP_PROTOCOL_ICMP)
+ {
+ icmp46_header_t *icmp = (void *) (ip + 1);
+ if (icmp->type == ICMP4_echo_request ||
+ icmp->type == ICMP4_echo_reply)
+ {
+ return *((u16 *) (icmp + 1));
+ }
+ }
+ }
+ }
+ return -1;
+}
+
+i32
+ip6_get_port (ip6_header_t * ip6, map_dir_e dir, u16 buffer_len)
+{
+ u8 l4_protocol;
+ u16 l4_offset;
+ u16 frag_offset;
+ u8 *l4;
+
+ if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
+ return -1;
+
+ //TODO: Use buffer length
+
+ if (frag_offset &&
+ ip6_frag_hdr_offset (((ip6_frag_hdr_t *)
+ u8_ptr_add (ip6, frag_offset))))
+ return -1; //Can't deal with non-first fragment for now
+
+ l4 = u8_ptr_add (ip6, l4_offset);
+ if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP)
+ {
+ return (dir ==
+ MAP_SENDER) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t
+ *)
+ (l4))->dst_port;
+ }
+ else if (l4_protocol == IP_PROTOCOL_ICMP6)
+ {
+ icmp46_header_t *icmp = (icmp46_header_t *) (l4);
+ if (icmp->type == ICMP6_echo_request)
+ {
+ return (dir == MAP_SENDER) ? ((u16 *) (icmp))[2] : -1;
+ }
+ else if (icmp->type == ICMP6_echo_reply)
+ {
+ return (dir == MAP_SENDER) ? -1 : ((u16 *) (icmp))[2];
+ }
+ }
+ return -1;
+}
+
+
+int
+map_create_domain (ip4_address_t * ip4_prefix,
+ u8 ip4_prefix_len,
+ ip6_address_t * ip6_prefix,
+ u8 ip6_prefix_len,
+ ip6_address_t * ip6_src,
+ u8 ip6_src_len,
+ u8 ea_bits_len,
+ u8 psid_offset,
+ u8 psid_length, u32 * map_domain_index, u16 mtu, u8 flags)
+{
+ u8 suffix_len, suffix_shift;
+ map_main_t *mm = &map_main;
+ dpo_id_t dpo_v4 = DPO_INVALID;
+ dpo_id_t dpo_v6 = DPO_INVALID;
+ fib_node_index_t fei;
+ map_domain_t *d;
+
+ /* Sanity check on the src prefix length */
+ if (flags & MAP_DOMAIN_TRANSLATION)
+ {
+ if (ip6_src_len != 96)
+ {
+ clib_warning ("MAP-T only supports ip6_src_len = 96 for now.");
+ return -1;
+ }
+ }
+ else
+ {
+ if (ip6_src_len != 128)
+ {
+ clib_warning
+ ("MAP-E requires a BR address, not a prefix (ip6_src_len should "
+ "be 128).");
+ return -1;
+ }
+ }
+
+ /* How many, and which bits to grab from the IPv4 DA */
+ if (ip4_prefix_len + ea_bits_len < 32)
+ {
+ flags |= MAP_DOMAIN_PREFIX;
+ suffix_shift = 32 - ip4_prefix_len - ea_bits_len;
+ suffix_len = ea_bits_len;
+ }
+ else
+ {
+ suffix_shift = 0;
+ suffix_len = 32 - ip4_prefix_len;
+ }
+
+ /* EA bits must be within the first 64 bits */
+ if (ea_bits_len > 0 && ((ip6_prefix_len + ea_bits_len) > 64 ||
+ ip6_prefix_len + suffix_len + psid_length > 64))
+ {
+ clib_warning
+ ("Embedded Address bits must be within the first 64 bits of "
+ "the IPv6 prefix");
+ return -1;
+ }
+
+ /* Get domain index */
+ pool_get_aligned (mm->domains, d, CLIB_CACHE_LINE_BYTES);
+ memset (d, 0, sizeof (*d));
+ *map_domain_index = d - mm->domains;
+
+ /* Init domain struct */
+ d->ip4_prefix.as_u32 = ip4_prefix->as_u32;
+ d->ip4_prefix_len = ip4_prefix_len;
+ d->ip6_prefix = *ip6_prefix;
+ d->ip6_prefix_len = ip6_prefix_len;
+ d->ip6_src = *ip6_src;
+ d->ip6_src_len = ip6_src_len;
+ d->ea_bits_len = ea_bits_len;
+ d->psid_offset = psid_offset;
+ d->psid_length = psid_length;
+ d->mtu = mtu;
+ d->flags = flags;
+ d->suffix_shift = suffix_shift;
+ d->suffix_mask = (1 << suffix_len) - 1;
+
+ d->psid_shift = 16 - psid_length - psid_offset;
+ d->psid_mask = (1 << d->psid_length) - 1;
+ d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length;
+
+ /* MAP data-plane object */
+ if (d->flags & MAP_DOMAIN_TRANSLATION)
+ map_t_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4);
+ else
+ map_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4);
+
+ /* Create ip4 route */
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = d->ip4_prefix_len,
+ .fp_addr = {
+ .ip4 = d->ip4_prefix,
+ }
+ ,
+ };
+ fib_table_entry_special_dpo_add (0, &pfx,
+ FIB_SOURCE_MAP,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4);
+ dpo_reset (&dpo_v4);
+
+ /*
+ * Multiple MAP domains may share same source IPv6 TEP.
+ * In this case the route will exist and be MAP sourced.
+ * Find the adj (if any) already contributed and modify it
+ */
+ fib_prefix_t pfx6 = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = d->ip6_src_len,
+ .fp_addr = {
+ .ip6 = d->ip6_src,
+ }
+ ,
+ };
+ fei = fib_table_lookup_exact_match (0, &pfx6);
+
+ if (FIB_NODE_INDEX_INVALID != fei)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_MAP, &dpo))
+ {
+ /*
+ * modify the existing MAP to indicate it's shared
+ * skip to route add.
+ */
+ const dpo_id_t *md_dpo;
+ map_dpo_t *md;
+
+ ASSERT (DPO_LOAD_BALANCE == dpo.dpoi_type);
+
+ md_dpo = load_balance_get_bucket (dpo.dpoi_index, 0);
+ md = map_dpo_get (md_dpo->dpoi_index);
+
+ md->md_domain = ~0;
+ dpo_copy (&dpo_v6, md_dpo);
+ dpo_reset (&dpo);
+
+ goto route_add;
+ }
+ }
+
+ if (d->flags & MAP_DOMAIN_TRANSLATION)
+ map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6);
+ else
+ map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6);
+
+route_add:
+ /*
+ * Create ip6 route. This is a reference counted add. If the prefix
+ * already exists and is MAP sourced, it is now MAP source n+1 times
+ * and will need to be removed n+1 times.
+ */
+ fib_table_entry_special_dpo_add (0, &pfx6,
+ FIB_SOURCE_MAP,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6);
+ dpo_reset (&dpo_v6);
+
+ /* Validate packet/byte counters */
+ map_domain_counter_lock (mm);
+ int i;
+ for (i = 0; i < vec_len (mm->simple_domain_counters); i++)
+ {
+ vlib_validate_simple_counter (&mm->simple_domain_counters[i],
+ *map_domain_index);
+ vlib_zero_simple_counter (&mm->simple_domain_counters[i],
+ *map_domain_index);
+ }
+ for (i = 0; i < vec_len (mm->domain_counters); i++)
+ {
+ vlib_validate_combined_counter (&mm->domain_counters[i],
+ *map_domain_index);
+ vlib_zero_combined_counter (&mm->domain_counters[i], *map_domain_index);
+ }
+ map_domain_counter_unlock (mm);
+
+ return 0;
+}
+
+/*
+ * map_delete_domain
+ */
+int
+map_delete_domain (u32 map_domain_index)
+{
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+
+ if (pool_is_free_index (mm->domains, map_domain_index))
+ {
+ clib_warning ("MAP domain delete: domain does not exist: %d",
+ map_domain_index);
+ return -1;
+ }
+
+ d = pool_elt_at_index (mm->domains, map_domain_index);
+
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = d->ip4_prefix_len,
+ .fp_addr = {
+ .ip4 = d->ip4_prefix,
+ }
+ ,
+ };
+ fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_MAP);
+
+ fib_prefix_t pfx6 = {
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_len = d->ip6_src_len,
+ .fp_addr = {
+ .ip6 = d->ip6_src,
+ }
+ ,
+ };
+ fib_table_entry_special_remove (0, &pfx6, FIB_SOURCE_MAP);
+
+ /* Deleting rules */
+ if (d->rules)
+ clib_mem_free (d->rules);
+
+ pool_put (mm->domains, d);
+
+ return 0;
+}
+
+int
+map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep,
+ u8 is_add)
+{
+ map_domain_t *d;
+ map_main_t *mm = &map_main;
+
+ if (pool_is_free_index (mm->domains, map_domain_index))
+ {
+ clib_warning ("MAP rule: domain does not exist: %d", map_domain_index);
+ return -1;
+ }
+ d = pool_elt_at_index (mm->domains, map_domain_index);
+
+ /* Rules are only used in 1:1 independent case */
+ if (d->ea_bits_len > 0)
+ return (-1);
+
+ if (!d->rules)
+ {
+ u32 l = (0x1 << d->psid_length) * sizeof (ip6_address_t);
+ d->rules = clib_mem_alloc_aligned (l, CLIB_CACHE_LINE_BYTES);
+ if (!d->rules)
+ return -1;
+ memset (d->rules, 0, l);
+ }
+
+ if (psid >= (0x1 << d->psid_length))
+ {
+ clib_warning ("MAP rule: PSID outside bounds: %d [%d]", psid,
+ 0x1 << d->psid_length);
+ return -1;
+ }
+
+ if (is_add)
+ {
+ d->rules[psid] = *tep;
+ }
+ else
+ {
+ memset (&d->rules[psid], 0, sizeof (ip6_address_t));
+ }
+ return 0;
+}
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+static void
+map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6)
+{
+ map_main_t *mm = &map_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0)
+ {
+ // FIXME NOT an ADJ
+ mm->adj6_index = ip6_fib_table_fwding_lookup (im6, 0, ip6);
+ clib_warning ("FIB lookup results in: %u", mm->adj6_index);
+ }
+ if (ip4->as_u32 != 0)
+ {
+ // FIXME NOT an ADJ
+ mm->adj4_index = ip4_fib_table_lookup_lb (0, ip4);
+ clib_warning ("FIB lookup results in: %u", mm->adj4_index);
+ }
+}
+#endif
+
+static clib_error_t *
+map_security_check_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "off"))
+ mm->sec_check = false;
+ else if (unformat (line_input, "on"))
+ mm->sec_check = true;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+ return 0;
+}
+
+static clib_error_t *
+map_security_check_frag_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "off"))
+ mm->sec_check_frag = false;
+ else if (unformat (line_input, "on"))
+ mm->sec_check_frag = true;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+ return 0;
+}
+
+static clib_error_t *
+map_add_domain_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip6_address_t ip6_src;
+ u32 ip6_prefix_len = 0, ip4_prefix_len = 0, map_domain_index, ip6_src_len;
+ u32 num_m_args = 0;
+ /* Optional arguments */
+ u32 ea_bits_len = 0, psid_offset = 0, psid_length = 0;
+ u32 mtu = 0;
+ u8 flags = 0;
+ ip6_src_len = 128;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix,
+ &ip4_prefix_len))
+ num_m_args++;
+ else
+ if (unformat
+ (line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix,
+ &ip6_prefix_len))
+ num_m_args++;
+ else
+ if (unformat
+ (line_input, "ip6-src %U/%d", unformat_ip6_address, &ip6_src,
+ &ip6_src_len))
+ num_m_args++;
+ else
+ if (unformat
+ (line_input, "ip6-src %U", unformat_ip6_address, &ip6_src))
+ num_m_args++;
+ else if (unformat (line_input, "ea-bits-len %d", &ea_bits_len))
+ num_m_args++;
+ else if (unformat (line_input, "psid-offset %d", &psid_offset))
+ num_m_args++;
+ else if (unformat (line_input, "psid-len %d", &psid_length))
+ num_m_args++;
+ else if (unformat (line_input, "mtu %d", &mtu))
+ num_m_args++;
+ else if (unformat (line_input, "map-t"))
+ flags |= MAP_DOMAIN_TRANSLATION;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (num_m_args < 3)
+ return clib_error_return (0, "mandatory argument(s) missing");
+
+ map_create_domain (&ip4_prefix, ip4_prefix_len,
+ &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len,
+ ea_bits_len, psid_offset, psid_length, &map_domain_index,
+ mtu, flags);
+
+ return 0;
+}
+
+static clib_error_t *
+map_del_domain_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 num_m_args = 0;
+ u32 map_domain_index;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "index %d", &map_domain_index))
+ num_m_args++;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (num_m_args != 1)
+ return clib_error_return (0, "mandatory argument(s) missing");
+
+ map_delete_domain (map_domain_index);
+
+ return 0;
+}
+
+static clib_error_t *
+map_add_rule_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip6_address_t tep;
+ u32 num_m_args = 0;
+ u32 psid = 0, map_domain_index;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "index %d", &map_domain_index))
+ num_m_args++;
+ else if (unformat (line_input, "psid %d", &psid))
+ num_m_args++;
+ else
+ if (unformat (line_input, "ip6-dst %U", unformat_ip6_address, &tep))
+ num_m_args++;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (num_m_args != 3)
+ return clib_error_return (0, "mandatory argument(s) missing");
+
+ if (map_add_del_psid (map_domain_index, psid, &tep, 1) != 0)
+ {
+ return clib_error_return (0, "Failing to add Mapping Rule");
+ }
+ return 0;
+}
+
+#if MAP_SKIP_IP6_LOOKUP
+static clib_error_t *
+map_pre_resolve_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4nh;
+ ip6_address_t ip6nh;
+ map_main_t *mm = &map_main;
+
+ memset (&ip4nh, 0, sizeof (ip4nh));
+ memset (&ip6nh, 0, sizeof (ip6nh));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh))
+ mm->preresolve_ip4 = ip4nh;
+ else
+ if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh))
+ mm->preresolve_ip6 = ip6nh;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ map_pre_resolve (&ip4nh, &ip6nh);
+
+ return 0;
+}
+#endif
+
+static clib_error_t *
+map_icmp_relay_source_address_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t icmp_src_address;
+ map_main_t *mm = &map_main;
+
+ mm->icmp4_src_address.as_u32 = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_ip4_address, &icmp_src_address))
+ mm->icmp4_src_address = icmp_src_address;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ return 0;
+}
+
+static clib_error_t *
+map_icmp_unreachables_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ int num_m_args = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ num_m_args++;
+ if (unformat (line_input, "on"))
+ mm->icmp6_enabled = true;
+ else if (unformat (line_input, "off"))
+ mm->icmp6_enabled = false;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+
+ if (num_m_args != 1)
+ return clib_error_return (0, "mandatory argument(s) missing");
+
+ return 0;
+}
+
+static clib_error_t *
+map_fragment_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "inner"))
+ mm->frag_inner = true;
+ else if (unformat (line_input, "outer"))
+ mm->frag_inner = false;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ return 0;
+}
+
+static clib_error_t *
+map_fragment_df_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ mm->frag_ignore_df = true;
+ else if (unformat (line_input, "off"))
+ mm->frag_ignore_df = false;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ return 0;
+}
+
+static clib_error_t *
+map_traffic_class_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ u32 tc = 0;
+
+ mm->tc_copy = false;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "copy"))
+ mm->tc_copy = true;
+ else if (unformat (line_input, "%x", &tc))
+ mm->tc = tc & 0xff;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ return 0;
+}
+
+static u8 *
+format_map_domain (u8 * s, va_list * args)
+{
+ map_domain_t *d = va_arg (*args, map_domain_t *);
+ bool counters = va_arg (*args, int);
+ map_main_t *mm = &map_main;
+ ip6_address_t ip6_prefix;
+
+ if (d->rules)
+ memset (&ip6_prefix, 0, sizeof (ip6_prefix));
+ else
+ ip6_prefix = d->ip6_prefix;
+
+ s = format (s,
+ "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d psid-offset %d psid-len %d mtu %d %s",
+ d - mm->domains,
+ format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len,
+ format_ip6_address, &ip6_prefix, d->ip6_prefix_len,
+ format_ip6_address, &d->ip6_src, d->ip6_src_len,
+ d->ea_bits_len, d->psid_offset, d->psid_length, d->mtu,
+ (d->flags & MAP_DOMAIN_TRANSLATION) ? "map-t" : "");
+
+ if (counters)
+ {
+ map_domain_counter_lock (mm);
+ vlib_counter_t v;
+ vlib_get_combined_counter (&mm->domain_counters[MAP_DOMAIN_COUNTER_TX],
+ d - mm->domains, &v);
+ s = format (s, " TX: %lld/%lld", v.packets, v.bytes);
+ vlib_get_combined_counter (&mm->domain_counters[MAP_DOMAIN_COUNTER_RX],
+ d - mm->domains, &v);
+ s = format (s, " RX: %lld/%lld", v.packets, v.bytes);
+ map_domain_counter_unlock (mm);
+ }
+ s = format (s, "\n");
+
+ if (d->rules)
+ {
+ int i;
+ ip6_address_t dst;
+ for (i = 0; i < (0x1 << d->psid_length); i++)
+ {
+ dst = d->rules[i];
+ if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0)
+ continue;
+ s = format (s,
+ " rule psid: %d ip6-dst %U\n", i, format_ip6_address,
+ &dst);
+ }
+ }
+ return s;
+}
+
+static u8 *
+format_map_ip4_reass (u8 * s, va_list * args)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_t *r = va_arg (*args, map_ip4_reass_t *);
+ map_ip4_reass_key_t *k = &r->key;
+ f64 now = vlib_time_now (mm->vlib_main);
+ f64 lifetime = (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000);
+ f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1;
+ s = format (s,
+ "ip4-reass src=%U dst=%U protocol=%d identifier=%d port=%d lifetime=%.3lf\n",
+ format_ip4_address, &k->src.as_u8, format_ip4_address,
+ &k->dst.as_u8, k->protocol,
+ clib_net_to_host_u16 (k->fragment_id),
+ (r->port >= 0) ? clib_net_to_host_u16 (r->port) : -1, dt);
+ return s;
+}
+
+static u8 *
+format_map_ip6_reass (u8 * s, va_list * args)
+{
+ map_main_t *mm = &map_main;
+ map_ip6_reass_t *r = va_arg (*args, map_ip6_reass_t *);
+ map_ip6_reass_key_t *k = &r->key;
+ f64 now = vlib_time_now (mm->vlib_main);
+ f64 lifetime = (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000);
+ f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1;
+ s = format (s,
+ "ip6-reass src=%U dst=%U protocol=%d identifier=%d lifetime=%.3lf\n",
+ format_ip6_address, &k->src.as_u8, format_ip6_address,
+ &k->dst.as_u8, k->protocol,
+ clib_net_to_host_u32 (k->fragment_id), dt);
+ return s;
+}
+
+static clib_error_t *
+show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+ bool counters = false;
+ u32 map_domain_index = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "counters"))
+ counters = true;
+ else if (unformat (line_input, "index %d", &map_domain_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (pool_elts (mm->domains) == 0)
+ vlib_cli_output (vm, "No MAP domains are configured...");
+
+ if (map_domain_index == ~0)
+ {
+ /* *INDENT-OFF* */
+ pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);}));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ if (pool_is_free_index (mm->domains, map_domain_index))
+ {
+ return clib_error_return (0, "MAP domain does not exists %d",
+ map_domain_index);
+ }
+
+ d = pool_elt_at_index (mm->domains, map_domain_index);
+ vlib_cli_output (vm, "%U", format_map_domain, d, counters);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+show_map_fragments_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_t *f4;
+ map_ip6_reass_t *f6;
+
+ /* *INDENT-OFF* */
+ pool_foreach(f4, mm->ip4_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip4_reass, f4);}));
+ /* *INDENT-ON* */
+ /* *INDENT-OFF* */
+ pool_foreach(f6, mm->ip6_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip6_reass, f6);}));
+ /* *INDENT-ON* */
+ return (0);
+}
+
+u64
+map_error_counter_get (u32 node_index, map_error_t map_error)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, node_index);
+ vlib_error_main_t *em = &vm->error_main;
+ vlib_error_t e = error_node->errors[map_error];
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ u32 ci;
+
+ ci = vlib_error_get_code (e);
+ ASSERT (ci < n->n_errors);
+ ci += n->error_heap_index;
+
+ return (em->counters[ci]);
+}
+
+static clib_error_t *
+show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+ int domains = 0, rules = 0, domaincount = 0, rulecount = 0;
+ if (pool_elts (mm->domains) == 0)
+ vlib_cli_output (vm, "No MAP domains are configured...");
+
+ /* *INDENT-OFF* */
+ pool_foreach(d, mm->domains, ({
+ if (d->rules) {
+ rulecount+= 0x1 << d->psid_length;
+ rules += sizeof(ip6_address_t) * 0x1 << d->psid_length;
+ }
+ domains += sizeof(*d);
+ domaincount++;
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "MAP domains structure: %d\n", sizeof (map_domain_t));
+ vlib_cli_output (vm, "MAP domains: %d (%d bytes)\n", domaincount, domains);
+ vlib_cli_output (vm, "MAP rules: %d (%d bytes)\n", rulecount, rules);
+ vlib_cli_output (vm, "Total: %d bytes)\n", rules + domains);
+
+#if MAP_SKIP_IP6_LOOKUP
+ vlib_cli_output (vm,
+ "MAP pre-resolve: IP6 next-hop: %U (%u), IP4 next-hop: %U (%u)\n",
+ format_ip6_address, &mm->preresolve_ip6, mm->adj6_index,
+ format_ip4_address, &mm->preresolve_ip4, mm->adj4_index);
+#endif
+
+ if (mm->tc_copy)
+ vlib_cli_output (vm, "MAP traffic-class: copy");
+ else
+ vlib_cli_output (vm, "MAP traffic-class: %x", mm->tc);
+
+ vlib_cli_output (vm,
+ "MAP IPv6 inbound security check: %s, fragmented packet security check: %s",
+ mm->sec_check ? "enabled" : "disabled",
+ mm->sec_check_frag ? "enabled" : "disabled");
+
+ vlib_cli_output (vm, "ICMP-relay IPv4 source address: %U\n",
+ format_ip4_address, &mm->icmp4_src_address);
+ vlib_cli_output (vm, "ICMP6 unreachables sent for unmatched packets: %s\n",
+ mm->icmp6_enabled ? "enabled" : "disabled");
+ vlib_cli_output (vm, "Inner fragmentation: %s\n",
+ mm->frag_inner ? "enabled" : "disabled");
+ vlib_cli_output (vm, "Fragment packets regardless of DF flag: %s\n",
+ mm->frag_ignore_df ? "enabled" : "disabled");
+
+ /*
+ * Counters
+ */
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u64 total_pkts[MAP_N_DOMAIN_COUNTER];
+ u64 total_bytes[MAP_N_DOMAIN_COUNTER];
+ int which, i;
+ vlib_counter_t v;
+
+ memset (total_pkts, 0, sizeof (total_pkts));
+ memset (total_bytes, 0, sizeof (total_bytes));
+
+ map_domain_counter_lock (mm);
+ vec_foreach (cm, mm->domain_counters)
+ {
+ which = cm - mm->domain_counters;
+
+ for (i = 0; i < vec_len (cm->maxi); i++)
+ {
+ vlib_get_combined_counter (cm, i, &v);
+ total_pkts[which] += v.packets;
+ total_bytes[which] += v.bytes;
+ }
+ }
+ map_domain_counter_unlock (mm);
+
+ vlib_cli_output (vm, "Encapsulated packets: %lld bytes: %lld\n",
+ total_pkts[MAP_DOMAIN_COUNTER_TX],
+ total_bytes[MAP_DOMAIN_COUNTER_TX]);
+ vlib_cli_output (vm, "Decapsulated packets: %lld bytes: %lld\n",
+ total_pkts[MAP_DOMAIN_COUNTER_RX],
+ total_bytes[MAP_DOMAIN_COUNTER_RX]);
+
+ vlib_cli_output (vm, "ICMP relayed packets: %d\n",
+ vlib_get_simple_counter (&mm->icmp_relayed, 0));
+
+ return 0;
+}
+
+static clib_error_t *
+map_params_reass_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 lifetime = ~0;
+ f64 ht_ratio = (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1);
+ u32 pool_size = ~0;
+ u64 buffers = ~(0ull);
+ u8 ip4 = 0, ip6 = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "lifetime %u", &lifetime))
+ ;
+ else if (unformat (line_input, "ht-ratio %lf", &ht_ratio))
+ ;
+ else if (unformat (line_input, "pool-size %u", &pool_size))
+ ;
+ else if (unformat (line_input, "buffers %llu", &buffers))
+ ;
+ else if (unformat (line_input, "ip4"))
+ ip4 = 1;
+ else if (unformat (line_input, "ip6"))
+ ip6 = 1;
+ else
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "invalid input");
+ }
+ }
+ unformat_free (line_input);
+
+ if (!ip4 && !ip6)
+ return clib_error_return (0, "must specify ip4 and/or ip6");
+
+ if (ip4)
+ {
+ if (pool_size != ~0 && pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX)
+ return clib_error_return (0, "invalid ip4-reass pool-size ( > %d)",
+ MAP_IP4_REASS_CONF_POOL_SIZE_MAX);
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)
+ && ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX)
+ return clib_error_return (0, "invalid ip4-reass ht-ratio ( > %d)",
+ MAP_IP4_REASS_CONF_HT_RATIO_MAX);
+ if (lifetime != ~0 && lifetime > MAP_IP4_REASS_CONF_LIFETIME_MAX)
+ return clib_error_return (0, "invalid ip4-reass lifetime ( > %d)",
+ MAP_IP4_REASS_CONF_LIFETIME_MAX);
+ if (buffers != ~(0ull) && buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX)
+ return clib_error_return (0, "invalid ip4-reass buffers ( > %ld)",
+ MAP_IP4_REASS_CONF_BUFFERS_MAX);
+ }
+
+ if (ip6)
+ {
+ if (pool_size != ~0 && pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX)
+ return clib_error_return (0, "invalid ip6-reass pool-size ( > %d)",
+ MAP_IP6_REASS_CONF_POOL_SIZE_MAX);
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1)
+ && ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX)
+ return clib_error_return (0, "invalid ip6-reass ht-log2len ( > %d)",
+ MAP_IP6_REASS_CONF_HT_RATIO_MAX);
+ if (lifetime != ~0 && lifetime > MAP_IP6_REASS_CONF_LIFETIME_MAX)
+ return clib_error_return (0, "invalid ip6-reass lifetime ( > %d)",
+ MAP_IP6_REASS_CONF_LIFETIME_MAX);
+ if (buffers != ~(0ull) && buffers > MAP_IP6_REASS_CONF_BUFFERS_MAX)
+ return clib_error_return (0, "invalid ip6-reass buffers ( > %ld)",
+ MAP_IP6_REASS_CONF_BUFFERS_MAX);
+ }
+
+ if (ip4)
+ {
+ u32 reass = 0, packets = 0;
+ if (pool_size != ~0)
+ {
+ if (map_ip4_reass_conf_pool_size (pool_size, &reass, &packets))
+ {
+ vlib_cli_output (vm, "Could not set ip4-reass pool-size");
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ "Setting ip4-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)",
+ reass, packets);
+ }
+ }
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1))
+ {
+ if (map_ip4_reass_conf_ht_ratio (ht_ratio, &reass, &packets))
+ {
+ vlib_cli_output (vm, "Could not set ip4-reass ht-log2len");
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ "Setting ip4-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)",
+ reass, packets);
+ }
+ }
+ if (lifetime != ~0)
+ {
+ if (map_ip4_reass_conf_lifetime (lifetime))
+ vlib_cli_output (vm, "Could not set ip4-reass lifetime");
+ else
+ vlib_cli_output (vm, "Setting ip4-reass lifetime");
+ }
+ if (buffers != ~(0ull))
+ {
+ if (map_ip4_reass_conf_buffers (buffers))
+ vlib_cli_output (vm, "Could not set ip4-reass buffers");
+ else
+ vlib_cli_output (vm, "Setting ip4-reass buffers");
+ }
+
+ if (map_main.ip4_reass_conf_buffers >
+ map_main.ip4_reass_conf_pool_size *
+ MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY)
+ {
+ vlib_cli_output (vm,
+ "Note: 'ip4-reass buffers' > pool-size * max-fragments-per-reassembly.");
+ }
+ }
+
+ if (ip6)
+ {
+ u32 reass = 0, packets = 0;
+ if (pool_size != ~0)
+ {
+ if (map_ip6_reass_conf_pool_size (pool_size, &reass, &packets))
+ {
+ vlib_cli_output (vm, "Could not set ip6-reass pool-size");
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ "Setting ip6-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)",
+ reass, packets);
+ }
+ }
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX + 1))
+ {
+ if (map_ip6_reass_conf_ht_ratio (ht_ratio, &reass, &packets))
+ {
+ vlib_cli_output (vm, "Could not set ip6-reass ht-log2len");
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ "Setting ip6-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)",
+ reass, packets);
+ }
+ }
+ if (lifetime != ~0)
+ {
+ if (map_ip6_reass_conf_lifetime (lifetime))
+ vlib_cli_output (vm, "Could not set ip6-reass lifetime");
+ else
+ vlib_cli_output (vm, "Setting ip6-reass lifetime");
+ }
+ if (buffers != ~(0ull))
+ {
+ if (map_ip6_reass_conf_buffers (buffers))
+ vlib_cli_output (vm, "Could not set ip6-reass buffers");
+ else
+ vlib_cli_output (vm, "Setting ip6-reass buffers");
+ }
+
+ if (map_main.ip6_reass_conf_buffers >
+ map_main.ip6_reass_conf_pool_size *
+ MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY)
+ {
+ vlib_cli_output (vm,
+ "Note: 'ip6-reass buffers' > pool-size * max-fragments-per-reassembly.");
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * packet trace format function
+ */
+u8 *
+format_map_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ map_trace_t *t = va_arg (*args, map_trace_t *);
+ u32 map_domain_index = t->map_domain_index;
+ u16 port = t->port;
+
+ s =
+ format (s, "MAP domain index: %d L4 port: %u", map_domain_index,
+ clib_net_to_host_u16 (port));
+
+ return s;
+}
+
+static_always_inline map_ip4_reass_t *
+map_ip4_reass_lookup (map_ip4_reass_key_t * k, u32 bucket, f64 now)
+{
+ map_main_t *mm = &map_main;
+ u32 ri = mm->ip4_reass_hash_table[bucket];
+ while (ri != MAP_REASS_INDEX_NONE)
+ {
+ map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri);
+ if (r->key.as_u64[0] == k->as_u64[0] &&
+ r->key.as_u64[1] == k->as_u64[1] &&
+ now < r->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000))
+ {
+ return r;
+ }
+ ri = r->bucket_next;
+ }
+ return NULL;
+}
+
+#define map_ip4_reass_pool_index(r) (r - map_main.ip4_reass_pool)
+
+void
+map_ip4_reass_free (map_ip4_reass_t * r, u32 ** pi_to_drop)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_get_fragments (r, pi_to_drop);
+
+ // Unlink in hash bucket
+ map_ip4_reass_t *r2 = NULL;
+ u32 r2i = mm->ip4_reass_hash_table[r->bucket];
+ while (r2i != map_ip4_reass_pool_index (r))
+ {
+ ASSERT (r2i != MAP_REASS_INDEX_NONE);
+ r2 = pool_elt_at_index (mm->ip4_reass_pool, r2i);
+ r2i = r2->bucket_next;
+ }
+ if (r2)
+ {
+ r2->bucket_next = r->bucket_next;
+ }
+ else
+ {
+ mm->ip4_reass_hash_table[r->bucket] = r->bucket_next;
+ }
+
+ // Unlink in list
+ if (r->fifo_next == map_ip4_reass_pool_index (r))
+ {
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ }
+ else
+ {
+ if (mm->ip4_reass_fifo_last == map_ip4_reass_pool_index (r))
+ mm->ip4_reass_fifo_last = r->fifo_prev;
+ pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next =
+ r->fifo_next;
+ pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev =
+ r->fifo_prev;
+ }
+
+ pool_put (mm->ip4_reass_pool, r);
+ mm->ip4_reass_allocated--;
+}
+
+map_ip4_reass_t *
+map_ip4_reass_get (u32 src, u32 dst, u16 fragment_id,
+ u8 protocol, u32 ** pi_to_drop)
+{
+ map_ip4_reass_t *r;
+ map_main_t *mm = &map_main;
+ map_ip4_reass_key_t k = {.src.data_u32 = src,
+ .dst.data_u32 = dst,
+ .fragment_id = fragment_id,
+ .protocol = protocol
+ };
+
+ u32 h = 0;
+ h = crc_u32 (k.as_u32[0], h);
+ h = crc_u32 (k.as_u32[1], h);
+ h = crc_u32 (k.as_u32[2], h);
+ h = crc_u32 (k.as_u32[3], h);
+ h = h >> (32 - mm->ip4_reass_ht_log2len);
+
+ f64 now = vlib_time_now (mm->vlib_main);
+
+ //Cache garbage collection
+ while (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ map_ip4_reass_t *last =
+ pool_elt_at_index (mm->ip4_reass_pool, mm->ip4_reass_fifo_last);
+ if (last->ts + (((f64) mm->ip4_reass_conf_lifetime_ms) / 1000) < now)
+ map_ip4_reass_free (last, pi_to_drop);
+ else
+ break;
+ }
+
+ if ((r = map_ip4_reass_lookup (&k, h, now)))
+ return r;
+
+ if (mm->ip4_reass_allocated >= mm->ip4_reass_conf_pool_size)
+ return NULL;
+
+ pool_get (mm->ip4_reass_pool, r);
+ mm->ip4_reass_allocated++;
+ int i;
+ for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ r->fragments[i] = ~0;
+
+ u32 ri = map_ip4_reass_pool_index (r);
+
+ //Link in new bucket
+ r->bucket = h;
+ r->bucket_next = mm->ip4_reass_hash_table[h];
+ mm->ip4_reass_hash_table[h] = ri;
+
+ //Link in fifo
+ if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ r->fifo_next =
+ pool_elt_at_index (mm->ip4_reass_pool,
+ mm->ip4_reass_fifo_last)->fifo_next;
+ r->fifo_prev = mm->ip4_reass_fifo_last;
+ pool_elt_at_index (mm->ip4_reass_pool, r->fifo_prev)->fifo_next = ri;
+ pool_elt_at_index (mm->ip4_reass_pool, r->fifo_next)->fifo_prev = ri;
+ }
+ else
+ {
+ r->fifo_next = r->fifo_prev = ri;
+ mm->ip4_reass_fifo_last = ri;
+ }
+
+ //Set other fields
+ r->ts = now;
+ r->key = k;
+ r->port = -1;
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ r->expected_total = 0xffff;
+ r->forwarded = 0;
+#endif
+
+ return r;
+}
+
+int
+map_ip4_reass_add_fragment (map_ip4_reass_t * r, u32 pi)
+{
+ if (map_main.ip4_reass_buffered_counter >= map_main.ip4_reass_conf_buffers)
+ return -1;
+
+ int i;
+ for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i] == ~0)
+ {
+ r->fragments[i] = pi;
+ map_main.ip4_reass_buffered_counter++;
+ return 0;
+ }
+ return -1;
+}
+
+static_always_inline map_ip6_reass_t *
+map_ip6_reass_lookup (map_ip6_reass_key_t * k, u32 bucket, f64 now)
+{
+ map_main_t *mm = &map_main;
+ u32 ri = mm->ip6_reass_hash_table[bucket];
+ while (ri != MAP_REASS_INDEX_NONE)
+ {
+ map_ip6_reass_t *r = pool_elt_at_index (mm->ip6_reass_pool, ri);
+ if (now < r->ts + (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000) &&
+ r->key.as_u64[0] == k->as_u64[0] &&
+ r->key.as_u64[1] == k->as_u64[1] &&
+ r->key.as_u64[2] == k->as_u64[2] &&
+ r->key.as_u64[3] == k->as_u64[3] &&
+ r->key.as_u64[4] == k->as_u64[4])
+ return r;
+ ri = r->bucket_next;
+ }
+ return NULL;
+}
+
+#define map_ip6_reass_pool_index(r) (r - map_main.ip6_reass_pool)
+
+void
+map_ip6_reass_free (map_ip6_reass_t * r, u32 ** pi_to_drop)
+{
+ map_main_t *mm = &map_main;
+ int i;
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i].pi != ~0)
+ {
+ vec_add1 (*pi_to_drop, r->fragments[i].pi);
+ r->fragments[i].pi = ~0;
+ map_main.ip6_reass_buffered_counter--;
+ }
+
+ // Unlink in hash bucket
+ map_ip6_reass_t *r2 = NULL;
+ u32 r2i = mm->ip6_reass_hash_table[r->bucket];
+ while (r2i != map_ip6_reass_pool_index (r))
+ {
+ ASSERT (r2i != MAP_REASS_INDEX_NONE);
+ r2 = pool_elt_at_index (mm->ip6_reass_pool, r2i);
+ r2i = r2->bucket_next;
+ }
+ if (r2)
+ {
+ r2->bucket_next = r->bucket_next;
+ }
+ else
+ {
+ mm->ip6_reass_hash_table[r->bucket] = r->bucket_next;
+ }
+
+ // Unlink in list
+ if (r->fifo_next == map_ip6_reass_pool_index (r))
+ {
+ //Single element in the list, list is now empty
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ }
+ else
+ {
+ if (mm->ip6_reass_fifo_last == map_ip6_reass_pool_index (r)) //First element
+ mm->ip6_reass_fifo_last = r->fifo_prev;
+ pool_elt_at_index (mm->ip6_reass_pool, r->fifo_prev)->fifo_next =
+ r->fifo_next;
+ pool_elt_at_index (mm->ip6_reass_pool, r->fifo_next)->fifo_prev =
+ r->fifo_prev;
+ }
+
+ // Free from pool if necessary
+ pool_put (mm->ip6_reass_pool, r);
+ mm->ip6_reass_allocated--;
+}
+
+map_ip6_reass_t *
+map_ip6_reass_get (ip6_address_t * src, ip6_address_t * dst, u32 fragment_id,
+ u8 protocol, u32 ** pi_to_drop)
+{
+ map_ip6_reass_t *r;
+ map_main_t *mm = &map_main;
+ map_ip6_reass_key_t k = {
+ .src = *src,
+ .dst = *dst,
+ .fragment_id = fragment_id,
+ .protocol = protocol
+ };
+
+ u32 h = 0;
+ int i;
+ for (i = 0; i < 10; i++)
+ h = crc_u32 (k.as_u32[i], h);
+ h = h >> (32 - mm->ip6_reass_ht_log2len);
+
+ f64 now = vlib_time_now (mm->vlib_main);
+
+ //Cache garbage collection
+ while (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ map_ip6_reass_t *last =
+ pool_elt_at_index (mm->ip6_reass_pool, mm->ip6_reass_fifo_last);
+ if (last->ts + (((f64) mm->ip6_reass_conf_lifetime_ms) / 1000) < now)
+ map_ip6_reass_free (last, pi_to_drop);
+ else
+ break;
+ }
+
+ if ((r = map_ip6_reass_lookup (&k, h, now)))
+ return r;
+
+ if (mm->ip6_reass_allocated >= mm->ip6_reass_conf_pool_size)
+ return NULL;
+
+ pool_get (mm->ip6_reass_pool, r);
+ mm->ip6_reass_allocated++;
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ {
+ r->fragments[i].pi = ~0;
+ r->fragments[i].next_data_len = 0;
+ r->fragments[i].next_data_offset = 0;
+ }
+
+ u32 ri = map_ip6_reass_pool_index (r);
+
+ //Link in new bucket
+ r->bucket = h;
+ r->bucket_next = mm->ip6_reass_hash_table[h];
+ mm->ip6_reass_hash_table[h] = ri;
+
+ //Link in fifo
+ if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ r->fifo_next =
+ pool_elt_at_index (mm->ip6_reass_pool,
+ mm->ip6_reass_fifo_last)->fifo_next;
+ r->fifo_prev = mm->ip6_reass_fifo_last;
+ pool_elt_at_index (mm->ip6_reass_pool, r->fifo_prev)->fifo_next = ri;
+ pool_elt_at_index (mm->ip6_reass_pool, r->fifo_next)->fifo_prev = ri;
+ }
+ else
+ {
+ r->fifo_next = r->fifo_prev = ri;
+ mm->ip6_reass_fifo_last = ri;
+ }
+
+ //Set other fields
+ r->ts = now;
+ r->key = k;
+ r->ip4_header.ip_version_and_header_length = 0;
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ r->expected_total = 0xffff;
+ r->forwarded = 0;
+#endif
+ return r;
+}
+
+int
+map_ip6_reass_add_fragment (map_ip6_reass_t * r, u32 pi,
+ u16 data_offset, u16 next_data_offset,
+ u8 * data_start, u16 data_len)
+{
+ map_ip6_fragment_t *f = NULL, *prev_f = NULL;
+ u16 copied_len = (data_len > 20) ? 20 : data_len;
+
+ if (map_main.ip6_reass_buffered_counter >= map_main.ip6_reass_conf_buffers)
+ return -1;
+
+ //Lookup for fragments for the current buffer
+ //and the one before that
+ int i;
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ {
+ if (data_offset && r->fragments[i].next_data_offset == data_offset)
+ {
+ prev_f = &r->fragments[i]; // This is buffer for previous packet
+ }
+ else if (r->fragments[i].next_data_offset == next_data_offset)
+ {
+ f = &r->fragments[i]; // This is a buffer for the current packet
+ }
+ else if (r->fragments[i].next_data_offset == 0)
+ { //Available
+ if (f == NULL)
+ f = &r->fragments[i];
+ else if (prev_f == NULL)
+ prev_f = &r->fragments[i];
+ }
+ }
+
+ if (!f || f->pi != ~0)
+ return -1;
+
+ if (data_offset)
+ {
+ if (!prev_f)
+ return -1;
+
+ clib_memcpy (prev_f->next_data, data_start, copied_len);
+ prev_f->next_data_len = copied_len;
+ prev_f->next_data_offset = data_offset;
+ }
+ else
+ {
+ if (((ip4_header_t *) data_start)->ip_version_and_header_length != 0x45)
+ return -1;
+
+ if (r->ip4_header.ip_version_and_header_length == 0)
+ clib_memcpy (&r->ip4_header, data_start, sizeof (ip4_header_t));
+ }
+
+ if (data_len > 20)
+ {
+ f->next_data_offset = next_data_offset;
+ f->pi = pi;
+ map_main.ip6_reass_buffered_counter++;
+ }
+ return 0;
+}
+
+void
+map_ip4_reass_reinit (u32 * trashed_reass, u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ int i;
+
+ if (dropped_packets)
+ *dropped_packets = mm->ip4_reass_buffered_counter;
+ if (trashed_reass)
+ *trashed_reass = mm->ip4_reass_allocated;
+ if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ u16 ri = mm->ip4_reass_fifo_last;
+ do
+ {
+ map_ip4_reass_t *r = pool_elt_at_index (mm->ip4_reass_pool, ri);
+ for (i = 0; i < MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i] != ~0)
+ map_ip4_drop_pi (r->fragments[i]);
+
+ ri = r->fifo_next;
+ pool_put (mm->ip4_reass_pool, r);
+ }
+ while (ri != mm->ip4_reass_fifo_last);
+ }
+
+ vec_free (mm->ip4_reass_hash_table);
+ vec_resize (mm->ip4_reass_hash_table, 1 << mm->ip4_reass_ht_log2len);
+ for (i = 0; i < (1 << mm->ip4_reass_ht_log2len); i++)
+ mm->ip4_reass_hash_table[i] = MAP_REASS_INDEX_NONE;
+ pool_free (mm->ip4_reass_pool);
+ pool_alloc (mm->ip4_reass_pool, mm->ip4_reass_conf_pool_size);
+
+ mm->ip4_reass_allocated = 0;
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ mm->ip4_reass_buffered_counter = 0;
+}
+
+u8
+map_get_ht_log2len (f32 ht_ratio, u16 pool_size)
+{
+ u32 desired_size = (u32) (pool_size * ht_ratio);
+ u8 i;
+ for (i = 1; i < 31; i++)
+ if ((1 << i) >= desired_size)
+ return i;
+ return 4;
+}
+
+int
+map_ip4_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass,
+ u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX)
+ return -1;
+
+ map_ip4_reass_lock ();
+ mm->ip4_reass_conf_ht_ratio = ht_ratio;
+ mm->ip4_reass_ht_log2len =
+ map_get_ht_log2len (ht_ratio, mm->ip4_reass_conf_pool_size);
+ map_ip4_reass_reinit (trashed_reass, dropped_packets);
+ map_ip4_reass_unlock ();
+ return 0;
+}
+
+int
+map_ip4_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass,
+ u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX)
+ return -1;
+
+ map_ip4_reass_lock ();
+ mm->ip4_reass_conf_pool_size = pool_size;
+ map_ip4_reass_reinit (trashed_reass, dropped_packets);
+ map_ip4_reass_unlock ();
+ return 0;
+}
+
+int
+map_ip4_reass_conf_lifetime (u16 lifetime_ms)
+{
+ map_main.ip4_reass_conf_lifetime_ms = lifetime_ms;
+ return 0;
+}
+
+int
+map_ip4_reass_conf_buffers (u32 buffers)
+{
+ map_main.ip4_reass_conf_buffers = buffers;
+ return 0;
+}
+
+void
+map_ip6_reass_reinit (u32 * trashed_reass, u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (dropped_packets)
+ *dropped_packets = mm->ip6_reass_buffered_counter;
+ if (trashed_reass)
+ *trashed_reass = mm->ip6_reass_allocated;
+ int i;
+ if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE)
+ {
+ u16 ri = mm->ip6_reass_fifo_last;
+ do
+ {
+ map_ip6_reass_t *r = pool_elt_at_index (mm->ip6_reass_pool, ri);
+ for (i = 0; i < MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i].pi != ~0)
+ map_ip6_drop_pi (r->fragments[i].pi);
+
+ ri = r->fifo_next;
+ pool_put (mm->ip6_reass_pool, r);
+ }
+ while (ri != mm->ip6_reass_fifo_last);
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ }
+
+ vec_free (mm->ip6_reass_hash_table);
+ vec_resize (mm->ip6_reass_hash_table, 1 << mm->ip6_reass_ht_log2len);
+ for (i = 0; i < (1 << mm->ip6_reass_ht_log2len); i++)
+ mm->ip6_reass_hash_table[i] = MAP_REASS_INDEX_NONE;
+ pool_free (mm->ip6_reass_pool);
+ pool_alloc (mm->ip6_reass_pool, mm->ip4_reass_conf_pool_size);
+
+ mm->ip6_reass_allocated = 0;
+ mm->ip6_reass_buffered_counter = 0;
+}
+
+int
+map_ip6_reass_conf_ht_ratio (f32 ht_ratio, u32 * trashed_reass,
+ u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX)
+ return -1;
+
+ map_ip6_reass_lock ();
+ mm->ip6_reass_conf_ht_ratio = ht_ratio;
+ mm->ip6_reass_ht_log2len =
+ map_get_ht_log2len (ht_ratio, mm->ip6_reass_conf_pool_size);
+ map_ip6_reass_reinit (trashed_reass, dropped_packets);
+ map_ip6_reass_unlock ();
+ return 0;
+}
+
+int
+map_ip6_reass_conf_pool_size (u16 pool_size, u32 * trashed_reass,
+ u32 * dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX)
+ return -1;
+
+ map_ip6_reass_lock ();
+ mm->ip6_reass_conf_pool_size = pool_size;
+ map_ip6_reass_reinit (trashed_reass, dropped_packets);
+ map_ip6_reass_unlock ();
+ return 0;
+}
+
+int
+map_ip6_reass_conf_lifetime (u16 lifetime_ms)
+{
+ map_main.ip6_reass_conf_lifetime_ms = lifetime_ms;
+ return 0;
+}
+
+int
+map_ip6_reass_conf_buffers (u32 buffers)
+{
+ map_main.ip6_reass_conf_buffers = buffers;
+ return 0;
+}
+
+/* *INDENT-OFF* */
+
+/*?
+ * Configure MAP reassembly behaviour
+ *
+ * @cliexpar
+ * @cliexstart{map params reassembly}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_ip4_reass_lifetime_command, static) = {
+ .path = "map params reassembly",
+ .short_help = "map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] "
+ "[pool-size <pool-size>] [buffers <buffers>] "
+ "[ht-ratio <ht-ratio>]",
+ .function = map_params_reass_command_fn,
+};
+
+/*?
+ * Set or copy the IP TOS/Traffic Class field
+ *
+ * @cliexpar
+ * @cliexstart{map params traffic-class}
+ *
+ * This command is used to set the traffic-class field in translated
+ * or encapsulated packets. If copy is specifed (the default) then the
+ * traffic-class/TOS field is copied from the original packet to the
+ * translated / encapsulating header.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_traffic_class_command, static) = {
+ .path = "map params traffic-class",
+ .short_help = "map params traffic-class {0x0-0xff | copy}",
+ .function = map_traffic_class_command_fn,
+};
+
+/*?
+ * Bypass IP4/IP6 lookup
+ *
+ * @cliexpar
+ * @cliexstart{map params pre-resolve}
+ *
+ * Bypass a second FIB lookup of the translated or encapsulated
+ * packet, and forward the packet directly to the specified
+ * next-hop. This optimization trades forwarding flexibility for
+ * performance.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_pre_resolve_command, static) = {
+ .path = "map params pre-resolve",
+ .short_help = " map params pre-resolve {ip4-nh <address>} "
+ "| {ip6-nh <address>}",
+ .function = map_pre_resolve_command_fn,
+};
+
+/*?
+ * Enable or disable the MAP-E inbound security check
+ *
+ * @cliexpar
+ * @cliexstart{map params security-check}
+ *
+ * By default, a decapsulated packet's IPv4 source address will be
+ * verified against the outer header's IPv6 source address. Disabling
+ * this feature will allow IPv4 source address spoofing.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_security_check_command, static) = {
+ .path = "map params security-check",
+ .short_help = "map params security-check on|off",
+ .function = map_security_check_command_fn,
+};
+
+/*?
+ * Specifiy the IPv4 source address used for relayed ICMP error messages
+ *
+ * @cliexpar
+ * @cliexstart{map params icmp source-address}
+ *
+ * This command specifies which IPv4 source address (must be local to
+ * the system), that is used for relayed received IPv6 ICMP error
+ * messages.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_icmp_relay_source_address_command, static) = {
+ .path = "map params icmp source-address",
+ .short_help = "map params icmp source-address <ip4-address>",
+ .function = map_icmp_relay_source_address_command_fn,
+};
+
+/*?
+ * Send IPv6 ICMP unreachables
+ *
+ * @cliexpar
+ * @cliexstart{map params icmp6 unreachables}
+ *
+ * Send IPv6 ICMP unreachable messages back if security check fails or
+ * no MAP domain exists.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_icmp_unreachables_command, static) = {
+ .path = "map params icmp6 unreachables",
+ .short_help = "map params icmp6 unreachables {on|off}",
+ .function = map_icmp_unreachables_command_fn,
+};
+
+/*?
+ * Configure MAP fragmentation behaviour
+ *
+ * @cliexpar
+ * @cliexstart{map params fragment}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_fragment_command, static) = {
+ .path = "map params fragment",
+ .short_help = "map params fragment inner|outer",
+ .function = map_fragment_command_fn,
+};
+
+/*?
+ * Ignore the IPv4 Don't fragment bit
+ *
+ * @cliexpar
+ * @cliexstart{map params fragment ignore-df}
+ *
+ * Allows fragmentation of the IPv4 packet even if the DF bit is
+ * set. The choice between inner or outer fragmentation of tunnel
+ * packets is complicated. The benefit of inner fragmentation is that
+ * the ultimate endpoint must reassemble, instead of the tunnel
+ * endpoint.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_fragment_df_command, static) = {
+ .path = "map params fragment ignore-df",
+ .short_help = "map params fragment ignore-df on|off",
+ .function = map_fragment_df_command_fn,
+};
+
+/*?
+ * Specifiy if the inbound security check should be done on fragments
+ *
+ * @cliexpar
+ * @cliexstart{map params security-check fragments}
+ *
+ * Typically the inbound on-decapsulation security check is only done
+ * on the first packet. The packet that contains the L4
+ * information. While a security check on every fragment is possible,
+ * it has a cost. State must be created on the first fragment.
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_security_check_frag_command, static) = {
+ .path = "map params security-check fragments",
+ .short_help = "map params security-check fragments on|off",
+ .function = map_security_check_frag_command_fn,
+};
+
+/*?
+ * Add MAP domain
+ *
+ * @cliexpar
+ * @cliexstart{map add domain}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_add_domain_command, static) = {
+ .path = "map add domain",
+ .short_help = "map add domain ip4-pfx <ip4-pfx> ip6-pfx <ip6-pfx> "
+ "ip6-src <ip6-pfx> ea-bits-len <n> psid-offset <n> psid-len <n> "
+ "[map-t] [mtu <mtu>]",
+ .function = map_add_domain_command_fn,
+};
+
+/*?
+ * Add MAP rule to a domain
+ *
+ * @cliexpar
+ * @cliexstart{map add rule}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_add_rule_command, static) = {
+ .path = "map add rule",
+ .short_help = "map add rule index <domain> psid <psid> ip6-dst <ip6-addr>",
+ .function = map_add_rule_command_fn,
+};
+
+/*?
+ * Delete MAP domain
+ *
+ * @cliexpar
+ * @cliexstart{map del domain}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(map_del_command, static) = {
+ .path = "map del domain",
+ .short_help = "map del domain index <domain>",
+ .function = map_del_domain_command_fn,
+};
+
+/*?
+ * Show MAP domains
+ *
+ * @cliexpar
+ * @cliexstart{show map domain}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(show_map_domain_command, static) = {
+ .path = "show map domain",
+ .short_help = "show map domain index <n> [counters]",
+ .function = show_map_domain_command_fn,
+};
+
+/*?
+ * Show MAP statistics
+ *
+ * @cliexpar
+ * @cliexstart{show map stats}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(show_map_stats_command, static) = {
+ .path = "show map stats",
+ .short_help = "show map stats",
+ .function = show_map_stats_command_fn,
+};
+
+/*?
+ * Show MAP fragmentation information
+ *
+ * @cliexpar
+ * @cliexstart{show map fragments}
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND(show_map_fragments_command, static) = {
+ .path = "show map fragments",
+ .short_help = "show map fragments",
+ .function = show_map_fragments_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * map_init
+ */
+clib_error_t *
+map_init (vlib_main_t * vm)
+{
+ map_main_t *mm = &map_main;
+ mm->vnet_main = vnet_get_main ();
+ mm->vlib_main = vm;
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+ memset (&mm->preresolve_ip4, 0, sizeof (mm->preresolve_ip4));
+ memset (&mm->preresolve_ip6, 0, sizeof (mm->preresolve_ip6));
+ mm->adj4_index = 0;
+ mm->adj6_index = 0;
+#endif
+
+ /* traffic class */
+ mm->tc = 0;
+ mm->tc_copy = true;
+
+ /* Inbound security check */
+ mm->sec_check = true;
+ mm->sec_check_frag = false;
+
+ /* ICMP6 Type 1, Code 5 for security check failure */
+ mm->icmp6_enabled = false;
+
+ /* Inner or outer fragmentation */
+ mm->frag_inner = false;
+ mm->frag_ignore_df = false;
+
+ vec_validate (mm->domain_counters, MAP_N_DOMAIN_COUNTER - 1);
+ mm->domain_counters[MAP_DOMAIN_COUNTER_RX].name = "rx";
+ mm->domain_counters[MAP_DOMAIN_COUNTER_TX].name = "tx";
+
+ vlib_validate_simple_counter (&mm->icmp_relayed, 0);
+ vlib_zero_simple_counter (&mm->icmp_relayed, 0);
+
+ /* IP4 virtual reassembly */
+ mm->ip4_reass_hash_table = 0;
+ mm->ip4_reass_pool = 0;
+ mm->ip4_reass_lock =
+ clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT;
+ mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT;
+ mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT;
+ mm->ip4_reass_conf_buffers = MAP_IP4_REASS_BUFFERS_DEFAULT;
+ mm->ip4_reass_ht_log2len =
+ map_get_ht_log2len (mm->ip4_reass_conf_ht_ratio,
+ mm->ip4_reass_conf_pool_size);
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ map_ip4_reass_reinit (NULL, NULL);
+
+ /* IP6 virtual reassembly */
+ mm->ip6_reass_hash_table = 0;
+ mm->ip6_reass_pool = 0;
+ mm->ip6_reass_lock =
+ clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ mm->ip6_reass_conf_ht_ratio = MAP_IP6_REASS_HT_RATIO_DEFAULT;
+ mm->ip6_reass_conf_lifetime_ms = MAP_IP6_REASS_LIFETIME_DEFAULT;
+ mm->ip6_reass_conf_pool_size = MAP_IP6_REASS_POOL_SIZE_DEFAULT;
+ mm->ip6_reass_conf_buffers = MAP_IP6_REASS_BUFFERS_DEFAULT;
+ mm->ip6_reass_ht_log2len =
+ map_get_ht_log2len (mm->ip6_reass_conf_ht_ratio,
+ mm->ip6_reass_conf_pool_size);
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ map_ip6_reass_reinit (NULL, NULL);
+
+ map_dpo_module_init ();
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (map_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/map.h b/src/vnet/map/map.h
new file mode 100644
index 00000000000..f446b739a93
--- /dev/null
+++ b/src/vnet/map/map.h
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdbool.h>
+#include <vppinfra/error.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_types.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/adj/adj.h>
+#include <vnet/map/map_dpo.h>
+#include <vnet/dpo/load_balance.h>
+
+#define MAP_SKIP_IP6_LOOKUP 1
+
+typedef enum
+{
+ MAP_SENDER,
+ MAP_RECEIVER
+} map_dir_e;
+
+int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len,
+ ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
+ ip6_address_t * ip6_src, u8 ip6_src_len,
+ u8 ea_bits_len, u8 psid_offset, u8 psid_length,
+ u32 * map_domain_index, u16 mtu, u8 flags);
+int map_delete_domain (u32 map_domain_index);
+int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep,
+ u8 is_add);
+u8 *format_map_trace (u8 * s, va_list * args);
+i32 ip4_get_port (ip4_header_t * ip, map_dir_e dir, u16 buffer_len);
+i32 ip6_get_port (ip6_header_t * ip6, map_dir_e dir, u16 buffer_len);
+u16 ip4_map_get_port (ip4_header_t * ip, map_dir_e dir);
+
+typedef enum __attribute__ ((__packed__))
+{
+ MAP_DOMAIN_PREFIX = 1 << 0, MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T
+} map_domain_flags_e;
+
+/**
+ * IP4 reassembly logic:
+ * One virtually reassembled flow requires a map_ip4_reass_t structure in order
+ * to keep the first-fragment port number and, optionally, cache out of sequence
+ * packets.
+ * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures.
+ * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets.
+ * When a new structure needs to be used, it is allocated from available ones.
+ * If there is no structure available, the oldest in use is selected and used if and
+ * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago.
+ * In case no structure can be allocated, the fragment is dropped.
+ */
+
+#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */
+#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0)
+#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures
+#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048
+
+#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly
+
+#define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */
+#define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0)
+#define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures
+#define MAP_IP6_REASS_BUFFERS_DEFAULT 2048
+
+#define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5
+
+#define MAP_IP6_REASS_COUNT_BYTES
+#define MAP_IP4_REASS_COUNT_BYTES
+
+//#define IP6_MAP_T_OVERRIDE_TOS 0
+
+/*
+ * This structure _MUST_ be no larger than a single cache line (64 bytes).
+ * If more space is needed make a union of ip6_prefix and *rules, those are mutually exclusive.
+ */
+typedef struct
+{
+ ip6_address_t ip6_src;
+ ip6_address_t ip6_prefix;
+ ip6_address_t *rules;
+ u32 suffix_mask;
+ ip4_address_t ip4_prefix;
+ u16 psid_mask;
+ u16 mtu;
+ map_domain_flags_e flags;
+ u8 ip6_prefix_len;
+ u8 ip6_src_len;
+ u8 ea_bits_len;
+ u8 psid_offset;
+ u8 psid_length;
+
+ /* helpers */
+ u8 psid_shift;
+ u8 suffix_shift;
+ u8 ea_shift;
+
+ /* not used by forwarding */
+ u8 ip4_prefix_len;
+} map_domain_t;
+
+STATIC_ASSERT ((sizeof (map_domain_t) <= CLIB_CACHE_LINE_BYTES),
+ "MAP domain fits in one cacheline");
+
+#define MAP_REASS_INDEX_NONE ((u16)0xffff)
+
+/*
+ * Hash key, padded out to 16 bytes for fast compare
+ */
+/* *INDENT-OFF* */
+typedef union {
+ CLIB_PACKED (struct {
+ ip4_address_t src;
+ ip4_address_t dst;
+ u16 fragment_id;
+ u8 protocol;
+ });
+ u64 as_u64[2];
+ u32 as_u32[4];
+} map_ip4_reass_key_t;
+/* *INDENT-ON* */
+
+typedef struct
+{
+ map_ip4_reass_key_t key;
+ f64 ts;
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ u16 expected_total;
+ u16 forwarded;
+#endif
+ i32 port;
+ u16 bucket;
+ u16 bucket_next;
+ u16 fifo_prev;
+ u16 fifo_next;
+ u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY];
+} map_ip4_reass_t;
+
+/*
+ * MAP domain counters
+ */
+typedef enum
+{
+ /* Simple counters */
+ MAP_DOMAIN_IPV4_FRAGMENT = 0,
+ /* Combined counters */
+ MAP_DOMAIN_COUNTER_RX = 0,
+ MAP_DOMAIN_COUNTER_TX,
+ MAP_N_DOMAIN_COUNTER
+} map_domain_counter_t;
+
+/*
+ * main_main_t
+ */
+/* *INDENT-OFF* */
+typedef union {
+ CLIB_PACKED (struct {
+ ip6_address_t src;
+ ip6_address_t dst;
+ u32 fragment_id;
+ u8 protocol;
+ });
+ u64 as_u64[5];
+ u32 as_u32[10];
+} map_ip6_reass_key_t;
+/* *INDENT-OFF* */
+
+typedef struct {
+ u32 pi; //Cached packet or ~0
+ u16 next_data_offset; //The data offset of the additional 20 bytes or ~0
+ u8 next_data_len; //Number of bytes ready to be copied (20 if not last fragment)
+ u8 next_data[20]; //The 20 additional bytes
+} map_ip6_fragment_t;
+
+typedef struct {
+ map_ip6_reass_key_t key;
+ f64 ts;
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ u16 expected_total;
+ u16 forwarded;
+#endif
+ u16 bucket; //What hash bucket this element is linked in
+ u16 bucket_next;
+ u16 fifo_prev;
+ u16 fifo_next;
+ ip4_header_t ip4_header;
+ map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY];
+} map_ip6_reass_t;
+
+typedef struct {
+ /* pool of MAP domains */
+ map_domain_t *domains;
+
+ /* MAP Domain packet/byte counters indexed by map domain index */
+ vlib_simple_counter_main_t *simple_domain_counters;
+ vlib_combined_counter_main_t *domain_counters;
+ volatile u32 *counter_lock;
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+ /* pre-presolve */
+ u32 adj6_index, adj4_index;
+ ip4_address_t preresolve_ip4;
+ ip6_address_t preresolve_ip6;
+#endif
+
+ /* Traffic class: zero, copy (~0) or fixed value */
+ u8 tc;
+ bool tc_copy;
+
+ bool sec_check; /* Inbound security check */
+ bool sec_check_frag; /* Inbound security check for (subsequent) fragments */
+ bool icmp6_enabled; /* Send destination unreachable for security check failure */
+
+ /* ICMPv6 -> ICMPv4 relay parameters */
+ ip4_address_t icmp4_src_address;
+ vlib_simple_counter_main_t icmp_relayed;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /*
+ * IPv4 encap and decap reassembly
+ */
+ /* Configuration */
+ f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size))
+ u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures
+ u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms
+ u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly
+
+ /* Runtime */
+ map_ip4_reass_t *ip4_reass_pool;
+ u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len
+ u16 ip4_reass_allocated;
+ u16 *ip4_reass_hash_table;
+ u16 ip4_reass_fifo_last;
+ volatile u32 *ip4_reass_lock;
+
+ /* Counters */
+ u32 ip4_reass_buffered_counter;
+
+ bool frag_inner; /* Inner or outer fragmentation */
+ bool frag_ignore_df; /* Fragment (outer) packet even if DF is set */
+
+ /*
+ * IPv6 decap reassembly
+ */
+ /* Configuration */
+ f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size))
+ u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures
+ u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms
+ u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly
+
+ /* Runtime */
+ map_ip6_reass_t *ip6_reass_pool;
+ u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len
+ u16 ip6_reass_allocated;
+ u16 *ip6_reass_hash_table;
+ u16 ip6_reass_fifo_last;
+ volatile u32 *ip6_reass_lock;
+
+ /* Counters */
+ u32 ip6_reass_buffered_counter;
+
+} map_main_t;
+
+/*
+ * MAP Error counters/messages
+ */
+#define foreach_map_error \
+ /* Must be first. */ \
+ _(NONE, "valid MAP packets") \
+ _(BAD_PROTOCOL, "bad protocol") \
+ _(SEC_CHECK, "security check failed") \
+ _(ENCAP_SEC_CHECK, "encap security check failed") \
+ _(DECAP_SEC_CHECK, "decap security check failed") \
+ _(ICMP, "unable to translate ICMP") \
+ _(ICMP_RELAY, "unable to relay ICMP") \
+ _(UNKNOWN, "unknown") \
+ _(NO_BINDING, "no binding") \
+ _(NO_DOMAIN, "no domain") \
+ _(FRAGMENTED, "packet is a fragment") \
+ _(FRAGMENT_MEMORY, "could not cache fragment") \
+ _(FRAGMENT_MALFORMED, "fragment has unexpected format")\
+ _(FRAGMENT_DROPPED, "dropped cached fragment") \
+ _(MALFORMED, "malformed packet") \
+ _(DF_SET, "can't fragment, DF set")
+
+typedef enum {
+#define _(sym,str) MAP_ERROR_##sym,
+ foreach_map_error
+#undef _
+ MAP_N_ERROR,
+ } map_error_t;
+
+u64 map_error_counter_get(u32 node_index, map_error_t map_error);
+
+typedef struct {
+ u32 map_domain_index;
+ u16 port;
+} map_trace_t;
+
+map_main_t map_main;
+
+extern vlib_node_registration_t ip4_map_node;
+extern vlib_node_registration_t ip6_map_node;
+
+extern vlib_node_registration_t ip4_map_t_node;
+extern vlib_node_registration_t ip4_map_t_fragmented_node;
+extern vlib_node_registration_t ip4_map_t_tcp_udp_node;
+extern vlib_node_registration_t ip4_map_t_icmp_node;
+
+extern vlib_node_registration_t ip6_map_t_node;
+extern vlib_node_registration_t ip6_map_t_fragmented_node;
+extern vlib_node_registration_t ip6_map_t_tcp_udp_node;
+extern vlib_node_registration_t ip6_map_t_icmp_node;
+
+/*
+ * map_get_pfx
+ */
+static_always_inline u64
+map_get_pfx (map_domain_t *d, u32 addr, u16 port)
+{
+ u16 psid = (port >> d->psid_shift) & d->psid_mask;
+
+ if (d->ea_bits_len == 0 && d->rules)
+ return clib_net_to_host_u64(d->rules[psid].as_u64[0]);
+
+ u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask;
+ u64 ea = d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid;
+
+ return clib_net_to_host_u64(d->ip6_prefix.as_u64[0]) | ea << d->ea_shift;
+}
+
+static_always_inline u64
+map_get_pfx_net (map_domain_t *d, u32 addr, u16 port)
+{
+ return clib_host_to_net_u64(map_get_pfx(d, clib_net_to_host_u32(addr),
+ clib_net_to_host_u16(port)));
+}
+
+/*
+ * map_get_sfx
+ */
+static_always_inline u64
+map_get_sfx (map_domain_t *d, u32 addr, u16 port)
+{
+ u16 psid = (port >> d->psid_shift) & d->psid_mask;
+
+ /* Shared 1:1 mode. */
+ if (d->ea_bits_len == 0 && d->rules)
+ return clib_net_to_host_u64(d->rules[psid].as_u64[1]);
+ if (d->ip6_prefix_len == 128)
+ return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]);
+
+ /* IPv4 prefix */
+ if (d->flags & MAP_DOMAIN_PREFIX)
+ return (u64) (addr & (0xFFFFFFFF << d->suffix_shift)) << 16;
+
+ /* Shared or full IPv4 address */
+ return ((u64) addr << 16) | psid;
+}
+
+static_always_inline u64
+map_get_sfx_net (map_domain_t *d, u32 addr, u16 port)
+{
+ return clib_host_to_net_u64(map_get_sfx(d, clib_net_to_host_u32(addr),
+ clib_net_to_host_u16(port)));
+}
+
+static_always_inline u32
+map_get_ip4 (ip6_address_t *addr)
+{
+ return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16);
+}
+
+/*
+ * Get the MAP domain from an IPv4 lookup adjacency.
+ */
+static_always_inline map_domain_t *
+ip4_map_get_domain (u32 mdi,
+ u32 *map_domain_index)
+{
+ map_main_t *mm = &map_main;
+ map_dpo_t *md;
+
+ md = map_dpo_get(mdi);
+
+ ASSERT(md);
+ *map_domain_index = md->md_domain;
+ return pool_elt_at_index(mm->domains, *map_domain_index);
+}
+
+/*
+ * Get the MAP domain from an IPv6 lookup adjacency.
+ * If the IPv6 address or prefix is not shared, no lookup is required.
+ * The IPv4 address is used otherwise.
+ */
+static_always_inline map_domain_t *
+ip6_map_get_domain (u32 mdi, ip4_address_t *addr,
+ u32 *map_domain_index, u8 *error)
+{
+ map_main_t *mm = &map_main;
+ map_dpo_t *md;
+
+ /*
+ * Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA.
+ * (That's done implicitly when MAP domain is looked up in the IPv4 FIB)
+ */
+#ifdef MAP_NONSHARED_DOMAIN_ENABLED
+ md = map_dpo_get(mdi);
+
+ ASSERT(md);
+ *map_domain_index = md->md_domain;
+ if (*map_domain_index != ~0)
+ return pool_elt_at_index(mm->domains, *map_domain_index);
+#endif
+
+ u32 lbi = ip4_fib_forwarding_lookup(0, addr);
+ const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0);
+ if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type ||
+ dpo->dpoi_type == map_t_dpo_type))
+ {
+ md = map_dpo_get(dpo->dpoi_index);
+ *map_domain_index = md->md_domain;
+ return pool_elt_at_index(mm->domains, *map_domain_index);
+ }
+ *error = MAP_ERROR_NO_DOMAIN;
+ return NULL;
+}
+
+map_ip4_reass_t *
+map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id,
+ u8 protocol, u32 **pi_to_drop);
+void
+map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop);
+
+#define map_ip4_reass_lock() while (__sync_lock_test_and_set(map_main.ip4_reass_lock, 1)) {}
+#define map_ip4_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip4_reass_lock = 0;} while(0)
+
+static_always_inline void
+map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi)
+{
+ int i;
+ for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if(r->fragments[i] != ~0) {
+ vec_add1(*pi, r->fragments[i]);
+ r->fragments[i] = ~0;
+ map_main.ip4_reass_buffered_counter--;
+ }
+}
+
+int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi);
+
+map_ip6_reass_t *
+map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id,
+ u8 protocol, u32 **pi_to_drop);
+void
+map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop);
+
+#define map_ip6_reass_lock() while (__sync_lock_test_and_set(map_main.ip6_reass_lock, 1)) {}
+#define map_ip6_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip6_reass_lock = 0;} while(0)
+
+int
+map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi,
+ u16 data_offset, u16 next_data_offset,
+ u8 *data_start, u16 data_len);
+
+void map_ip4_drop_pi(u32 pi);
+
+int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100
+int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff)
+int map_ip4_reass_conf_lifetime(u16 lifetime_ms);
+#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff
+int map_ip4_reass_conf_buffers(u32 buffers);
+#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff)
+
+void map_ip6_drop_pi(u32 pi);
+
+
+int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP6_REASS_CONF_HT_RATIO_MAX 100
+int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP6_REASS_CONF_POOL_SIZE_MAX (0xfeff)
+int map_ip6_reass_conf_lifetime(u16 lifetime_ms);
+#define MAP_IP6_REASS_CONF_LIFETIME_MAX 0xffff
+int map_ip6_reass_conf_buffers(u32 buffers);
+#define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff)
+
+static_always_inline
+int ip6_parse(const ip6_header_t *ip6, u32 buff_len,
+ u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset)
+{
+ if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) {
+ *l4_protocol = ((ip6_frag_hdr_t *)(ip6 + 1))->next_hdr;
+ *frag_hdr_offset = sizeof(*ip6);
+ *l4_offset = sizeof(*ip6) + sizeof(ip6_frag_hdr_t);
+ } else {
+ *l4_protocol = ip6->protocol;
+ *frag_hdr_offset = 0;
+ *l4_offset = sizeof(*ip6);
+ }
+
+ return (buff_len < (*l4_offset + 4)) ||
+ (clib_net_to_host_u16(ip6->payload_length) < (*l4_offset + 4 - sizeof(*ip6)));
+}
+
+
+#define u8_ptr_add(ptr, index) (((u8 *)ptr) + index)
+#define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val))
+
+#define frag_id_6to4(id) ((id) ^ ((id) >> 16))
+
+static_always_inline void
+ip4_map_t_embedded_address (map_domain_t *d,
+ ip6_address_t *ip6, const ip4_address_t *ip4)
+{
+ ASSERT(d->ip6_src_len == 96); //No support for other lengths for now
+ ip6->as_u64[0] = d->ip6_src.as_u64[0];
+ ip6->as_u32[2] = d->ip6_src.as_u32[2];
+ ip6->as_u32[3] = ip4->as_u32;
+}
+
+static_always_inline u32
+ip6_map_t_embedded_address (map_domain_t *d, ip6_address_t *addr)
+{
+ ASSERT(d->ip6_src_len == 96); //No support for other lengths for now
+ return addr->as_u32[3];
+}
+
+static inline void
+map_domain_counter_lock (map_main_t *mm)
+{
+ if (mm->counter_lock)
+ while (__sync_lock_test_and_set(mm->counter_lock, 1))
+ /* zzzz */ ;
+}
+static inline void
+map_domain_counter_unlock (map_main_t *mm)
+{
+ if (mm->counter_lock)
+ *mm->counter_lock = 0;
+}
+
+
+static_always_inline void
+map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector,
+ vlib_node_runtime_t *node, vlib_error_t *error,
+ u32 next)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ //Deal with fragments that are ready
+ from = pi_vector;
+ n_left_from = vec_len(pi_vector);
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ vlib_buffer_t *p0 = vlib_get_buffer(vm, pi0);
+ p0->error = *error;
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/map_api.c b/src/vnet/map/map_api.c
new file mode 100644
index 00000000000..7febeb3d7a3
--- /dev/null
+++ b/src/vnet/map/map_api.c
@@ -0,0 +1,295 @@
+/*
+ *------------------------------------------------------------------
+ * map_api.c - vnet map api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include "map.h"
+#include <vnet/api_errno.h>
+#include <vnet/ip/ip.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(MAP_ADD_DOMAIN, map_add_domain) \
+_(MAP_DEL_DOMAIN, map_del_domain) \
+_(MAP_ADD_DEL_RULE, map_add_del_rule) \
+_(MAP_DOMAIN_DUMP, map_domain_dump) \
+_(MAP_RULE_DUMP, map_rule_dump) \
+_(MAP_SUMMARY_STATS, map_summary_stats)
+
+static void
+vl_api_map_add_domain_t_handler (vl_api_map_add_domain_t * mp)
+{
+ vl_api_map_add_domain_reply_t *rmp;
+ int rv = 0;
+ u32 index;
+ u8 flags = mp->is_translation ? MAP_DOMAIN_TRANSLATION : 0;
+ rv =
+ map_create_domain ((ip4_address_t *) & mp->ip4_prefix, mp->ip4_prefix_len,
+ (ip6_address_t *) & mp->ip6_prefix, mp->ip6_prefix_len,
+ (ip6_address_t *) & mp->ip6_src,
+ mp->ip6_src_prefix_len, mp->ea_bits_len,
+ mp->psid_offset, mp->psid_length, &index,
+ ntohs (mp->mtu), flags);
+
+ /* *INDENT-OFF* */
+ REPLY_MACRO2(VL_API_MAP_ADD_DOMAIN_REPLY,
+ ({
+ rmp->index = ntohl(index);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_map_del_domain_t_handler (vl_api_map_del_domain_t * mp)
+{
+ vl_api_map_del_domain_reply_t *rmp;
+ int rv = 0;
+
+ rv = map_delete_domain (ntohl (mp->index));
+
+ REPLY_MACRO (VL_API_MAP_DEL_DOMAIN_REPLY);
+}
+
+static void
+vl_api_map_add_del_rule_t_handler (vl_api_map_add_del_rule_t * mp)
+{
+ vl_api_map_del_domain_reply_t *rmp;
+ int rv = 0;
+
+ rv =
+ map_add_del_psid (ntohl (mp->index), ntohs (mp->psid),
+ (ip6_address_t *) mp->ip6_dst, mp->is_add);
+
+ REPLY_MACRO (VL_API_MAP_ADD_DEL_RULE_REPLY);
+}
+
+static void
+vl_api_map_domain_dump_t_handler (vl_api_map_domain_dump_t * mp)
+{
+ vl_api_map_domain_details_t *rmp;
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+ unix_shared_memory_queue_t *q;
+
+ if (pool_elts (mm->domains) == 0)
+ return;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach(d, mm->domains,
+ ({
+ /* Make sure every field is initiated (or don't skip the memset()) */
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs(VL_API_MAP_DOMAIN_DETAILS);
+ rmp->domain_index = htonl(d - mm->domains);
+ rmp->ea_bits_len = d->ea_bits_len;
+ rmp->psid_offset = d->psid_offset;
+ rmp->psid_length = d->psid_length;
+ clib_memcpy(rmp->ip4_prefix, &d->ip4_prefix, sizeof(rmp->ip4_prefix));
+ rmp->ip4_prefix_len = d->ip4_prefix_len;
+ clib_memcpy(rmp->ip6_prefix, &d->ip6_prefix, sizeof(rmp->ip6_prefix));
+ rmp->ip6_prefix_len = d->ip6_prefix_len;
+ clib_memcpy(rmp->ip6_src, &d->ip6_src, sizeof(rmp->ip6_src));
+ rmp->ip6_src_len = d->ip6_src_len;
+ rmp->mtu = htons(d->mtu);
+ rmp->is_translation = (d->flags & MAP_DOMAIN_TRANSLATION);
+ rmp->context = mp->context;
+
+ vl_msg_api_send_shmem (q, (u8 *)&rmp);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+vl_api_map_rule_dump_t_handler (vl_api_map_rule_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ u16 i;
+ ip6_address_t dst;
+ vl_api_map_rule_details_t *rmp;
+ map_main_t *mm = &map_main;
+ u32 domain_index = ntohl (mp->domain_index);
+ map_domain_t *d;
+
+ if (pool_elts (mm->domains) == 0)
+ return;
+
+ d = pool_elt_at_index (mm->domains, domain_index);
+ if (!d || !d->rules)
+ {
+ return;
+ }
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ for (i = 0; i < (0x1 << d->psid_length); i++)
+ {
+ dst = d->rules[i];
+ if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0)
+ {
+ continue;
+ }
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_MAP_RULE_DETAILS);
+ rmp->psid = htons (i);
+ clib_memcpy (rmp->ip6_dst, &dst, sizeof (rmp->ip6_dst));
+ rmp->context = mp->context;
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+ }
+}
+
+static void
+vl_api_map_summary_stats_t_handler (vl_api_map_summary_stats_t * mp)
+{
+ vl_api_map_summary_stats_reply_t *rmp;
+ vlib_combined_counter_main_t *cm;
+ vlib_counter_t v;
+ int i, which;
+ u64 total_pkts[VLIB_N_RX_TX];
+ u64 total_bytes[VLIB_N_RX_TX];
+ map_main_t *mm = &map_main;
+ unix_shared_memory_queue_t *q =
+ vl_api_client_index_to_input_queue (mp->client_index);
+
+ if (!q)
+ return;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_MAP_SUMMARY_STATS_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = 0;
+
+ memset (total_pkts, 0, sizeof (total_pkts));
+ memset (total_bytes, 0, sizeof (total_bytes));
+
+ map_domain_counter_lock (mm);
+ vec_foreach (cm, mm->domain_counters)
+ {
+ which = cm - mm->domain_counters;
+
+ for (i = 0; i < vec_len (cm->maxi); i++)
+ {
+ vlib_get_combined_counter (cm, i, &v);
+ total_pkts[which] += v.packets;
+ total_bytes[which] += v.bytes;
+ }
+ }
+
+ map_domain_counter_unlock (mm);
+
+ /* Note: in network byte order! */
+ rmp->total_pkts[MAP_DOMAIN_COUNTER_RX] =
+ clib_host_to_net_u64 (total_pkts[MAP_DOMAIN_COUNTER_RX]);
+ rmp->total_bytes[MAP_DOMAIN_COUNTER_RX] =
+ clib_host_to_net_u64 (total_bytes[MAP_DOMAIN_COUNTER_RX]);
+ rmp->total_pkts[MAP_DOMAIN_COUNTER_TX] =
+ clib_host_to_net_u64 (total_pkts[MAP_DOMAIN_COUNTER_TX]);
+ rmp->total_bytes[MAP_DOMAIN_COUNTER_TX] =
+ clib_host_to_net_u64 (total_bytes[MAP_DOMAIN_COUNTER_TX]);
+ rmp->total_bindings = clib_host_to_net_u64 (pool_elts (mm->domains));
+ rmp->total_ip4_fragments = 0; // Not yet implemented. Should be a simple counter.
+ rmp->total_security_check[MAP_DOMAIN_COUNTER_TX] =
+ clib_host_to_net_u64 (map_error_counter_get
+ (ip4_map_node.index, MAP_ERROR_ENCAP_SEC_CHECK));
+ rmp->total_security_check[MAP_DOMAIN_COUNTER_RX] =
+ clib_host_to_net_u64 (map_error_counter_get
+ (ip4_map_node.index, MAP_ERROR_DECAP_SEC_CHECK));
+
+ vl_msg_api_send_shmem (q, (u8 *) & rmp);
+}
+
+/*
+ * vpe_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has alread mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+#define vl_msg_name_crc_list
+#include <vnet/vnet_all_api_h.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_map;
+#undef _
+}
+
+static clib_error_t *
+map_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (map_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/map/map_doc.md b/src/vnet/map/map_doc.md
new file mode 100644
index 00000000000..17f3c51174b
--- /dev/null
+++ b/src/vnet/map/map_doc.md
@@ -0,0 +1,69 @@
+# VPP MAP and Lw4o6 implementation {#map_doc}
+
+This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations.
+Everything that is not directly obvious should come here.
+
+
+
+## MAP-E Virtual Reassembly
+
+The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments.
+
+Fragment caching and handling is not always necessary. It is performed when:
+* An IPv4 fragment is received and the destination IPv4 address is shared.
+* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on.
+* An IPv6 fragment is received.
+
+There are 3 dedicated nodes:
+* ip4-map-reass
+* ip6-map-ip4-reass
+* ip6-map-ip6-reass
+
+ip4-map sends all fragments to ip4-map-reass.
+ip6-map sends all inner-fragments to ip6-map-ip4-reass.
+ip6-map sends all outer-fragments to ip6-map-ip6-reass.
+
+IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes.
+
+An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received.
+
+#### Virtual Reassembly configuration
+
+IPv4 and IPv6 virtual reassembly support the following configuration:
+ map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>]
+
+lifetime:
+ The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 people. Those values are not realistic for high-throughput cases.
+
+buffers:
+ The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool.
+
+pool-size:
+ The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total.
+
+ht-ratio:
+ The amount of buckets in the hash-table is pool-size * ht-ratio.
+
+
+Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost.
+
+
+##### Additional considerations
+
+Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart.
+
+Let:
+R be the packet rate at which fragments are received.
+F be the number of fragments per packet.
+
+Assuming the first fragment is always received last. We should have:
+buffers > lifetime * R / F * (F - 1)
+pool-size > lifetime * R/F
+
+This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'.
+
+But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments.
+
+If you want to do that, be prepared to configure a lot of fragments.
+
+
diff --git a/src/vnet/map/map_dpo.c b/src/vnet/map/map_dpo.c
new file mode 100644
index 00000000000..df2b5fa4197
--- /dev/null
+++ b/src/vnet/map/map_dpo.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/map/map_dpo.h>
+
+/**
+ * pool of all MPLS Label DPOs
+ */
+map_dpo_t *map_dpo_pool;
+
+/**
+ * The register MAP DPO type
+ */
+dpo_type_t map_dpo_type;
+dpo_type_t map_t_dpo_type;
+
+static map_dpo_t *
+map_dpo_alloc (void)
+{
+ map_dpo_t *md;
+
+ pool_get_aligned(map_dpo_pool, md, CLIB_CACHE_LINE_BYTES);
+ memset(md, 0, sizeof(*md));
+
+ return (md);
+}
+
+static index_t
+map_dpo_get_index (map_dpo_t *md)
+{
+ return (md - map_dpo_pool);
+}
+
+void
+map_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo)
+{
+ map_dpo_t *md;
+
+ md = map_dpo_alloc();
+ md->md_domain = domain_index;
+ md->md_proto = dproto;
+
+ dpo_set(dpo,
+ map_dpo_type,
+ dproto,
+ map_dpo_get_index(md));
+}
+
+void
+map_t_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo)
+{
+ map_dpo_t *md;
+
+ md = map_dpo_alloc();
+ md->md_domain = domain_index;
+ md->md_proto = dproto;
+
+ dpo_set(dpo,
+ map_t_dpo_type,
+ dproto,
+ map_dpo_get_index(md));
+}
+
+
+u8*
+format_map_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+ map_dpo_t *md;
+
+ md = map_dpo_get(index);
+
+ return (format(s, "map:[%d]:%U domain:%d",
+ index,
+ format_dpo_proto, md->md_proto,
+ md->md_domain));
+}
+
+u8*
+format_map_t_dpo (u8 *s, va_list *args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED(u32 indent) = va_arg (*args, u32);
+ map_dpo_t *md;
+
+ md = map_dpo_get(index);
+
+ return (format(s, "map-t:[%d]:%U domain:%d",
+ index,
+ format_dpo_proto, md->md_proto,
+ md->md_domain));
+}
+
+
+static void
+map_dpo_lock (dpo_id_t *dpo)
+{
+ map_dpo_t *md;
+
+ md = map_dpo_get(dpo->dpoi_index);
+
+ md->md_locks++;
+}
+
+static void
+map_dpo_unlock (dpo_id_t *dpo)
+{
+ map_dpo_t *md;
+
+ md = map_dpo_get(dpo->dpoi_index);
+
+ md->md_locks--;
+
+ if (0 == md->md_locks)
+ {
+ pool_put(map_dpo_pool, md);
+ }
+}
+
+const static dpo_vft_t md_vft = {
+ .dv_lock = map_dpo_lock,
+ .dv_unlock = map_dpo_unlock,
+ .dv_format = format_map_dpo,
+};
+
+const static char* const map_ip4_nodes[] =
+{
+ "ip4-map",
+ NULL,
+};
+const static char* const map_ip6_nodes[] =
+{
+ "ip6-map",
+ NULL,
+};
+
+const static char* const * const map_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = map_ip4_nodes,
+ [DPO_PROTO_IP6] = map_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+const static dpo_vft_t md_t_vft = {
+ .dv_lock = map_dpo_lock,
+ .dv_unlock = map_dpo_unlock,
+ .dv_format = format_map_t_dpo,
+};
+
+const static char* const map_t_ip4_nodes[] =
+{
+ "ip4-map-t",
+ NULL,
+};
+const static char* const map_t_ip6_nodes[] =
+{
+ "ip6-map-t",
+ NULL,
+};
+
+const static char* const * const map_t_nodes[DPO_PROTO_NUM] =
+{
+ [DPO_PROTO_IP4] = map_t_ip4_nodes,
+ [DPO_PROTO_IP6] = map_t_ip6_nodes,
+ [DPO_PROTO_MPLS] = NULL,
+};
+
+void
+map_dpo_module_init (void)
+{
+ map_dpo_type = dpo_register_new_type(&md_vft, map_nodes);
+ map_t_dpo_type = dpo_register_new_type(&md_t_vft, map_t_nodes);
+}
diff --git a/src/vnet/map/map_dpo.h b/src/vnet/map/map_dpo.h
new file mode 100644
index 00000000000..be510dbaea6
--- /dev/null
+++ b/src/vnet/map/map_dpo.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MAP_DPO_H__
+#define __MAP_DPO_H__
+
+#include <vnet/vnet.h>
+#include <vnet/dpo/dpo.h>
+
+/**
+ * A representation of a MAP DPO
+ */
+typedef struct map_dpo_t
+{
+ /**
+ * The dat-plane protocol
+ */
+ dpo_proto_t md_proto;
+
+ /**
+ * the MAP domain index
+ */
+ u32 md_domain;
+
+ /**
+ * Number of locks/users of the label
+ */
+ u16 md_locks;
+} map_dpo_t;
+
+extern void map_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo);
+extern void map_t_dpo_create (dpo_proto_t dproto,
+ u32 domain_index,
+ dpo_id_t *dpo);
+
+extern u8* format_map_dpo(u8 *s, va_list *args);
+
+/*
+ * Encapsulation violation for fast data-path access
+ */
+extern map_dpo_t *map_dpo_pool;
+extern dpo_type_t map_dpo_type;
+extern dpo_type_t map_t_dpo_type;
+
+static inline map_dpo_t *
+map_dpo_get (index_t index)
+{
+ return (pool_elt_at_index(map_dpo_pool, index));
+}
+
+extern void map_dpo_module_init(void);
+
+#endif
diff --git a/src/vnet/map/test.c b/src/vnet/map/test.c
new file mode 100644
index 00000000000..f3c893a7a31
--- /dev/null
+++ b/src/vnet/map/test.c
@@ -0,0 +1,205 @@
+/*
+ * test.c : MAP unit tests
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <assert.h>
+#include "map.h"
+
+static map_domain_t *
+get_domain(ip4_address_t * ip4_prefix, u8 ip4_prefix_len,
+ ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
+ ip6_address_t * ip6_src, u8 ip6_src_len,
+ u8 ea_bits_len, u8 psid_offset,
+ u8 psid_length, u16 mtu, u8 flags)
+{
+ map_domain_t * d = malloc(sizeof(*d));
+ u8 suffix_len;
+
+ /* EA bits must be within the first 64 bits */
+ if (ea_bits_len > 0 && (ip6_prefix_len + ea_bits_len) > 64)
+ return NULL;
+
+ /* Init domain struct */
+ d->ip4_prefix.as_u32 = ip4_prefix->as_u32;
+ d->ip4_prefix_len = ip4_prefix_len;
+ d->ip6_prefix = *ip6_prefix;
+ d->ip6_prefix_len = ip6_prefix_len;
+ d->ip6_src = *ip6_src;
+ d->ip6_src_len = ip6_src_len;
+ d->ea_bits_len = ea_bits_len;
+ d->psid_offset = psid_offset;
+ d->psid_length = psid_length;
+ d->mtu = mtu;
+ d->flags = flags;
+
+ /* How many, and which bits to grab from the IPv4 DA */
+ if (ip4_prefix_len + ea_bits_len < 32)
+ {
+ d->flags |= MAP_DOMAIN_PREFIX;
+ d->suffix_shift = 32 - ip4_prefix_len - ea_bits_len;
+ suffix_len = ea_bits_len;
+ }
+ else
+ {
+ d->suffix_shift = 0;
+ suffix_len = 32 - ip4_prefix_len;
+ }
+ d->suffix_mask = (1 << suffix_len) - 1;
+
+ d->psid_shift = 16 - psid_length - psid_offset;
+ d->psid_mask = (1 << d->psid_length) - 1;
+
+ if (ip6_prefix_len + suffix_len + d->psid_length > 64)
+ return NULL;
+
+ d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length;
+
+ return d;
+}
+
+
+/*
+ * VPP-340:
+ * map_add_domain ip4-pfx 20.0.0.0/8 ip6-pfx 2001:db8::/40 ip6-src 2001:db8:ffff::/96 ea-bits-len 24 psid-offset 0 psid-len 0 map-t
+ * IPv4 src = 100.0.0.1
+ * IPv4 dst = 20.169.201.219
+ * UDP dest port = 1232
+ * IPv6 src = 2001:db8:ffff::6400:1
+ * IPv6 dst = a9c9:dfb8::14a9:c9db:0
+ * a9c9:dfb8::14a9:c9db:0 != 2001:db8:a9:c9db:0:14a9:c9db:0
+ */
+static void
+test_map_t_destaddr (void)
+{
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip6_address_t ip6_src;
+
+ ip4_prefix.as_u32 = clib_host_to_net_u32(0x14000000);
+ ip6_prefix.as_u64[0] = clib_host_to_net_u64(0x20010db800000000);
+ ip6_prefix.as_u64[1] = 0;
+ ip6_src.as_u64[0] = clib_host_to_net_u64(0x20010db8ffff0000);
+ map_domain_t * d = get_domain (&ip4_prefix, 8, &ip6_prefix, 40, &ip6_src, 96, 24, 0, 0, 0, MAP_DOMAIN_TRANSLATION);
+
+ ip6_address_t dst6;
+
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db800a9c9db);
+ assert(dst6.as_u64[1] == 0x000014a9c9db0000);
+}
+
+/*
+ * VPP-228
+ * ip4-pfx 20.0.0.0/8
+ * ip6-pfx 2001:db8::/<n>
+ * ip6-src 2001:db8:ffff::1
+ * ea-bits-len 16 psid-offset 6 psid-len 8
+ * 20.169.201.219 port 1232
+ */
+static void
+test_map_eabits (void)
+{
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip6_address_t ip6_src;
+ ip6_address_t dst6;
+
+ ip4_prefix.as_u32 = clib_host_to_net_u32(0x14000000);
+ ip6_prefix.as_u64[0] = clib_host_to_net_u64(0x20010db800000000);
+ ip6_prefix.as_u64[1] = 0;
+ ip6_src.as_u64[0] = clib_host_to_net_u64(0x20010db8ffff0000);
+ ip6_src.as_u64[1] = clib_host_to_net_u64(0x0000000000000001);
+ map_domain_t * d = get_domain (&ip4_prefix, 16, &ip6_prefix, 48, &ip6_src,
+ 128, 16, 6, 8, 0, 0);
+ assert(!d);
+
+ //20.0.0.0/8 2001:db8::/32 4 2001:db8:a000::14a0:0:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 4, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a0000000);
+ assert(dst6.as_u64[1] == 0x000014a000000000);
+
+ //20.0.0.0/8 2001:db8::/32 8 2001:db8:a900::14a9:0:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 8, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9000000);
+ assert(dst6.as_u64[1] == 0x000014a900000000);
+
+ //20.0.0.0/8 2001:db8::/32 10 2001:db8:a9c0::14a9:c000:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 10, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9c00000);
+ assert(dst6.as_u64[1] == 0x000014a9c0000000);
+
+ //20.0.0.0/8 2001:db8::/32 16 2001:db8:a9c9::14a9:c900:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 16, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9c90000);
+ assert(dst6.as_u64[1] == 0x000014a9c9000000);
+
+ //20.0.0.0/8 2001:db8::/32 20 2001:db8:a9c9:d000:0:14a9:c9d0:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 20, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9c9d000);
+ assert(dst6.as_u64[1] == 0x000014a9c9d00000);
+
+ //20.0.0.0/8 2001:db8::/32 23 2001:db8:a9c9:da00:0:14a9:c9da:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 23, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a9c9da00);
+ assert(dst6.as_u64[1] == 0x000014a9c9da0000);
+
+ //20.169.201.0/24 2001:db8::/32 7 2001:db8:da00::14a9:c9da:0
+ d = get_domain (&ip4_prefix, 8, &ip6_prefix, 32, &ip6_src,
+ 128, 7, 0, 0, 0, 0);
+ dst6.as_u64[0] = map_get_pfx(d, 0x14a9c9db, 1232);
+ dst6.as_u64[1] = map_get_sfx(d, 0x14a9c9db, 1232);
+ assert(dst6.as_u64[0] == 0x20010db8a8000000);
+ assert(dst6.as_u64[1] == 0x000014a800000000);
+}
+
+#define foreach_test_case \
+ _(map_t_destaddr) \
+ _(map_eabits)
+
+static void
+run_tests (void)
+{
+#define _(_test_name) \
+ test_ ## _test_name ();
+
+ foreach_test_case
+#undef _
+}
+
+int main()
+{
+ run_tests ();
+ return 0;
+}