summaryrefslogtreecommitdiffstats
path: root/vnet/vnet/map
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/map')
-rwxr-xr-xvnet/vnet/map/examples/gen-rules.py213
-rwxr-xr-xvnet/vnet/map/examples/map-test.py214
-rw-r--r--vnet/vnet/map/examples/mapalgs.py327
-rw-r--r--vnet/vnet/map/examples/mt-test.py80
-rwxr-xr-xvnet/vnet/map/gen-rules.py107
-rw-r--r--vnet/vnet/map/ip4_map.c591
-rw-r--r--vnet/vnet/map/ip4_map_t.c1092
-rw-r--r--vnet/vnet/map/ip4_sixrd.c127
-rw-r--r--vnet/vnet/map/ip6_map.c966
-rw-r--r--vnet/vnet/map/ip6_map_t.c1141
-rw-r--r--vnet/vnet/map/ip6_sixrd.c129
-rw-r--r--vnet/vnet/map/map.c1634
-rw-r--r--vnet/vnet/map/map.h556
-rw-r--r--vnet/vnet/map/map_doc.md69
-rw-r--r--vnet/vnet/map/sixrd.c355
-rw-r--r--vnet/vnet/map/sixrd.h144
16 files changed, 7745 insertions, 0 deletions
diff --git a/vnet/vnet/map/examples/gen-rules.py b/vnet/vnet/map/examples/gen-rules.py
new file mode 100755
index 00000000000..d6746f79af4
--- /dev/null
+++ b/vnet/vnet/map/examples/gen-rules.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python3.4
+
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import ipaddress
+import argparse
+import sys
+
+# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
+# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
+
+parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
+parser.add_argument('-t', action="store", dest="mapmode")
+args = parser.parse_args()
+
+#
+# 1:1 Shared IPv4 address, shared BR, Terastream
+#
+def terastream():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/22')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ ip6_src = ipaddress.ip_address('cccc:bbbb::')
+ for i in range(ip4_pfx.num_addresses):
+ if not i % 64:
+ ip6_src = ip6_src + 1
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src " + str(ip6_src) +
+ " ea-bits-len 0 psid-offset 0 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Shared IPv4 address, shared BR, OTE
+#
+def oteshared11():
+ ip4_pfx = ipaddress.ip_network('2.84.63.0/24')
+ dst = list(ipaddress.ip_network('2a02:580:8c00::/40').subnets(new_prefix=56))
+ psid_len = 6
+ ip6_src = ipaddress.ip_address('2a02::')
+ for i in range(ip4_pfx.num_addresses):
+ if not i % 64:
+ ip6_src = ip6_src + 1
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src " + str(ip6_src) +
+ " ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ enduserprefix = list(dst.pop(0).subnets(new_prefix=64))[255-1]
+ print("map add rule index", i, "psid", psid, "ip6-dst", enduserprefix[(i * (0x1<<psid_len)) + psid])
+
+
+#
+# 1:1 Shared IPv4 address, shared BR, Terastream
+#
+def confdterastream():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/22')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ ip6_src = ipaddress.ip_address('cccc:bbbb::')
+ for i in range(ip4_pfx.num_addresses):
+ if not i % 64:
+ ip6_src = ip6_src + 1
+ print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + str(ip6_src) + " ipv6-prefix ::/0" + " ipv4-prefix " + str(ip4_pfx[i]) +
+ "/32 ea-len 0 psid-offset 6 psid-len", psid_len)
+# print("vpp softwire softwire-instances softwire-instance", i, "ipv4-pfx " + str(ip4_pfx[i]) + "/32 ipv6-pfx ::/0 br-ipv6 " + str(ip6_src) +
+# " ea-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("binding", psid, "ipv6-addr", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+def shared11br_yang():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("vpp softwire softwire-instances softwire-instance " + str(i) + " ipv4-prefix " + str(ip4_pfx[i]) + "/32 " +
+ "ipv6-prefix ::/0 ea-len 0 psid-offset 6 tunnel-mtu 1234 psid-len", psid_len)
+ #print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
+ # "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ # print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+ print("binding", psid, "ipv6-addr", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+def shared11br_xml():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/32')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip6_src = ipaddress.ip_address('cccc:bbbb::')
+ psid_len = 6
+ print('<vpp xmlns="http://www.cisco.com/yang/cisco-vpp"><softwire><softwire-instances>');
+ count = 1024;
+ for i in range(ip4_pfx.num_addresses):
+ if not i % 64:
+ ip6_src = ip6_src + 1
+ if count == 0:
+ break;
+ count = count - 1;
+ print('<softwire-instance>')
+ print(' <id>'+ str(i)+ '</id>')
+ print(' <ipv4-prefix>'+ str(ip4_pfx[i])+ '/32</ipv4-prefix>')
+ print(' <ipv6-prefix>::/0</ipv6-prefix>')
+ print(' <ea-len>0</ea-len>')
+ print(' <psid-offset>0</psid-offset>')
+ print(' <psid-len>'+ str(psid_len) + '</psid-len>')
+ for psid in range(0x1 << psid_len):
+ print(' <binding>')
+ print(' <psid>', psid, '</psid>')
+ print(' <ipv6-addr>'+ str(ip6_dst[(i * (0x1<<psid_len)) + psid]) + '</ipv6-addr>')
+ print(' </binding>')
+ print('</softwire-instance>')
+ print('</softwire-instances></softwire>')
+ print('</vpp>')
+
+#
+# 1:1 Shared IPv4 address, shared BR
+#
+def shared11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Shared IPv4 address, shared BR
+#
+def shared11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+
+#
+# 1:1 Shared IPv4 address
+#
+def shared11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Shared IPv4 address small
+#
+def smallshared11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/24')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Full IPv4 address
+#
+def full11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 0
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 0 psid-len 0")
+def full11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 0
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 0 psid-len 0")
+
+#
+# Algorithmic mapping Shared IPv4 address
+#
+def algo():
+ print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8")
+ print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0")
+
+#
+# IP4 forwarding
+#
+def ip4():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ for i in range(ip4_pfx.num_addresses):
+ print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2")
+
+
+globals()[args.mapmode]()
+
+
diff --git a/vnet/vnet/map/examples/map-test.py b/vnet/vnet/map/examples/map-test.py
new file mode 100755
index 00000000000..01f377fb6ee
--- /dev/null
+++ b/vnet/vnet/map/examples/map-test.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys, time
+from scapy.all import *
+
+import mapalgs
+
+
+ifname = "vpp-tap"
+
+loc_v4_mac = "aa:aa:aa:aa:aa:a4"
+loc_v6_mac = "aa:aa:aa:aa:aa:a6"
+vpp_mac = "aa:aa:aa:aa:00:00"
+
+map_t = 1
+
+fragsize = 0
+map_mtu = 200
+
+def mac_to_vppmac(mac):
+ mac = mac.replace(':', '')
+ return mac[0:4]+"."+mac[4:8]+"."+mac[8:12]
+
+
+map = mapalgs.MapCalc( rulev6 = 'bbbb::/32',
+ rulev4 = '20.0.0.0/24',
+ ratio = 256);
+
+dmr = mapalgs.DmrCalc('cccc:bbbb::/96')
+
+
+ICMP_TYPES_CODES = {
+ 0: 0,
+ 3: 15,
+ 4: 0,
+ 5: 3,
+ 6: 0,
+ 8: 0,
+ 9: 0,
+ 10: 0,
+ 11: 1,
+ 12: 2,
+ 13: 0,
+ 14: 0,
+ 15: 0,
+ 16: 0,
+ 17: 0,
+ 18: 0
+}
+
+ICMP6_TYPES_CODES = {
+ 1: 7,
+ 2: 0,
+ 3: 1,
+ 4: 3,
+}
+
+def net_conf():
+ c = ""
+ c += "tap connect "+ifname+" hwaddr "+mac_to_vppmac(vpp_mac)+" \n"
+ c += "set int state tap-0 up \n"
+ c += "set ip6 neighbor tap-0 2001:f00d::1 "+mac_to_vppmac(loc_v6_mac)+" \n"
+ c += "set ip arp tap-0 10.0.0.1 "+mac_to_vppmac(loc_v4_mac)+" \n"
+ c += "ip route add ::/0 via 2001:f00d::1 tap-0 \n"
+ c += "ip route add 0.0.0.0/0 via 10.0.0.1 tap-0 \n"
+ return c
+
+def conf():
+ c = net_conf()
+ c += "map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ea-bits-len 16 psid-offset 6 psid-len 8"
+ if map_mtu != 0:
+ c += " mtu "+str(map_mtu)
+ if map_t:
+ c += " ip6-src cccc:bbbb::/96 map-t"
+ else:
+ c += " ip6-src cccc:bbbb::ffff"
+
+ c += "\n"
+ return c
+
+def send_packet(ip_header, ip_content):
+ print("Send packet")
+ if fragsize != 0:
+ if ip_header.version == 4:
+ frags = fragment(ip_header/ip_content, fragsize=fragsize)
+ for f in frags:
+ print("Fragmented IPv4 packet")
+ sendp(Ether(dst=vpp_mac, src=loc_v4_mac)/f, iface=ifname)
+ elif ip_header.version == 6:
+ frags = fragment6(ip_header/IPv6ExtHdrFragment()/ip_content, fragsize)
+ for f in frags:
+ print("Fragmented IPv6 packet")
+ sendp(Ether(dst=vpp_mac, src=loc_v6_mac)/f, iface=ifname)
+ else:
+ sendp(Ether(dst=vpp_mac)/ip_header/ip_content, iface=ifname)
+
+def send_packet_frag_inner(packet, inner_header, inner_content):
+ print("Send packet with inner ICMP packet")
+ if fragsize != 0:
+ if packet.version == 4:
+ frags = fragment(inner_header/inner_content, fragsize=fragsize)
+ for f in frags:
+ print("Fragmented IPv4 inner packet")
+ sendp(Ether(dst=vpp_mac, src=loc_v4_mac)/packet/f, iface=ifname)
+ elif packet.version == 6:
+ frags = fragment6(inner_header/IPv6ExtHdrFragment()/inner_content, fragsize)
+ for f in frags:
+ print("Fragmented IPv6 inner packet")
+ sendp(Ether(dst=vpp_mac, src=loc_v6_mac)/packet/f, iface=ifname)
+ else:
+ sendp(Ether(dst=vpp_mac)/packet/inner_header/inner_content, iface=ifname)
+
+
+def sendv6udp(src, dst, port):
+ psid = map.gen_psid(port)
+ ceaddr = str(map.get_mapce_addr(src, psid))
+ dst = str(dmr.embed_6052addr(dst))
+ send_packet(IPv6(dst=dst, src=ceaddr), UDP(sport=port)/('X'*900))
+
+def sendv6tcp(src, dst, port):
+ psid = map.gen_psid(port)
+ ceaddr = str(map.get_mapce_addr(src, psid))
+ dst = str(dmr.embed_6052addr(dst))
+ send_packet(IPv6(dst=dst, src=ceaddr), TCP(sport=port)/('X'*900))
+
+def sendv4udp(src, dst, port):
+ send_packet(IP(dst=dst, src=src), UDP(dport=port)/('X'*900))
+
+def sendv4tcp(src, dst, port):
+ send_packet(IP(dst=dst, src=src), TCP(dport=port)/('X'*900))
+
+def sendv6ping(src, dst, id):
+ psid = map.gen_psid(id)
+ ceaddr = str(map.get_mapce_addr(src, psid))
+ dst = str(dmr.embed_6052addr(dst))
+ send_packet(IPv6(dst=dst, src=ceaddr), ICMPv6EchoRequest(id=id, data='A'*500))
+ send_packet(IPv6(dst=dst, src=ceaddr), ICMPv6EchoReply(id=id, data='A'*500))
+
+def sendv4ping(src, dst, id):
+ send_packet(IP(dst=dst, src=src), ICMP(id=id, type=0)/('X'*500))
+ send_packet(IP(dst=dst, src=src), ICMP(id=id, type=8)/('X'*500))
+
+def sendv4icmperr(src, dst, type, code, port, inner_src, inner_dst, payload_length):
+ inner = IP(dst=inner_dst, src=inner_src)/TCP(sport=port, dport=8888)/('X'*payload_length)
+ send_packet_frag_inner(IP(dst=dst, src=src)/ICMP(type=type, code=code), IP(dst=inner_dst, src=inner_src), TCP(sport=port, dport=8888)/('X'*payload_length))
+ #send_packet(IP(dst=dst, src=src)/ICMP(type=type, code=code)/inner)
+
+def sendv6icmperr(src, dst, type, code, port, payload_length):
+ psid = map.gen_psid(port)
+ src = str(map.get_mapce_addr(src, psid))
+ dst = str(dmr.embed_6052addr(dst))
+ inner_header = IPv6(dst=src, src=dst)
+ inner_content = TCP(sport=8888, dport=port)/('X'*payload_length)
+ send_packet_frag_inner(IPv6(dst=dst, src=src)/ICMPv6DestUnreach(type=type, code=code), inner_header, inner_content)
+ #send_packet(IPv6(dst=dst, src=src)/ICMPv6DestUnreach(type=type, code=code)/inner)
+
+def sendv4icmp_errors(src, dst, port, inner_src, inner_dst, payload_length):
+ for type in ICMP_TYPES_CODES:
+ for code in range(0, ICMP_TYPES_CODES[type] + 1):
+ sendv4icmperr(src, dst, type, code, port, inner_src, inner_dst, payload_length)
+ #sendv4icmperr(src, dst, type, ICMP_TYPES_CODES[type] + 2, port, inner_src, inner_dst, payload_length)
+ #sendv4icmperr(src, dst, type, 255, port, inner_src, inner_dst, payload_length)
+ #sendv4icmperr(src, dst, 1, 0, port, inner_src, inner_dst, payload_length)
+ #sendv4icmperr(src, dst, 2, 10, port, inner_src, inner_dst, payload_length)
+ #sendv4icmperr(src, dst, 255, 255, port, inner_src, inner_dst, payload_length)
+
+ #TODO: Check wrong paramater with different pointer values
+
+def sendv6icmp_errors(src, dst, port, payload_length):
+ for type in ICMP6_TYPES_CODES:
+ for code in range(0, ICMP6_TYPES_CODES[type] + 1):
+ sendv6icmperr(src, dst, type, code, port, payload_length)
+ #sendv6icmperr(src, dst, type, ICMP6_TYPES_CODES[type] + 2, port, payload_length)
+ #sendv6icmperr(src, dst, type, 255, port, payload_length)
+
+
+def traffic():
+ delay = 2.0
+ while 1:
+ #sendp(Ether(dst="bb:bb:bb:bb:bb:b4")/IP(dst="20.0.0.1")/UDP(chksum=0)/('X'*900), iface="vpp-tapv4")
+ #sendp(Ether(dst="bb:bb:bb:bb:bb:b6")/IPv6(dst="cccc:bbbb::a000:0001")/ICMPv6EchoRequest()/('X'*900), iface="vpp-tapv6")
+ #sendp(Ether(dst="bb:bb:bb:bb:bb:b6")/IPv6(dst="cccc:bbbb::a000:0001")/UDP()/('X'*900), iface="vpp-tapv6")
+ sendv6udp("20.0.0.1", "10.0.0.1", 12001)
+ sendv6tcp("20.0.0.1", "10.0.0.1", 12002)
+ sendv4udp("10.0.0.1", "20.0.0.1", 12003)
+ sendv4tcp("10.0.0.1", "20.0.0.1", 12004)
+ sendv6ping("20.0.0.1", "10.0.0.1", 12005)
+ sendv4ping("10.0.0.1", "20.0.0.1", 12006)
+ sendv4icmp_errors("10.0.0.1", "20.0.0.1", 12006, "20.0.0.1", "10.0.0.1", 500)
+ sendv4icmp_errors("10.0.0.1", "20.0.0.1", 12006, "20.0.0.1", "10.0.0.1", 1500)
+ sendv6icmp_errors("20.0.0.1", "10.0.0.1", 12006, 500)
+ time.sleep(delay)
+ delay *= 0.9
+
+if len(sys.argv) <= 1:
+ print("Usage: conf|traffic")
+ exit(1)
+
+if sys.argv[1] == "conf":
+ print(conf())
+elif sys.argv[1] == "traffic":
+ traffic() \ No newline at end of file
diff --git a/vnet/vnet/map/examples/mapalgs.py b/vnet/vnet/map/examples/mapalgs.py
new file mode 100644
index 00000000000..50a0ed0a3ee
--- /dev/null
+++ b/vnet/vnet/map/examples/mapalgs.py
@@ -0,0 +1,327 @@
+#!/usr/bin/env python3
+
+# The MIT License (MIT)
+#
+# Copyright (c) 2015
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# File included from https://github.com/ejordangottlieb/pyswmap
+# Thanks to jordan ;)
+# - Pierre
+#
+
+# There is still a great deal of work required on this module. Please
+# use with caution.
+# -Jordan
+
+import sys
+from ipaddress import (
+ IPv6Address,
+ IPv6Network,
+ ip_network,
+ ip_address,
+ )
+from math import (
+ log,
+ )
+
+class MapCalc(object):
+
+ def __init__(self,**bmr):
+ #rulev6,rulev4):
+ self.portranges = False
+
+ # Validate and set BMR and BMR derived values
+ self._check_bmr_values(bmr)
+
+ def _check_bmr_values(self,bmr):
+ # Assume these values have not been supplied. Validate later.
+ self.ealen = False
+ self.ratio = False
+
+ # Validate that a proper PSID Offset has been set
+ if 'psidoffset' not in bmr:
+ # Set Default PSID Offset of 6 if it is not set
+ self.psidoffset = 6
+ else:
+ self.psidoffset = self._psid_offset(bmr['psidoffset'])
+
+ # Validate that a proper IPv4 rule prefix is defined
+ if 'rulev4' not in bmr:
+ print("The rule IPv4 prefix has not been set")
+ sys.exit(1)
+ else:
+ self.rulev4 = self._ipv4_rule(bmr['rulev4'])
+
+ # Validate that a proper IPv6 rule prefix is defined
+ if 'rulev6' not in bmr:
+ print("The rule IPv6 prefix has not been set")
+ sys.exit(1)
+ else:
+ self.rulev6 = self._ipv6_rule(bmr['rulev6'])
+
+ # Check if EA length was passed
+ if 'ealen' not in bmr:
+ self.ealen = False
+ else:
+ self.ealen = bmr['ealen']
+ self.ratio = self._calc_ratio(bmr['ealen'])
+
+ # Check if sharing ratio was passed or calculated by _calc_ratio
+ if 'ratio' not in bmr:
+ # Skip if we have already calculated ratio
+ if not (self.ratio):
+ self.ratio = False
+ else:
+ if (self.ealen):
+ # Check to see if supplied EA length contradicts supplied ratio
+ if ( bmr['ratio'] != self.ratio ):
+ eavalue = "EA value {}".format(self.ealen)
+ sharingratio = "sharing ratio {}".format(bmr['ratio'])
+ print("Supplied {} and {} are contradictory".format(
+ eavalue,
+ sharingratio)
+ )
+ sys.exit(1)
+ else:
+ self.ratio = bmr['ratio']
+ self.ealen = self._calc_ea(bmr['ratio'])
+
+ # EA length or sharing ratio must be set
+ if not ( self.ealen or self.ratio):
+ print("The BMR must include an EA length or sharing ratio")
+ sys.exit(1)
+
+ # Since we have not hit an exception we can calculate the port bits
+ self.portbits = self._calc_port_bits()
+
+ def _ipv4_rule(self,rulev4):
+ try:
+ self.rulev4mask = ip_network(
+ rulev4,
+ strict=False
+ ).prefixlen
+ except ValueError:
+ print("Invalid IPv4 prefix {}".format(rulev4))
+ sys.exit(1)
+
+ self.rulev4object = ip_network(rulev4)
+
+ return rulev4
+
+ def _ipv6_rule(self,rulev6):
+ try:
+ self.rulev6mask = IPv6Network(
+ rulev6,
+ strict=False
+ ).prefixlen
+ except ValueError:
+ print("Invalid IPv6 prefix {}".format(rulev6))
+ sys.exit(1)
+
+ return rulev6
+
+ def _psid_offset(self,psidoffset):
+ PSIDOFFSET_MAX = 6
+ if psidoffset in range(0,PSIDOFFSET_MAX+1):
+ return psidoffset
+ else:
+ print("Invalid PSID Offset value: {}".format(psidoffset))
+ sys.exit(1)
+
+ def _psid_range(self,x):
+ rset = []
+ for i in range(0,x+1):
+ rset.append(2**i)
+ return rset
+
+ def _calc_port_bits(self):
+ portbits = 16 - self.psidoffset - self.psidbits
+ return portbits
+
+ def _calc_ea(self,ratio):
+ if ratio not in ( self._psid_range(16) ):
+ print("Invalid ratio {}".format(ratio))
+ print("Ratio between 2 to the power of 0 thru 16")
+ sys.exit(1)
+
+ if ( 1 == ratio):
+ self.psidbits = 0
+ else:
+ self.psidbits = int(log(ratio,2))
+ ealen = self.psidbits + ( 32 - self.rulev4mask )
+ return ealen
+
+ def _calc_ratio(self,ealen):
+ maskbits = 32 - self.rulev4mask
+ if ( ealen < maskbits ):
+ print("EA of {} incompatible with rule IPv4 prefix {}".format(
+ ealen,
+ self.rulev4,
+ )
+ )
+ print("EA length must be at least {} bits".format(
+ maskbits,
+ )
+ )
+ sys.exit(1)
+
+ self.psidbits = ealen - ( 32 - self.rulev4mask )
+ if ( self.psidbits > 16):
+ print("EA length of {} is too large".format(
+ ealen,
+ )
+ )
+ print("EA should not exceed {} for rule IPv4 prefix {}".format(
+ maskbits + 16,
+ self.rulev4,
+ )
+ )
+ sys.exit(1)
+ ratio = 2**self.psidbits
+ return ratio
+
+ def gen_psid(self,portnum):
+ if ( portnum < self.start_port() ):
+ print("port value is less than allowed by PSID Offset")
+ sys.exit(1)
+ psid = (portnum & ((2**self.psidbits - 1) << self.portbits))
+ psid = psid >> self.portbits
+ return psid
+
+ def port_ranges(self):
+ return 2**self.psidoffset - 1
+
+ def start_port(self):
+ if self.psidoffset == 0: return 0
+ return 2**(16 - self.psidoffset)
+
+ def port_list(self,psid):
+ startrange = psid * (2**self.portbits) + self.start_port()
+ increment = (2**self.psidbits) * (2**self.portbits)
+ portlist = [ ]
+ for port in range(startrange,startrange + 2**self.portbits):
+ if port >= 65536: continue
+ portlist.append(port)
+ for x in range(1,self.port_ranges()):
+ startrange += increment
+ for port in range(startrange,startrange + 2**self.portbits):
+ portlist.append(port)
+ return portlist
+
+ def ipv4_index(self,ipv4addr):
+ if ip_address(ipv4addr) in ip_network(self.rulev4):
+ x = ip_address(ipv4addr)
+ y = ip_network(self.rulev4,strict=False).network_address
+ self.ipv4addr = x
+ return ( int(x) - int(y) )
+ else:
+ print("Error: IPv4 address {} not in Rule IPv4 subnet {}".format(
+ ipv4add,
+ ip_network(self.rulev4,strict=False).network_address))
+ sys.exit(1)
+
+ def _calc_ipv6bit_pos(self):
+ addroffset = 128 - (self.rulev6mask + ( self.ealen - self.psidbits))
+ psidshift = 128 - ( self.rulev6mask + self.ealen )
+ return [addroffset,psidshift]
+
+ def _append_map_eabits(self,ipv4index,addroffset,psidshift,psid):
+ rulev6base = IPv6Network(self.rulev6,strict=False).network_address
+ map_prefix = int(rulev6base) | ( ipv4index << addroffset )
+ map_fullprefix = map_prefix | ( psid << psidshift)
+ return map_fullprefix
+
+
+ def get_mapce_addr(self,ipv4addr,psid):
+ ipv4index = self.ipv4_index(ipv4addr)
+ (addroffset,psidshift) = self._calc_ipv6bit_pos()
+ map_fullprefix = self._append_map_eabits(ipv4index,
+ addroffset,
+ psidshift,
+ psid)
+ mapv4iid = map_fullprefix | ( int(self.ipv4addr) << 16 )
+ map_full_address = mapv4iid | psid
+ mapce_address = "{}".format(IPv6Address(map_full_address))
+ return mapce_address
+
+ def get_mapce_prefix(self,ipv4addr,psid):
+ ipv4index = self.ipv4_index(ipv4addr)
+ (addroffset,psidshift) = self._calc_ipv6bit_pos()
+ map_fullprefix = self._append_map_eabits(ipv4index,
+ addroffset,
+ psidshift,
+ psid)
+ mapce_prefix = "{}/{}".format(
+ IPv6Address(map_fullprefix),
+ self.rulev6mask + self.ealen
+ )
+ return mapce_prefix
+
+ def get_map_ipv4(self,mapce_address):
+ ipv4 = (int(IPv6Address(mapce_address)) & ( 0xffffffff << 16 )) >> 16
+ return ip_address(ipv4)
+
+
+
+class DmrCalc(object):
+
+ def __init__(self,dmr):
+
+ # Validate and set BMR and BMR derived values
+ self.dmrprefix = self._check_dmr_prefix(dmr)
+
+ def embed_6052addr(self,ipv4addr):
+
+ try:
+ ipv4addrint = int(ip_address(ipv4addr))
+ except ValueError:
+ print("Invalid IPv4 address {}".format(ipv4addr))
+ sys.exit(1)
+
+ if ( self.dmrprefix.prefixlen == 64 ):
+ ipv6int = ipv4addrint << 24
+ ipv6int += int(self.dmrprefix.network_address)
+ return IPv6Address(ipv6int)
+
+ if ( self.dmrprefix.prefixlen == 96 ):
+ ipv6int = ipv4addrint
+ ipv6int += int(self.dmrprefix.network_address)
+ return IPv6Address(ipv6int)
+
+ def _check_dmr_prefix(self,dmrprefix):
+ try:
+ self.dmrmask = IPv6Network(
+ dmrprefix,
+ strict=False
+ ).prefixlen
+ except ValueError:
+ print("Invalid IPv6 prefix {}".format(prefix))
+ sys.exit(1)
+
+ if self.dmrmask not in (32,40,48,56,64,96):
+ print("Invalid prefix mask /{}".format(self.dmrmask))
+ sys.exit(1)
+
+ return IPv6Network(dmrprefix)
+
+if __name__ == "__main__":
+ m = DmrCalc('fd80::/48')
+ print(m.dmrprefix)
diff --git a/vnet/vnet/map/examples/mt-test.py b/vnet/vnet/map/examples/mt-test.py
new file mode 100644
index 00000000000..62d269c7a13
--- /dev/null
+++ b/vnet/vnet/map/examples/mt-test.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2009-2014 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import threading
+import time
+from scapy.all import *
+from Queue import *
+
+iface = 'veth1'
+
+class SnifferThread(threading.Thread) :
+ def __init__(self,q,iface,flt,timeout) :
+ threading.Thread.__init__(self)
+ self.q = q
+ self.iface = iface
+ self.timeout = timeout
+ self.flt = flt
+ print("Sniffers reporting for service on ",self.iface)
+
+ def run(self) :
+ conf.iface=self.iface
+ conf.iface6=self.iface
+
+ r = sniff(filter=self.flt,iface=self.iface,timeout=self.timeout,prn=lambda x: x.summary())
+ self.q.put(r)
+
+
+
+# New "SR" function
+# Fire off thread with filter and expected answer packet(s).
+# Fire off sniffer thread, main thread sends packet
+# Returns true if found
+
+def sr2(answer, *args, **kwargs):
+ q = Queue()
+ print("Creating SnifferThreadWorkerThread")
+ flt='ip proto 41'
+ iface='veth1'
+ sniffer = SnifferThread(q,iface,flt,1)
+ sniffer.setDaemon(True)
+ sniffer.start()
+
+ print "Sending packet:"
+ send(*args, **kwargs)
+ sniffer.join()
+ ps = q.get()
+
+# ps.summary()
+ print "Number of packets sniffed:", len(ps)
+
+ for p in ps:
+ ip = p.getlayer(1)
+ print "Comparing", ip.summary(), "and", answer.summary()
+ if ip == answer:
+ print "We have a match!!"
+ return True
+ return False
+
+aip6 = IPv6(dst='2002:0a0a:0a0a::12')/ICMPv6EchoRequest()
+answer= IP(src="10.0.0.100",dst="10.10.10.10",ttl=63)/aip6
+packet = IPv6(dst='2002:0a0a:0a0a::12')/ICMPv6EchoRequest()
+
+# From IPv6
+sr2(answer, packet,iface='veth1')
+
+#From IPv4
+packet = IP(src='10.10.10.10',dst='10.0.0.100')/IPv6(src='2002:0a0a:0a0a::12',dst='1::2')/ICMPv6EchoRequest()
+sr2(answer, packet,iface='veth1')
diff --git a/vnet/vnet/map/gen-rules.py b/vnet/vnet/map/gen-rules.py
new file mode 100755
index 00000000000..533a8e237f7
--- /dev/null
+++ b/vnet/vnet/map/gen-rules.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2015 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import ipaddress
+import argparse
+import sys
+
+# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
+# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
+
+parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
+parser.add_argument('-t', action="store", dest="mapmode")
+args = parser.parse_args()
+
+#
+# 1:1 Shared IPv4 address, shared BR
+#
+def shared11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+
+#
+# 1:1 Shared IPv4 address
+#
+def shared11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Shared IPv4 address small
+#
+def smallshared11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/24')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 6
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ for psid in range(0x1 << psid_len):
+ print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+
+#
+# 1:1 Full IPv4 address
+#
+def full11():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 0
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i],
+ "ea-bits-len 0 psid-offset 0 psid-len 0")
+def full11br():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip6_dst = ipaddress.ip_network('bbbb::/32')
+ psid_len = 0
+ for i in range(ip4_pfx.num_addresses):
+ print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 0 psid-len 0")
+
+#
+# Algorithmic mapping Shared IPv4 address
+#
+def algo():
+ print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8")
+ print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0")
+
+#
+# IP4 forwarding
+#
+def ip4():
+ ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ for i in range(ip4_pfx.num_addresses):
+ print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2")
+
+
+globals()[args.mapmode]()
+
+
diff --git a/vnet/vnet/map/ip4_map.c b/vnet/vnet/map/ip4_map.c
new file mode 100644
index 00000000000..cf53ef4918c
--- /dev/null
+++ b/vnet/vnet/map/ip4_map.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Defines used for testing various optimisation schemes
+ */
+#define MAP_ENCAP_DUAL 0
+
+#include "map.h"
+#include "../ip/ip_frag.h"
+
+vlib_node_registration_t ip4_map_reass_node;
+
+enum ip4_map_next_e {
+ IP4_MAP_NEXT_IP6_LOOKUP,
+#ifdef MAP_SKIP_IP6_LOOKUP
+ IP4_MAP_NEXT_IP6_REWRITE,
+#endif
+ IP4_MAP_NEXT_FRAGMENT,
+ IP4_MAP_NEXT_REASS,
+ IP4_MAP_NEXT_DROP,
+ IP4_MAP_N_NEXT,
+};
+
+enum ip4_map_reass_next_t {
+ IP4_MAP_REASS_NEXT_IP6_LOOKUP,
+ IP4_MAP_REASS_NEXT_IP4_FRAGMENT,
+ IP4_MAP_REASS_NEXT_DROP,
+ IP4_MAP_REASS_N_NEXT,
+};
+
+typedef struct {
+ u32 map_domain_index;
+ u16 port;
+ u8 cached;
+} map_ip4_map_reass_trace_t;
+
+u8 *
+format_ip4_map_reass_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
+ map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *);
+ return format(s, "MAP domain index: %d L4 port: %u Status: %s", t->map_domain_index,
+ t->port, t->cached?"cached":"forwarded");
+}
+
+/*
+ * ip4_map_get_port
+ */
+u16
+ip4_map_get_port (ip4_header_t *ip, map_dir_e dir)
+{
+ /* Find port information */
+ if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP))) {
+ udp_header_t *udp = (void *)(ip + 1);
+ return (dir == MAP_SENDER ? udp->src_port : udp->dst_port);
+ } else if (ip->protocol == IP_PROTOCOL_ICMP) {
+ /*
+ * 1) ICMP Echo request or Echo reply
+ * 2) ICMP Error with inner packet being UDP or TCP
+ * 3) ICMP Error with inner packet being ICMP Echo request or Echo reply
+ */
+ icmp46_header_t *icmp = (void *)(ip + 1);
+ if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) {
+ return *((u16 *)(icmp + 1));
+ } else if (clib_net_to_host_u16(ip->length) >= 64) { // IP + ICMP + IP + L4 header
+ ip4_header_t *icmp_ip = (ip4_header_t *)(icmp + 2);
+ if (PREDICT_TRUE((icmp_ip->protocol == IP_PROTOCOL_TCP) ||
+ (icmp_ip->protocol == IP_PROTOCOL_UDP))) {
+ udp_header_t *udp = (void *)(icmp_ip + 1);
+ return (dir == MAP_SENDER ? udp->dst_port : udp->src_port);
+ } else if (icmp_ip->protocol == IP_PROTOCOL_ICMP) {
+ icmp46_header_t *inner_icmp = (void *)(icmp_ip + 1);
+ if (inner_icmp->type == ICMP4_echo_request || inner_icmp->type == ICMP4_echo_reply)
+ return (*((u16 *)(inner_icmp + 1)));
+ }
+ }
+ }
+ return (0);
+}
+
+static_always_inline u16
+ip4_map_port_and_security_check (map_domain_t *d, ip4_header_t *ip, u32 *next, u8 *error)
+{
+ u16 port = 0;
+
+ if (d->psid_length > 0) {
+ if (!ip4_is_fragment(ip)) {
+ if (PREDICT_FALSE((ip->ip_version_and_header_length != 0x45) || clib_host_to_net_u16(ip->length) < 28)) {
+ return 0;
+ }
+ port = ip4_map_get_port(ip, MAP_RECEIVER);
+ if (port) {
+ /* Verify that port is not among the well-known ports */
+ if ((d->psid_offset > 0) && (clib_net_to_host_u16(port) < (0x1 << (16 - d->psid_offset)))) {
+ *error = MAP_ERROR_ENCAP_SEC_CHECK;
+ } else {
+ return (port);
+ }
+ } else {
+ *error = MAP_ERROR_BAD_PROTOCOL;
+ }
+ } else {
+ *next = IP4_MAP_NEXT_REASS;
+ }
+ }
+ return (0);
+}
+
+/*
+ * ip4_map_vtcfl
+ */
+static_always_inline u32
+ip4_map_vtcfl (ip4_header_t *ip4, vlib_buffer_t *p)
+{
+ map_main_t *mm = &map_main;
+ u8 tc = mm->tc_copy ? ip4->tos : mm->tc;
+ u32 vtcfl = 0x6 << 28;
+ vtcfl |= tc << 20;
+ vtcfl |= vnet_buffer(p)->ip.flow_hash && 0x000fffff;
+
+ return (clib_host_to_net_u32(vtcfl));
+}
+
+static_always_inline bool
+ip4_map_ip6_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip)
+{
+#ifdef MAP_SKIP_IP6_LOOKUP
+ map_main_t *mm = &map_main;
+ u32 adj_index0 = mm->adj6_index;
+ if (adj_index0 > 0) {
+ ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+ ip_adjacency_t *adj = ip_get_adjacency(lm6, mm->adj6_index);
+ if (adj->n_adj > 1) {
+ u32 hash_c0 = ip4_compute_flow_hash(ip, IP_FLOW_HASH_DEFAULT);
+ adj_index0 += (hash_c0 & (adj->n_adj - 1));
+ }
+ vnet_buffer(p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ return (true);
+ }
+#endif
+ return (false);
+}
+
+/*
+ * ip4_map
+ */
+static uword
+ip4_map (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_node.index);
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 cpu_index = os_get_cpu_number();
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* Dual loop */
+ while (n_left_from > 4 && n_left_to_next > 2) {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ map_domain_t *d0, *d1;
+ u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE;
+ ip4_header_t *ip40, *ip41;
+ u16 port0 = 0, port1 = 0;
+ ip6_header_t *ip6h0, *ip6h1;
+ u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
+ u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 = IP4_MAP_NEXT_IP6_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer(vm, from[2]);
+ p3 = vlib_get_buffer(vm, from[3]);
+
+ vlib_prefetch_buffer_header(p2, STORE);
+ vlib_prefetch_buffer_header(p3, STORE);
+ /* IPv4 + 8 = 28. possibly plus -40 */
+ CLIB_PREFETCH (p2->data-40, 68, STORE);
+ CLIB_PREFETCH (p3->data-40, 68, STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next +=2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ p1 = vlib_get_buffer(vm, pi1);
+ ip40 = vlib_buffer_get_current(p0);
+ ip41 = vlib_buffer_get_current(p1);
+ p0->current_length = clib_net_to_host_u16(ip40->length);
+ p1->current_length = clib_net_to_host_u16(ip41->length);
+ d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0);
+ d1 = ip4_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], &map_domain_index1);
+ ASSERT(d0);
+ ASSERT(d1);
+
+ /*
+ * Shared IPv4 address
+ */
+ port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0);
+ port1 = ip4_map_port_and_security_check(d1, ip41, &next1, &error1);
+
+ /* MAP calc */
+ u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32);
+ u32 da41 = clib_net_to_host_u32(ip41->dst_address.as_u32);
+ u16 dp40 = clib_net_to_host_u16(port0);
+ u16 dp41 = clib_net_to_host_u16(port1);
+ u64 dal60 = map_get_pfx(d0, da40, dp40);
+ u64 dal61 = map_get_pfx(d1, da41, dp41);
+ u64 dar60 = map_get_sfx(d0, da40, dp40);
+ u64 dar61 = map_get_sfx(d1, da41, dp41);
+ if (dal60 == 0 && dar60 == 0) error0 = MAP_ERROR_UNKNOWN;
+ if (dal61 == 0 && dar61 == 0) error1 = MAP_ERROR_UNKNOWN;
+
+ /* construct ipv6 header */
+ vlib_buffer_advance(p0, - sizeof(ip6_header_t));
+ vlib_buffer_advance(p1, - sizeof(ip6_header_t));
+ ip6h0 = vlib_buffer_get_current(p0);
+ ip6h1 = vlib_buffer_get_current(p1);
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0;
+ vnet_buffer(p1)->sw_if_index[VLIB_TX] = (u32)~0;
+
+ ip6h0->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip40, p0);
+ ip6h1->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip41, p1);
+ ip6h0->payload_length = ip40->length;
+ ip6h1->payload_length = ip41->length;
+ ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h1->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h0->hop_limit = 0x40;
+ ip6h1->hop_limit = 0x40;
+ ip6h0->src_address = d0->ip6_src;
+ ip6h1->src_address = d1->ip6_src;
+ ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64(dal60);
+ ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64(dar60);
+ ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64(dal61);
+ ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64(dar61);
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) {
+ if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) {
+ vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0);
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
+ vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
+ vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
+ next0 = IP4_MAP_NEXT_FRAGMENT;
+ } else {
+ next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0;
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1,
+ clib_net_to_host_u16(ip6h0->payload_length) + 40);
+ }
+ } else {
+ next0 = IP4_MAP_NEXT_DROP;
+ }
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE(error1 == MAP_ERROR_NONE)) {
+ if (PREDICT_FALSE(d1->mtu && (clib_net_to_host_u16(ip6h1->payload_length) + sizeof(*ip6h1) > d1->mtu))) {
+ vnet_buffer(p1)->ip_frag.header_offset = sizeof(*ip6h1);
+ vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
+ vnet_buffer(p1)->ip_frag.mtu = d1->mtu;
+ vnet_buffer(p1)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
+ next1 = IP4_MAP_NEXT_FRAGMENT;
+ } else {
+ next1 = ip4_map_ip6_lookup_bypass(p1, ip41) ? IP4_MAP_NEXT_IP6_REWRITE : next1;
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index1, 1,
+ clib_net_to_host_u16(ip6h1->payload_length) + 40);
+ }
+ } else {
+ next1 = IP4_MAP_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+ if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_trace_t *tr = vlib_add_trace(vm, node, p1, sizeof(*tr));
+ tr->map_domain_index = map_domain_index1;
+ tr->port = port1;
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ map_domain_t *d0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip4_header_t *ip40;
+ u16 port0 = 0;
+ ip6_header_t *ip6h0;
+ u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
+ u32 map_domain_index0 = ~0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip40 = vlib_buffer_get_current(p0);
+ p0->current_length = clib_net_to_host_u16(ip40->length);
+ d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0);
+ ASSERT(d0);
+
+ /*
+ * Shared IPv4 address
+ */
+ port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0);
+
+ /* MAP calc */
+ u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32);
+ u16 dp40 = clib_net_to_host_u16(port0);
+ u64 dal60 = map_get_pfx(d0, da40, dp40);
+ u64 dar60 = map_get_sfx(d0, da40, dp40);
+ if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) error0 = MAP_ERROR_UNKNOWN;
+
+ /* construct ipv6 header */
+ vlib_buffer_advance(p0, - (sizeof(ip6_header_t)));
+ ip6h0 = vlib_buffer_get_current(p0);
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0;
+
+ ip6h0->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip40, p0);
+ ip6h0->payload_length = ip40->length;
+ ip6h0->protocol = IP_PROTOCOL_IP_IN_IP;
+ ip6h0->hop_limit = 0x40;
+ ip6h0->src_address = d0->ip6_src;
+ ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64(dal60);
+ ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64(dar60);
+
+ /*
+ * Determine next node. Can be one of:
+ * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop
+ */
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) {
+ if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) {
+ vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0);
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
+ vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
+ vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
+ next0 = IP4_MAP_NEXT_FRAGMENT;
+ } else {
+ next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0;
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1,
+ clib_net_to_host_u16(ip6h0->payload_length) + 40);
+ }
+ } else {
+ next0 = IP4_MAP_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/*
+ * ip4_map_reass
+ */
+static uword
+ip4_map_reass (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_reass_node.index);
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 cpu_index = os_get_cpu_number();
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_to_loopback = NULL;
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ map_domain_t *d0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip4_header_t *ip40;
+ i32 port0 = 0;
+ ip6_header_t *ip60;
+ u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP;
+ u32 map_domain_index0;
+ u8 cached = 0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ ip40 = (ip4_header_t *)(ip60 + 1);
+ d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0);
+
+ map_ip4_reass_lock();
+ map_ip4_reass_t *r = map_ip4_reass_get(ip40->src_address.as_u32, ip40->dst_address.as_u32,
+ ip40->fragment_id, ip40->protocol, &fragments_to_drop);
+ if (PREDICT_FALSE(!r)) {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ } else if (PREDICT_TRUE(ip4_get_fragment_offset(ip40))) {
+ if (r->port >= 0) {
+ // We know the port already
+ port0 = r->port;
+ } else if (map_ip4_reass_add_fragment(r, pi0)) {
+ // Not enough space for caching
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ map_ip4_reass_free(r, &fragments_to_drop);
+ } else {
+ cached = 1;
+ }
+ } else if ((port0 = ip4_get_port(ip40, MAP_RECEIVER, p0->current_length)) < 0) {
+ // Could not find port. We'll free the reassembly.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ port0 = 0;
+ map_ip4_reass_free(r, &fragments_to_drop);
+ } else {
+ r->port = port0;
+ map_ip4_reass_get_fragments(r, &fragments_to_loopback);
+ }
+
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ if (!cached && r) {
+ r->forwarded += clib_host_to_net_u16(ip40->length) - 20;
+ if (!ip4_get_fragment_more(ip40))
+ r->expected_total = ip4_get_fragment_offset(ip40) * 8 + clib_host_to_net_u16(ip40->length) - 20;
+ if(r->forwarded >= r->expected_total)
+ map_ip4_reass_free(r, &fragments_to_drop);
+ }
+#endif
+
+ map_ip4_reass_unlock();
+
+ // NOTE: Most operations have already been performed by ip4_map
+ // All we need is the right destination address
+ ip60->dst_address.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, port0);
+ ip60->dst_address.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, port0);
+
+ if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip60->payload_length) + sizeof(*ip60) > d0->mtu))) {
+ vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip60);
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
+ vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
+ vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
+ next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_ip4_map_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ tr->cached = cached;
+ }
+
+ if(cached) {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ } else {
+ if (error0 == MAP_ERROR_NONE)
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1,
+ clib_net_to_host_u16(ip60->payload_length) + 40);
+ next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+ }
+
+ //Loopback when we reach the end of the inpu vector
+ if(n_left_from == 0 && vec_len(fragments_to_loopback)) {
+ from = vlib_frame_vector_args(frame);
+ u32 len = vec_len(fragments_to_loopback);
+ if(len <= VLIB_FRAME_SIZE) {
+ memcpy(from, fragments_to_loopback, sizeof(u32)*len);
+ n_left_from = len;
+ vec_reset_length(fragments_to_loopback);
+ } else {
+ memcpy(from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof(u32)*VLIB_FRAME_SIZE);
+ n_left_from = VLIB_FRAME_SIZE;
+ _vec_len(fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+ }
+ }
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+
+ map_send_all_to_node(vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP4_MAP_REASS_NEXT_DROP);
+
+ vec_free(fragments_to_drop);
+ vec_free(fragments_to_loopback);
+ return frame->n_vectors;
+}
+
+static char *map_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+VLIB_REGISTER_NODE(ip4_map_node) = {
+ .function = ip4_map,
+ .name = "ip4-map",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP4_MAP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+#ifdef MAP_SKIP_IP6_LOOKUP
+ [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-rewrite",
+#endif
+ [IP4_MAP_NEXT_FRAGMENT] = "ip4-frag",
+ [IP4_MAP_NEXT_REASS] = "ip4-map-reass",
+ [IP4_MAP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip4_map_reass_node) = {
+ .function = ip4_map_reass,
+ .name = "ip4-map-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip4_map_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP4_MAP_REASS_N_NEXT,
+ .next_nodes = {
+ [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP4_MAP_REASS_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/vnet/vnet/map/ip4_map_t.c b/vnet/vnet/map/ip4_map_t.c
new file mode 100644
index 00000000000..07f5b19c257
--- /dev/null
+++ b/vnet/vnet/map/ip4_map_t.c
@@ -0,0 +1,1092 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+
+#define IP4_MAP_T_DUAL_LOOP 1
+
+typedef enum {
+ IP4_MAPT_NEXT_MAPT_TCP_UDP,
+ IP4_MAPT_NEXT_MAPT_ICMP,
+ IP4_MAPT_NEXT_MAPT_FRAGMENTED,
+ IP4_MAPT_NEXT_DROP,
+ IP4_MAPT_N_NEXT
+} ip4_mapt_next_t;
+
+typedef enum {
+ IP4_MAPT_ICMP_NEXT_IP6_LOOKUP,
+ IP4_MAPT_ICMP_NEXT_IP6_FRAG,
+ IP4_MAPT_ICMP_NEXT_DROP,
+ IP4_MAPT_ICMP_N_NEXT
+} ip4_mapt_icmp_next_t;
+
+typedef enum {
+ IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP,
+ IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG,
+ IP4_MAPT_TCP_UDP_NEXT_DROP,
+ IP4_MAPT_TCP_UDP_N_NEXT
+} ip4_mapt_tcp_udp_next_t;
+
+typedef enum {
+ IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP,
+ IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG,
+ IP4_MAPT_FRAGMENTED_NEXT_DROP,
+ IP4_MAPT_FRAGMENTED_N_NEXT
+} ip4_mapt_fragmented_next_t;
+
+//This is used to pass information within the buffer data.
+//Buffer structure being too small to contain big structures like this.
+typedef CLIB_PACKED(struct {
+ ip6_address_t daddr;
+ ip6_address_t saddr;
+ //IPv6 header + Fragmentation header will be here
+ //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4)
+ u8 unused[28];
+}) ip4_mapt_pseudo_header_t;
+
+#define frag_id_4to6(id) (id)
+
+//TODO: Find the right place in memory for this.
+static u8 icmp_to_icmp6_updater_pointer_table[] =
+ { 0, 1, 4, 4,~0,
+ ~0,~0,~0, 7, 6,
+ ~0,~0, 8, 8, 8,
+ 8, 24, 24, 24, 24 };
+
+
+static_always_inline int
+ip4_map_fragment_cache (ip4_header_t *ip4, u16 port)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock();
+ map_ip4_reass_t *r = map_ip4_reass_get(ip4->src_address.as_u32, ip4->dst_address.as_u32,
+ ip4->fragment_id,
+ (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol,
+ &ignore);
+ if (r)
+ r->port = port;
+
+ map_ip4_reass_unlock();
+ return !r;
+}
+
+static_always_inline i32
+ip4_map_fragment_get_port (ip4_header_t *ip4)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock();
+ map_ip4_reass_t *r = map_ip4_reass_get(ip4->src_address.as_u32, ip4->dst_address.as_u32,
+ ip4->fragment_id,
+ (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol,
+ &ignore);
+ i32 ret = r?r->port:-1;
+ map_ip4_reass_unlock();
+ return ret;
+}
+
+
+/* Statelessly translates an ICMP packet into ICMPv6.
+ *
+ * Warning: The checksum will need to be recomputed.
+ *
+ */
+static_always_inline int
+ip4_icmp_to_icmp6_in_place (icmp46_header_t *icmp, u32 icmp_len,
+ i32 *receiver_port, ip4_header_t **inner_ip4)
+{
+ *inner_ip4 = NULL;
+ switch (icmp->type) {
+ case ICMP4_echo_reply:
+ *receiver_port = ((u16 *)icmp)[2];
+ icmp->type = ICMP6_echo_reply;
+ break;
+ case ICMP4_echo_request:
+ *receiver_port = ((u16 *)icmp)[2];
+ icmp->type = ICMP6_echo_request;
+ break;
+ case ICMP4_destination_unreachable:
+ *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8);
+ *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8);
+
+ switch (icmp->code) {
+ case ICMP4_destination_unreachable_destination_unreachable_net: //0
+ case ICMP4_destination_unreachable_destination_unreachable_host: //1
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_no_route_to_destination;
+ break;
+ case ICMP4_destination_unreachable_protocol_unreachable: //2
+ icmp->type = ICMP6_parameter_problem;
+ icmp->code = ICMP6_parameter_problem_unrecognized_next_header;
+ break;
+ case ICMP4_destination_unreachable_port_unreachable: //3
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_port_unreachable;
+ break;
+ case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4
+ icmp->type = ICMP6_packet_too_big;
+ icmp->code = 0;
+ {
+ u32 advertised_mtu = clib_net_to_host_u32(*((u32 *)(icmp + 1)));
+ if (advertised_mtu)
+ advertised_mtu += 20;
+ else
+ advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value)
+
+ //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20)
+ *((u32 *)(icmp + 1)) = clib_host_to_net_u32(advertised_mtu);
+ }
+ break;
+
+ case ICMP4_destination_unreachable_source_route_failed: //5
+ case ICMP4_destination_unreachable_destination_network_unknown: //6
+ case ICMP4_destination_unreachable_destination_host_unknown: //7
+ case ICMP4_destination_unreachable_source_host_isolated: //8
+ case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11
+ case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_no_route_to_destination;
+ break;
+ case ICMP4_destination_unreachable_network_administratively_prohibited: //9
+ case ICMP4_destination_unreachable_host_administratively_prohibited: //10
+ case ICMP4_destination_unreachable_communication_administratively_prohibited: //13
+ case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_destination_administratively_prohibited;
+ break;
+ case ICMP4_destination_unreachable_host_precedence_violation: //14
+ default:
+ return -1;
+ }
+ break;
+
+ case ICMP4_time_exceeded: //11
+ *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8);
+ *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8);
+ icmp->type = ICMP6_time_exceeded;
+ //icmp->code = icmp->code //unchanged
+ break;
+
+ case ICMP4_parameter_problem:
+ *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8);
+ *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8);
+
+ switch (icmp->code) {
+ case ICMP4_parameter_problem_pointer_indicates_error:
+ case ICMP4_parameter_problem_bad_length:
+ icmp->type = ICMP6_parameter_problem;
+ icmp->code = ICMP6_parameter_problem_erroneous_header_field;
+ {
+ u8 ptr = icmp_to_icmp6_updater_pointer_table[*((u8 *)(icmp + 1))];
+ if (ptr == 0xff)
+ return -1;
+
+ *((u32 *)(icmp + 1)) = clib_host_to_net_u32(ptr);
+ }
+ break;
+ default:
+ //All other codes cause dropping the packet
+ return -1;
+ }
+ break;
+
+ default:
+ //All other types cause dropping the packet
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+static_always_inline void
+_ip4_map_t_icmp (map_domain_t *d, vlib_buffer_t *p, u8 *error)
+{
+ ip4_header_t *ip4, *inner_ip4;
+ ip6_header_t *ip6, *inner_ip6;
+ u32 ip_len;
+ icmp46_header_t *icmp;
+ i32 recv_port;
+ ip_csum_t csum;
+ u16 *inner_L4_checksum = 0;
+ ip6_frag_hdr_t *inner_frag;
+ u32 inner_frag_id;
+ u32 inner_frag_offset;
+ u8 inner_frag_more;
+
+ ip4 = vlib_buffer_get_current(p);
+ ip_len = clib_net_to_host_u16(ip4->length);
+ ASSERT(ip_len <= p->current_length);
+
+ icmp = (icmp46_header_t *)(ip4 + 1);
+ if (ip4_icmp_to_icmp6_in_place(icmp, ip_len - sizeof(*ip4),
+ &recv_port, &inner_ip4)) {
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+
+ if (recv_port < 0) {
+ // In case of 1:1 mapping, we don't care about the port
+ if(d->ea_bits_len == 0 && d->rules) {
+ recv_port = 0;
+ } else {
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+ }
+
+ if (inner_ip4) {
+ //We have 2 headers to translate.
+ //We need to make some room in the middle of the packet
+
+ if (PREDICT_FALSE(ip4_is_fragment(inner_ip4))) {
+ //Here it starts getting really tricky
+ //We will add a fragmentation header in the inner packet
+
+ if (!ip4_is_first_fragment(inner_ip4)) {
+ //For now we do not handle unless it is the first fragment
+ //Ideally we should handle the case as we are in slow path already
+ *error = MAP_ERROR_FRAGMENTED;
+ return;
+ }
+
+ vlib_buffer_advance(p, - 2*(sizeof(*ip6) - sizeof(*ip4)) - sizeof(*inner_frag));
+ ip6 = vlib_buffer_get_current(p);
+ memcpy(u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)), ip4, 20 + 8);
+ ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4));
+ icmp = (icmp46_header_t *) (ip4 + 1);
+
+ inner_ip6 = (ip6_header_t *) u8_ptr_add(inner_ip4, sizeof(*ip4) - sizeof(*ip6) - sizeof(*inner_frag));
+ inner_frag = (ip6_frag_hdr_t *) u8_ptr_add(inner_ip6, sizeof(*inner_ip6));
+ ip6->payload_length = u16_net_add(ip4->length, sizeof(*ip6) - 2*sizeof(*ip4) + sizeof(*inner_frag));
+ inner_frag_id = frag_id_4to6(inner_ip4->fragment_id);
+ inner_frag_offset = ip4_get_fragment_offset(inner_ip4);
+ inner_frag_more = !!(inner_ip4->flags_and_fragment_offset & clib_net_to_host_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS));
+ } else {
+ vlib_buffer_advance(p, - 2*(sizeof(*ip6) - sizeof(*ip4)));
+ ip6 = vlib_buffer_get_current(p);
+ memcpy(u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)), ip4, 20 + 8);
+ ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4));
+ icmp = (icmp46_header_t *) u8_ptr_add(ip4, sizeof(*ip4));
+ inner_ip6 = (ip6_header_t *) u8_ptr_add(inner_ip4, sizeof(*ip4) - sizeof(*ip6));
+ ip6->payload_length = u16_net_add(ip4->length, sizeof(*ip6) - 2*sizeof(*ip4));
+ inner_frag = NULL;
+ }
+
+ if (PREDICT_TRUE(inner_ip4->protocol == IP_PROTOCOL_TCP)) {
+ inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum;
+ *inner_L4_checksum = ip_csum_fold(ip_csum_sub_even(*inner_L4_checksum, *((u64 *) (&inner_ip4->src_address))));
+ } else if (PREDICT_TRUE(inner_ip4->protocol == IP_PROTOCOL_UDP)) {
+ inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum;
+ if (!*inner_L4_checksum) {
+ //The inner packet was first translated, and therefore came from IPv6.
+ //As the packet was an IPv6 packet, the UDP checksum can't be NULL
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+ *inner_L4_checksum = ip_csum_fold(ip_csum_sub_even(*inner_L4_checksum, *((u64 *)(&inner_ip4->src_address))));
+ } else if (inner_ip4->protocol == IP_PROTOCOL_ICMP) {
+ //We have an ICMP inside an ICMP
+ //It needs to be translated, but not for error ICMP messages
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1);
+ csum = inner_icmp->checksum;
+ //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by ip4_icmp_to_icmp6_in_place
+ csum = ip_csum_sub_even(csum, *((u16 *)inner_icmp));
+ inner_icmp->type = (inner_icmp->type == ICMP4_echo_request)?
+ ICMP6_echo_request:ICMP6_echo_reply;
+ csum = ip_csum_add_even(csum, *((u16 *)inner_icmp));
+ csum = ip_csum_add_even(csum, clib_host_to_net_u16(IP_PROTOCOL_ICMP6));
+ csum = ip_csum_add_even(csum, inner_ip4->length - sizeof(*inner_ip4));
+ inner_icmp->checksum = ip_csum_fold(csum);
+ inner_L4_checksum = &inner_icmp->checksum;
+ inner_ip4->protocol = IP_PROTOCOL_ICMP6;
+ } else {
+ ASSERT(0); // We had a port from that, so it is udp or tcp or ICMP
+ }
+
+ //FIXME: Security check with the port found in the inner packet
+
+ csum = *inner_L4_checksum; //Initial checksum of the inner L4 header
+ //FIXME: Shouldn't we remove ip addresses from there ?
+
+ inner_ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (inner_ip4->tos << 20));
+ inner_ip6->payload_length = u16_net_add(inner_ip4->length, - sizeof(*inner_ip4));
+ inner_ip6->hop_limit = inner_ip4->ttl;
+ inner_ip6->protocol = inner_ip4->protocol;
+
+ //Note that the source address is within the domain
+ //while the destination address is the one outside the domain
+ ip4_map_t_embedded_address(d, &inner_ip6->dst_address, &inner_ip4->dst_address);
+ inner_ip6->src_address.as_u64[0] = map_get_pfx_net(d, inner_ip4->src_address.as_u32, recv_port);
+ inner_ip6->src_address.as_u64[1] = map_get_sfx_net(d, inner_ip4->src_address.as_u32, recv_port);
+
+ if (PREDICT_FALSE(inner_frag != NULL)) {
+ inner_frag->next_hdr = inner_ip6->protocol;
+ inner_frag->identification = inner_frag_id;
+ inner_frag->rsv = 0;
+ inner_frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(inner_frag_offset, inner_frag_more);
+ inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ inner_ip6->payload_length = clib_host_to_net_u16(
+ clib_net_to_host_u16(inner_ip6->payload_length) + sizeof(*inner_frag));
+ }
+
+ csum = ip_csum_add_even(csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_add_even(csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_add_even(csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_add_even(csum, inner_ip6->dst_address.as_u64[1]);
+ *inner_L4_checksum = ip_csum_fold(csum);
+
+ } else {
+ vlib_buffer_advance(p, sizeof(*ip4) - sizeof(*ip6));
+ ip6 = vlib_buffer_get_current(p);
+ ip6->payload_length = clib_host_to_net_u16(clib_net_to_host_u16(ip4->length) - sizeof(*ip4));
+ }
+
+ //Translate outer IPv6
+ ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip4->tos << 20));
+
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = IP_PROTOCOL_ICMP6;
+
+ ip4_map_t_embedded_address(d, &ip6->src_address, &ip4->src_address);
+ ip6->dst_address.as_u64[0] = map_get_pfx_net(d, ip4->dst_address.as_u32, recv_port);
+ ip6->dst_address.as_u64[1] = map_get_sfx_net(d, ip4->dst_address.as_u32, recv_port);
+
+ //Truncate when the packet exceeds the minimal IPv6 MTU
+ if (p->current_length > 1280) {
+ ip6->payload_length = clib_host_to_net_u16(1280 - sizeof(*ip6));
+ p->current_length = 1280; //Looks too simple to be correct...
+ }
+
+ //TODO: We could do an easy diff-checksum for echo requests/replies
+ //Recompute ICMP checksum
+ icmp->checksum = 0;
+ csum = ip_csum_with_carry(0, ip6->payload_length);
+ csum = ip_csum_with_carry(csum, clib_host_to_net_u16(ip6->protocol));
+ csum = ip_csum_with_carry(csum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_with_carry(csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_with_carry(csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_with_carry(csum, ip6->dst_address.as_u64[1]);
+ csum = ip_incremental_checksum(csum, icmp, clib_net_to_host_u16(ip6->payload_length));
+ icmp->checksum = ~ip_csum_fold (csum);
+}
+
+static uword
+ip4_map_t_icmp (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_t_icmp_node.index);
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 cpu_index = os_get_cpu_number();
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_mapt_icmp_next_t next0;
+ u8 error0;
+ map_domain_t *d0;
+ u16 len0;
+
+ next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP;
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ vlib_buffer_advance(p0, sizeof(ip4_mapt_pseudo_header_t)); //The pseudo-header is not used
+ len0 = clib_net_to_host_u16(((ip4_header_t *)vlib_buffer_get_current(p0))->length);
+ d0 = pool_elt_at_index(map_main.domains, vnet_buffer(p0)->map_t.map_domain_index);
+ _ip4_map_t_icmp(d0, p0, &error0);
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
+ }
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) {
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index,
+ vnet_buffer(p0)->map_t.map_domain_index, 1,
+ len0);
+ } else {
+ next0 = IP4_MAPT_ICMP_NEXT_DROP;
+ }
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip4_map_t_fragmented (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ ip4_mapt_pseudo_header_t *pheader0;
+ ip4_mapt_fragmented_next_t next0;
+
+ next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP;
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current(p0);
+ vlib_buffer_advance(p0, sizeof(*pheader0));
+
+ //Accessing ip4 header
+ ip40 = vlib_buffer_get_current(p0);
+ frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0));
+ ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0) - sizeof(*ip60));
+ vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0));
+
+ //We know that the protocol was one of ICMP, TCP or UDP
+ //because the first fragment was found and cached
+ frag0->next_hdr = (ip40->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip40->protocol;
+ frag0->identification = frag_id_4to6(ip40->fragment_id);
+ frag0->rsv = 0;
+ frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(
+ ip4_get_fragment_offset(ip40),
+ clib_net_to_host_u16(ip40->flags_and_fragment_offset) & IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20));
+ ip60->payload_length = clib_host_to_net_u16(clib_net_to_host_u16(ip40->length) - sizeof(*ip40) + sizeof(*frag0));
+ ip60->hop_limit = ip40->ttl;
+ ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0];
+ ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1];
+ ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0];
+ ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1];
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip4_map_t_tcp_udp(vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP4_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2) {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip40, *ip41;
+ ip6_header_t *ip60, *ip61;
+ ip_csum_t csum0, csum1;
+ u16 *checksum0, *checksum1;
+ ip6_frag_hdr_t *frag0, *frag1;
+ u32 frag_id0, frag_id1;
+ ip4_mapt_pseudo_header_t *pheader0, *pheader1;
+ ip4_mapt_tcp_udp_next_t next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next +=2;
+ n_left_to_next -= 2;
+
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ p0 = vlib_get_buffer(vm, pi0);
+ p1 = vlib_get_buffer(vm, pi1);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current(p0);
+ pheader1 = vlib_buffer_get_current(p1);
+ vlib_buffer_advance(p0, sizeof(*pheader0));
+ vlib_buffer_advance(p1, sizeof(*pheader1));
+
+ //Accessing ip4 header
+ ip40 = vlib_buffer_get_current(p0);
+ ip41 = vlib_buffer_get_current(p1);
+ checksum0 = (u16 *) u8_ptr_add(ip40, vnet_buffer(p0)->map_t.checksum_offset);
+ checksum1 = (u16 *) u8_ptr_add(ip41, vnet_buffer(p1)->map_t.checksum_offset);
+
+ //UDP checksum is optional over IPv4 but mandatory for IPv6
+ //We do not check udp->length sanity but use our safe computed value instead
+ if (PREDICT_FALSE(!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP)) {
+ u16 udp_len = clib_host_to_net_u16(ip40->length) - sizeof(*ip40);
+ udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip40, sizeof(*ip40));
+ ip_csum_t csum;
+ csum = ip_incremental_checksum(0, udp, udp_len);
+ csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len));
+ csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP));
+ csum = ip_csum_with_carry(csum, *((u64 *)(&ip40->src_address)));
+ *checksum0 = ~ip_csum_fold(csum);
+ }
+ if (PREDICT_FALSE(!*checksum1 && ip41->protocol == IP_PROTOCOL_UDP)) {
+ u16 udp_len = clib_host_to_net_u16(ip41->length) - sizeof(*ip40);
+ udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip41, sizeof(*ip40));
+ ip_csum_t csum;
+ csum = ip_incremental_checksum(0, udp, udp_len);
+ csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len));
+ csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP));
+ csum = ip_csum_with_carry(csum, *((u64 *)(&ip41->src_address)));
+ *checksum1 = ~ip_csum_fold(csum);
+ }
+
+ csum0 = ip_csum_sub_even(*checksum0, ip40->src_address.as_u32);
+ csum1 = ip_csum_sub_even(*checksum1, ip41->src_address.as_u32);
+ csum0 = ip_csum_sub_even(csum0, ip40->dst_address.as_u32);
+ csum1 = ip_csum_sub_even(csum1, ip41->dst_address.as_u32);
+
+ // Deal with fragmented packets
+ if (PREDICT_FALSE(ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) {
+ ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0));
+ frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0));
+ frag_id0 = frag_id_4to6(ip40->fragment_id);
+ vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0));
+ } else {
+ ip60 = (ip6_header_t *) (((u8 *)ip40) + sizeof(*ip40) - sizeof(*ip60));
+ vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60));
+ frag0 = NULL;
+ }
+
+ if (PREDICT_FALSE(ip41->flags_and_fragment_offset &
+ clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) {
+ ip61 = (ip6_header_t *) u8_ptr_add(ip41, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0));
+ frag1 = (ip6_frag_hdr_t *) u8_ptr_add(ip41, sizeof(*ip40) - sizeof(*frag0));
+ frag_id1 = frag_id_4to6(ip41->fragment_id);
+ vlib_buffer_advance(p1, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0));
+ } else {
+ ip61 = (ip6_header_t *) (((u8 *)ip41) + sizeof(*ip40) - sizeof(*ip60));
+ vlib_buffer_advance(p1, sizeof(*ip40) - sizeof(*ip60));
+ frag1 = NULL;
+ }
+
+ ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20));
+ ip61->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip41->tos << 20));
+ ip60->payload_length = u16_net_add(ip40->length, - sizeof(*ip40));
+ ip61->payload_length = u16_net_add(ip41->length, - sizeof(*ip40));
+ ip60->hop_limit = ip40->ttl;
+ ip61->hop_limit = ip41->ttl;
+ ip60->protocol = ip40->protocol;
+ ip61->protocol = ip41->protocol;
+
+ if (PREDICT_FALSE(frag0 != NULL)) {
+ frag0->next_hdr = ip60->protocol;
+ frag0->identification = frag_id0;
+ frag0->rsv = 0;
+ frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1);
+ ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip60->payload_length = u16_net_add(ip60->payload_length, sizeof(*frag0));
+ }
+
+ if (PREDICT_FALSE(frag1 != NULL)) {
+ frag1->next_hdr = ip61->protocol;
+ frag1->identification = frag_id1;
+ frag1->rsv = 0;
+ frag1->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1);
+ ip61->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip61->payload_length = u16_net_add(ip61->payload_length, sizeof(*frag0));
+ }
+
+ //Finally copying the address
+ ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0];
+ ip61->dst_address.as_u64[0] = pheader1->daddr.as_u64[0];
+ ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1];
+ ip61->dst_address.as_u64[1] = pheader1->daddr.as_u64[1];
+ ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0];
+ ip61->src_address.as_u64[0] = pheader1->saddr.as_u64[0];
+ ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1];
+ ip61->src_address.as_u64[1] = pheader1->saddr.as_u64[1];
+
+ csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[0]);
+ csum1 = ip_csum_add_even(csum1, ip61->src_address.as_u64[0]);
+ csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[1]);
+ csum1 = ip_csum_add_even(csum1, ip61->src_address.as_u64[1]);
+ csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[0]);
+ csum1 = ip_csum_add_even(csum1, ip61->dst_address.as_u64[0]);
+ csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[1]);
+ csum1 = ip_csum_add_even(csum1, ip61->dst_address.as_u64[1]);
+ *checksum0 = ip_csum_fold(csum0);
+ *checksum1 = ip_csum_fold(csum1);
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+
+ if(vnet_buffer(p1)->map_t.mtu < p1->current_length) {
+ vnet_buffer(p1)->ip_frag.header_offset = 0;
+ vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu;
+ vnet_buffer(p1)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index,
+ to_next, n_left_to_next, pi0, pi1,
+ next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ ip_csum_t csum0;
+ u16 *checksum0;
+ ip6_frag_hdr_t *frag0;
+ u32 frag_id0;
+ ip4_mapt_pseudo_header_t *pheader0;
+ ip4_mapt_tcp_udp_next_t next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP;
+ p0 = vlib_get_buffer(vm, pi0);
+
+ //Accessing pseudo header
+ pheader0 = vlib_buffer_get_current(p0);
+ vlib_buffer_advance(p0, sizeof(*pheader0));
+
+ //Accessing ip4 header
+ ip40 = vlib_buffer_get_current(p0);
+ checksum0 = (u16 *) u8_ptr_add(ip40, vnet_buffer(p0)->map_t.checksum_offset);
+
+ //UDP checksum is optional over IPv4 but mandatory for IPv6
+ //We do not check udp->length sanity but use our safe computed value instead
+ if (PREDICT_FALSE(!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP)) {
+ u16 udp_len = clib_host_to_net_u16(ip40->length) - sizeof(*ip40);
+ udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip40, sizeof(*ip40));
+ ip_csum_t csum;
+ csum = ip_incremental_checksum(0, udp, udp_len);
+ csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len));
+ csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP));
+ csum = ip_csum_with_carry(csum, *((u64 *)(&ip40->src_address)));
+ *checksum0 = ~ip_csum_fold(csum);
+ }
+
+ csum0 = ip_csum_sub_even(*checksum0, ip40->src_address.as_u32);
+ csum0 = ip_csum_sub_even(csum0, ip40->dst_address.as_u32);
+
+ // Deal with fragmented packets
+ if (PREDICT_FALSE(ip40->flags_and_fragment_offset &
+ clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) {
+ ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0));
+ frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0));
+ frag_id0 = frag_id_4to6(ip40->fragment_id);
+ vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0));
+ } else {
+ ip60 = (ip6_header_t *) (((u8 *)ip40) + sizeof(*ip40) - sizeof(*ip60));
+ vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60));
+ frag0 = NULL;
+ }
+
+ ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20));
+ ip60->payload_length = u16_net_add(ip40->length, - sizeof(*ip40));
+ ip60->hop_limit = ip40->ttl;
+ ip60->protocol = ip40->protocol;
+
+ if (PREDICT_FALSE(frag0 != NULL)) {
+ frag0->next_hdr = ip60->protocol;
+ frag0->identification = frag_id0;
+ frag0->rsv = 0;
+ frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1);
+ ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip60->payload_length = u16_net_add(ip60->payload_length, sizeof(*frag0));
+ }
+
+ //Finally copying the address
+ ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0];
+ ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1];
+ ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0];
+ ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1];
+
+ csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[0]);
+ csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[1]);
+ csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[0]);
+ csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[1]);
+ *checksum0 = ip_csum_fold(csum0);
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ //Send to fragmentation node if necessary
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
+ next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static_always_inline void
+ip4_map_t_classify(vlib_buffer_t *p0, map_domain_t *d0, ip4_header_t *ip40, u16 ip4_len0,
+ i32 *dst_port0, u8 *error0, ip4_mapt_next_t *next0)
+{
+ if (PREDICT_FALSE(ip4_get_fragment_offset(ip40))) {
+ *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED;
+ if(d0->ea_bits_len == 0 && d0->rules) {
+ *dst_port0 = 0;
+ } else {
+ *dst_port0 = ip4_map_fragment_get_port(ip40);
+ *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0;
+ }
+ } else if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_TCP)) {
+ vnet_buffer(p0)->map_t.checksum_offset = 36;
+ *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
+ *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0;
+ *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 2));
+ } else if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_UDP)) {
+ vnet_buffer(p0)->map_t.checksum_offset = 26;
+ *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP;
+ *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0;
+ *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 2));
+ } else if (ip40->protocol == IP_PROTOCOL_ICMP) {
+ *next0 = IP4_MAPT_NEXT_MAPT_ICMP;
+ if(d0->ea_bits_len == 0 && d0->rules)
+ *dst_port0 = 0;
+ else if (((icmp46_header_t *) u8_ptr_add(ip40, sizeof(*ip40)))->code == ICMP4_echo_reply ||
+ ((icmp46_header_t *) u8_ptr_add(ip40, sizeof(*ip40)))->code == ICMP4_echo_request)
+ *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 6));
+ } else {
+ *error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+}
+
+static uword
+ip4_map_t (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_t_node.index);
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 cpu_index = os_get_cpu_number();
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP4_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >= 2) {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip40, *ip41;
+ map_domain_t *d0, *d1;
+ ip4_mapt_next_t next0, next1;
+ u16 ip4_len0, ip4_len1;
+ u8 error0, error1;
+ i32 dst_port0, dst_port1;
+ ip4_mapt_pseudo_header_t *pheader0, *pheader1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next +=2;
+ n_left_to_next -= 2;
+ error0 = MAP_ERROR_NONE;
+ error1 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ p1 = vlib_get_buffer(vm, pi1);
+ ip40 = vlib_buffer_get_current(p0);
+ ip41 = vlib_buffer_get_current(p1);
+ ip4_len0 = clib_host_to_net_u16(ip40->length);
+ ip4_len1 = clib_host_to_net_u16(ip41->length);
+
+ if (PREDICT_FALSE(p0->current_length < ip4_len0 ||
+ ip40->ip_version_and_header_length != 0x45)) {
+ error0 = MAP_ERROR_UNKNOWN;
+ next0 = IP4_MAPT_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(p1->current_length < ip4_len1 ||
+ ip41->ip_version_and_header_length != 0x45)) {
+ error1 = MAP_ERROR_UNKNOWN;
+ next1 = IP4_MAPT_NEXT_DROP;
+ }
+
+ d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX],
+ &vnet_buffer(p0)->map_t.map_domain_index);
+ d1 = ip4_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX],
+ &vnet_buffer(p1)->map_t.map_domain_index);
+
+ vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+ vnet_buffer(p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0;
+
+ dst_port0 = -1;
+ dst_port1 = -1;
+
+ ip4_map_t_classify(p0, d0, ip40, ip4_len0, &dst_port0, &error0, &next0);
+ ip4_map_t_classify(p1, d1, ip41, ip4_len1, &dst_port1, &error1, &next1);
+
+ //Add MAP-T pseudo header in front of the packet
+ vlib_buffer_advance(p0, - sizeof(*pheader0));
+ vlib_buffer_advance(p1, - sizeof(*pheader1));
+ pheader0 = vlib_buffer_get_current(p0);
+ pheader1 = vlib_buffer_get_current(p1);
+
+ //Save addresses within the packet
+ ip4_map_t_embedded_address(d0, &pheader0->saddr, &ip40->src_address);
+ ip4_map_t_embedded_address(d1, &pheader1->saddr, &ip41->src_address);
+ pheader0->daddr.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0);
+ pheader0->daddr.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0);
+ pheader1->daddr.as_u64[0] = map_get_pfx_net(d1, ip41->dst_address.as_u32, (u16)dst_port1);
+ pheader1->daddr.as_u64[1] = map_get_sfx_net(d1, ip41->dst_address.as_u32, (u16)dst_port1);
+
+ if (PREDICT_FALSE(ip4_is_first_fragment(ip40) && (dst_port0 != -1) &&
+ (d0->ea_bits_len != 0 || !d0->rules) &&
+ ip4_map_fragment_cache(ip40, dst_port0))) {
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+
+ if (PREDICT_FALSE(ip4_is_first_fragment(ip41) && (dst_port1 != -1) &&
+ (d1->ea_bits_len != 0 || !d1->rules) &&
+ ip4_map_fragment_cache(ip41, dst_port1))) {
+ error1 = MAP_ERROR_FRAGMENT_MEMORY;
+ }
+
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) {
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index,
+ vnet_buffer(p0)->map_t.map_domain_index, 1,
+ clib_net_to_host_u16(ip40->length));
+ }
+
+ if (PREDICT_TRUE(error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP)) {
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index,
+ vnet_buffer(p1)->map_t.map_domain_index, 1,
+ clib_net_to_host_u16(ip41->length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
+ next1 = (error1 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next1;
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip40;
+ map_domain_t *d0;
+ ip4_mapt_next_t next0;
+ u16 ip4_len0;
+ u8 error0;
+ i32 dst_port0;
+ ip4_mapt_pseudo_header_t *pheader0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip40 = vlib_buffer_get_current(p0);
+ ip4_len0 = clib_host_to_net_u16(ip40->length);
+ if (PREDICT_FALSE(p0->current_length < ip4_len0 ||
+ ip40->ip_version_and_header_length != 0x45)) {
+ error0 = MAP_ERROR_UNKNOWN;
+ next0 = IP4_MAPT_NEXT_DROP;
+ }
+
+ d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX],
+ &vnet_buffer(p0)->map_t.map_domain_index);
+
+ vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+
+ dst_port0 = -1;
+ ip4_map_t_classify(p0, d0, ip40, ip4_len0, &dst_port0, &error0, &next0);
+
+ //Add MAP-T pseudo header in front of the packet
+ vlib_buffer_advance(p0, - sizeof(*pheader0));
+ pheader0 = vlib_buffer_get_current(p0);
+
+ //Save addresses within the packet
+ ip4_map_t_embedded_address(d0, &pheader0->saddr, &ip40->src_address);
+ pheader0->daddr.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0);
+ pheader0->daddr.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0);
+
+ //It is important to cache at this stage because the result might be necessary
+ //for packets within the same vector.
+ //Actually, this approach even provides some limited out-of-order fragments support
+ if (PREDICT_FALSE(ip4_is_first_fragment(ip40) && (dst_port0 != -1) &&
+ (d0->ea_bits_len != 0 || !d0->rules) &&
+ ip4_map_fragment_cache(ip40, dst_port0))) {
+ error0 = MAP_ERROR_UNKNOWN;
+ }
+
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) {
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index,
+ vnet_buffer(p0)->map_t.map_domain_index, 1,
+ clib_net_to_host_u16(ip40->length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static char *map_t_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = {
+ .function = ip4_map_t_fragmented,
+ .name = "ip4-map-t-fragmented",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
+ .function = ip4_map_t_icmp,
+ .name = "ip4-map-t-icmp",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_ICMP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
+ .function = ip4_map_t_tcp_udp,
+ .name = "ip4-map-t-tcp-udp",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME,
+ [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip4_map_t_node) = {
+ .function = ip4_map_t,
+ .name = "ip4-map-t",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP4_MAPT_N_NEXT,
+ .next_nodes = {
+ [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp",
+ [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp",
+ [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented",
+ [IP4_MAPT_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/vnet/vnet/map/ip4_sixrd.c b/vnet/vnet/map/ip4_sixrd.c
new file mode 100644
index 00000000000..1e83ce831e0
--- /dev/null
+++ b/vnet/vnet/map/ip4_sixrd.c
@@ -0,0 +1,127 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+#include "sixrd.h"
+
+vlib_node_registration_t ip4_sixrd_node;
+
+typedef enum {
+ IP4_SIXRD_NEXT_IP6_LOOKUP,
+ IP4_SIXRD_NEXT_DROP,
+ IP4_SIXRD_N_NEXT,
+} ip4_sixrd_next_t;
+
+/*
+ * ip4_sixrd_sec_check
+ */
+static_always_inline void
+ip4_sixrd_sec_check (sixrd_domain_t *d, ip4_address_t sa4, ip6_address_t sa6, u8 *error)
+{
+ u32 a = sixrd_get_addr(d, sa6.as_u64[0]);
+ clib_warning("Security check: %U %U", format_ip4_address, &a, format_ip4_address, &sa4);
+ if (PREDICT_FALSE(sixrd_get_addr(d, sa6.as_u64[0]) != sa4.as_u32))
+ *error = SIXRD_ERROR_SEC_CHECK;
+}
+
+/*
+ * ip4_sixrd
+ */
+static uword
+ip4_sixrd (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_sixrd_node.index);
+ u32 decap = 0;
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = SIXRD_ERROR_NONE;
+ sixrd_domain_t *d0 = 0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ u32 sixrd_domain_index0 = ~0;
+ u32 next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip40 = vlib_buffer_get_current(p0);
+
+ /* Throw away anything that isn't IP in IP. */
+ if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_IPV6 && clib_net_to_host_u16(ip40->length) >= 60)) {
+ vlib_buffer_advance(p0, sizeof(ip4_header_t));
+ ip60 = vlib_buffer_get_current(p0);
+ d0 = ip4_sixrd_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip6_address_t *)&ip60->src_address,
+ &sixrd_domain_index0, &error0);
+ } else {
+ error0 = SIXRD_ERROR_BAD_PROTOCOL;
+ }
+ if (d0) {
+ /* SIXRD inbound security check */
+ ip4_sixrd_sec_check(d0, ip40->src_address, ip60->src_address, &error0);
+ }
+
+ next0 = error0 == SIXRD_ERROR_NONE ? IP4_SIXRD_NEXT_IP6_LOOKUP : IP4_SIXRD_NEXT_DROP;
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ sixrd_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->sixrd_domain_index = sixrd_domain_index0;
+ }
+
+ p0->error = error_node->errors[error0];
+ if (PREDICT_TRUE(error0 == SIXRD_ERROR_NONE)) decap++;
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter(vm, ip4_sixrd_node.index, SIXRD_ERROR_DECAPSULATED, decap);
+
+ return frame->n_vectors;
+}
+
+static char *sixrd_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sixrd_error
+#undef _
+};
+
+VLIB_REGISTER_NODE(ip4_sixrd_node) = {
+ .function = ip4_sixrd,
+ .name = "ip4-sixrd",
+ .vector_size = sizeof(u32),
+ .format_trace = format_sixrd_trace,
+ .n_errors = SIXRD_N_ERROR,
+ .error_strings = sixrd_error_strings,
+ .n_next_nodes = IP4_SIXRD_N_NEXT,
+ .next_nodes = {
+ [IP4_SIXRD_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_SIXRD_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/vnet/vnet/map/ip6_map.c b/vnet/vnet/map/ip6_map.c
new file mode 100644
index 00000000000..e803af9007a
--- /dev/null
+++ b/vnet/vnet/map/ip6_map.c
@@ -0,0 +1,966 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+
+enum ip6_map_next_e {
+ IP6_MAP_NEXT_IP4_LOOKUP,
+#ifdef MAP_SKIP_IP6_LOOKUP
+ IP6_MAP_NEXT_IP4_REWRITE,
+#endif
+ IP6_MAP_NEXT_IP6_REASS,
+ IP6_MAP_NEXT_IP4_REASS,
+ IP6_MAP_NEXT_IP4_FRAGMENT,
+ IP6_MAP_NEXT_IP6_ICMP_RELAY,
+ IP6_MAP_NEXT_IP6_LOCAL,
+ IP6_MAP_NEXT_DROP,
+ IP6_MAP_N_NEXT,
+};
+
+enum ip6_map_ip6_reass_next_e {
+ IP6_MAP_IP6_REASS_NEXT_IP6_MAP,
+ IP6_MAP_IP6_REASS_NEXT_DROP,
+ IP6_MAP_IP6_REASS_N_NEXT,
+};
+
+enum ip6_map_ip4_reass_next_e {
+ IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP,
+ IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT,
+ IP6_MAP_IP4_REASS_NEXT_DROP,
+ IP6_MAP_IP4_REASS_N_NEXT,
+};
+
+enum ip6_icmp_relay_next_e {
+ IP6_ICMP_RELAY_NEXT_IP4_LOOKUP,
+ IP6_ICMP_RELAY_NEXT_DROP,
+ IP6_ICMP_RELAY_N_NEXT,
+};
+
+vlib_node_registration_t ip6_map_ip4_reass_node;
+vlib_node_registration_t ip6_map_ip6_reass_node;
+static vlib_node_registration_t ip6_map_icmp_relay_node;
+
+typedef struct {
+ u32 map_domain_index;
+ u16 port;
+ u8 cached;
+} map_ip6_map_ip4_reass_trace_t;
+
+u8 *
+format_ip6_map_ip4_reass_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
+ map_ip6_map_ip4_reass_trace_t *t = va_arg (*args, map_ip6_map_ip4_reass_trace_t *);
+ return format(s, "MAP domain index: %d L4 port: %u Status: %s", t->map_domain_index,
+ t->port, t->cached?"cached":"forwarded");
+}
+
+typedef struct {
+ u16 offset;
+ u16 frag_len;
+ u8 out;
+} map_ip6_map_ip6_reass_trace_t;
+
+u8 *
+format_ip6_map_ip6_reass_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
+ map_ip6_map_ip6_reass_trace_t *t = va_arg (*args, map_ip6_map_ip6_reass_trace_t *);
+ return format(s, "Offset: %d Fragment length: %d Status: %s", t->offset, t->frag_len, t->out?"out":"in");
+}
+
+/*
+ * ip6_map_sec_check
+ */
+static_always_inline bool
+ip6_map_sec_check (map_domain_t *d, u16 port, ip4_header_t *ip4, ip6_header_t *ip6)
+{
+ u16 sp4 = clib_net_to_host_u16(port);
+ u32 sa4 = clib_net_to_host_u32(ip4->src_address.as_u32);
+ u64 sal6 = map_get_pfx(d, sa4, sp4);
+ u64 sar6 = map_get_sfx(d, sa4, sp4);
+
+ if (PREDICT_FALSE(sal6 != clib_net_to_host_u64(ip6->src_address.as_u64[0]) ||
+ sar6 != clib_net_to_host_u64(ip6->src_address.as_u64[1])))
+ return (false);
+ return (true);
+}
+
+static_always_inline void
+ip6_map_security_check (map_domain_t *d, ip4_header_t *ip4, ip6_header_t *ip6, u32 *next, u8 *error)
+{
+ map_main_t *mm = &map_main;
+ if (d->ea_bits_len || d->rules) {
+ if (d->psid_length > 0) {
+ if (!ip4_is_fragment(ip4)) {
+ u16 port = ip4_map_get_port(ip4, MAP_SENDER);
+ if (port) {
+ if (mm->sec_check)
+ *error = ip6_map_sec_check(d, port, ip4, ip6) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK;
+ } else {
+ *error = MAP_ERROR_BAD_PROTOCOL;
+ }
+ } else {
+ *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next;
+ }
+ }
+ }
+}
+
+static_always_inline bool
+ip6_map_ip4_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip)
+{
+#ifdef MAP_SKIP_IP6_LOOKUP
+ map_main_t *mm = &map_main;
+ u32 adj_index0 = mm->adj4_index;
+ if (adj_index0 > 0) {
+ ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ ip_adjacency_t *adj = ip_get_adjacency(lm4, mm->adj4_index);
+ if (adj->n_adj > 1) {
+ u32 hash_c0 = ip4_compute_flow_hash(ip, IP_FLOW_HASH_DEFAULT);
+ adj_index0 += (hash_c0 & (adj->n_adj - 1));
+ }
+ vnet_buffer(p0)->ip.adj_index[VLIB_TX] = adj_index0;
+ return (true);
+ }
+#endif
+ return (false);
+}
+
+
+/*
+ * ip6_map
+ */
+static uword
+ip6_map (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_node.index);
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* Dual loop */
+ while (n_left_from > 4 && n_left_to_next > 2) {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ u8 error0 = MAP_ERROR_NONE;
+ u8 error1 = MAP_ERROR_NONE;
+ map_domain_t *d0 = 0, *d1 = 0;
+ ip4_header_t *ip40, *ip41;
+ ip6_header_t *ip60, *ip61;
+ u16 port0 = 0, port1 = 0;
+ u32 map_domain_index0 = ~0, map_domain_index1 = ~0;
+ u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
+ u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer(vm, from[2]);
+ p3 = vlib_get_buffer(vm, from[3]);
+
+ vlib_prefetch_buffer_header(p2, LOAD);
+ vlib_prefetch_buffer_header(p3, LOAD);
+
+ /* IPv6 + IPv4 header + 8 bytes of ULP */
+ CLIB_PREFETCH(p2->data, 68, LOAD);
+ CLIB_PREFETCH(p3->data, 68, LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next +=2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ p1 = vlib_get_buffer(vm, pi1);
+ ip60 = vlib_buffer_get_current(p0);
+ ip61 = vlib_buffer_get_current(p1);
+ vlib_buffer_advance(p0, sizeof(ip6_header_t));
+ vlib_buffer_advance(p1, sizeof(ip6_header_t));
+ ip40 = vlib_buffer_get_current(p0);
+ ip41 = vlib_buffer_get_current(p1);
+
+ /*
+ * Encapsulated IPv4 packet
+ * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
+ * - Lookup/Rewrite or Fragment node in case of packet > MTU
+ * Fragmented IPv6 packet
+ * ICMP IPv6 packet
+ * - Error -> Pass to ICMPv6/ICMPv4 relay
+ * - Info -> Pass to IPv6 local
+ * Anything else -> drop
+ */
+ if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) {
+ d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
+ &map_domain_index0, &error0);
+ } else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) {
+ icmp46_header_t *icmp = (void *)(ip60 + 1);
+ next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
+ IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) {
+ next0 = IP6_MAP_NEXT_IP6_REASS;
+ } else {
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ next0 = IP6_MAP_NEXT_DROP;
+ }
+ if (PREDICT_TRUE(ip61->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip61->payload_length) > 20)) {
+ d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip41->src_address.as_u32,
+ &map_domain_index1, &error1);
+ } else if (ip61->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16(ip61->payload_length) > sizeof(icmp46_header_t)) {
+ icmp46_header_t *icmp = (void *)(ip61 + 1);
+ next1 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
+ IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ } else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) {
+ next1 = IP6_MAP_NEXT_IP6_REASS;
+ } else {
+ error1 = MAP_ERROR_BAD_PROTOCOL;
+ next1 = IP6_MAP_NEXT_DROP;
+ }
+
+ if (d0) {
+ /* MAP inbound security check */
+ ip6_map_security_check(d0, ip40, ip60, &next0, &error0);
+
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE &&
+ next0 == IP6_MAP_NEXT_IP4_LOOKUP)) {
+ if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) {
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.flags = 0;
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ } else {
+ next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0;
+ }
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
+ clib_net_to_host_u16(ip40->length));
+ }
+ }
+ if (d1) {
+ /* MAP inbound security check */
+ ip6_map_security_check(d1, ip41, ip61, &next1, &error1);
+
+ if (PREDICT_TRUE(error1 == MAP_ERROR_NONE &&
+ next1 == IP6_MAP_NEXT_IP4_LOOKUP)) {
+ if (PREDICT_FALSE(d1->mtu && (clib_host_to_net_u16(ip41->length) > d1->mtu))) {
+ vnet_buffer(p1)->ip_frag.header_offset = 0;
+ vnet_buffer(p1)->ip_frag.flags = 0;
+ vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer(p1)->ip_frag.mtu = d0->mtu;
+ next1 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ } else {
+ next1 = ip6_map_ip4_lookup_bypass(p1, ip41) ? IP6_MAP_NEXT_IP4_REWRITE : next1;
+ }
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index1, 1,
+ clib_net_to_host_u16(ip41->length));
+ }
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ }
+
+ if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_trace_t *tr = vlib_add_trace(vm, node, p1, sizeof(*tr));
+ tr->map_domain_index = map_domain_index1;
+ tr->port = port1;
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1);
+ }
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ map_domain_t *d0 = 0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ i32 port0 = 0;
+ u32 map_domain_index0 = ~0;
+ u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ vlib_buffer_advance(p0, sizeof(ip6_header_t));
+ ip40 = vlib_buffer_get_current(p0);
+
+ /*
+ * Encapsulated IPv4 packet
+ * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled
+ * - Lookup/Rewrite or Fragment node in case of packet > MTU
+ * Fragmented IPv6 packet
+ * ICMP IPv6 packet
+ * - Error -> Pass to ICMPv6/ICMPv4 relay
+ * - Info -> Pass to IPv6 local
+ * Anything else -> drop
+ */
+ if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) {
+ d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
+ &map_domain_index0, &error0);
+ } else if (ip60->protocol == IP_PROTOCOL_ICMP6 &&
+ clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) {
+ icmp46_header_t *icmp = (void *)(ip60 + 1);
+ next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ?
+ IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY;
+ } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION &&
+ (((ip6_frag_hdr_t *)(ip60+1))->next_hdr == IP_PROTOCOL_IP_IN_IP)) {
+ next0 = IP6_MAP_NEXT_IP6_REASS;
+ } else {
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+
+ if (d0) {
+ /* MAP inbound security check */
+ ip6_map_security_check(d0, ip40, ip60, &next0, &error0);
+
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE &&
+ next0 == IP6_MAP_NEXT_IP4_LOOKUP)) {
+ if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) {
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.flags = 0;
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_NEXT_IP4_FRAGMENT;
+ } else {
+ next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0;
+ }
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
+ clib_net_to_host_u16(ip40->length));
+ }
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = (u16)port0;
+ }
+
+ next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+
+static_always_inline void
+ip6_map_ip6_reass_prepare(vlib_main_t *vm, vlib_node_runtime_t *node, map_ip6_reass_t *r,
+ u32 **fragments_ready, u32 **fragments_to_drop)
+{
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ vlib_buffer_t *p0;
+
+ if(!r->ip4_header.ip_version_and_header_length)
+ return;
+
+ //The IP header is here, we need to check for packets
+ //that can be forwarded
+ int i;
+ for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) {
+ if (r->fragments[i].pi == ~0 ||
+ ((!r->fragments[i].next_data_len) && (r->fragments[i].next_data_offset != (0xffff))))
+ continue;
+
+ p0 = vlib_get_buffer(vm, r->fragments[i].pi);
+ ip60 = vlib_buffer_get_current(p0);
+ frag0 = (ip6_frag_hdr_t *)(ip60 + 1);
+ ip40 = (ip4_header_t *)(frag0 + 1);
+
+ if (ip6_frag_hdr_offset(frag0)) {
+ //Not first fragment, add the IPv4 header
+ memcpy(ip40, &r->ip4_header, 20);
+ }
+
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ r->forwarded += clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0);
+#endif
+
+ if (ip6_frag_hdr_more(frag0)) {
+ //Not last fragment, we copy end of next
+ memcpy(u8_ptr_add(ip60, p0->current_length), r->fragments[i].next_data, 20);
+ p0->current_length += 20;
+ ip60->payload_length = u16_net_add(ip60->payload_length, 20);
+ }
+
+ if (!ip4_is_fragment(ip40)) {
+ ip40->fragment_id = frag_id_6to4(frag0->identification);
+ ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip6_frag_hdr_offset(frag0));
+ } else {
+ ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip4_get_fragment_offset(ip40) + ip6_frag_hdr_offset(frag0));
+ }
+
+ if (ip6_frag_hdr_more(frag0))
+ ip40->flags_and_fragment_offset |= clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ ip40->length = clib_host_to_net_u16(p0->current_length - sizeof(*ip60) - sizeof(*frag0));
+ ip40->checksum = ip4_header_checksum(ip40);
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->offset = ip4_get_fragment_offset(ip40);
+ tr->frag_len = clib_net_to_host_u16(ip40->length) - sizeof(*ip40);
+ tr->out = 1;
+ }
+
+ vec_add1(*fragments_ready, r->fragments[i].pi);
+ r->fragments[i].pi = ~0;
+ r->fragments[i].next_data_len = 0;
+ r->fragments[i].next_data_offset = 0;
+ map_main.ip6_reass_buffered_counter--;
+
+ //TODO: Best solution would be that ip6_map handles extension headers
+ // and ignores atomic fragment. But in the meantime, let's just copy the header.
+
+ u8 protocol = frag0->next_hdr;
+ memmove(u8_ptr_add(ip40, - sizeof(*ip60)), ip60, sizeof(*ip60));
+ ((ip6_header_t *)u8_ptr_add(ip40, - sizeof(*ip60)))->protocol = protocol;
+ vlib_buffer_advance(p0, sizeof(*frag0));
+ }
+}
+
+void
+map_ip6_drop_pi(u32 pi)
+{
+ vlib_main_t *vm = vlib_get_main();
+ vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index);
+ vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi);
+}
+
+void
+map_ip4_drop_pi(u32 pi)
+{
+ vlib_main_t *vm = vlib_get_main();
+ vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index);
+ vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi);
+}
+
+/*
+ * ip6_reass
+ * TODO: We should count the number of successfully
+ * transmitted fragment bytes and compare that to the last fragment
+ * offset such that we can free the reassembly structure when all fragments
+ * have been forwarded.
+ */
+static uword
+ip6_map_ip6_reass (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index);
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_ready = NULL;
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ u16 offset;
+ u16 next_offset;
+ u16 frag_len;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ frag0 = (ip6_frag_hdr_t *)(ip60 + 1);
+ offset = clib_host_to_net_u16(frag0->fragment_offset_and_more) & (~7);
+ frag_len = clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0);
+ next_offset = ip6_frag_hdr_more(frag0) ? (offset + frag_len) : (0xffff);
+
+ //FIXME: Support other extension headers, maybe
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->offset = offset;
+ tr->frag_len = frag_len;
+ tr->out = 0;
+ }
+
+ map_ip6_reass_lock();
+ map_ip6_reass_t *r = map_ip6_reass_get(&ip60->src_address, &ip60->dst_address,
+ frag0->identification, frag0->next_hdr, &fragments_to_drop);
+ //FIXME: Use better error codes
+ if (PREDICT_FALSE(!r)) {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ } else if (PREDICT_FALSE((frag_len <= 20 &&
+ (ip6_frag_hdr_more(frag0) || (!offset))))) {
+ //Very small fragment are restricted to the last one and
+ //can't be the first one
+ error0 = MAP_ERROR_FRAGMENT_MALFORMED;
+ } else if (map_ip6_reass_add_fragment(r, pi0, offset, next_offset, (u8 *)(frag0 + 1), frag_len)) {
+ map_ip6_reass_free(r, &fragments_to_drop);
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ } else {
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ if (!ip6_frag_hdr_more(frag0))
+ r->expected_total = offset + frag_len;
+#endif
+ ip6_map_ip6_reass_prepare(vm, node, r, &fragments_ready, &fragments_to_drop);
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ if(r->forwarded >= r->expected_total)
+ map_ip6_reass_free(r, &fragments_to_drop);
+#endif
+ }
+ map_ip6_reass_unlock();
+
+ if (error0 == MAP_ERROR_NONE) {
+ if (frag_len > 20) {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ } else {
+ //All data from that packet was copied no need to keep it, but this is not an error
+ p0->error = error_node->errors[MAP_ERROR_NONE];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP);
+ }
+ } else {
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP);
+ }
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+
+ map_send_all_to_node(vm, fragments_ready, node,
+ &error_node->errors[MAP_ERROR_NONE],
+ IP6_MAP_IP6_REASS_NEXT_IP6_MAP);
+ map_send_all_to_node(vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP6_MAP_IP6_REASS_NEXT_DROP);
+
+ vec_free(fragments_to_drop);
+ vec_free(fragments_ready);
+ return frame->n_vectors;
+}
+
+/*
+ * ip6_ip4_virt_reass
+ */
+static uword
+ip6_map_ip4_reass (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index);
+ map_main_t *mm = &map_main;
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u32 cpu_index = os_get_cpu_number();
+ u32 *fragments_to_drop = NULL;
+ u32 *fragments_to_loopback = NULL;
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ map_domain_t *d0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ i32 port0 = 0;
+ u32 map_domain_index0;
+ u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP;
+ u8 cached = 0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip40 = vlib_buffer_get_current(p0);
+ ip60 = ((ip6_header_t *)ip40) - 1;
+
+ d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32,
+ &map_domain_index0, &error0);
+
+ map_ip4_reass_lock();
+ //This node only deals with fragmented ip4
+ map_ip4_reass_t *r = map_ip4_reass_get(ip40->src_address.as_u32, ip40->dst_address.as_u32,
+ ip40->fragment_id, ip40->protocol, &fragments_to_drop);
+ if (PREDICT_FALSE(!r)) {
+ // Could not create a caching entry
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ } else if (PREDICT_TRUE(ip4_get_fragment_offset(ip40))) {
+ // This is a fragment
+ if (r->port >= 0) {
+ // We know the port already
+ port0 = r->port;
+ } else if (map_ip4_reass_add_fragment(r, pi0)) {
+ // Not enough space for caching
+ error0 = MAP_ERROR_FRAGMENT_MEMORY;
+ map_ip4_reass_free(r, &fragments_to_drop);
+ } else {
+ cached = 1;
+ }
+ } else if ((port0 = ip4_get_port(ip40, MAP_SENDER, p0->current_length)) < 0) {
+ // Could not find port from first fragment. Stop reassembling.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ port0 = 0;
+ map_ip4_reass_free(r, &fragments_to_drop);
+ } else {
+ // Found port. Remember it and loopback saved fragments
+ r->port = port0;
+ map_ip4_reass_get_fragments(r, &fragments_to_loopback);
+ }
+
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ if (!cached && r) {
+ r->forwarded += clib_host_to_net_u16(ip40->length) - 20;
+ if (!ip4_get_fragment_more(ip40))
+ r->expected_total = ip4_get_fragment_offset(ip40) * 8 + clib_host_to_net_u16(ip40->length) - 20;
+ if(r->forwarded >= r->expected_total)
+ map_ip4_reass_free(r, &fragments_to_drop);
+ }
+#endif
+
+ map_ip4_reass_unlock();
+
+ if(PREDICT_TRUE(error0 == MAP_ERROR_NONE))
+ error0 = ip6_map_sec_check(d0, port0, ip40, ip60) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK;
+
+ if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu) &&
+ error0 == MAP_ERROR_NONE && !cached)) {
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.flags = 0;
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ vnet_buffer(p0)->ip_frag.mtu = d0->mtu;
+ next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT;
+ }
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_ip6_map_ip4_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->map_domain_index = map_domain_index0;
+ tr->port = port0;
+ tr->cached = cached;
+ }
+
+ if (cached) {
+ //Dequeue the packet
+ n_left_to_next++;
+ to_next--;
+ } else {
+ if (error0 == MAP_ERROR_NONE)
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1,
+ clib_net_to_host_u16(ip40->length));
+ next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+ }
+
+ //Loopback when we reach the end of the inpu vector
+ if(n_left_from == 0 && vec_len(fragments_to_loopback)) {
+ from = vlib_frame_vector_args(frame);
+ u32 len = vec_len(fragments_to_loopback);
+ if(len <= VLIB_FRAME_SIZE) {
+ memcpy(from, fragments_to_loopback, sizeof(u32)*len);
+ n_left_from = len;
+ vec_reset_length(fragments_to_loopback);
+ } else {
+ memcpy(from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof(u32)*VLIB_FRAME_SIZE);
+ n_left_from = VLIB_FRAME_SIZE;
+ _vec_len(fragments_to_loopback) = len - VLIB_FRAME_SIZE;
+ }
+ }
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ map_send_all_to_node(vm, fragments_to_drop, node,
+ &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED],
+ IP6_MAP_IP4_REASS_NEXT_DROP);
+
+ vec_free(fragments_to_drop);
+ vec_free(fragments_to_loopback);
+ return frame->n_vectors;
+}
+
+/*
+ * ip6_icmp_relay
+ */
+static uword
+ip6_map_icmp_relay (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_icmp_relay_node.index);
+ map_main_t *mm = &map_main;
+ u32 cpu_index = os_get_cpu_number();
+ u16 *fragment_ids, *fid;
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ /* Get random fragment IDs for replies. */
+ fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer, n_left_from * sizeof (fragment_ids[0]));
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ /* Single loop */
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0 = MAP_ERROR_NONE;
+ ip6_header_t *ip60;
+ u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP;
+ u32 mtu;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ u16 tlen = clib_net_to_host_u16(ip60->payload_length);
+
+ /*
+ * In:
+ * IPv6 header (40)
+ * ICMPv6 header (8)
+ * IPv6 header (40)
+ * Original IPv4 header / packet
+ * Out:
+ * New IPv4 header
+ * New ICMP header
+ * Original IPv4 header / packet
+ */
+
+ /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */
+ if (tlen < 76) {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ icmp46_header_t *icmp60 = (icmp46_header_t *)(ip60 + 1);
+ ip6_header_t *inner_ip60 = (ip6_header_t *)(icmp60 + 2);
+
+ if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP) {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ ip4_header_t *inner_ip40 = (ip4_header_t *)(inner_ip60 + 1);
+ vlib_buffer_advance(p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */
+ ip4_header_t *new_ip40 = vlib_buffer_get_current(p0);
+ icmp46_header_t *new_icmp40 = (icmp46_header_t *)(new_ip40 + 1);
+
+ /*
+ * Relay according to RFC2473, section 8.3
+ */
+ switch (icmp60->type) {
+ case ICMP6_destination_unreachable:
+ case ICMP6_time_exceeded:
+ case ICMP6_parameter_problem:
+ /* Type 3 - destination unreachable, Code 1 - host unreachable */
+ new_icmp40->type = ICMP4_destination_unreachable;
+ new_icmp40->code = ICMP4_destination_unreachable_destination_unreachable_host;
+ break;
+
+ case ICMP6_packet_too_big:
+ /* Type 3 - destination unreachable, Code 4 - packet too big */
+ /* Potential TODO: Adjust domain tunnel MTU based on the value received here */
+ mtu = clib_net_to_host_u32(*((u32 *)(icmp60 + 1)));
+
+ /* Check DF flag */
+ if (!(inner_ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT))) {
+ error0 = MAP_ERROR_ICMP_RELAY;
+ goto error;
+ }
+
+ new_icmp40->type = ICMP4_destination_unreachable;
+ new_icmp40->code = ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set;
+ *((u32 *)(new_icmp40 + 1)) = clib_host_to_net_u32(mtu < 1280 ? 1280 : mtu);
+ break;
+
+ default:
+ error0 = MAP_ERROR_ICMP_RELAY;
+ break;
+ }
+
+ /*
+ * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812)
+ */
+ new_ip40->ip_version_and_header_length = 0x45;
+ new_ip40->tos = 0;
+ u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20;
+ new_ip40->length = clib_host_to_net_u16(nlen);
+ new_ip40->fragment_id = fid[0]; fid++;
+ new_ip40->ttl = 64;
+ new_ip40->protocol = IP_PROTOCOL_ICMP;
+ new_ip40->src_address = mm->icmp_src_address;
+ new_ip40->dst_address = inner_ip40->src_address;
+ new_ip40->checksum = ip4_header_checksum(new_ip40);
+
+ new_icmp40->checksum = 0;
+ ip_csum_t sum = ip_incremental_checksum(0, new_icmp40, nlen - 20);
+ new_icmp40->checksum = ~ip_csum_fold(sum);
+
+ vlib_increment_simple_counter(&mm->icmp_relayed, cpu_index, 0, 1);
+
+ error:
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->map_domain_index = 0;
+ tr->port = 0;
+ }
+
+ next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+
+}
+
+static char *map_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+VLIB_REGISTER_NODE(ip6_map_node) = {
+ .function = ip6_map,
+ .name = "ip6-map",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+
+ .n_next_nodes = IP6_MAP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+#ifdef MAP_SKIP_IP6_LOOKUP
+ [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite-transit",
+#endif
+ [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass",
+ [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass",
+ [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay",
+ [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local",
+ [IP6_MAP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = {
+ .function = ip6_map_ip6_reass,
+ .name = "ip6-map-ip6-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip6_map_ip6_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map",
+ [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = {
+ .function = ip6_map_ip4_reass,
+ .name = "ip6-map-ip4-reass",
+ .vector_size = sizeof(u32),
+ .format_trace = format_ip6_map_ip4_reass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT,
+ .next_nodes = {
+ [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag",
+ [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
+ .function = ip6_map_icmp_relay,
+ .name = "ip6-map-icmp-relay",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace, //FIXME
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_error_strings,
+ .n_next_nodes = IP6_ICMP_RELAY_N_NEXT,
+ .next_nodes = {
+ [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/vnet/vnet/map/ip6_map_t.c b/vnet/vnet/map/ip6_map_t.c
new file mode 100644
index 00000000000..7720e06fba4
--- /dev/null
+++ b/vnet/vnet/map/ip6_map_t.c
@@ -0,0 +1,1141 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "map.h"
+
+#include "../ip/ip_frag.h"
+
+#define IP6_MAP_T_DUAL_LOOP
+
+typedef enum {
+ IP6_MAPT_NEXT_MAPT_TCP_UDP,
+ IP6_MAPT_NEXT_MAPT_ICMP,
+ IP6_MAPT_NEXT_MAPT_FRAGMENTED,
+ IP6_MAPT_NEXT_DROP,
+ IP6_MAPT_N_NEXT
+} ip6_mapt_next_t;
+
+typedef enum {
+ IP6_MAPT_ICMP_NEXT_IP4_LOOKUP,
+ IP6_MAPT_ICMP_NEXT_IP4_FRAG,
+ IP6_MAPT_ICMP_NEXT_DROP,
+ IP6_MAPT_ICMP_N_NEXT
+} ip6_mapt_icmp_next_t;
+
+typedef enum {
+ IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP,
+ IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG,
+ IP6_MAPT_TCP_UDP_NEXT_DROP,
+ IP6_MAPT_TCP_UDP_N_NEXT
+} ip6_mapt_tcp_udp_next_t;
+
+typedef enum {
+ IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP,
+ IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG,
+ IP6_MAPT_FRAGMENTED_NEXT_DROP,
+ IP6_MAPT_FRAGMENTED_N_NEXT
+} ip6_mapt_fragmented_next_t;
+
+static_always_inline int
+ip6_map_fragment_cache (ip6_header_t *ip6, ip6_frag_hdr_t *frag, map_domain_t *d, u16 port)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock();
+ map_ip4_reass_t *r = map_ip4_reass_get(map_get_ip4(&ip6->src_address), ip6_map_t_embedded_address(d, &ip6->dst_address),
+ frag_id_6to4(frag->identification),
+ (ip6->protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol,
+ &ignore);
+ if (r)
+ r->port = port;
+
+ map_ip4_reass_unlock();
+ return !r;
+}
+
+/* Returns the associated port or -1 */
+static_always_inline i32
+ip6_map_fragment_get(ip6_header_t *ip6, ip6_frag_hdr_t *frag, map_domain_t *d)
+{
+ u32 *ignore = NULL;
+ map_ip4_reass_lock();
+ map_ip4_reass_t *r = map_ip4_reass_get(map_get_ip4(&ip6->src_address), ip6_map_t_embedded_address(d, &ip6->dst_address),
+ frag_id_6to4(frag->identification),
+ (ip6->protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol,
+ &ignore);
+ i32 ret = r?r->port:-1;
+ map_ip4_reass_unlock();
+ return ret;
+}
+
+static_always_inline u8
+ip6_translate_tos(const ip6_header_t *ip6)
+{
+#ifdef IP6_MAP_T_OVERRIDE_TOS
+ return IP6_MAP_T_OVERRIDE_TOS;
+#else
+ return (clib_net_to_host_u32(ip6->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >> 20;
+#endif
+}
+
+//TODO: Find right place in memory for that
+static u8 icmp6_to_icmp_updater_pointer_table[] =
+ { 0, 1,~0,~0,
+ 2, 2, 9, 8,
+ 12,12,12,12,
+ 12,12,12,12,
+ 12,12,12,12,
+ 12,12,12,12,
+ 24,24,24,24,
+ 24,24,24,24,
+ 24,24,24,24,
+ 24,24,24,24
+ };
+
+static_always_inline int
+ip6_icmp_to_icmp6_in_place (icmp46_header_t *icmp, u32 icmp_len,
+ i32 *sender_port, ip6_header_t **inner_ip6)
+{
+ *inner_ip6 = NULL;
+ switch (icmp->type) {
+ case ICMP6_echo_request:
+ *sender_port = ((u16 *)icmp)[2];
+ icmp->type = ICMP4_echo_request;
+ break;
+ case ICMP6_echo_reply:
+ *sender_port = ((u16 *)icmp)[2];
+ icmp->type = ICMP4_echo_reply;
+ break;
+ case ICMP6_destination_unreachable:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8);
+ *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len);
+
+ switch (icmp->code) {
+ case ICMP6_destination_unreachable_no_route_to_destination: //0
+ case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2
+ case ICMP6_destination_unreachable_address_unreachable: //3
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_destination_unreachable_host;
+ break;
+ case ICMP6_destination_unreachable_destination_administratively_prohibited: //1
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_communication_administratively_prohibited;
+ break;
+ case ICMP6_destination_unreachable_port_unreachable:
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_port_unreachable;
+ break;
+ default:
+ return -1;
+ }
+ break;
+ case ICMP6_packet_too_big:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8);
+ *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len);
+
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = 4;
+ {
+ u32 advertised_mtu = clib_net_to_host_u32(*((u32 *)(icmp + 1)));
+ advertised_mtu -= 20;
+ //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20)
+ ((u16 *)(icmp))[3] = clib_host_to_net_u16(advertised_mtu);
+ }
+ break;
+
+ case ICMP6_time_exceeded:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8);
+ *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len);
+
+ icmp->type = ICMP4_time_exceeded;
+ break;
+
+ case ICMP6_parameter_problem:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8);
+ *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len);
+
+ switch (icmp->code) {
+ case ICMP6_parameter_problem_erroneous_header_field:
+ icmp->type = ICMP4_parameter_problem;
+ icmp->code = ICMP4_parameter_problem_pointer_indicates_error;
+ u32 pointer = clib_net_to_host_u32(*((u32*)(icmp + 1)));
+ if (pointer >= 40)
+ return -1;
+
+ ((u8*)(icmp + 1))[0] = icmp6_to_icmp_updater_pointer_table[pointer];
+ break;
+ case ICMP6_parameter_problem_unrecognized_next_header:
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_port_unreachable;
+ break;
+ case ICMP6_parameter_problem_unrecognized_option:
+ default:
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+static_always_inline void
+_ip6_map_t_icmp (map_domain_t *d, vlib_buffer_t *p, u8 *error)
+{
+ ip6_header_t *ip6, *inner_ip6;
+ ip4_header_t *ip4, *inner_ip4;
+ u32 ip6_pay_len;
+ icmp46_header_t *icmp;
+ i32 sender_port;
+ ip_csum_t csum;
+ u32 ip4_sadr, inner_ip4_dadr;
+
+ ip6 = vlib_buffer_get_current(p);
+ ip6_pay_len = clib_net_to_host_u16(ip6->payload_length);
+ icmp = (icmp46_header_t *)(ip6 + 1);
+ ASSERT(ip6_pay_len + sizeof(*ip6) <= p->current_length);
+
+ if (ip6->protocol != IP_PROTOCOL_ICMP6) {
+ //No extensions headers allowed here
+ //TODO: SR header
+ *error = MAP_ERROR_MALFORMED;
+ return;
+ }
+
+ //There are no fragmented ICMP messages, so no extension header for now
+
+ if (ip6_icmp_to_icmp6_in_place(icmp, ip6_pay_len, &sender_port, &inner_ip6)) {
+ //TODO: In case of 1:1 mapping it is not necessary to have the sender port
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+
+ if (sender_port < 0) {
+ // In case of 1:1 mapping, we don't care about the port
+ if(d->ea_bits_len == 0 && d->rules) {
+ sender_port = 0;
+ } else {
+ *error = MAP_ERROR_ICMP;
+ return;
+ }
+ }
+
+ //Security check
+ //Note that this prevents an intermediate IPv6 router from answering the request
+ ip4_sadr = map_get_ip4(&ip6->src_address);
+ if (ip6->src_address.as_u64[0] != map_get_pfx_net(d, ip4_sadr, sender_port) ||
+ ip6->src_address.as_u64[1] != map_get_sfx_net(d, ip4_sadr, sender_port)) {
+ *error = MAP_ERROR_SEC_CHECK;
+ return;
+ }
+
+ if (inner_ip6) {
+ u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset, inner_frag_id;
+ u8 *inner_l4, inner_protocol;
+
+ //We have two headers to translate
+ // FROM
+ // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ...
+ // Handled cases:
+ // [ IPv6 ][IC][ IPv6 ][L4 header ...
+ // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ...
+ // TO
+ // [ IPv4][IC][ IPv4][L4 header ...
+
+ //TODO: This was already done deep in ip6_icmp_to_icmp6_in_place
+ //We shouldn't have to do it again
+ if (ip6_parse(inner_ip6, ip6_pay_len - 8,
+ &inner_protocol, &inner_l4_offset, &inner_frag_offset)) {
+ *error = MAP_ERROR_MALFORMED;
+ return;
+ }
+
+ inner_l4 = u8_ptr_add(inner_ip6, inner_l4_offset);
+ inner_ip4 = (ip4_header_t *) u8_ptr_add(inner_l4, - sizeof(*inner_ip4));
+ if (inner_frag_offset) {
+ ip6_frag_hdr_t *inner_frag = (ip6_frag_hdr_t *) u8_ptr_add(inner_ip6, inner_frag_offset);
+ inner_frag_id = frag_id_6to4(inner_frag->identification);
+ } else {
+ inner_frag_id = 0;
+ }
+
+ //Do the translation of the inner packet
+ if (inner_protocol == IP_PROTOCOL_TCP) {
+ inner_L4_checksum = (u16 *) u8_ptr_add(inner_l4, 16);
+ } else if (inner_protocol == IP_PROTOCOL_UDP) {
+ inner_L4_checksum = (u16 *) u8_ptr_add(inner_l4, 6);
+ } else if (inner_protocol == IP_PROTOCOL_ICMP6) {
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4;
+ csum = inner_icmp->checksum;
+ csum = ip_csum_sub_even(csum, *((u16 *)inner_icmp));
+ //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded
+ inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ?
+ ICMP4_echo_request : ICMP4_echo_reply;
+ csum = ip_csum_add_even(csum, *((u16 *)inner_icmp));
+ inner_icmp->checksum = ip_csum_fold(csum);
+ inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later
+ inner_L4_checksum = &inner_icmp->checksum;
+ } else {
+ *error = MAP_ERROR_BAD_PROTOCOL;
+ return;
+ }
+
+ csum = *inner_L4_checksum;
+ csum = ip_csum_sub_even(csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_sub_even(csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_sub_even(csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_sub_even(csum, inner_ip6->dst_address.as_u64[1]);
+
+ //Sanity check of the outer destination address
+ if (ip6->dst_address.as_u64[0] != inner_ip6->src_address.as_u64[0] &&
+ ip6->dst_address.as_u64[1] != inner_ip6->src_address.as_u64[1]) {
+ *error = MAP_ERROR_SEC_CHECK;
+ return;
+ }
+
+ //Security check of inner packet
+ inner_ip4_dadr = map_get_ip4(&inner_ip6->dst_address);
+ if (inner_ip6->dst_address.as_u64[0] != map_get_pfx_net(d, inner_ip4_dadr, sender_port) ||
+ inner_ip6->dst_address.as_u64[1] != map_get_sfx_net(d, inner_ip4_dadr, sender_port)) {
+ *error = MAP_ERROR_SEC_CHECK;
+ return;
+ }
+
+ inner_ip4->dst_address.as_u32 = inner_ip4_dadr;
+ inner_ip4->src_address.as_u32 = ip6_map_t_embedded_address(d, &inner_ip6->src_address);
+ inner_ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ inner_ip4->tos = ip6_translate_tos(inner_ip6);
+ inner_ip4->length = u16_net_add(inner_ip6->payload_length, sizeof(*ip4) + sizeof(*ip6) -
+ inner_l4_offset);
+ inner_ip4->fragment_id = inner_frag_id;
+ inner_ip4->flags_and_fragment_offset = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ inner_ip4->ttl = inner_ip6->hop_limit;
+ inner_ip4->protocol = inner_protocol;
+ inner_ip4->checksum = ip4_header_checksum(inner_ip4);
+
+ if (inner_ip4->protocol == IP_PROTOCOL_ICMP) {
+ //Remove remainings of the pseudo-header in the csum
+ csum = ip_csum_sub_even(csum, clib_host_to_net_u16(IP_PROTOCOL_ICMP6));
+ csum = ip_csum_sub_even(csum, inner_ip4->length - sizeof(*inner_ip4));
+ } else {
+ //Update to new pseudo-header
+ csum = ip_csum_add_even(csum, inner_ip4->src_address.as_u32);
+ csum = ip_csum_add_even(csum, inner_ip4->dst_address.as_u32);
+ }
+ *inner_L4_checksum = ip_csum_fold(csum);
+
+ //Move up icmp header
+ ip4 = (ip4_header_t *) u8_ptr_add(inner_l4, - 2 * sizeof(*ip4) - 8);
+ memcpy(u8_ptr_add(inner_l4, - sizeof(*ip4) - 8), icmp, 8);
+ icmp = (icmp46_header_t *) u8_ptr_add(inner_l4, - sizeof(*ip4) - 8);
+ } else {
+ //Only one header to translate
+ ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4));
+ }
+ vlib_buffer_advance(p, (u32) (((u8 *)ip4) - ((u8 *)ip6)));
+
+ ip4->dst_address.as_u32 = ip6_map_t_embedded_address(d, &ip6->dst_address);
+ ip4->src_address.as_u32 = ip4_sadr;
+ ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos(ip6);
+ ip4->fragment_id = 0;
+ ip4->flags_and_fragment_offset = 0;
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol = IP_PROTOCOL_ICMP;
+ //TODO fix the length depending on offset length
+ ip4->length = u16_net_add(ip6->payload_length,
+ (inner_ip6 == NULL)?sizeof(*ip4):(2*sizeof(*ip4) - sizeof(*ip6)));
+ ip4->checksum = ip4_header_checksum(ip4);
+
+ //TODO: We could do an easy diff-checksum for echo requests/replies
+ //Recompute ICMP checksum
+ icmp->checksum = 0;
+ csum = ip_incremental_checksum(0, icmp, clib_net_to_host_u16(ip4->length) - sizeof(*ip4));
+ icmp->checksum = ~ip_csum_fold (csum);
+}
+
+static uword
+ip6_map_t_icmp (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_map_t_icmp_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 cpu_index = os_get_cpu_number();
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ u8 error0;
+ ip6_mapt_icmp_next_t next0;
+ map_domain_t *d0;
+ u16 len0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+ next0 = IP6_MAPT_ICMP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ len0 = clib_net_to_host_u16(((ip6_header_t *)vlib_buffer_get_current(p0))->payload_length);
+ d0 = pool_elt_at_index(map_main.domains, vnet_buffer(p0)->map_t.map_domain_index);
+ _ip6_map_t_icmp(d0, p0, &error0);
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ //Send to fragmentation node if necessary
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG;
+ }
+
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) {
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index,
+ vnet_buffer(p0)->map_t.map_domain_index, 1,
+ len0);
+ } else {
+ next0 = IP6_MAPT_ICMP_NEXT_DROP;
+ }
+
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip6_map_t_fragmented (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while(n_left_from >= 4 && n_left_to_next >= 2) {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip60, *ip61;
+ ip6_frag_hdr_t *frag0, *frag1;
+ ip4_header_t *ip40, *ip41;
+ u16 frag_id0, frag_offset0,
+ frag_id1, frag_offset1;
+ u8 frag_more0, frag_more1;
+ ip6_mapt_fragmented_next_t next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ p0 = vlib_get_buffer(vm, pi0);
+ p1 = vlib_get_buffer(vm, pi1);
+ ip60 = vlib_buffer_get_current(p0);
+ ip61 = vlib_buffer_get_current(p1);
+ frag0 = (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset);
+ frag1 = (ip6_frag_hdr_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset);
+ ip40 = (ip4_header_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40));
+ ip41 = (ip4_header_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40));
+ vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40));
+ vlib_buffer_advance(p1, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40));
+
+ frag_id0 = frag_id_6to4(frag0->identification);
+ frag_id1 = frag_id_6to4(frag1->identification);
+ frag_more0 = ip6_frag_hdr_more(frag0);
+ frag_more1 = ip6_frag_hdr_more(frag1);
+ frag_offset0 = ip6_frag_hdr_offset(frag0);
+ frag_offset1 = ip6_frag_hdr_offset(frag1);
+
+ ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr;
+ ip41->dst_address.as_u32 = vnet_buffer(p1)->map_t.v6.daddr;
+ ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr;
+ ip41->src_address.as_u32 = vnet_buffer(p1)->map_t.v6.saddr;
+ ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip41->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip40->tos = ip6_translate_tos(ip60);
+ ip41->tos = ip6_translate_tos(ip61);
+ ip40->length = u16_net_add(ip60->payload_length,
+ sizeof(*ip40) - vnet_buffer(p0)->map_t.v6.l4_offset + sizeof(*ip60));
+ ip41->length = u16_net_add(ip61->payload_length,
+ sizeof(*ip40) - vnet_buffer(p1)->map_t.v6.l4_offset + sizeof(*ip60));
+ ip40->fragment_id = frag_id0;
+ ip41->fragment_id = frag_id1;
+ ip40->flags_and_fragment_offset =
+ clib_host_to_net_u16(frag_offset0 | (frag_more0?IP4_HEADER_FLAG_MORE_FRAGMENTS:0));
+ ip41->flags_and_fragment_offset =
+ clib_host_to_net_u16(frag_offset1 | (frag_more1?IP4_HEADER_FLAG_MORE_FRAGMENTS:0));
+ ip40->ttl = ip60->hop_limit;
+ ip41->ttl = ip61->hop_limit;
+ ip40->protocol = (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)?
+ IP_PROTOCOL_ICMP:vnet_buffer(p0)->map_t.v6.l4_protocol;
+ ip41->protocol = (vnet_buffer(p1)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)?
+ IP_PROTOCOL_ICMP:vnet_buffer(p1)->map_t.v6.l4_protocol;
+ ip40->checksum = ip4_header_checksum(ip40);
+ ip41->checksum = ip4_header_checksum(ip41);
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+
+ if(vnet_buffer(p1)->map_t.mtu < p1->current_length) {
+ vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu;
+ vnet_buffer(p1)->ip_frag.header_offset = 0;
+ vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index,
+ to_next, n_left_to_next, pi0, pi1,
+ next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip60;
+ ip6_frag_hdr_t *frag0;
+ ip4_header_t *ip40;
+ u16 frag_id0;
+ u8 frag_more0;
+ u16 frag_offset0;
+ ip6_mapt_fragmented_next_t next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ frag0 = (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset);
+ ip40 = (ip4_header_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40));
+ vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40));
+
+ frag_id0 = frag_id_6to4(frag0->identification);
+ frag_more0 = ip6_frag_hdr_more(frag0);
+ frag_offset0 = ip6_frag_hdr_offset(frag0);
+
+ ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr;
+ ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr;
+ ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip40->tos = ip6_translate_tos(ip60);
+ ip40->length = u16_net_add(ip60->payload_length,
+ sizeof(*ip40) - vnet_buffer(p0)->map_t.v6.l4_offset + sizeof(*ip60));
+ ip40->fragment_id = frag_id0;
+ ip40->flags_and_fragment_offset =
+ clib_host_to_net_u16(frag_offset0 | (frag_more0?IP4_HEADER_FLAG_MORE_FRAGMENTS:0));
+ ip40->ttl = ip60->hop_limit;
+ ip40->protocol = (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)?
+ IP_PROTOCOL_ICMP:vnet_buffer(p0)->map_t.v6.l4_protocol;
+ ip40->checksum = ip4_header_checksum(ip40);
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ //Send to fragmentation node if necessary
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static uword
+ip6_map_t_tcp_udp (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while(n_left_from >= 4 && n_left_to_next >= 2) {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip60, *ip61;
+ ip_csum_t csum0, csum1;
+ ip4_header_t *ip40, *ip41;
+ u16 fragment_id0, flags0, *checksum0,
+ fragment_id1, flags1, *checksum1;
+ ip6_mapt_tcp_udp_next_t next0, next1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ p1 = vlib_get_buffer(vm, pi1);
+ ip60 = vlib_buffer_get_current(p0);
+ ip61 = vlib_buffer_get_current(p1);
+ ip40 = (ip4_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40));
+ ip41 = (ip4_header_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40));
+ vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40));
+ vlib_buffer_advance(p1, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40));
+ checksum0 = (u16 *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.checksum_offset);
+ checksum1 = (u16 *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.checksum_offset);
+
+ csum0 = ip_csum_sub_even(*checksum0, ip60->src_address.as_u64[0]);
+ csum1 = ip_csum_sub_even(*checksum1, ip61->src_address.as_u64[0]);
+ csum0 = ip_csum_sub_even(csum0, ip60->src_address.as_u64[1]);
+ csum1 = ip_csum_sub_even(csum1, ip61->src_address.as_u64[1]);
+ csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[0]);
+ csum1 = ip_csum_sub_even(csum0, ip61->dst_address.as_u64[0]);
+ csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[1]);
+ csum1 = ip_csum_sub_even(csum1, ip61->dst_address.as_u64[1]);
+ csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.daddr);
+ csum1 = ip_csum_add_even(csum1, vnet_buffer(p1)->map_t.v6.daddr);
+ csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.saddr);
+ csum1 = ip_csum_add_even(csum1, vnet_buffer(p1)->map_t.v6.saddr);
+ *checksum0 = ip_csum_fold(csum0);
+ *checksum1 = ip_csum_fold(csum1);
+
+ if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset)) {
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset);
+ fragment_id0 = frag_id_6to4(hdr->identification);
+ flags0 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ } else {
+ fragment_id0 = 0;
+ flags0 = 0;
+ }
+
+ if (PREDICT_FALSE(vnet_buffer(p1)->map_t.v6.frag_offset)) {
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset);
+ fragment_id1 = frag_id_6to4(hdr->identification);
+ flags1 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ } else {
+ fragment_id1 = 0;
+ flags1 = 0;
+ }
+
+ ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr;
+ ip41->dst_address.as_u32 = vnet_buffer(p1)->map_t.v6.daddr;
+ ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr;
+ ip41->src_address.as_u32 = vnet_buffer(p1)->map_t.v6.saddr;
+ ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip41->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip40->tos = ip6_translate_tos(ip60);
+ ip41->tos = ip6_translate_tos(ip61);
+ ip40->length = u16_net_add(ip60->payload_length,
+ sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset);
+ ip41->length = u16_net_add(ip61->payload_length,
+ sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p1)->map_t.v6.l4_offset);
+ ip40->fragment_id = fragment_id0;
+ ip41->fragment_id = fragment_id1;
+ ip40->flags_and_fragment_offset = flags0;
+ ip41->flags_and_fragment_offset = flags1;
+ ip40->ttl = ip60->hop_limit;
+ ip41->ttl = ip61->hop_limit;
+ ip40->protocol = vnet_buffer(p0)->map_t.v6.l4_protocol;
+ ip41->protocol = vnet_buffer(p1)->map_t.v6.l4_protocol;
+ ip40->checksum = ip4_header_checksum(ip40);
+ ip41->checksum = ip4_header_checksum(ip41);
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+
+ if(vnet_buffer(p1)->map_t.mtu < p1->current_length) {
+ vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu;
+ vnet_buffer(p1)->ip_frag.header_offset = 0;
+ vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip60;
+ u16 *checksum0;
+ ip_csum_t csum0;
+ ip4_header_t *ip40;
+ u16 fragment_id0;
+ u16 flags0;
+ ip6_mapt_tcp_udp_next_t next0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ ip40 = (ip4_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40));
+ vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40));
+ checksum0 = (u16 *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.checksum_offset);
+
+ //TODO: This can probably be optimized
+ csum0 = ip_csum_sub_even(*checksum0, ip60->src_address.as_u64[0]);
+ csum0 = ip_csum_sub_even(csum0, ip60->src_address.as_u64[1]);
+ csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[0]);
+ csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[1]);
+ csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.daddr);
+ csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.saddr);
+ *checksum0 = ip_csum_fold(csum0);
+
+ if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset)) {
+ //Only the first fragment
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset);
+ fragment_id0 = frag_id_6to4(hdr->identification);
+ flags0 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ } else {
+ fragment_id0 = 0;
+ flags0 = 0;
+ }
+
+ ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr;
+ ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr;
+ ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip40->tos = ip6_translate_tos(ip60);
+ ip40->length = u16_net_add(ip60->payload_length,
+ sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset);
+ ip40->fragment_id = fragment_id0;
+ ip40->flags_and_fragment_offset = flags0;
+ ip40->ttl = ip60->hop_limit;
+ ip40->protocol = vnet_buffer(p0)->map_t.v6.l4_protocol;
+ ip40->checksum = ip4_header_checksum(ip40);
+
+ if(vnet_buffer(p0)->map_t.mtu < p0->current_length) {
+ //Send to fragmentation node if necessary
+ vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu;
+ vnet_buffer(p0)->ip_frag.header_offset = 0;
+ vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
+ next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
+ }
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static_always_inline void
+ip6_map_t_classify(vlib_buffer_t *p0, ip6_header_t *ip60,
+ map_domain_t *d0, i32 *src_port0,
+ u8 *error0, ip6_mapt_next_t *next0,
+ u32 l4_len0, ip6_frag_hdr_t *frag0)
+{
+ if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset &&
+ ip6_frag_hdr_offset(frag0))) {
+ *next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED;
+ if(d0->ea_bits_len == 0 && d0->rules) {
+ *src_port0 = 0;
+ } else {
+ *src_port0 = ip6_map_fragment_get(ip60, frag0, d0);
+ *error0 = (*src_port0 != -1) ? *error0 : MAP_ERROR_FRAGMENT_DROPPED;
+ }
+ } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) {
+ *error0 = l4_len0 < sizeof(tcp_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 16;
+ *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ *src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset));
+ } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) {
+ *error0 = l4_len0 < sizeof(udp_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 6;
+ *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ *src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset));
+ } else if (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) {
+ *error0 = l4_len0 < sizeof(icmp46_header_t) ? MAP_ERROR_MALFORMED : *error0;
+ *next0 = IP6_MAPT_NEXT_MAPT_ICMP;
+ if(d0->ea_bits_len == 0 && d0->rules) {
+ *src_port0 = 0;
+ } else if (((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_reply ||
+ ((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_request) {
+ *src_port0 = (i32) *((u16 *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset + 6));
+ }
+ } else {
+ //TODO: In case of 1:1 mapping, it might be possible to do something with those packets.
+ *error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+}
+
+static uword
+ip6_map_t (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_t_node.index);
+ vlib_combined_counter_main_t *cm = map_main.domain_counters;
+ u32 cpu_index = os_get_cpu_number();
+
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+#ifdef IP6_MAP_T_DUAL_LOOP
+ while (n_left_from >= 4 && n_left_to_next >=2) {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip60, *ip61;
+ u8 error0, error1;
+ ip6_mapt_next_t next0, next1;
+ u32 l4_len0, l4_len1;
+ i32 src_port0, src_port1;
+ map_domain_t *d0, *d1;
+ ip6_frag_hdr_t *frag0, *frag1;
+ u32 saddr0, saddr1;
+ next0 = next1 = 0; //Because compiler whines
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ error0 = MAP_ERROR_NONE;
+ error1 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ p1 = vlib_get_buffer(vm, pi1);
+ ip60 = vlib_buffer_get_current(p0);
+ ip61 = vlib_buffer_get_current(p1);
+
+ saddr0 = map_get_ip4(&ip60->src_address);
+ saddr1 = map_get_ip4(&ip61->src_address);
+ d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *)&saddr0,
+ &vnet_buffer(p0)->map_t.map_domain_index, &error0);
+ d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *)&saddr1,
+ &vnet_buffer(p1)->map_t.map_domain_index, &error1);
+
+ vnet_buffer(p0)->map_t.v6.saddr = saddr0;
+ vnet_buffer(p1)->map_t.v6.saddr = saddr1;
+ vnet_buffer(p0)->map_t.v6.daddr = ip6_map_t_embedded_address(d0, &ip60->dst_address);
+ vnet_buffer(p1)->map_t.v6.daddr = ip6_map_t_embedded_address(d1, &ip61->dst_address);
+ vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+ vnet_buffer(p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0;
+
+ if (PREDICT_FALSE(ip6_parse(ip60, p0->current_length,
+ &(vnet_buffer(p0)->map_t.v6.l4_protocol),
+ &(vnet_buffer(p0)->map_t.v6.l4_offset),
+ &(vnet_buffer(p0)->map_t.v6.frag_offset)))) {
+ error0 = MAP_ERROR_MALFORMED;
+ next0 = IP6_MAPT_NEXT_DROP;
+ }
+
+ if (PREDICT_FALSE(ip6_parse(ip61, p1->current_length,
+ &(vnet_buffer(p1)->map_t.v6.l4_protocol),
+ &(vnet_buffer(p1)->map_t.v6.l4_offset),
+ &(vnet_buffer(p1)->map_t.v6.frag_offset)))) {
+ error1 = MAP_ERROR_MALFORMED;
+ next1 = IP6_MAPT_NEXT_DROP;
+ }
+
+ src_port0 = src_port1 = -1;
+ l4_len0 = (u32)clib_net_to_host_u16(ip60->payload_length) +
+ sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset;
+ l4_len1 = (u32)clib_net_to_host_u16(ip61->payload_length) +
+ sizeof(*ip60) - vnet_buffer(p1)->map_t.v6.l4_offset;
+ frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset);
+ frag1 = (ip6_frag_hdr_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset);
+
+ ip6_map_t_classify(p0, ip60, d0, &src_port0, &error0, &next0, l4_len0, frag0);
+ ip6_map_t_classify(p1, ip61, d1, &src_port1, &error1, &next1, l4_len1, frag1);
+
+ if (PREDICT_FALSE((src_port0 != -1) && (
+ ip60->src_address.as_u64[0] != map_get_pfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0) ||
+ ip60->src_address.as_u64[1] != map_get_sfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0)))) {
+ error0 = MAP_ERROR_SEC_CHECK;
+ }
+
+ if (PREDICT_FALSE((src_port1 != -1) && (
+ ip61->src_address.as_u64[0] != map_get_pfx_net(d1, vnet_buffer(p1)->map_t.v6.saddr, src_port1) ||
+ ip61->src_address.as_u64[1] != map_get_sfx_net(d1, vnet_buffer(p1)->map_t.v6.saddr, src_port1)))) {
+ error1 = MAP_ERROR_SEC_CHECK;
+ }
+
+ if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset((ip6_frag_hdr_t *)
+ u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset))) &&
+ (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) && (error0 == MAP_ERROR_NONE)) {
+ ip6_map_fragment_cache(ip60,
+ (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset),
+ d0, src_port0);
+ }
+
+ if (PREDICT_FALSE(vnet_buffer(p1)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset((ip6_frag_hdr_t *)
+ u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset))) &&
+ (src_port1 != -1) && (d1->ea_bits_len != 0 || !d1->rules) && (error1 == MAP_ERROR_NONE)) {
+ ip6_map_fragment_cache(ip61,
+ (ip6_frag_hdr_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset),
+ d1, src_port1);
+ }
+
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) {
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index,
+ vnet_buffer(p0)->map_t.map_domain_index, 1,
+ clib_net_to_host_u16(ip60->payload_length));
+ }
+
+ if (PREDICT_TRUE(error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP)) {
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index,
+ vnet_buffer(p1)->map_t.map_domain_index, 1,
+ clib_net_to_host_u16(ip61->payload_length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0;
+ next1 = (error1 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next1;
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+ vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1);
+ }
+#endif
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip60;
+ u8 error0;
+ u32 l4_len0;
+ i32 src_port0;
+ map_domain_t *d0;
+ ip6_frag_hdr_t *frag0;
+ ip6_mapt_next_t next0 = 0;
+ u32 saddr;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+ error0 = MAP_ERROR_NONE;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ //Save saddr in a different variable to not overwrite ip.adj_index
+ saddr = map_get_ip4(&ip60->src_address);
+ d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX],
+ (ip4_address_t *)&saddr,
+ &vnet_buffer(p0)->map_t.map_domain_index, &error0);
+
+ //FIXME: What if d0 is null
+ vnet_buffer(p0)->map_t.v6.saddr = saddr;
+ vnet_buffer(p0)->map_t.v6.daddr = ip6_map_t_embedded_address(d0, &ip60->dst_address);
+ vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0;
+
+ if (PREDICT_FALSE(ip6_parse(ip60, p0->current_length,
+ &(vnet_buffer(p0)->map_t.v6.l4_protocol),
+ &(vnet_buffer(p0)->map_t.v6.l4_offset),
+ &(vnet_buffer(p0)->map_t.v6.frag_offset)))) {
+ error0 = MAP_ERROR_MALFORMED;
+ next0 = IP6_MAPT_NEXT_DROP;
+ }
+
+ src_port0 = -1;
+ l4_len0 = (u32)clib_net_to_host_u16(ip60->payload_length) +
+ sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset;
+ frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset);
+
+
+ if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset &&
+ ip6_frag_hdr_offset(frag0))) {
+ src_port0 = ip6_map_fragment_get(ip60, frag0, d0);
+ error0 = (src_port0 != -1) ? error0 : MAP_ERROR_FRAGMENT_MEMORY;
+ next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED;
+ } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) {
+ error0 = l4_len0 < sizeof(tcp_header_t) ? MAP_ERROR_MALFORMED : error0;
+ vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 16;
+ next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset));
+ } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) {
+ error0 = l4_len0 < sizeof(udp_header_t) ? MAP_ERROR_MALFORMED : error0;
+ vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 6;
+ next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP;
+ src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset));
+ } else if (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) {
+ error0 = l4_len0 < sizeof(icmp46_header_t) ? MAP_ERROR_MALFORMED : error0;
+ next0 = IP6_MAPT_NEXT_MAPT_ICMP;
+ if (((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_reply ||
+ ((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_request)
+ src_port0 = (i32) *((u16 *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset + 6));
+ } else {
+ //TODO: In case of 1:1 mapping, it might be possible to do something with those packets.
+ error0 = MAP_ERROR_BAD_PROTOCOL;
+ }
+
+ //Security check
+ if (PREDICT_FALSE((src_port0 != -1) && (
+ ip60->src_address.as_u64[0] != map_get_pfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0) ||
+ ip60->src_address.as_u64[1] != map_get_sfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0)))) {
+ //Security check when src_port0 is not zero (non-first fragment, UDP or TCP)
+ error0 = MAP_ERROR_SEC_CHECK;
+ }
+
+ //Fragmented first packet needs to be cached for following packets
+ if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset &&
+ !ip6_frag_hdr_offset((ip6_frag_hdr_t *)
+ u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset))) &&
+ (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) && (error0 == MAP_ERROR_NONE)) {
+ ip6_map_fragment_cache(ip60,
+ (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset),
+ d0, src_port0);
+ }
+
+ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) {
+ vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index,
+ vnet_buffer(p0)->map_t.map_domain_index, 1,
+ clib_net_to_host_u16(ip60->payload_length));
+ }
+
+ next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0;
+ p0->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index,
+ to_next, n_left_to_next, pi0,
+ next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+static char *map_t_error_strings[] = {
+#define _(sym,string) string,
+ foreach_map_error
+#undef _
+};
+
+VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = {
+ .function = ip6_map_t_fragmented,
+ .name = "ip6-map-t-fragmented",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_FRAGMENTED_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = {
+ .function = ip6_map_t_icmp,
+ .name = "ip6-map-t-icmp",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_ICMP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = {
+ .function = ip6_map_t_tcp_udp,
+ .name = "ip6-map-t-tcp-udp",
+ .vector_size = sizeof (u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_TCP_UDP_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME,
+ [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE(ip6_map_t_node) = {
+ .function = ip6_map_t,
+ .name = "ip6-map-t",
+ .vector_size = sizeof(u32),
+ .format_trace = format_map_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = MAP_N_ERROR,
+ .error_strings = map_t_error_strings,
+
+ .n_next_nodes = IP6_MAPT_N_NEXT,
+ .next_nodes = {
+ [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp",
+ [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp",
+ [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented",
+ [IP6_MAPT_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/vnet/vnet/map/ip6_sixrd.c b/vnet/vnet/map/ip6_sixrd.c
new file mode 100644
index 00000000000..0bd0cf3a303
--- /dev/null
+++ b/vnet/vnet/map/ip6_sixrd.c
@@ -0,0 +1,129 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * Defines used for testing various optimisation schemes
+ */
+#define SIXRD_ENCAP_DUAL 0
+
+#include "sixrd.h"
+
+vlib_node_registration_t ip6_sixrd_node;
+
+typedef enum {
+ IP6_SIXRD_NEXT_IP4_LOOKUP,
+ IP6_SIXRD_NEXT_DROP,
+ IP6_SIXRD_N_NEXT,
+} ip6_sixrd_next_t;
+
+/*
+ * ip6_sixrd
+ */
+static uword
+ip6_sixrd (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_sixrd_node.index);
+ u32 encap = 0;
+ from = vlib_frame_vector_args(frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ sixrd_domain_t *d0;
+ u8 error0 = SIXRD_ERROR_NONE;
+ ip6_header_t *ip60;
+ ip4_header_t *ip4h0;
+ u32 next0 = IP6_SIXRD_NEXT_IP4_LOOKUP;
+ u32 sixrd_domain_index0 = ~0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next +=1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer(vm, pi0);
+ ip60 = vlib_buffer_get_current(p0);
+ // p0->current_length = clib_net_to_host_u16(ip40->length);
+ d0 = ip6_sixrd_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &sixrd_domain_index0);
+ ASSERT(d0);
+
+ /* SIXRD calc */
+ u64 dal60 = clib_net_to_host_u64(ip60->dst_address.as_u64[0]);
+ u32 da40 = sixrd_get_addr(d0, dal60);
+ u16 len = clib_net_to_host_u16(ip60->payload_length) + 60;
+ if (da40 == 0) error0 = SIXRD_ERROR_UNKNOWN;
+
+ /* construct ipv4 header */
+ vlib_buffer_advance(p0, - (sizeof(ip4_header_t)));
+ ip4h0 = vlib_buffer_get_current(p0);
+ vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0;
+ ip4h0->ip_version_and_header_length = 0x45;
+ ip4h0->tos = 0;
+ ip4h0->length = clib_host_to_net_u16(len);
+ ip4h0->fragment_id = 0;
+ ip4h0->flags_and_fragment_offset = 0;
+ ip4h0->ttl = 0x40;
+ ip4h0->protocol = IP_PROTOCOL_IPV6;
+ ip4h0->src_address = d0->ip4_src;
+ ip4h0->dst_address.as_u32 = clib_host_to_net_u32(da40);
+ ip4h0->checksum = ip4_header_checksum(ip4h0);
+
+ next0 = error0 == SIXRD_ERROR_NONE ? IP6_SIXRD_NEXT_IP4_LOOKUP : IP6_SIXRD_NEXT_DROP;
+
+ if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) {
+ sixrd_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr));
+ tr->sixrd_domain_index = sixrd_domain_index0;
+ }
+
+ p0->error = error_node->errors[error0];
+ if (PREDICT_TRUE(error0 == SIXRD_ERROR_NONE)) encap++;
+
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+ vlib_node_increment_counter(vm, ip6_sixrd_node.index, SIXRD_ERROR_ENCAPSULATED, encap);
+
+ return frame->n_vectors;
+}
+
+static char *sixrd_error_strings[] = {
+#define _(sym,string) string,
+ foreach_sixrd_error
+#undef _
+};
+
+VLIB_REGISTER_NODE(ip6_sixrd_node) = {
+ .function = ip6_sixrd,
+ .name = "ip6-sixrd",
+ .vector_size = sizeof(u32),
+ .format_trace = format_sixrd_trace,
+ .n_errors = SIXRD_N_ERROR,
+ .error_strings = sixrd_error_strings,
+ .n_next_nodes = IP6_SIXRD_N_NEXT,
+ .next_nodes = {
+ [IP6_SIXRD_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_SIXRD_NEXT_DROP] = "error-drop",
+ },
+};
diff --git a/vnet/vnet/map/map.c b/vnet/vnet/map/map.c
new file mode 100644
index 00000000000..b0cab660876
--- /dev/null
+++ b/vnet/vnet/map/map.c
@@ -0,0 +1,1634 @@
+/*
+ * map.c : MAP support
+ *
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "map.h"
+
+/*
+ * This code supports the following MAP modes:
+ *
+ * Algorithmic Shared IPv4 address (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix > 32
+ * psid_length > 0, ip6_prefix < 64, ip4_prefix <= 32
+ * Algorithmic Full IPv4 address (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix = 32
+ * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32
+ * Algorithmic IPv4 prefix (ea_bits_len > 0):
+ * ea_bits_len + ip4_prefix < 32
+ * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32
+ *
+ * Independent Shared IPv4 address (ea_bits_len = 0):
+ * ip4_prefix = 32
+ * psid_length > 0
+ * Rule IPv6 address = 128, Rule PSID Set
+ * Independent Full IPv4 address (ea_bits_len = 0):
+ * ip4_prefix = 32
+ * psid_length = 0, ip6_prefix = 128
+ * Independent IPv4 prefix (ea_bits_len = 0):
+ * ip4_prefix < 32
+ * psid_length = 0, ip6_prefix = 128
+ *
+ */
+
+/*
+ * This code supports MAP-T:
+ *
+ * With DMR prefix length equal to 96.
+ *
+ */
+
+
+i32
+ip4_get_port (ip4_header_t *ip, map_dir_e dir, u16 buffer_len)
+{
+ //TODO: use buffer length
+ if (ip->ip_version_and_header_length != 0x45 ||
+ ip4_get_fragment_offset(ip))
+ return -1;
+
+ if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP))) {
+ udp_header_t *udp = (void *)(ip + 1);
+ return (dir == MAP_SENDER) ? udp->src_port : udp->dst_port;
+ } else if (ip->protocol == IP_PROTOCOL_ICMP) {
+ icmp46_header_t *icmp = (void *)(ip + 1);
+ if (icmp->type == ICMP4_echo_request ||
+ icmp->type == ICMP4_echo_reply) {
+ return *((u16 *)(icmp + 1));
+ } else if (clib_net_to_host_u16(ip->length) >= 64) {
+ ip = (ip4_header_t *)(icmp + 2);
+ if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP))) {
+ udp_header_t *udp = (void *)(ip + 1);
+ return (dir == MAP_SENDER) ? udp->dst_port : udp->src_port;
+ } else if (ip->protocol == IP_PROTOCOL_ICMP) {
+ icmp46_header_t *icmp = (void *)(ip + 1);
+ if (icmp->type == ICMP4_echo_request ||
+ icmp->type == ICMP4_echo_reply) {
+ return *((u16 *)(icmp + 1));
+ }
+ }
+ }
+ }
+ return -1;
+}
+
+i32
+ip6_get_port (ip6_header_t *ip6, map_dir_e dir, u16 buffer_len)
+{
+ u8 l4_protocol;
+ u16 l4_offset;
+ u16 frag_offset;
+ u8 *l4;
+
+ if (ip6_parse(ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
+ return -1;
+
+ //TODO: Use buffer length
+
+ if (frag_offset &&
+ ip6_frag_hdr_offset(((ip6_frag_hdr_t *)u8_ptr_add(ip6, frag_offset))))
+ return -1; //Can't deal with non-first fragment for now
+
+ l4 = u8_ptr_add(ip6, l4_offset);
+ if (l4_protocol == IP_PROTOCOL_TCP ||
+ l4_protocol == IP_PROTOCOL_UDP) {
+ return (dir == MAP_SENDER) ? ((udp_header_t *)(l4))->src_port : ((udp_header_t *)(l4))->dst_port;
+ } else if (l4_protocol == IP_PROTOCOL_ICMP6) {
+ icmp46_header_t *icmp = (icmp46_header_t *)(l4);
+ if (icmp->type == ICMP6_echo_request) {
+ return (dir == MAP_SENDER) ? ((u16*)(icmp))[2] : -1;
+ } else if (icmp->type == ICMP6_echo_reply) {
+ return (dir == MAP_SENDER) ? -1 : ((u16*)(icmp))[2];
+ }
+ }
+ return -1;
+}
+
+
+int
+map_create_domain (ip4_address_t *ip4_prefix,
+ u8 ip4_prefix_len,
+ ip6_address_t *ip6_prefix,
+ u8 ip6_prefix_len,
+ ip6_address_t *ip6_src,
+ u8 ip6_src_len,
+ u8 ea_bits_len,
+ u8 psid_offset,
+ u8 psid_length,
+ u32 *map_domain_index,
+ u16 mtu,
+ u8 flags)
+{
+ map_main_t *mm = &map_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ map_domain_t *d;
+ ip_adjacency_t adj;
+ ip4_add_del_route_args_t args4;
+ ip6_add_del_route_args_t args6;
+ u8 suffix_len;
+ uword *p;
+
+ /* EA bits must be within the first 64 bits */
+ if (ea_bits_len > 0 && (ip6_prefix_len + ea_bits_len) > 64)
+ return -1;
+
+ /* Sanity check on the src prefix length */
+ if (flags & MAP_DOMAIN_TRANSLATION) {
+ if (ip6_src_len != 96) {
+ clib_warning("MAP-T only supports ip6_src_len = 96 for now.");
+ return -1;
+ }
+ } else {
+ if (ip6_src_len != 128) {
+ clib_warning("MAP-E requires a BR address, not a prefix (ip6_src_len should be 128).");
+ return -1;
+ }
+ }
+
+ /* Get domain index */
+ pool_get_aligned(mm->domains, d, CLIB_CACHE_LINE_BYTES);
+ memset(d, 0, sizeof (*d));
+ *map_domain_index = d - mm->domains;
+
+ /* Init domain struct */
+ d->ip4_prefix.as_u32 = ip4_prefix->as_u32;
+ d->ip4_prefix_len = ip4_prefix_len;
+ d->ip6_prefix = *ip6_prefix;
+ d->ip6_prefix_len = ip6_prefix_len;
+ d->ip6_src = *ip6_src;
+ d->ip6_src_len = ip6_src_len;
+ d->ea_bits_len = ea_bits_len;
+ d->psid_offset = psid_offset;
+ d->psid_length = psid_length;
+ d->mtu = mtu;
+ d->flags = flags;
+
+ /* How many, and which bits to grab from the IPv4 DA */
+ if (ip4_prefix_len + ea_bits_len < 32) {
+ d->flags |= MAP_DOMAIN_PREFIX;
+ suffix_len = d->suffix_shift = 32 - ip4_prefix_len - ea_bits_len;
+ } else {
+ d->suffix_shift = 0;
+ suffix_len = 32 - ip4_prefix_len;
+ }
+ d->suffix_mask = (1<<suffix_len) - 1;
+
+ d->psid_shift = 16 - psid_length - psid_offset;
+ d->psid_mask = (1 << d->psid_length) - 1;
+ d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length;
+
+ /* Init IP adjacency */
+ memset(&adj, 0, sizeof(adj));
+ adj.explicit_fib_index = ~0;
+ adj.lookup_next_index = (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T : IP_LOOKUP_NEXT_MAP;
+ p = (uword *)&adj.rewrite_data[0];
+ *p = (uword) (*map_domain_index);
+
+ if (ip4_get_route(im4, 0, 0, (u8 *)ip4_prefix, ip4_prefix_len)) {
+ clib_warning("IPv4 route already defined: %U/%d", format_ip4_address, ip4_prefix, ip4_prefix_len);
+ pool_put(mm->domains, d);
+ return -1;
+ }
+
+ /* Create ip4 adjacency */
+ memset(&args4, 0, sizeof(args4));
+ args4.table_index_or_table_id = 0;
+ args4.flags = IP4_ROUTE_FLAG_ADD;
+ args4.dst_address.as_u32 = ip4_prefix->as_u32;
+ args4.dst_address_length = ip4_prefix_len;
+
+ args4.adj_index = ~0;
+ args4.add_adj = &adj;
+ args4.n_add_adj = 1;
+ ip4_add_del_route(im4, &args4);
+
+ /* Multiple MAP domains may share same source IPv6 TEP */
+ u32 ai = ip6_get_route(im6, 0, 0, ip6_src, ip6_src_len);
+ if (ai > 0) {
+ ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+ ip_adjacency_t *adj6 = ip_get_adjacency(lm6, ai);
+ if (adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP &&
+ adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP_T) {
+ clib_warning("BR source address already assigned: %U", format_ip6_address, ip6_src);
+ pool_put(mm->domains, d);
+ return -1;
+ }
+ /* Shared source */
+ p = (uword *)&adj6->rewrite_data[0];
+ p[0] = ~0;
+
+ /* Add refcount, so we don't accidentially delete the route underneath someone */
+ p[1]++;
+ } else {
+ /* Create ip6 adjacency. */
+ memset(&args6, 0, sizeof(args6));
+ args6.table_index_or_table_id = 0;
+ args6.flags = IP6_ROUTE_FLAG_ADD;
+ args6.dst_address.as_u64[0] = ip6_src->as_u64[0];
+ args6.dst_address.as_u64[1] = ip6_src->as_u64[1];
+ args6.dst_address_length = ip6_src_len;
+ args6.adj_index = ~0;
+ args6.add_adj = &adj;
+ args6.n_add_adj = 1;
+ ip6_add_del_route(im6, &args6);
+ }
+
+ /* Validate packet/byte counters */
+ map_domain_counter_lock(mm);
+ int i;
+ for (i = 0; i < vec_len(mm->simple_domain_counters); i++) {
+ vlib_validate_simple_counter(&mm->simple_domain_counters[i], *map_domain_index);
+ vlib_zero_simple_counter(&mm->simple_domain_counters[i], *map_domain_index);
+ }
+ for (i = 0; i < vec_len(mm->domain_counters); i++) {
+ vlib_validate_combined_counter(&mm->domain_counters[i], *map_domain_index);
+ vlib_zero_combined_counter(&mm->domain_counters[i], *map_domain_index);
+ }
+ map_domain_counter_unlock(mm);
+
+ return 0;
+}
+
+/*
+ * map_delete_domain
+ */
+int
+map_delete_domain (u32 map_domain_index)
+{
+ map_main_t *mm = &map_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ map_domain_t *d;
+ ip_adjacency_t adj;
+ ip4_add_del_route_args_t args4;
+ ip6_add_del_route_args_t args6;
+
+ if (pool_is_free_index(mm->domains, map_domain_index)) {
+ clib_warning("MAP domain delete: domain does not exist: %d", map_domain_index);
+ return -1;
+ }
+
+ d = pool_elt_at_index(mm->domains, map_domain_index);
+
+ memset(&adj, 0, sizeof(adj));
+ adj.explicit_fib_index = ~0;
+ adj.lookup_next_index = (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T : IP_LOOKUP_NEXT_MAP;
+
+ /* Delete ip4 adjacency */
+ memset(&args4, 0, sizeof(args4));
+ args4.table_index_or_table_id = 0;
+ args4.flags = IP4_ROUTE_FLAG_DEL;
+ args4.dst_address.as_u32 = d->ip4_prefix.as_u32;
+ args4.dst_address_length = d->ip4_prefix_len;
+ args4.adj_index = 0;
+ args4.add_adj = &adj;
+ args4.n_add_adj = 0;
+ ip4_add_del_route(im4, &args4);
+
+ /* Delete ip6 adjacency */
+ u32 ai = ip6_get_route(im6, 0, 0, &d->ip6_src, d->ip6_src_len);
+ if (ai > 0) {
+ ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+ ip_adjacency_t *adj6 = ip_get_adjacency(lm6, ai);
+
+ uword *p = (uword *)&adj6->rewrite_data[0];
+ /* Delete route when no other domains use this source */
+ if (p[1] == 0) {
+ memset(&args6, 0, sizeof (args6));
+ args6.table_index_or_table_id = 0;
+ args6.flags = IP6_ROUTE_FLAG_DEL;
+ args6.dst_address.as_u64[0] = d->ip6_src.as_u64[0];
+ args6.dst_address.as_u64[1] = d->ip6_src.as_u64[1];
+ args6.dst_address_length = d->ip6_src_len;
+ args6.adj_index = 0;
+ args6.add_adj = &adj;
+ args6.n_add_adj = 0;
+ ip6_add_del_route(im6, &args6);
+ }
+ p[1]--;
+ }
+ /* Deleting rules */
+ if (d->rules)
+ clib_mem_free(d->rules);
+
+ pool_put(mm->domains, d);
+
+ return 0;
+}
+
+int
+map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t *tep,
+ u8 is_add)
+{
+ map_domain_t *d;
+ map_main_t *mm = &map_main;
+
+ if (pool_is_free_index(mm->domains, map_domain_index)) {
+ clib_warning("MAP rule: domain does not exist: %d", map_domain_index);
+ return -1;
+ }
+ d = pool_elt_at_index(mm->domains, map_domain_index);
+
+ /* Rules are only used in 1:1 independent case */
+ if (d->ea_bits_len > 0)
+ return (-1);
+
+ if (!d->rules) {
+ u32 l = (0x1 << d->psid_length) * sizeof(ip6_address_t);
+ d->rules = clib_mem_alloc_aligned(l, CLIB_CACHE_LINE_BYTES);
+ if (!d->rules) return -1;
+ memset(d->rules, 0, l);
+ }
+
+ if (psid >= (0x1 << d->psid_length)) {
+ clib_warning("MAP rule: PSID outside bounds: %d [%d]", psid, 0x1 << d->psid_length);
+ return -1;
+ }
+
+ if (is_add) {
+ d->rules[psid] = *tep;
+ } else {
+ memset(&d->rules[psid], 0, sizeof(ip6_address_t));
+ }
+ return 0;
+}
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+static void
+map_pre_resolve (ip4_address_t *ip4, ip6_address_t *ip6)
+{
+ map_main_t *mm = &map_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0) {
+ mm->adj6_index = ip6_fib_lookup_with_table(im6, 0, ip6);
+ clib_warning("FIB lookup results in: %u", mm->adj6_index);
+ }
+ if (ip4->as_u32 != 0) {
+ mm->adj4_index = ip4_fib_lookup_with_table(im4, 0, ip4, 0);
+ clib_warning("FIB lookup results in: %u", mm->adj4_index);
+ }
+}
+#endif
+
+static clib_error_t *
+map_security_check_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "off"))
+ mm->sec_check = false;
+ else if (unformat(line_input, "on"))
+ mm->sec_check = true;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+ return 0;
+}
+
+static clib_error_t *
+map_security_check_frag_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "off"))
+ mm->sec_check_frag = false;
+ else if (unformat(line_input, "on"))
+ mm->sec_check_frag = true;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+ return 0;
+}
+
+static clib_error_t *
+map_add_domain_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip6_address_t ip6_src;
+ u32 ip6_prefix_len, ip4_prefix_len, map_domain_index, ip6_src_len;
+ u32 num_m_args = 0;
+ /* Optional arguments */
+ u32 ea_bits_len, psid_offset = 0, psid_length = 0;
+ u32 mtu = 0;
+ u8 flags = 0;
+ ip6_src_len = 128;
+
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, &ip4_prefix_len))
+ num_m_args++;
+ else if (unformat(line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, &ip6_prefix_len))
+ num_m_args++;
+ else if (unformat(line_input, "ip6-src %U/%d", unformat_ip6_address, &ip6_src, &ip6_src_len))
+ num_m_args++;
+ else if (unformat(line_input, "ip6-src %U", unformat_ip6_address, &ip6_src))
+ num_m_args++;
+ else if (unformat(line_input, "ea-bits-len %d", &ea_bits_len))
+ num_m_args++;
+ else if (unformat(line_input, "psid-offset %d", &psid_offset))
+ num_m_args++;
+ else if (unformat(line_input, "psid-len %d", &psid_length))
+ num_m_args++;
+ else if (unformat(line_input, "mtu %d", &mtu))
+ num_m_args++;
+ else if (unformat(line_input, "map-t"))
+ flags |= MAP_DOMAIN_TRANSLATION;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ if (num_m_args < 3)
+ return clib_error_return(0, "mandatory argument(s) missing");
+
+ map_create_domain(&ip4_prefix, ip4_prefix_len,
+ &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len,
+ ea_bits_len, psid_offset, psid_length, &map_domain_index,
+ mtu, flags);
+
+ return 0;
+}
+
+static clib_error_t *
+map_del_domain_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 num_m_args = 0;
+ u32 map_domain_index;
+
+ /* Get a line of input. */
+ if (! unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "index %d", &map_domain_index))
+ num_m_args++;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ if (num_m_args != 1)
+ return clib_error_return(0, "mandatory argument(s) missing");
+
+ map_delete_domain(map_domain_index);
+
+ return 0;
+}
+
+static clib_error_t *
+map_add_rule_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip6_address_t tep;
+ u32 num_m_args = 0;
+ u32 psid, map_domain_index;
+
+ /* Get a line of input. */
+ if (! unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "index %d", &map_domain_index))
+ num_m_args++;
+ else if (unformat(line_input, "psid %d", &psid))
+ num_m_args++;
+ else if (unformat(line_input, "ip6-dst %U", unformat_ip6_address, &tep))
+ num_m_args++;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ if (num_m_args != 3)
+ return clib_error_return(0, "mandatory argument(s) missing");
+
+ if (map_add_del_psid(map_domain_index, psid, &tep, 1) != 0) {
+ return clib_error_return(0, "Failing to add Mapping Rule");
+ }
+ return 0;
+}
+
+#if MAP_SKIP_IP6_LOOKUP
+static clib_error_t *
+map_pre_resolve_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4nh;
+ ip6_address_t ip6nh;
+ map_main_t *mm = &map_main;
+
+ memset(&ip4nh, 0, sizeof(ip4nh));
+ memset(&ip6nh, 0, sizeof(ip6nh));
+
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh))
+ mm->preresolve_ip4 = ip4nh;
+ else if (unformat(line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh))
+ mm->preresolve_ip6 = ip6nh;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ map_pre_resolve(&ip4nh, &ip6nh);
+
+ return 0;
+}
+#endif
+
+static clib_error_t *
+map_icmp_relay_source_address_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t icmp_src_address;
+ map_main_t *mm = &map_main;
+
+ memset(&icmp_src_address, 0, sizeof(icmp_src_address));
+
+
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "%U", unformat_ip4_address, &icmp_src_address))
+ mm->icmp_src_address = icmp_src_address;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ return 0;
+}
+
+static clib_error_t *
+map_traffic_class_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ u32 tc = 0;
+
+ mm->tc_copy = false;
+
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "copy"))
+ mm->tc_copy = true;
+ else if (unformat(line_input, "%x", &tc))
+ mm->tc = tc & 0xff;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ return 0;
+}
+
+static u8 *
+format_map_domain (u8 *s, va_list *args)
+{
+ map_domain_t *d = va_arg(*args, map_domain_t *);
+ bool counters = va_arg(*args, int);
+ map_main_t *mm = &map_main;
+ ip6_address_t ip6_prefix;
+
+ if (d->rules)
+ memset(&ip6_prefix, 0, sizeof(ip6_prefix));
+ else
+ ip6_prefix = d->ip6_prefix;
+
+ s = format(s,
+ "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d psid-offset %d psid-len %d mtu %d %s",
+ d - mm->domains,
+ format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len,
+ format_ip6_address, &ip6_prefix, d->ip6_prefix_len,
+ format_ip6_address, &d->ip6_src, d->ip6_src_len,
+ d->ea_bits_len, d->psid_offset, d->psid_length, d->mtu,
+ (d->flags & MAP_DOMAIN_TRANSLATION) ? "map-t" : "");
+
+ if (counters) {
+ map_domain_counter_lock(mm);
+ vlib_counter_t v;
+ vlib_get_combined_counter(&mm->domain_counters[MAP_DOMAIN_COUNTER_TX], d - mm->domains, &v);
+ s = format(s, " TX: %d/%d", v.packets, v.bytes);
+ vlib_get_combined_counter(&mm->domain_counters[MAP_DOMAIN_COUNTER_RX], d - mm->domains, &v);
+ s = format(s, " RX: %d/%d", v.packets, v.bytes);
+ map_domain_counter_unlock(mm);
+ }
+
+ if (d->rules) {
+ int i;
+ ip6_address_t dst;
+ for (i = 0; i < (0x1 << d->psid_length); i++) {
+ dst = d->rules[i];
+ if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0 )
+ continue;
+ s = format(s,
+ " rule psid: %d ip6-dst %U\n", i, format_ip6_address, &dst);
+ }
+ }
+ return s;
+}
+
+static u8 *
+format_map_ip4_reass (u8 *s, va_list *args)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_t *r = va_arg(*args, map_ip4_reass_t *);
+ map_ip4_reass_key_t *k = &r->key;
+ f64 now = vlib_time_now(mm->vlib_main);
+ f64 lifetime = (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000);
+ f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1;
+ s = format(s,
+ "ip4-reass src=%U dst=%U protocol=%d identifier=%d port=%d lifetime=%.3lf\n",
+ format_ip4_address, &k->src.as_u8, format_ip4_address, &k->dst.as_u8,
+ k->protocol, clib_net_to_host_u16(k->fragment_id), (r->port >= 0)?clib_net_to_host_u16(r->port):-1, dt);
+ return s;
+}
+
+static u8 *
+format_map_ip6_reass (u8 *s, va_list *args)
+{
+ map_main_t *mm = &map_main;
+ map_ip6_reass_t *r = va_arg(*args, map_ip6_reass_t *);
+ map_ip6_reass_key_t *k = &r->key;
+ f64 now = vlib_time_now(mm->vlib_main);
+ f64 lifetime = (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000);
+ f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1;
+ s = format(s,
+ "ip6-reass src=%U dst=%U protocol=%d identifier=%d lifetime=%.3lf\n",
+ format_ip6_address, &k->src.as_u8, format_ip6_address, &k->dst.as_u8,
+ k->protocol, clib_net_to_host_u32(k->fragment_id), dt);
+ return s;
+}
+
+static clib_error_t *
+show_map_domain_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+ bool counters = false;
+ u32 map_domain_index = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "counters"))
+ counters = true;
+ else if (unformat(line_input, "index %d", &map_domain_index))
+ ;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ if (pool_elts(mm->domains) == 0)
+ vlib_cli_output(vm, "No MAP domains are configured...");
+
+ if (map_domain_index == ~0) {
+ pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);}));
+ } else {
+ if (pool_is_free_index(mm->domains, map_domain_index)) {
+ return clib_error_return(0, "MAP domain does not exists %d", map_domain_index);
+ }
+
+ d = pool_elt_at_index(mm->domains, map_domain_index);
+ vlib_cli_output(vm, "%U", format_map_domain, d, counters);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+show_map_fragments_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_t *f4;
+ map_ip6_reass_t *f6;
+
+ pool_foreach(f4, mm->ip4_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip4_reass, f4);}));
+ pool_foreach(f6, mm->ip6_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip6_reass, f6);}));
+ return (0);
+}
+
+u64
+map_error_counter_get (u32 node_index, map_error_t map_error)
+{
+ vlib_main_t *vm = vlib_get_main();
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, node_index);
+ vlib_error_main_t *em = &vm->error_main;
+ vlib_error_t e = error_node->errors[map_error];
+ vlib_node_t *n = vlib_get_node(vm, node_index);
+ u32 ci;
+
+ ci = vlib_error_get_code(e);
+ ASSERT (ci < n->n_errors);
+ ci += n->error_heap_index;
+
+ return (em->counters[ci]);
+}
+
+static clib_error_t *
+show_map_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ map_main_t *mm = &map_main;
+ map_domain_t *d;
+ int domains = 0, rules = 0, domaincount = 0, rulecount = 0;
+ if (pool_elts (mm->domains) == 0)
+ vlib_cli_output(vm, "No MAP domains are configured...");
+
+ pool_foreach(d, mm->domains, ({
+ if (d->rules) {
+ rulecount+= 0x1 << d->psid_length;
+ rules += sizeof(ip6_address_t) * 0x1 << d->psid_length;
+ }
+ domains += sizeof(*d);
+ domaincount++;
+ }));
+
+ vlib_cli_output(vm, "MAP domains structure: %d\n", sizeof (map_domain_t));
+ vlib_cli_output(vm, "MAP domains: %d (%d bytes)\n", domaincount, domains);
+ vlib_cli_output(vm, "MAP rules: %d (%d bytes)\n", rulecount, rules);
+ vlib_cli_output(vm, "Total: %d bytes)\n", rules + domains);
+
+#if MAP_SKIP_IP6_LOOKUP
+ vlib_cli_output(vm, "MAP pre-resolve: IP6 next-hop: %U (%u), IP4 next-hop: %U (%u)\n",
+ format_ip6_address, &mm->preresolve_ip6, mm->adj6_index,
+ format_ip4_address, &mm->preresolve_ip4, mm->adj4_index);
+#endif
+
+ if (mm->tc_copy)
+ vlib_cli_output(vm, "MAP traffic-class: copy");
+ else
+ vlib_cli_output(vm, "MAP traffic-class: %x", mm->tc);
+
+ vlib_cli_output(vm, "MAP IPv6 inbound security check: %s Fragments: %s", mm->sec_check ? "enabled" : "disabled",
+ mm->sec_check_frag ? "enabled" : "disabled");
+
+
+ /*
+ * Counters
+ */
+ vlib_combined_counter_main_t *cm = mm->domain_counters;
+ u64 total_pkts[MAP_N_DOMAIN_COUNTER];
+ u64 total_bytes[MAP_N_DOMAIN_COUNTER];
+ int which, i;
+ vlib_counter_t v;
+
+ memset (total_pkts, 0, sizeof (total_pkts));
+ memset (total_bytes, 0, sizeof (total_bytes));
+
+ map_domain_counter_lock (mm);
+ vec_foreach (cm, mm->domain_counters) {
+ which = cm - mm->domain_counters;
+
+ for (i = 0; i < vec_len (cm->maxi); i++) {
+ vlib_get_combined_counter (cm, i, &v);
+ total_pkts[which] += v.packets;
+ total_bytes[which] += v.bytes;
+ }
+ }
+ map_domain_counter_unlock (mm);
+
+ vlib_cli_output(vm, "Encapsulated packets: %d bytes: %d\n", total_pkts[MAP_DOMAIN_COUNTER_TX],
+ total_bytes[MAP_DOMAIN_COUNTER_TX]);
+ vlib_cli_output(vm, "Decapsulated packets: %d bytes: %d\n", total_pkts[MAP_DOMAIN_COUNTER_RX],
+ total_bytes[MAP_DOMAIN_COUNTER_RX]);
+
+ vlib_cli_output(vm, "ICMP relayed packets: %d\n", vlib_get_simple_counter(&mm->icmp_relayed, 0));
+
+ return 0;
+}
+
+static clib_error_t *
+map_params_reass_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 lifetime = ~0;
+ f64 ht_ratio = (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1);
+ u32 pool_size = ~0;
+ u64 buffers = ~(0ull);
+ u8 ip4 = 0, ip6 = 0;
+
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (!unformat(line_input, "lifetime %u", &lifetime) &&
+ !unformat(line_input, "ht-ratio %lf", &ht_ratio) &&
+ !unformat(line_input, "pool-size %u", &pool_size) &&
+ !unformat(line_input, "buffers %llu", &buffers) &&
+ !((unformat(line_input, "ip4")) && (ip4 = 1)) &&
+ !((unformat(line_input, "ip6")) && (ip6 = 1))) {
+ unformat_free(line_input);
+ return clib_error_return(0, "invalid input");
+ }
+ }
+ unformat_free(line_input);
+
+ if (!ip4 && !ip6)
+ return clib_error_return(0, "must specify ip4 and/or ip6");
+
+ if (ip4) {
+ if (pool_size != ~0 && pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX)
+ return clib_error_return(0, "invalid ip4-reass pool-size ( > %d)", MAP_IP4_REASS_CONF_POOL_SIZE_MAX);
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1) && ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX)
+ return clib_error_return(0, "invalid ip4-reass ht-ratio ( > %d)", MAP_IP4_REASS_CONF_HT_RATIO_MAX);
+ if (lifetime != ~0 && lifetime > MAP_IP4_REASS_CONF_LIFETIME_MAX)
+ return clib_error_return(0, "invalid ip4-reass lifetime ( > %d)", MAP_IP4_REASS_CONF_LIFETIME_MAX);
+ if (buffers != ~(0ull) && buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX)
+ return clib_error_return(0, "invalid ip4-reass buffers ( > %ld)", MAP_IP4_REASS_CONF_BUFFERS_MAX);
+ }
+
+ if (ip6) {
+ if (pool_size != ~0 && pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX)
+ return clib_error_return(0, "invalid ip6-reass pool-size ( > %d)", MAP_IP6_REASS_CONF_POOL_SIZE_MAX);
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1) && ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX)
+ return clib_error_return(0, "invalid ip6-reass ht-log2len ( > %d)", MAP_IP6_REASS_CONF_HT_RATIO_MAX);
+ if (lifetime != ~0 && lifetime > MAP_IP6_REASS_CONF_LIFETIME_MAX)
+ return clib_error_return(0, "invalid ip6-reass lifetime ( > %d)", MAP_IP6_REASS_CONF_LIFETIME_MAX);
+ if (buffers != ~(0ull) && buffers > MAP_IP6_REASS_CONF_BUFFERS_MAX)
+ return clib_error_return(0, "invalid ip6-reass buffers ( > %ld)", MAP_IP6_REASS_CONF_BUFFERS_MAX);
+ }
+
+ if (ip4) {
+ u32 reass = 0, packets = 0;
+ if (pool_size != ~0) {
+ if (map_ip4_reass_conf_pool_size(pool_size, &reass, &packets)) {
+ vlib_cli_output(vm, "Could not set ip4-reass pool-size");
+ } else {
+ vlib_cli_output(vm, "Setting ip4-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets);
+ }
+ }
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1)) {
+ if (map_ip4_reass_conf_ht_ratio(ht_ratio, &reass, &packets)) {
+ vlib_cli_output(vm, "Could not set ip4-reass ht-log2len");
+ } else {
+ vlib_cli_output(vm, "Setting ip4-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets);
+ }
+ }
+ if (lifetime != ~0) {
+ if (map_ip4_reass_conf_lifetime(lifetime))
+ vlib_cli_output(vm, "Could not set ip4-reass lifetime");
+ else
+ vlib_cli_output(vm, "Setting ip4-reass lifetime");
+ }
+ if (buffers != ~(0ull)) {
+ if (map_ip4_reass_conf_buffers(buffers))
+ vlib_cli_output(vm, "Could not set ip4-reass buffers");
+ else
+ vlib_cli_output(vm, "Setting ip4-reass buffers");
+ }
+
+ if (map_main.ip4_reass_conf_buffers >
+ map_main.ip4_reass_conf_pool_size * MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) {
+ vlib_cli_output(vm, "Note: 'ip4-reass buffers' > pool-size * max-fragments-per-reassembly.");
+ }
+ }
+
+ if (ip6) {
+ u32 reass = 0, packets = 0;
+ if (pool_size != ~0) {
+ if (map_ip6_reass_conf_pool_size(pool_size, &reass, &packets)) {
+ vlib_cli_output(vm, "Could not set ip6-reass pool-size");
+ } else {
+ vlib_cli_output(vm, "Setting ip6-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets);
+ }
+ }
+ if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1)) {
+ if (map_ip6_reass_conf_ht_ratio(ht_ratio, &reass, &packets)) {
+ vlib_cli_output(vm, "Could not set ip6-reass ht-log2len");
+ } else {
+ vlib_cli_output(vm, "Setting ip6-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets);
+ }
+ }
+ if (lifetime != ~0) {
+ if (map_ip6_reass_conf_lifetime(lifetime))
+ vlib_cli_output(vm, "Could not set ip6-reass lifetime");
+ else
+ vlib_cli_output(vm, "Setting ip6-reass lifetime");
+ }
+ if (buffers != ~(0ull)) {
+ if (map_ip6_reass_conf_buffers(buffers))
+ vlib_cli_output(vm, "Could not set ip6-reass buffers");
+ else
+ vlib_cli_output(vm, "Setting ip6-reass buffers");
+ }
+
+ if (map_main.ip6_reass_conf_buffers >
+ map_main.ip6_reass_conf_pool_size * MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) {
+ vlib_cli_output(vm, "Note: 'ip6-reass buffers' > pool-size * max-fragments-per-reassembly.");
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * packet trace format function
+ */
+u8 *
+format_map_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
+ map_trace_t *t = va_arg (*args, map_trace_t *);
+ u32 map_domain_index = t->map_domain_index;
+ u16 port = t->port;
+
+ s = format(s, "MAP domain index: %d L4 port: %u", map_domain_index, clib_net_to_host_u16(port));
+
+ return s;
+}
+
+static_always_inline map_ip4_reass_t *
+map_ip4_reass_lookup(map_ip4_reass_key_t *k, u32 bucket, f64 now)
+{
+ map_main_t *mm = &map_main;
+ u32 ri = mm->ip4_reass_hash_table[bucket];
+ while(ri != MAP_REASS_INDEX_NONE) {
+ map_ip4_reass_t * r = pool_elt_at_index(mm->ip4_reass_pool, ri);
+ if (r->key.as_u64[0] == k->as_u64[0] &&
+ r->key.as_u64[1] == k->as_u64[1] &&
+ now < r->ts + (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000)) {
+ return r;
+ }
+ ri = r->bucket_next;
+ }
+ return NULL;
+}
+
+#define map_ip4_reass_pool_index(r) (r - map_main.ip4_reass_pool)
+
+void
+map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop)
+{
+ map_main_t *mm = &map_main;
+ map_ip4_reass_get_fragments(r, pi_to_drop);
+
+ // Unlink in hash bucket
+ map_ip4_reass_t *r2 = NULL;
+ u32 r2i = mm->ip4_reass_hash_table[r->bucket];
+ while (r2i != map_ip4_reass_pool_index(r)) {
+ ASSERT(r2i != MAP_REASS_INDEX_NONE);
+ r2 = pool_elt_at_index(mm->ip4_reass_pool, r2i);
+ r2i = r2->bucket_next;
+ }
+ if (r2) {
+ r2->bucket_next = r->bucket_next;
+ } else {
+ mm->ip4_reass_hash_table[r->bucket] = r->bucket_next;
+ }
+
+ // Unlink in list
+ if (r->fifo_next == map_ip4_reass_pool_index(r)) {
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ } else {
+ if(mm->ip4_reass_fifo_last == map_ip4_reass_pool_index(r))
+ mm->ip4_reass_fifo_last = r->fifo_prev;
+ pool_elt_at_index(mm->ip4_reass_pool, r->fifo_prev)->fifo_next = r->fifo_next;
+ pool_elt_at_index(mm->ip4_reass_pool, r->fifo_next)->fifo_prev = r->fifo_prev;
+ }
+
+ pool_put(mm->ip4_reass_pool, r);
+ mm->ip4_reass_allocated--;
+}
+
+map_ip4_reass_t *
+map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id,
+ u8 protocol, u32 **pi_to_drop)
+{
+ map_ip4_reass_t * r;
+ map_main_t *mm = &map_main;
+ map_ip4_reass_key_t k = {.src.data_u32 = src,
+ .dst.data_u32 = dst,
+ .fragment_id = fragment_id,
+ .protocol = protocol };
+
+ u32 h = 0;
+ h = crc_u32(k.as_u32[0], h);
+ h = crc_u32(k.as_u32[1], h);
+ h = crc_u32(k.as_u32[2], h);
+ h = crc_u32(k.as_u32[3], h);
+ h = h >> (32 - mm->ip4_reass_ht_log2len);
+
+ f64 now = vlib_time_now(mm->vlib_main);
+
+ //Cache garbage collection
+ while (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) {
+ map_ip4_reass_t *last = pool_elt_at_index(mm->ip4_reass_pool, mm->ip4_reass_fifo_last);
+ if (last->ts + (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000) < now)
+ map_ip4_reass_free(last, pi_to_drop);
+ else
+ break;
+ }
+
+ if ((r = map_ip4_reass_lookup(&k, h, now)))
+ return r;
+
+ if (mm->ip4_reass_allocated >= mm->ip4_reass_conf_pool_size)
+ return NULL;
+
+ pool_get(mm->ip4_reass_pool, r);
+ mm->ip4_reass_allocated++;
+ int i;
+ for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ r->fragments[i] = ~0;
+
+ u32 ri = map_ip4_reass_pool_index(r);
+
+ //Link in new bucket
+ r->bucket = h;
+ r->bucket_next = mm->ip4_reass_hash_table[h];
+ mm->ip4_reass_hash_table[h] = ri;
+
+ //Link in fifo
+ if(mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) {
+ r->fifo_next = pool_elt_at_index(mm->ip4_reass_pool, mm->ip4_reass_fifo_last)->fifo_next;
+ r->fifo_prev = mm->ip4_reass_fifo_last;
+ pool_elt_at_index(mm->ip4_reass_pool, r->fifo_prev)->fifo_next = ri;
+ pool_elt_at_index(mm->ip4_reass_pool, r->fifo_next)->fifo_prev = ri;
+ } else {
+ r->fifo_next = r->fifo_prev = ri;
+ mm->ip4_reass_fifo_last = ri;
+ }
+
+ //Set other fields
+ r->ts = now;
+ r->key = k;
+ r->port = -1;
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ r->expected_total = 0xffff;
+ r->forwarded = 0;
+#endif
+
+ return r;
+}
+
+int
+map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi)
+{
+ if (map_main.ip4_reass_buffered_counter >= map_main.ip4_reass_conf_buffers)
+ return -1;
+
+ int i;
+ for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if(r->fragments[i] == ~0) {
+ r->fragments[i] = pi;
+ map_main.ip4_reass_buffered_counter++;
+ return 0;
+ }
+ return -1;
+}
+
+static_always_inline map_ip6_reass_t *
+map_ip6_reass_lookup(map_ip6_reass_key_t *k, u32 bucket, f64 now)
+{
+ map_main_t *mm = &map_main;
+ u32 ri = mm->ip6_reass_hash_table[bucket];
+ while(ri != MAP_REASS_INDEX_NONE) {
+ map_ip6_reass_t * r = pool_elt_at_index(mm->ip6_reass_pool, ri);
+ if(now < r->ts + (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000) &&
+ r->key.as_u64[0] == k->as_u64[0] &&
+ r->key.as_u64[1] == k->as_u64[1] &&
+ r->key.as_u64[2] == k->as_u64[2] &&
+ r->key.as_u64[3] == k->as_u64[3] &&
+ r->key.as_u64[4] == k->as_u64[4])
+ return r;
+ ri = r->bucket_next;
+ }
+ return NULL;
+}
+
+#define map_ip6_reass_pool_index(r) (r - map_main.ip6_reass_pool)
+
+void
+map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop)
+{
+ map_main_t *mm = &map_main;
+ int i;
+ for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if(r->fragments[i].pi != ~0) {
+ vec_add1(*pi_to_drop, r->fragments[i].pi);
+ r->fragments[i].pi = ~0;
+ map_main.ip6_reass_buffered_counter--;
+ }
+
+ // Unlink in hash bucket
+ map_ip6_reass_t *r2 = NULL;
+ u32 r2i = mm->ip6_reass_hash_table[r->bucket];
+ while (r2i != map_ip6_reass_pool_index(r)) {
+ ASSERT(r2i != MAP_REASS_INDEX_NONE);
+ r2 = pool_elt_at_index(mm->ip6_reass_pool, r2i);
+ r2i = r2->bucket_next;
+ }
+ if (r2) {
+ r2->bucket_next = r->bucket_next;
+ } else {
+ mm->ip6_reass_hash_table[r->bucket] = r->bucket_next;
+ }
+
+ // Unlink in list
+ if (r->fifo_next == map_ip6_reass_pool_index(r)) {
+ //Single element in the list, list is now empty
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ } else {
+ if (mm->ip6_reass_fifo_last == map_ip6_reass_pool_index(r)) //First element
+ mm->ip6_reass_fifo_last = r->fifo_prev;
+ pool_elt_at_index(mm->ip6_reass_pool, r->fifo_prev)->fifo_next = r->fifo_next;
+ pool_elt_at_index(mm->ip6_reass_pool, r->fifo_next)->fifo_prev = r->fifo_prev;
+ }
+
+ // Free from pool if necessary
+ pool_put(mm->ip6_reass_pool, r);
+ mm->ip6_reass_allocated--;
+}
+
+map_ip6_reass_t *
+map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id,
+ u8 protocol, u32 **pi_to_drop)
+{
+ map_ip6_reass_t * r;
+ map_main_t *mm = &map_main;
+ map_ip6_reass_key_t k = {
+ .src = *src,
+ .dst = *dst,
+ .fragment_id = fragment_id,
+ .protocol = protocol };
+
+ u32 h = 0;
+ int i;
+ for (i=0; i<10; i++)
+ h = crc_u32(k.as_u32[i], h);
+ h = h >> (32 - mm->ip6_reass_ht_log2len);
+
+ f64 now = vlib_time_now(mm->vlib_main);
+
+ //Cache garbage collection
+ while (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) {
+ map_ip6_reass_t *last = pool_elt_at_index(mm->ip6_reass_pool, mm->ip6_reass_fifo_last);
+ if (last->ts + (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000) < now)
+ map_ip6_reass_free(last, pi_to_drop);
+ else
+ break;
+ }
+
+ if ((r = map_ip6_reass_lookup(&k, h, now)))
+ return r;
+
+ if (mm->ip6_reass_allocated >= mm->ip6_reass_conf_pool_size)
+ return NULL;
+
+ pool_get(mm->ip6_reass_pool, r);
+ mm->ip6_reass_allocated++;
+ for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) {
+ r->fragments[i].pi = ~0;
+ r->fragments[i].next_data_len = 0;
+ r->fragments[i].next_data_offset = 0;
+ }
+
+ u32 ri = map_ip6_reass_pool_index(r);
+
+ //Link in new bucket
+ r->bucket = h;
+ r->bucket_next = mm->ip6_reass_hash_table[h];
+ mm->ip6_reass_hash_table[h] = ri;
+
+ //Link in fifo
+ if(mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) {
+ r->fifo_next = pool_elt_at_index(mm->ip6_reass_pool, mm->ip6_reass_fifo_last)->fifo_next;
+ r->fifo_prev = mm->ip6_reass_fifo_last;
+ pool_elt_at_index(mm->ip6_reass_pool, r->fifo_prev)->fifo_next = ri;
+ pool_elt_at_index(mm->ip6_reass_pool, r->fifo_next)->fifo_prev = ri;
+ } else {
+ r->fifo_next = r->fifo_prev = ri;
+ mm->ip6_reass_fifo_last = ri;
+ }
+
+ //Set other fields
+ r->ts = now;
+ r->key = k;
+ r->ip4_header.ip_version_and_header_length = 0;
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ r->expected_total = 0xffff;
+ r->forwarded = 0;
+#endif
+ return r;
+}
+
+int
+map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi,
+ u16 data_offset, u16 next_data_offset,
+ u8 *data_start, u16 data_len)
+{
+ map_ip6_fragment_t *f = NULL, *prev_f = NULL;
+ u16 copied_len = (data_len > 20) ? 20 : data_len;
+
+ if (map_main.ip6_reass_buffered_counter >= map_main.ip6_reass_conf_buffers)
+ return -1;
+
+ //Lookup for fragments for the current buffer
+ //and the one before that
+ int i;
+ for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) {
+ if (data_offset && r->fragments[i].next_data_offset == data_offset) {
+ prev_f = &r->fragments[i]; // This is buffer for previous packet
+ } else if (r->fragments[i].next_data_offset == next_data_offset) {
+ f = &r->fragments[i]; // This is a buffer for the current packet
+ } else if (r->fragments[i].next_data_offset == 0) { //Available
+ if (f == NULL)
+ f = &r->fragments[i];
+ else if (prev_f == NULL)
+ prev_f = &r->fragments[i];
+ }
+ }
+
+ if (!f || f->pi != ~0)
+ return -1;
+
+ if (data_offset) {
+ if (!prev_f)
+ return -1;
+
+ memcpy(prev_f->next_data, data_start, copied_len);
+ prev_f->next_data_len = copied_len;
+ prev_f->next_data_offset = data_offset;
+ } else {
+ if (((ip4_header_t *)data_start)->ip_version_and_header_length != 0x45)
+ return -1;
+
+ if (r->ip4_header.ip_version_and_header_length == 0)
+ memcpy(&r->ip4_header, data_start, sizeof(ip4_header_t));
+ }
+
+ if(data_len > 20) {
+ f->next_data_offset = next_data_offset;
+ f->pi = pi;
+ map_main.ip6_reass_buffered_counter++;
+ }
+ return 0;
+}
+
+void map_ip4_reass_reinit(u32 *trashed_reass, u32 *dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ int i;
+
+ if(dropped_packets)
+ *dropped_packets = mm->ip4_reass_buffered_counter;
+ if(trashed_reass)
+ *trashed_reass = mm->ip4_reass_allocated;
+ if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) {
+ u16 ri = mm->ip4_reass_fifo_last;
+ do {
+ map_ip4_reass_t *r = pool_elt_at_index(mm->ip4_reass_pool, ri);
+ for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i] != ~0)
+ map_ip4_drop_pi(r->fragments[i]);
+
+ ri = r->fifo_next;
+ pool_put(mm->ip4_reass_pool, r);
+ } while (ri != mm->ip4_reass_fifo_last);
+ }
+
+ vec_free(mm->ip4_reass_hash_table);
+ vec_resize(mm->ip4_reass_hash_table, 1 << mm->ip4_reass_ht_log2len);
+ for (i=0; i<(1 << mm->ip4_reass_ht_log2len); i++)
+ mm->ip4_reass_hash_table[i] = MAP_REASS_INDEX_NONE;
+ pool_free(mm->ip4_reass_pool);
+ pool_alloc(mm->ip4_reass_pool, mm->ip4_reass_conf_pool_size);
+
+ mm->ip4_reass_allocated = 0;
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ mm->ip4_reass_buffered_counter = 0;
+}
+
+u8 map_get_ht_log2len(f32 ht_ratio, u16 pool_size)
+{
+ u32 desired_size = (u32)(pool_size * ht_ratio);
+ u8 i;
+ for (i=1; i<31; i++)
+ if ((1 << i) >= desired_size)
+ return i;
+ return 4;
+}
+
+int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX)
+ return -1;
+
+ map_ip4_reass_lock();
+ mm->ip4_reass_conf_ht_ratio = ht_ratio;
+ mm->ip4_reass_ht_log2len = map_get_ht_log2len(ht_ratio, mm->ip4_reass_conf_pool_size);
+ map_ip4_reass_reinit(trashed_reass, dropped_packets);
+ map_ip4_reass_unlock();
+ return 0;
+}
+
+int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX)
+ return -1;
+
+ map_ip4_reass_lock();
+ mm->ip4_reass_conf_pool_size = pool_size;
+ map_ip4_reass_reinit(trashed_reass, dropped_packets);
+ map_ip4_reass_unlock();
+ return 0;
+}
+
+int map_ip4_reass_conf_lifetime(u16 lifetime_ms)
+{
+ map_main.ip4_reass_conf_lifetime_ms = lifetime_ms;
+ return 0;
+}
+
+int map_ip4_reass_conf_buffers(u32 buffers)
+{
+ map_main.ip4_reass_conf_buffers = buffers;
+ return 0;
+}
+
+void map_ip6_reass_reinit(u32 *trashed_reass, u32 *dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if(dropped_packets)
+ *dropped_packets = mm->ip6_reass_buffered_counter;
+ if(trashed_reass)
+ *trashed_reass = mm->ip6_reass_allocated;
+ int i;
+ if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) {
+ u16 ri = mm->ip6_reass_fifo_last;
+ do {
+ map_ip6_reass_t *r = pool_elt_at_index(mm->ip6_reass_pool, ri);
+ for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if (r->fragments[i].pi != ~0)
+ map_ip6_drop_pi(r->fragments[i].pi);
+
+ ri = r->fifo_next;
+ pool_put(mm->ip6_reass_pool, r);
+ } while (ri != mm->ip6_reass_fifo_last);
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ }
+
+ vec_free(mm->ip6_reass_hash_table);
+ vec_resize(mm->ip6_reass_hash_table, 1 << mm->ip6_reass_ht_log2len);
+ for(i=0; i<(1 << mm->ip6_reass_ht_log2len); i++)
+ mm->ip6_reass_hash_table[i] = MAP_REASS_INDEX_NONE;
+ pool_free(mm->ip6_reass_pool);
+ pool_alloc(mm->ip6_reass_pool, mm->ip4_reass_conf_pool_size);
+
+ mm->ip6_reass_allocated = 0;
+ mm->ip6_reass_buffered_counter = 0;
+}
+
+int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX)
+ return -1;
+
+ map_ip6_reass_lock();
+ mm->ip6_reass_conf_ht_ratio = ht_ratio;
+ mm->ip6_reass_ht_log2len = map_get_ht_log2len(ht_ratio, mm->ip6_reass_conf_pool_size);
+ map_ip6_reass_reinit(trashed_reass, dropped_packets);
+ map_ip6_reass_unlock();
+ return 0;
+}
+
+int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets)
+{
+ map_main_t *mm = &map_main;
+ if (pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX)
+ return -1;
+
+ map_ip6_reass_lock();
+ mm->ip6_reass_conf_pool_size = pool_size;
+ map_ip6_reass_reinit(trashed_reass, dropped_packets);
+ map_ip6_reass_unlock();
+ return 0;
+}
+
+int map_ip6_reass_conf_lifetime(u16 lifetime_ms)
+{
+ map_main.ip6_reass_conf_lifetime_ms = lifetime_ms;
+ return 0;
+}
+
+int map_ip6_reass_conf_buffers(u32 buffers)
+{
+ map_main.ip6_reass_conf_buffers = buffers;
+ return 0;
+}
+
+VLIB_CLI_COMMAND(map_ip4_reass_lifetime_command, static) = {
+ .path = "map params reassembly",
+ .short_help = "[ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>]",
+ .function = map_params_reass_command_fn,
+};
+
+VLIB_CLI_COMMAND(map_traffic_class_command, static) = {
+ .path = "map params traffic-class",
+ .short_help =
+ "traffic-class {0x0-0xff | copy}",
+ .function = map_traffic_class_command_fn,
+};
+
+VLIB_CLI_COMMAND(map_pre_resolve_command, static) = {
+ .path = "map params pre-resolve",
+ .short_help =
+ "pre-resolve {ip4-nh <address>} | {ip6-nh <address>}",
+ .function = map_pre_resolve_command_fn,
+};
+
+VLIB_CLI_COMMAND(map_security_check_command, static) = {
+ .path = "map params security-check",
+ .short_help =
+ "security-check on|off",
+ .function = map_security_check_command_fn,
+};
+
+VLIB_CLI_COMMAND(map_icmp_relay_source_address_command, static) = {
+ .path = "map params icmp-source-address",
+ .short_help =
+ "icmp-source-address <ip4-address>",
+ .function = map_icmp_relay_source_address_command_fn,
+};
+
+VLIB_CLI_COMMAND(map_security_check_frag_command, static) = {
+ .path = "map params security-check fragments",
+ .short_help =
+ "fragments on|off",
+ .function = map_security_check_frag_command_fn,
+};
+
+VLIB_CLI_COMMAND(map_add_domain_command, static) = {
+ .path = "map add domain",
+ .short_help =
+ "map add domain ip4-pfx <ip4-pfx> ip6-pfx <ip6-pfx> ip6-src <ip6-pfx> "
+ "ea-bits-len <n> psid-offset <n> psid-len <n> [map-t] [mtu <mtu>]",
+ .function = map_add_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(map_add_rule_command, static) = {
+ .path = "map add rule",
+ .short_help =
+ "map add rule index <domain> psid <psid> ip6-dst <ip6-addr>",
+ .function = map_add_rule_command_fn,
+};
+
+VLIB_CLI_COMMAND(map_del_command, static) = {
+ .path = "map del domain",
+ .short_help =
+ "map del domain index <domain>",
+ .function = map_del_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(show_map_domain_command, static) = {
+ .path = "show map domain",
+ .function = show_map_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(show_map_stats_command, static) = {
+ .path = "show map stats",
+ .function = show_map_stats_command_fn,
+};
+
+VLIB_CLI_COMMAND(show_map_fragments_command, static) = {
+ .path = "show map fragments",
+ .function = show_map_fragments_command_fn,
+};
+
+/*
+ * map_init
+ */
+clib_error_t *map_init (vlib_main_t *vm)
+{
+ map_main_t *mm = &map_main;
+ mm->vnet_main = vnet_get_main();
+ mm->vlib_main = vm;
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+ memset(&mm->preresolve_ip4, 0, sizeof(mm->preresolve_ip4));
+ memset(&mm->preresolve_ip6, 0, sizeof(mm->preresolve_ip6));
+ mm->adj4_index = 0;
+ mm->adj6_index = 0;
+#endif
+
+ /* traffic class */
+ mm->tc = 0;
+ mm->tc_copy = true;
+
+ /* Inbound security check */
+ mm->sec_check = true;
+ mm->sec_check_frag = false;
+
+ vec_validate(mm->domain_counters, MAP_N_DOMAIN_COUNTER - 1);
+ mm->domain_counters[MAP_DOMAIN_COUNTER_RX].name = "rx";
+ mm->domain_counters[MAP_DOMAIN_COUNTER_TX].name = "tx";
+
+ vlib_validate_simple_counter(&mm->icmp_relayed, 0);
+ vlib_zero_simple_counter(&mm->icmp_relayed, 0);
+
+ /* IP4 virtual reassembly */
+ mm->ip4_reass_hash_table = 0;
+ mm->ip4_reass_pool = 0;
+ mm->ip4_reass_lock = clib_mem_alloc_aligned(CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT;
+ mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT;
+ mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT;
+ mm->ip4_reass_conf_buffers = MAP_IP4_REASS_BUFFERS_DEFAULT;
+ mm->ip4_reass_ht_log2len = map_get_ht_log2len(mm->ip4_reass_conf_ht_ratio, mm->ip4_reass_conf_pool_size);
+ mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ map_ip4_reass_reinit(NULL, NULL);
+
+ /* IP6 virtual reassembly */
+ mm->ip6_reass_hash_table = 0;
+ mm->ip6_reass_pool = 0;
+ mm->ip6_reass_lock = clib_mem_alloc_aligned(CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ mm->ip6_reass_conf_ht_ratio = MAP_IP6_REASS_HT_RATIO_DEFAULT;
+ mm->ip6_reass_conf_lifetime_ms = MAP_IP6_REASS_LIFETIME_DEFAULT;
+ mm->ip6_reass_conf_pool_size = MAP_IP6_REASS_POOL_SIZE_DEFAULT;
+ mm->ip6_reass_conf_buffers = MAP_IP6_REASS_BUFFERS_DEFAULT;
+ mm->ip6_reass_ht_log2len = map_get_ht_log2len(mm->ip6_reass_conf_ht_ratio, mm->ip6_reass_conf_pool_size);
+ mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE;
+ map_ip6_reass_reinit(NULL, NULL);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(map_init);
diff --git a/vnet/vnet/map/map.h b/vnet/vnet/map/map.h
new file mode 100644
index 00000000000..ae58cdb9120
--- /dev/null
+++ b/vnet/vnet/map/map.h
@@ -0,0 +1,556 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdbool.h>
+#include <vppinfra/error.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vlib/vlib.h>
+
+#define MAP_SKIP_IP6_LOOKUP 1
+
+typedef enum {
+ MAP_SENDER,
+ MAP_RECEIVER
+} map_dir_e;
+
+int map_create_domain(ip4_address_t *ip4_prefix, u8 ip4_prefix_len,
+ ip6_address_t *ip6_prefix, u8 ip6_prefix_len,
+ ip6_address_t *ip6_src, u8 ip6_src_len,
+ u8 ea_bits_len, u8 psid_offset, u8 psid_length,
+ u32 *map_domain_index, u16 mtu, u8 flags);
+int map_delete_domain(u32 map_domain_index);
+int map_add_del_psid(u32 map_domain_index, u16 psid, ip6_address_t *tep, u8 is_add);
+u8 *format_map_trace(u8 *s, va_list *args);
+i32 ip4_get_port(ip4_header_t *ip, map_dir_e dir, u16 buffer_len);
+i32 ip6_get_port(ip6_header_t *ip6, map_dir_e dir, u16 buffer_len);
+u16 ip4_map_get_port (ip4_header_t *ip, map_dir_e dir);
+
+typedef enum __attribute__ ((__packed__)) {
+ MAP_DOMAIN_PREFIX = 1 << 0,
+ MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T
+} map_domain_flags_e;
+
+/**
+ * IP4 reassembly logic:
+ * One virtually reassembled flow requires a map_ip4_reass_t structure in order
+ * to keep the first-fragment port number and, optionally, cache out of sequence
+ * packets.
+ * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures.
+ * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets.
+ * When a new structure needs to be used, it is allocated from available ones.
+ * If there is no structure available, the oldest in use is selected and used if and
+ * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago.
+ * In case no structure can be allocated, the fragment is dropped.
+ */
+
+#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */
+#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0)
+#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures
+#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048
+
+#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly
+
+#define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */
+#define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0)
+#define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures
+#define MAP_IP6_REASS_BUFFERS_DEFAULT 2048
+
+#define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5
+
+#define MAP_IP6_REASS_COUNT_BYTES
+#define MAP_IP4_REASS_COUNT_BYTES
+
+//#define IP6_MAP_T_OVERRIDE_TOS 0
+
+/*
+ * This structure _MUST_ be no larger than a single cache line (64 bytes).
+ * If more space is needed make a union of ip6_prefix and *rules, those are mutually exclusive.
+ */
+typedef struct {
+ ip6_address_t ip6_src;
+ ip6_address_t ip6_prefix;
+ ip6_address_t *rules;
+ u32 suffix_mask;
+ ip4_address_t ip4_prefix;
+ u16 psid_mask;
+ u16 mtu;
+ map_domain_flags_e flags;
+ u8 ip6_prefix_len;
+ u8 ip6_src_len;
+ u8 ea_bits_len;
+ u8 psid_offset;
+ u8 psid_length;
+
+ /* helpers */
+ u8 psid_shift;
+ u8 suffix_shift;
+ u8 ea_shift;
+
+ /* not used by forwarding */
+ u8 ip4_prefix_len;
+} map_domain_t;
+
+#define MAP_REASS_INDEX_NONE ((u16)0xffff)
+
+/*
+ * Hash key, padded out to 16 bytes for fast compare
+ */
+typedef union {
+ CLIB_PACKED (struct {
+ ip4_address_t src;
+ ip4_address_t dst;
+ u16 fragment_id;
+ u8 protocol;
+ });
+ u64 as_u64[2];
+ u32 as_u32[4];
+} map_ip4_reass_key_t;
+
+typedef struct {
+ map_ip4_reass_key_t key;
+ f64 ts;
+#ifdef MAP_IP4_REASS_COUNT_BYTES
+ u16 expected_total;
+ u16 forwarded;
+#endif
+ i32 port;
+ u16 bucket;
+ u16 bucket_next;
+ u16 fifo_prev;
+ u16 fifo_next;
+ u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY];
+} map_ip4_reass_t;
+
+/*
+ * MAP domain counters
+ */
+typedef enum {
+ /* Simple counters */
+ MAP_DOMAIN_IPV4_FRAGMENT = 0,
+ /* Combined counters */
+ MAP_DOMAIN_COUNTER_RX = 0,
+ MAP_DOMAIN_COUNTER_TX,
+ MAP_N_DOMAIN_COUNTER
+} map_domain_counter_t;
+
+/*
+ * main_main_t
+ */
+typedef union {
+ CLIB_PACKED (struct {
+ ip6_address_t src;
+ ip6_address_t dst;
+ u32 fragment_id;
+ u8 protocol;
+ });
+ u64 as_u64[5];
+ u32 as_u32[10];
+} map_ip6_reass_key_t;
+
+typedef struct {
+ u32 pi; //Cached packet or ~0
+ u16 next_data_offset; //The data offset of the additional 20 bytes or ~0
+ u8 next_data_len; //Number of bytes ready to be copied (20 if not last fragment)
+ u8 next_data[20]; //The 20 additional bytes
+} map_ip6_fragment_t;
+
+typedef struct {
+ map_ip6_reass_key_t key;
+ f64 ts;
+#ifdef MAP_IP6_REASS_COUNT_BYTES
+ u16 expected_total;
+ u16 forwarded;
+#endif
+ u16 bucket; //What hash bucket this element is linked in
+ u16 bucket_next;
+ u16 fifo_prev;
+ u16 fifo_next;
+ ip4_header_t ip4_header;
+ map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY];
+} map_ip6_reass_t;
+
+typedef struct {
+ /* pool of MAP domains */
+ map_domain_t *domains;
+
+ /* MAP Domain packet/byte counters indexed by map domain index */
+ vlib_simple_counter_main_t *simple_domain_counters;
+ vlib_combined_counter_main_t *domain_counters;
+ volatile u32 *counter_lock;
+
+ /* Global counters */
+ vlib_simple_counter_main_t icmp_relayed;
+
+#ifdef MAP_SKIP_IP6_LOOKUP
+ /* pre-presolve */
+ u32 adj6_index, adj4_index;
+ ip4_address_t preresolve_ip4;
+ ip6_address_t preresolve_ip6;
+#endif
+
+ /* Traffic class: zero, copy (~0) or fixed value */
+ u8 tc;
+ bool tc_copy;
+ bool sec_check;
+ bool sec_check_frag;
+
+ /* ICMPv6 -> ICMPv4 relay parameters */
+ ip4_address_t icmp_src_address;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /*
+ * IPv4 encap and decap reassembly
+ */
+ //Conf
+ f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size))
+ u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures
+ u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms
+ u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly
+
+ //Runtime
+ map_ip4_reass_t *ip4_reass_pool;
+ u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len
+ u16 ip4_reass_allocated;
+ u16 *ip4_reass_hash_table;
+ u16 ip4_reass_fifo_last;
+ volatile u32 *ip4_reass_lock;
+
+ //Counters
+ u32 ip4_reass_buffered_counter;
+
+ /*
+ * IPv6 decap reassembly
+ */
+ //Conf
+ f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size))
+ u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures
+ u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms
+ u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly
+
+ //Runtime
+ map_ip6_reass_t *ip6_reass_pool;
+ u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len
+ u16 ip6_reass_allocated;
+ u16 *ip6_reass_hash_table;
+ u16 ip6_reass_fifo_last;
+ volatile u32 *ip6_reass_lock;
+
+ //Counters
+ u32 ip6_reass_buffered_counter;
+
+} map_main_t;
+
+/*
+ * TODO: Remove SEC_CHECK / TRANSLATED_4TO6 / TRANSLATED_6TO4
+ */
+#define foreach_map_error \
+ /* Must be first. */ \
+ _(NONE, "valid MAP packets") \
+ _(BAD_PROTOCOL, "bad protocol") \
+ _(WRONG_ICMP_TYPE, "wrong icmp type") \
+ _(SEC_CHECK, "security check failed") \
+ _(ENCAP_SEC_CHECK, "encap security check failed") \
+ _(DECAP_SEC_CHECK, "decap security check failed") \
+ _(ICMP, "unable to translate ICMP") \
+ _(ICMP_RELAY, "unable to relay ICMP") \
+ _(UNKNOWN, "unknown") \
+ _(NO_DOMAIN, "no domain") \
+ _(FRAGMENTED, "packet is a fragment") \
+ _(FRAGMENT_MEMORY, "could not cache fragment") \
+ _(FRAGMENT_MALFORMED, "fragment has unexpected format")\
+ _(FRAGMENT_DROPPED, "dropped cached fragment") \
+ _(MALFORMED, "malformed packet")
+
+typedef enum {
+#define _(sym,str) MAP_ERROR_##sym,
+ foreach_map_error
+#undef _
+ MAP_N_ERROR,
+ } map_error_t;
+
+u64 map_error_counter_get(u32 node_index, map_error_t map_error);
+
+typedef struct {
+ u32 map_domain_index;
+ u16 port;
+} map_trace_t;
+
+map_main_t map_main;
+
+vlib_node_registration_t ip4_map_node;
+vlib_node_registration_t ip6_map_node;
+
+vlib_node_registration_t ip4_map_t_node;
+vlib_node_registration_t ip4_map_t_fragmented_node;
+vlib_node_registration_t ip4_map_t_tcp_udp_node;
+vlib_node_registration_t ip4_map_t_icmp_node;
+
+vlib_node_registration_t ip6_map_t_node;
+vlib_node_registration_t ip6_map_t_fragmented_node;
+vlib_node_registration_t ip6_map_t_tcp_udp_node;
+vlib_node_registration_t ip6_map_t_icmp_node;
+
+/*
+ * map_get_pfx
+ */
+static_always_inline u64
+map_get_pfx (map_domain_t *d, u32 addr, u16 port)
+{
+ u16 psid = (port >> d->psid_shift) & d->psid_mask;
+
+ if (d->ea_bits_len == 0 && d->rules)
+ return clib_net_to_host_u64(d->rules[psid].as_u64[0]);
+
+ u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask;
+ u64 ea = d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid;
+
+ return clib_net_to_host_u64(d->ip6_prefix.as_u64[0]) | ea << d->ea_shift;
+}
+
+static_always_inline u64
+map_get_pfx_net (map_domain_t *d, u32 addr, u16 port)
+{
+ return clib_host_to_net_u64(map_get_pfx(d, clib_net_to_host_u32(addr),
+ clib_net_to_host_u16(port)));
+}
+
+/*
+ * map_get_sfx
+ */
+static_always_inline u64
+map_get_sfx (map_domain_t *d, u32 addr, u16 port)
+{
+ u16 psid = (port >> d->psid_shift) & d->psid_mask;
+
+ /* Shared 1:1 mode. */
+ if (d->ea_bits_len == 0 && d->rules)
+ return clib_net_to_host_u64(d->rules[psid].as_u64[1]);
+ if (d->ip6_prefix_len == 128)
+ return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]);
+
+ /* IPv4 prefix */
+ if (d->flags & MAP_DOMAIN_PREFIX)
+ return (u64) (addr & ~d->suffix_mask) << 16;
+
+ /* Shared or full IPv4 address */
+ return ((u64) addr << 16) | psid;
+}
+
+static_always_inline u64
+map_get_sfx_net (map_domain_t *d, u32 addr, u16 port)
+{
+ return clib_host_to_net_u64(map_get_sfx(d, clib_net_to_host_u32(addr),
+ clib_net_to_host_u16(port)));
+}
+
+static_always_inline u32
+map_get_ip4 (ip6_address_t *addr)
+{
+ return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16);
+}
+
+/*
+ * Get the MAP domain from an IPv4 lookup adjacency.
+ */
+static_always_inline map_domain_t *
+ip4_map_get_domain (u32 adj_index, u32 *map_domain_index)
+{
+ map_main_t *mm = &map_main;
+ ip_lookup_main_t *lm = &ip4_main.lookup_main;
+ ip_adjacency_t *adj = ip_get_adjacency(lm, adj_index);
+ ASSERT(adj);
+ uword *p = (uword *)adj->rewrite_data;
+ ASSERT(p);
+ *map_domain_index = p[0];
+ return pool_elt_at_index(mm->domains, p[0]);
+}
+
+/*
+ * Get the MAP domain from an IPv6 lookup adjacency.
+ * If the IPv6 address or prefix is not shared, no lookup is required.
+ * The IPv4 address is used otherwise.
+ */
+static_always_inline map_domain_t *
+ip6_map_get_domain (u32 adj_index, ip4_address_t *addr,
+ u32 *map_domain_index, u8 *error)
+{
+ map_main_t *mm = &map_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+ ip_adjacency_t *adj = ip_get_adjacency(lm6, adj_index);
+ ASSERT(adj);
+ uword *p = (uword *)adj->rewrite_data;
+ ASSERT(p);
+ *map_domain_index = p[0];
+ if (p[0] != ~0)
+ return pool_elt_at_index(mm->domains, p[0]);
+
+ u32 ai = ip4_fib_lookup_with_table(im4, 0, addr, 0);
+ ip_adjacency_t *adj4 = ip_get_adjacency (lm4, ai);
+ if (PREDICT_TRUE(adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP ||
+ adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP_T)) {
+ uword *p = (uword *)adj4->rewrite_data;
+ *map_domain_index = p[0];
+ return pool_elt_at_index(mm->domains, *map_domain_index);
+ }
+ *error = MAP_ERROR_NO_DOMAIN;
+ return NULL;
+}
+
+map_ip4_reass_t *
+map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id,
+ u8 protocol, u32 **pi_to_drop);
+void
+map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop);
+
+#define map_ip4_reass_lock() while (__sync_lock_test_and_set(map_main.ip4_reass_lock, 1)) {}
+#define map_ip4_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip4_reass_lock = 0;} while(0)
+
+static_always_inline void
+map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi)
+{
+ int i;
+ for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++)
+ if(r->fragments[i] != ~0) {
+ vec_add1(*pi, r->fragments[i]);
+ r->fragments[i] = ~0;
+ map_main.ip4_reass_buffered_counter--;
+ }
+}
+
+int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi);
+
+map_ip6_reass_t *
+map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id,
+ u8 protocol, u32 **pi_to_drop);
+void
+map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop);
+
+#define map_ip6_reass_lock() while (__sync_lock_test_and_set(map_main.ip6_reass_lock, 1)) {}
+#define map_ip6_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip6_reass_lock = 0;} while(0)
+
+int
+map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi,
+ u16 data_offset, u16 next_data_offset,
+ u8 *data_start, u16 data_len);
+
+void map_ip4_drop_pi(u32 pi);
+
+int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100
+int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff)
+int map_ip4_reass_conf_lifetime(u16 lifetime_ms);
+#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff
+int map_ip4_reass_conf_buffers(u32 buffers);
+#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff)
+
+void map_ip6_drop_pi(u32 pi);
+
+
+int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP6_REASS_CONF_HT_RATIO_MAX 100
+int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets);
+#define MAP_IP6_REASS_CONF_POOL_SIZE_MAX (0xfeff)
+int map_ip6_reass_conf_lifetime(u16 lifetime_ms);
+#define MAP_IP6_REASS_CONF_LIFETIME_MAX 0xffff
+int map_ip6_reass_conf_buffers(u32 buffers);
+#define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff)
+
+static_always_inline
+int ip6_parse(const ip6_header_t *ip6, u32 buff_len,
+ u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset)
+{
+ if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) {
+ *l4_protocol = ((ip6_frag_hdr_t *)(ip6 + 1))->next_hdr;
+ *frag_hdr_offset = sizeof(*ip6);
+ *l4_offset = sizeof(*ip6) + sizeof(ip6_frag_hdr_t);
+ } else {
+ *l4_protocol = ip6->protocol;
+ *frag_hdr_offset = 0;
+ *l4_offset = sizeof(*ip6);
+ }
+
+ return (buff_len < (*l4_offset + 4)) ||
+ (clib_net_to_host_u16(ip6->payload_length) < (*l4_offset + 4 - sizeof(*ip6)));
+}
+
+
+#define u8_ptr_add(ptr, index) (((u8 *)ptr) + index)
+#define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val))
+
+#define frag_id_6to4(id) ((id) ^ ((id) >> 16))
+
+static_always_inline void
+ip4_map_t_embedded_address (map_domain_t *d,
+ ip6_address_t *ip6, const ip4_address_t *ip4)
+{
+ ASSERT(d->ip6_src_len == 96); //No support for other lengths for now
+ ip6->as_u64[0] = d->ip6_src.as_u64[0];
+ ip6->as_u32[2] = d->ip6_src.as_u32[2];
+ ip6->as_u32[3] = ip4->as_u32;
+}
+
+static_always_inline u32
+ip6_map_t_embedded_address (map_domain_t *d, ip6_address_t *addr)
+{
+ ASSERT(d->ip6_src_len == 96); //No support for other lengths for now
+ return addr->as_u32[3];
+}
+
+static inline void
+map_domain_counter_lock (map_main_t *mm)
+{
+ if (mm->counter_lock)
+ while (__sync_lock_test_and_set(mm->counter_lock, 1))
+ /* zzzz */ ;
+}
+static inline void
+map_domain_counter_unlock (map_main_t *mm)
+{
+ if (mm->counter_lock)
+ *mm->counter_lock = 0;
+}
+
+
+static_always_inline void
+map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector,
+ vlib_node_runtime_t *node, vlib_error_t *error,
+ u32 next)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ //Deal with fragments that are ready
+ from = pi_vector;
+ n_left_from = vec_len(pi_vector);
+ next_index = node->cached_next_index;
+ while (n_left_from > 0) {
+ vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from > 0 && n_left_to_next > 0) {
+ u32 pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ vlib_buffer_t *p0 = vlib_get_buffer(vm, pi0);
+ p0->error = *error;
+ vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next);
+ }
+ vlib_put_next_frame(vm, node, next_index, n_left_to_next);
+ }
+}
diff --git a/vnet/vnet/map/map_doc.md b/vnet/vnet/map/map_doc.md
new file mode 100644
index 00000000000..230c52dfafd
--- /dev/null
+++ b/vnet/vnet/map/map_doc.md
@@ -0,0 +1,69 @@
+# VPP MAP and Lw4o6 implementation
+
+This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations.
+Everything that is not directly obvious should come here.
+
+
+
+## MAP-E Virtual Reassembly
+
+The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments.
+
+Fragment caching and handling is not always necessary. It is performed when:
+* An IPv4 fragment is received and the destination IPv4 address is shared.
+* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on.
+* An IPv6 fragment is received.
+
+There are 3 dedicated nodes:
+* ip4-map-reass
+* ip6-map-ip4-reass
+* ip6-map-ip6-reass
+
+ip4-map sends all fragments to ip4-map-reass.
+ip6-map sends all inner-fragments to ip6-map-ip4-reass.
+ip6-map sends all outer-fragments to ip6-map-ip6-reass.
+
+IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes.
+
+An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received.
+
+#### Virtual Reassembly configuration
+
+IPv4 and IPv6 virtual reassembly support the following configuration:
+ map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>]
+
+lifetime:
+ The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 people. Those values are not realistic for high-throughput cases.
+
+buffers:
+ The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool.
+
+pool-size:
+ The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total.
+
+ht-ratio:
+ The amount of buckets in the hash-table is pool-size * ht-ratio.
+
+
+Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost.
+
+
+##### Additional considerations
+
+Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart.
+
+Let:
+R be the packet rate at which fragments are received.
+F be the number of fragments per packet.
+
+Assuming the first fragment is always received last. We should have:
+buffers > lifetime * R / F * (F - 1)
+pool-size > lifetime * R/F
+
+This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'.
+
+But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments.
+
+If you want to do that, be prepared to configure a lot of fragments.
+
+
diff --git a/vnet/vnet/map/sixrd.c b/vnet/vnet/map/sixrd.c
new file mode 100644
index 00000000000..26b4eea9a86
--- /dev/null
+++ b/vnet/vnet/map/sixrd.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sixrd.h"
+
+/*
+ * This code supports the following sixrd modes:
+ *
+ * 32 EA bits (Complete IPv4 address is embedded):
+ * ea_bits_len = 32
+ * IPv4 suffix is embedded:
+ * ea_bits_len = < 32
+ * No embedded address bits (1:1 mode):
+ * ea_bits_len = 0
+ */
+
+int
+sixrd_create_domain (ip6_address_t *ip6_prefix,
+ u8 ip6_prefix_len,
+ ip4_address_t *ip4_prefix,
+ u8 ip4_prefix_len,
+ ip4_address_t *ip4_src,
+ u32 *sixrd_domain_index,
+ u16 mtu)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ sixrd_domain_t *d;
+ ip_adjacency_t adj;
+ ip4_add_del_route_args_t args4;
+ ip6_add_del_route_args_t args6;
+ u32 *p;
+
+ /* Get domain index */
+ pool_get_aligned(mm->domains, d, CLIB_CACHE_LINE_BYTES);
+ memset(d, 0, sizeof (*d));
+ *sixrd_domain_index = d - mm->domains;
+
+ /* Init domain struct */
+ d->ip4_prefix.as_u32 = ip4_prefix->as_u32;
+ d->ip4_prefix_len = ip4_prefix_len;
+ d->ip6_prefix = *ip6_prefix;
+ d->ip6_prefix_len = ip6_prefix_len;
+ d->ip4_src = *ip4_src;
+ d->mtu = mtu;
+
+ if (ip4_prefix_len < 32)
+ d->shift = 64 - ip6_prefix_len + (32 - ip4_prefix_len);
+
+ /* Init IP adjacency */
+ memset(&adj, 0, sizeof(adj));
+ adj.explicit_fib_index = ~0;
+ adj.lookup_next_index = IP_LOOKUP_NEXT_SIXRD;
+ p = (u32 *)&adj.rewrite_data[0];
+ *p = (u32) (*sixrd_domain_index);
+
+ /* Create ip6 adjacency */
+ memset(&args6, 0, sizeof(args6));
+ args6.table_index_or_table_id = 0;
+ args6.flags = IP6_ROUTE_FLAG_ADD;
+ args6.dst_address.as_u64[0] = ip6_prefix->as_u64[0];
+ args6.dst_address.as_u64[1] = ip6_prefix->as_u64[1];
+ args6.dst_address_length = ip6_prefix_len;
+ args6.adj_index = ~0;
+ args6.add_adj = &adj;
+ args6.n_add_adj = 1;
+ ip6_add_del_route(im6, &args6);
+
+ /* Multiple SIXRD domains may share same source IPv4 TEP */
+ uword *q = ip4_get_route(im4, 0, 0, (u8 *)ip4_src, 32);
+ if (q) {
+ u32 ai = q[0];
+ ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ ip_adjacency_t *adj4 = ip_get_adjacency(lm4, ai);
+ if (adj4->lookup_next_index != IP_LOOKUP_NEXT_SIXRD) {
+ clib_warning("BR source address already assigned: %U", format_ip4_address, ip4_src);
+ pool_put(mm->domains, d);
+ return -1;
+ }
+ /* Shared source */
+ p = (u32 *)&adj4->rewrite_data[0];
+ p[0] = ~0;
+
+ /* Add refcount, so we don't accidentially delete the route underneath someone */
+ p[1]++;
+ } else {
+ /* Create ip4 adjacency. */
+ memset(&args4, 0, sizeof(args4));
+ args4.table_index_or_table_id = 0;
+ args4.flags = IP4_ROUTE_FLAG_ADD;
+ args4.dst_address.as_u32 = ip4_src->as_u32;
+ args4.dst_address_length = 32;
+ args4.adj_index = ~0;
+ args4.add_adj = &adj;
+ args4.n_add_adj = 1;
+ ip4_add_del_route(im4, &args4);
+ }
+
+ return 0;
+}
+
+/*
+ * sixrd_delete_domain
+ */
+int
+sixrd_delete_domain (u32 sixrd_domain_index)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ sixrd_domain_t *d;
+ ip_adjacency_t adj;
+ ip4_add_del_route_args_t args4;
+ ip6_add_del_route_args_t args6;
+
+ if (pool_is_free_index(mm->domains, sixrd_domain_index)) {
+ clib_warning("SIXRD domain delete: domain does not exist: %d", sixrd_domain_index);
+ return -1;
+ }
+
+ d = pool_elt_at_index(mm->domains, sixrd_domain_index);
+
+ memset(&adj, 0, sizeof(adj));
+ adj.explicit_fib_index = ~0;
+ adj.lookup_next_index = IP_LOOKUP_NEXT_SIXRD;
+
+ /* Delete ip6 adjacency */
+ memset(&args6, 0, sizeof (args6));
+ args6.table_index_or_table_id = 0;
+ args6.flags = IP6_ROUTE_FLAG_DEL;
+ args6.dst_address.as_u64[0] = d->ip6_prefix.as_u64[0];
+ args6.dst_address.as_u64[1] = d->ip6_prefix.as_u64[1];
+ args6.dst_address_length = d->ip6_prefix_len;
+ args6.adj_index = 0;
+ args6.add_adj = &adj;
+ args6.n_add_adj = 0;
+ ip6_add_del_route(im6, &args6);
+
+ /* Delete ip4 adjacency */
+ uword *q = ip4_get_route(im4, 0, 0, (u8 *)&d->ip4_src, 32);
+ if (q) {
+ u32 ai = q[0];
+ ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ ip_adjacency_t *adj4 = ip_get_adjacency(lm4, ai);
+
+ u32 *p = (u32 *)&adj4->rewrite_data[0];
+ /* Delete route when no other domains use this source */
+ if (p[1] == 0) {
+ memset(&args4, 0, sizeof(args4));
+ args4.table_index_or_table_id = 0;
+ args4.flags = IP4_ROUTE_FLAG_DEL;
+ args4.dst_address.as_u32 = d->ip4_prefix.as_u32;
+ args4.dst_address_length = d->ip4_prefix_len;
+ args4.adj_index = 0;
+ args4.add_adj = &adj;
+ args4.n_add_adj = 0;
+ ip4_add_del_route(im4, &args4);
+ }
+ p[1]--;
+ }
+
+ pool_put(mm->domains, d);
+
+ return 0;
+}
+
+static clib_error_t *
+sixrd_add_domain_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t ip4_prefix;
+ ip6_address_t ip6_prefix;
+ ip4_address_t ip4_src;
+ u32 ip6_prefix_len, ip4_prefix_len, sixrd_domain_index;
+ u32 num_m_args = 0;
+ /* Optional arguments */
+ u32 mtu = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user(input, unformat_line_input, line_input))
+ return 0;
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, &ip6_prefix_len))
+ num_m_args++;
+ else if (unformat(line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, &ip4_prefix_len))
+ num_m_args++;
+ else if (unformat(line_input, "ip4-src %U", unformat_ip4_address, &ip4_src))
+ num_m_args++;
+ else if (unformat(line_input, "mtu %d", &mtu))
+ num_m_args++;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ if (num_m_args < 3)
+ return clib_error_return(0, "mandatory argument(s) missing");
+
+ sixrd_create_domain(&ip6_prefix, ip6_prefix_len, &ip4_prefix, ip4_prefix_len,
+ &ip4_src, &sixrd_domain_index, mtu);
+
+ return 0;
+}
+
+static clib_error_t *
+sixrd_del_domain_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 num_m_args = 0;
+ u32 sixrd_domain_index;
+
+ /* Get a line of input. */
+ if (! unformat_user(input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat(line_input, "index %d", &sixrd_domain_index))
+ num_m_args++;
+ else
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free(line_input);
+
+ if (num_m_args != 1)
+ return clib_error_return(0, "mandatory argument(s) missing");
+
+ sixrd_delete_domain(sixrd_domain_index);
+
+ return 0;
+}
+
+static u8 *
+format_sixrd_domain (u8 *s, va_list *args)
+{
+ sixrd_domain_t *d = va_arg(*args, sixrd_domain_t *);
+ sixrd_main_t *mm = &sixrd_main;
+
+ s = format(s,
+ "[%d] ip6-pfx %U/%d ip4-pfx %U/%d ip4-src %U mtu %d",
+ d - mm->domains,
+ format_ip6_address, &d->ip6_prefix, d->ip6_prefix_len,
+ format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len,
+ format_ip4_address, &d->ip4_src, d->mtu);
+
+ return s;
+}
+
+static clib_error_t *
+show_sixrd_domain_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ sixrd_domain_t *d;
+
+ if (pool_elts(mm->domains) == 0)
+ vlib_cli_output(vm, "No SIXRD domains are configured...");
+
+ pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_sixrd_domain, d);}));
+
+ return 0;
+
+}
+
+static clib_error_t *
+show_sixrd_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ sixrd_domain_t *d;
+ int domains = 0, domaincount = 0;
+ if (pool_elts (mm->domains) == 0)
+ vlib_cli_output (vm, "No SIXRD domains are configured...");
+
+ pool_foreach(d, mm->domains, ({
+ domains += sizeof(*d);
+ domaincount++;
+ }));
+
+ vlib_cli_output(vm, "SIXRD domains structure: %d\n", sizeof (sixrd_domain_t));
+ vlib_cli_output(vm, "SIXRD domains: %d (%d bytes)\n", domaincount, domains);
+
+ return 0;
+}
+
+/*
+ * packet trace format function
+ */
+u8 *
+format_sixrd_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *);
+ sixrd_trace_t *t = va_arg (*args, sixrd_trace_t *);
+ u32 sixrd_domain_index = t->sixrd_domain_index;
+
+ s = format(s, "SIXRD domain index: %d", sixrd_domain_index);
+
+ return s;
+}
+
+VLIB_CLI_COMMAND(sixrd_add_domain_command, static) = {
+ .path = "sixrd add domain",
+ .short_help =
+ "sixrd add domain ip6-pfx <ip6-pfx> ip4-pfx <ip4-pfx> ip4-src <ip4-addr>",
+ .function = sixrd_add_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(sixrd_del_command, static) = {
+ .path = "sixrd del domain",
+ .short_help =
+ "sixrd del domain index <domain>",
+ .function = sixrd_del_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(show_sixrd_domain_command, static) = {
+ .path = "show sixrd domain",
+ .function = show_sixrd_domain_command_fn,
+};
+
+VLIB_CLI_COMMAND(show_sixrd_stats_command, static) = {
+ .path = "show sixrd stats",
+ .function = show_sixrd_stats_command_fn,
+};
+
+/*
+ * sixrd_init
+ */
+clib_error_t *sixrd_init (vlib_main_t *vm)
+{
+ sixrd_main_t *mm = &sixrd_main;
+
+ mm->vnet_main = vnet_get_main();
+ mm->vlib_main = vm;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(sixrd_init);
diff --git a/vnet/vnet/map/sixrd.h b/vnet/vnet/map/sixrd.h
new file mode 100644
index 00000000000..d741cb278b5
--- /dev/null
+++ b/vnet/vnet/map/sixrd.h
@@ -0,0 +1,144 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+#include <stdbool.h>
+#include <vppinfra/error.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+
+vlib_node_registration_t ip6_sixrd_node;
+vlib_node_registration_t ip4_sixrd_node;
+
+int sixrd_create_domain(ip6_address_t *ip6_prefix, u8 ip6_prefix_len,
+ ip4_address_t *ip4_prefix, u8 ip4_prefix_len,
+ ip4_address_t *ip4_src, u32 *sixrd_domain_index, u16 mtu);
+int sixrd_delete_domain(u32 sixrd_domain_index);
+u8 *format_sixrd_trace(u8 *s, va_list *args);
+
+typedef struct {
+ ip6_address_t ip6_prefix;
+ ip4_address_t ip4_prefix;
+ ip4_address_t ip4_src;
+ u8 ip6_prefix_len;
+ u8 ip4_prefix_len;
+
+ /* helpers */
+ u8 shift;
+
+ u16 mtu;
+} sixrd_domain_t;
+
+typedef struct {
+ /* pool of SIXRD domains */
+ sixrd_domain_t *domains;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} sixrd_main_t;
+
+#define foreach_sixrd_error \
+ /* Must be first. */ \
+ _(NONE, "valid SIXRD packets") \
+ _(BAD_PROTOCOL, "bad protocol") \
+ _(WRONG_ICMP_TYPE, "wrong icmp type") \
+ _(SEC_CHECK, "security check failed") \
+ _(ICMP, "unable to translate ICMP") \
+ _(UNKNOWN, "unknown") \
+ _(NO_DOMAIN, "no domain") \
+ _(ENCAPSULATED, "encapsulated") \
+ _(DECAPSULATED, "decapsulated") \
+ _(TRANSLATED_4TO6, "translated 4 to 6") \
+ _(TRANSLATED_6TO4, "translated 6 to 4") \
+ _(FRAGMENT, "fragment handling error") \
+ _(FRAGMENT_QUEUED, "dropped, missing first fragment") \
+ _(FRAGMENTED, "packets requiring fragmentation") \
+ _(FRAGMENT_PARTS, "fragment parts") \
+ _(MALFORMED, "malformed packet")
+
+typedef enum {
+#define _(sym,str) SIXRD_ERROR_##sym,
+ foreach_sixrd_error
+#undef _
+ SIXRD_N_ERROR,
+ } sixrd_error_t;
+
+typedef struct {
+ u32 sixrd_domain_index;
+} sixrd_trace_t;
+
+sixrd_main_t sixrd_main;
+
+/*
+ * sixrd_get_addr
+ */
+static_always_inline u32
+sixrd_get_addr (sixrd_domain_t *d, u64 dal)
+{
+
+ /* 1:1 mode */
+ if (d->ip4_prefix_len == 32) return (d->ip4_prefix.as_u32);
+
+ /* Grab 32 - ip4_prefix_len bits out of IPv6 address from offset ip6_prefix_len */
+ return (d->ip4_prefix.as_u32 | (u32)(dal >> d->shift));
+}
+
+/*
+ * Get the SIXRD domain from an IPv6 lookup adjacency.
+ */
+static_always_inline sixrd_domain_t *
+ip6_sixrd_get_domain (u32 adj_index, u32 *sixrd_domain_index)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ ip_adjacency_t *adj = ip_get_adjacency(lm, adj_index);
+ ASSERT(adj);
+ uword *p = (uword *)adj->rewrite_data;
+ ASSERT(p);
+ *sixrd_domain_index = p[0];
+ return pool_elt_at_index(mm->domains, p[0]);
+}
+
+/*
+ * Get the SIXRD domain from an IPv4 lookup adjacency.
+ * If the IPv4 address is not shared, no lookup is required.
+ * The IPv6 address is used otherwise.
+ */
+static_always_inline sixrd_domain_t *
+ip4_sixrd_get_domain (u32 adj_index, ip6_address_t *addr,
+ u32 *sixrd_domain_index, u8 *error)
+{
+ sixrd_main_t *mm = &sixrd_main;
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+ ip_adjacency_t *adj = ip_get_adjacency(lm4, adj_index);
+ ASSERT(adj);
+ uword *p = (uword *)adj->rewrite_data;
+ ASSERT(p);
+ *sixrd_domain_index = p[0];
+ if (p[0] != ~0)
+ return pool_elt_at_index(mm->domains, p[0]);
+
+ u32 ai = ip6_fib_lookup_with_table(im6, 0, addr);
+ ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai);
+ if (PREDICT_TRUE(adj6->lookup_next_index == IP_LOOKUP_NEXT_SIXRD)) {
+ uword *p = (uword *)adj6->rewrite_data;
+ *sixrd_domain_index = p[0];
+ return pool_elt_at_index(mm->domains, *sixrd_domain_index);
+ }
+ *error = SIXRD_ERROR_NO_DOMAIN;
+ return NULL;
+}