diff options
author | Ed Warnicke <eaw@cisco.com> | 2015-12-08 15:45:58 -0700 |
---|---|---|
committer | Ed Warnicke <eaw@cisco.com> | 2015-12-08 15:47:27 -0700 |
commit | cb9cadad578297ffd78fa8a33670bdf1ab669e7e (patch) | |
tree | 6ac2be912482cc7849a26f0ab845561c3d7f4e26 /vnet/vnet/map | |
parent | fb0815d4ae4bb0fe27bd9313f34b45c8593b907e (diff) |
Initial commit of vpp code.v1.0.0
Change-Id: Ib246f1fbfce93274020ee93ce461e3d8bd8b9f17
Signed-off-by: Ed Warnicke <eaw@cisco.com>
Diffstat (limited to 'vnet/vnet/map')
-rwxr-xr-x | vnet/vnet/map/examples/gen-rules.py | 213 | ||||
-rwxr-xr-x | vnet/vnet/map/examples/map-test.py | 214 | ||||
-rw-r--r-- | vnet/vnet/map/examples/mapalgs.py | 327 | ||||
-rw-r--r-- | vnet/vnet/map/examples/mt-test.py | 80 | ||||
-rwxr-xr-x | vnet/vnet/map/gen-rules.py | 107 | ||||
-rw-r--r-- | vnet/vnet/map/ip4_map.c | 591 | ||||
-rw-r--r-- | vnet/vnet/map/ip4_map_t.c | 1092 | ||||
-rw-r--r-- | vnet/vnet/map/ip4_sixrd.c | 127 | ||||
-rw-r--r-- | vnet/vnet/map/ip6_map.c | 966 | ||||
-rw-r--r-- | vnet/vnet/map/ip6_map_t.c | 1141 | ||||
-rw-r--r-- | vnet/vnet/map/ip6_sixrd.c | 129 | ||||
-rw-r--r-- | vnet/vnet/map/map.c | 1634 | ||||
-rw-r--r-- | vnet/vnet/map/map.h | 556 | ||||
-rw-r--r-- | vnet/vnet/map/map_doc.md | 69 | ||||
-rw-r--r-- | vnet/vnet/map/sixrd.c | 355 | ||||
-rw-r--r-- | vnet/vnet/map/sixrd.h | 144 |
16 files changed, 7745 insertions, 0 deletions
diff --git a/vnet/vnet/map/examples/gen-rules.py b/vnet/vnet/map/examples/gen-rules.py new file mode 100755 index 00000000000..d6746f79af4 --- /dev/null +++ b/vnet/vnet/map/examples/gen-rules.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3.4 + +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import ipaddress +import argparse +import sys + +# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6 +# map add rule index <0> psid <psid> ip6-dst <ip6-dst> + +parser = argparse.ArgumentParser(description='MAP VPP configuration generator') +parser.add_argument('-t', action="store", dest="mapmode") +args = parser.parse_args() + +# +# 1:1 Shared IPv4 address, shared BR, Terastream +# +def terastream(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/22') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + ip6_src = ipaddress.ip_address('cccc:bbbb::') + for i in range(ip4_pfx.num_addresses): + if not i % 64: + ip6_src = ip6_src + 1 + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src " + str(ip6_src) + + " ea-bits-len 0 psid-offset 0 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Shared IPv4 address, shared BR, OTE +# +def oteshared11(): + ip4_pfx = ipaddress.ip_network('2.84.63.0/24') + dst = list(ipaddress.ip_network('2a02:580:8c00::/40').subnets(new_prefix=56)) + psid_len = 6 + ip6_src = ipaddress.ip_address('2a02::') + for i in range(ip4_pfx.num_addresses): + if not i % 64: + ip6_src = ip6_src + 1 + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src " + str(ip6_src) + + " ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + enduserprefix = list(dst.pop(0).subnets(new_prefix=64))[255-1] + print("map add rule index", i, "psid", psid, "ip6-dst", enduserprefix[(i * (0x1<<psid_len)) + psid]) + + +# +# 1:1 Shared IPv4 address, shared BR, Terastream +# +def confdterastream(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/22') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + ip6_src = ipaddress.ip_address('cccc:bbbb::') + for i in range(ip4_pfx.num_addresses): + if not i % 64: + ip6_src = ip6_src + 1 + print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + str(ip6_src) + " ipv6-prefix ::/0" + " ipv4-prefix " + str(ip4_pfx[i]) + + "/32 ea-len 0 psid-offset 6 psid-len", psid_len) +# print("vpp softwire softwire-instances softwire-instance", i, "ipv4-pfx " + str(ip4_pfx[i]) + "/32 ipv6-pfx ::/0 br-ipv6 " + str(ip6_src) + +# " ea-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("binding", psid, "ipv6-addr", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +def shared11br_yang(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("vpp softwire softwire-instances softwire-instance " + str(i) + " ipv4-prefix " + str(ip4_pfx[i]) + "/32 " + + "ipv6-prefix ::/0 ea-len 0 psid-offset 6 tunnel-mtu 1234 psid-len", psid_len) + #print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + # "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + # print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + print("binding", psid, "ipv6-addr", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +def shared11br_xml(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/32') + ip6_dst = ipaddress.ip_network('bbbb::/32') + ip6_src = ipaddress.ip_address('cccc:bbbb::') + psid_len = 6 + print('<vpp xmlns="http://www.cisco.com/yang/cisco-vpp"><softwire><softwire-instances>'); + count = 1024; + for i in range(ip4_pfx.num_addresses): + if not i % 64: + ip6_src = ip6_src + 1 + if count == 0: + break; + count = count - 1; + print('<softwire-instance>') + print(' <id>'+ str(i)+ '</id>') + print(' <ipv4-prefix>'+ str(ip4_pfx[i])+ '/32</ipv4-prefix>') + print(' <ipv6-prefix>::/0</ipv6-prefix>') + print(' <ea-len>0</ea-len>') + print(' <psid-offset>0</psid-offset>') + print(' <psid-len>'+ str(psid_len) + '</psid-len>') + for psid in range(0x1 << psid_len): + print(' <binding>') + print(' <psid>', psid, '</psid>') + print(' <ipv6-addr>'+ str(ip6_dst[(i * (0x1<<psid_len)) + psid]) + '</ipv6-addr>') + print(' </binding>') + print('</softwire-instance>') + print('</softwire-instances></softwire>') + print('</vpp>') + +# +# 1:1 Shared IPv4 address, shared BR +# +def shared11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Shared IPv4 address, shared BR +# +def shared11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + + +# +# 1:1 Shared IPv4 address +# +def shared11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Shared IPv4 address small +# +def smallshared11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/24') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Full IPv4 address +# +def full11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 0 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 0 psid-len 0") +def full11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 0 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 0 psid-len 0") + +# +# Algorithmic mapping Shared IPv4 address +# +def algo(): + print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8") + print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0") + +# +# IP4 forwarding +# +def ip4(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + for i in range(ip4_pfx.num_addresses): + print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2") + + +globals()[args.mapmode]() + + diff --git a/vnet/vnet/map/examples/map-test.py b/vnet/vnet/map/examples/map-test.py new file mode 100755 index 00000000000..01f377fb6ee --- /dev/null +++ b/vnet/vnet/map/examples/map-test.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys, time +from scapy.all import * + +import mapalgs + + +ifname = "vpp-tap" + +loc_v4_mac = "aa:aa:aa:aa:aa:a4" +loc_v6_mac = "aa:aa:aa:aa:aa:a6" +vpp_mac = "aa:aa:aa:aa:00:00" + +map_t = 1 + +fragsize = 0 +map_mtu = 200 + +def mac_to_vppmac(mac): + mac = mac.replace(':', '') + return mac[0:4]+"."+mac[4:8]+"."+mac[8:12] + + +map = mapalgs.MapCalc( rulev6 = 'bbbb::/32', + rulev4 = '20.0.0.0/24', + ratio = 256); + +dmr = mapalgs.DmrCalc('cccc:bbbb::/96') + + +ICMP_TYPES_CODES = { + 0: 0, + 3: 15, + 4: 0, + 5: 3, + 6: 0, + 8: 0, + 9: 0, + 10: 0, + 11: 1, + 12: 2, + 13: 0, + 14: 0, + 15: 0, + 16: 0, + 17: 0, + 18: 0 +} + +ICMP6_TYPES_CODES = { + 1: 7, + 2: 0, + 3: 1, + 4: 3, +} + +def net_conf(): + c = "" + c += "tap connect "+ifname+" hwaddr "+mac_to_vppmac(vpp_mac)+" \n" + c += "set int state tap-0 up \n" + c += "set ip6 neighbor tap-0 2001:f00d::1 "+mac_to_vppmac(loc_v6_mac)+" \n" + c += "set ip arp tap-0 10.0.0.1 "+mac_to_vppmac(loc_v4_mac)+" \n" + c += "ip route add ::/0 via 2001:f00d::1 tap-0 \n" + c += "ip route add 0.0.0.0/0 via 10.0.0.1 tap-0 \n" + return c + +def conf(): + c = net_conf() + c += "map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ea-bits-len 16 psid-offset 6 psid-len 8" + if map_mtu != 0: + c += " mtu "+str(map_mtu) + if map_t: + c += " ip6-src cccc:bbbb::/96 map-t" + else: + c += " ip6-src cccc:bbbb::ffff" + + c += "\n" + return c + +def send_packet(ip_header, ip_content): + print("Send packet") + if fragsize != 0: + if ip_header.version == 4: + frags = fragment(ip_header/ip_content, fragsize=fragsize) + for f in frags: + print("Fragmented IPv4 packet") + sendp(Ether(dst=vpp_mac, src=loc_v4_mac)/f, iface=ifname) + elif ip_header.version == 6: + frags = fragment6(ip_header/IPv6ExtHdrFragment()/ip_content, fragsize) + for f in frags: + print("Fragmented IPv6 packet") + sendp(Ether(dst=vpp_mac, src=loc_v6_mac)/f, iface=ifname) + else: + sendp(Ether(dst=vpp_mac)/ip_header/ip_content, iface=ifname) + +def send_packet_frag_inner(packet, inner_header, inner_content): + print("Send packet with inner ICMP packet") + if fragsize != 0: + if packet.version == 4: + frags = fragment(inner_header/inner_content, fragsize=fragsize) + for f in frags: + print("Fragmented IPv4 inner packet") + sendp(Ether(dst=vpp_mac, src=loc_v4_mac)/packet/f, iface=ifname) + elif packet.version == 6: + frags = fragment6(inner_header/IPv6ExtHdrFragment()/inner_content, fragsize) + for f in frags: + print("Fragmented IPv6 inner packet") + sendp(Ether(dst=vpp_mac, src=loc_v6_mac)/packet/f, iface=ifname) + else: + sendp(Ether(dst=vpp_mac)/packet/inner_header/inner_content, iface=ifname) + + +def sendv6udp(src, dst, port): + psid = map.gen_psid(port) + ceaddr = str(map.get_mapce_addr(src, psid)) + dst = str(dmr.embed_6052addr(dst)) + send_packet(IPv6(dst=dst, src=ceaddr), UDP(sport=port)/('X'*900)) + +def sendv6tcp(src, dst, port): + psid = map.gen_psid(port) + ceaddr = str(map.get_mapce_addr(src, psid)) + dst = str(dmr.embed_6052addr(dst)) + send_packet(IPv6(dst=dst, src=ceaddr), TCP(sport=port)/('X'*900)) + +def sendv4udp(src, dst, port): + send_packet(IP(dst=dst, src=src), UDP(dport=port)/('X'*900)) + +def sendv4tcp(src, dst, port): + send_packet(IP(dst=dst, src=src), TCP(dport=port)/('X'*900)) + +def sendv6ping(src, dst, id): + psid = map.gen_psid(id) + ceaddr = str(map.get_mapce_addr(src, psid)) + dst = str(dmr.embed_6052addr(dst)) + send_packet(IPv6(dst=dst, src=ceaddr), ICMPv6EchoRequest(id=id, data='A'*500)) + send_packet(IPv6(dst=dst, src=ceaddr), ICMPv6EchoReply(id=id, data='A'*500)) + +def sendv4ping(src, dst, id): + send_packet(IP(dst=dst, src=src), ICMP(id=id, type=0)/('X'*500)) + send_packet(IP(dst=dst, src=src), ICMP(id=id, type=8)/('X'*500)) + +def sendv4icmperr(src, dst, type, code, port, inner_src, inner_dst, payload_length): + inner = IP(dst=inner_dst, src=inner_src)/TCP(sport=port, dport=8888)/('X'*payload_length) + send_packet_frag_inner(IP(dst=dst, src=src)/ICMP(type=type, code=code), IP(dst=inner_dst, src=inner_src), TCP(sport=port, dport=8888)/('X'*payload_length)) + #send_packet(IP(dst=dst, src=src)/ICMP(type=type, code=code)/inner) + +def sendv6icmperr(src, dst, type, code, port, payload_length): + psid = map.gen_psid(port) + src = str(map.get_mapce_addr(src, psid)) + dst = str(dmr.embed_6052addr(dst)) + inner_header = IPv6(dst=src, src=dst) + inner_content = TCP(sport=8888, dport=port)/('X'*payload_length) + send_packet_frag_inner(IPv6(dst=dst, src=src)/ICMPv6DestUnreach(type=type, code=code), inner_header, inner_content) + #send_packet(IPv6(dst=dst, src=src)/ICMPv6DestUnreach(type=type, code=code)/inner) + +def sendv4icmp_errors(src, dst, port, inner_src, inner_dst, payload_length): + for type in ICMP_TYPES_CODES: + for code in range(0, ICMP_TYPES_CODES[type] + 1): + sendv4icmperr(src, dst, type, code, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, type, ICMP_TYPES_CODES[type] + 2, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, type, 255, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, 1, 0, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, 2, 10, port, inner_src, inner_dst, payload_length) + #sendv4icmperr(src, dst, 255, 255, port, inner_src, inner_dst, payload_length) + + #TODO: Check wrong paramater with different pointer values + +def sendv6icmp_errors(src, dst, port, payload_length): + for type in ICMP6_TYPES_CODES: + for code in range(0, ICMP6_TYPES_CODES[type] + 1): + sendv6icmperr(src, dst, type, code, port, payload_length) + #sendv6icmperr(src, dst, type, ICMP6_TYPES_CODES[type] + 2, port, payload_length) + #sendv6icmperr(src, dst, type, 255, port, payload_length) + + +def traffic(): + delay = 2.0 + while 1: + #sendp(Ether(dst="bb:bb:bb:bb:bb:b4")/IP(dst="20.0.0.1")/UDP(chksum=0)/('X'*900), iface="vpp-tapv4") + #sendp(Ether(dst="bb:bb:bb:bb:bb:b6")/IPv6(dst="cccc:bbbb::a000:0001")/ICMPv6EchoRequest()/('X'*900), iface="vpp-tapv6") + #sendp(Ether(dst="bb:bb:bb:bb:bb:b6")/IPv6(dst="cccc:bbbb::a000:0001")/UDP()/('X'*900), iface="vpp-tapv6") + sendv6udp("20.0.0.1", "10.0.0.1", 12001) + sendv6tcp("20.0.0.1", "10.0.0.1", 12002) + sendv4udp("10.0.0.1", "20.0.0.1", 12003) + sendv4tcp("10.0.0.1", "20.0.0.1", 12004) + sendv6ping("20.0.0.1", "10.0.0.1", 12005) + sendv4ping("10.0.0.1", "20.0.0.1", 12006) + sendv4icmp_errors("10.0.0.1", "20.0.0.1", 12006, "20.0.0.1", "10.0.0.1", 500) + sendv4icmp_errors("10.0.0.1", "20.0.0.1", 12006, "20.0.0.1", "10.0.0.1", 1500) + sendv6icmp_errors("20.0.0.1", "10.0.0.1", 12006, 500) + time.sleep(delay) + delay *= 0.9 + +if len(sys.argv) <= 1: + print("Usage: conf|traffic") + exit(1) + +if sys.argv[1] == "conf": + print(conf()) +elif sys.argv[1] == "traffic": + traffic()
\ No newline at end of file diff --git a/vnet/vnet/map/examples/mapalgs.py b/vnet/vnet/map/examples/mapalgs.py new file mode 100644 index 00000000000..50a0ed0a3ee --- /dev/null +++ b/vnet/vnet/map/examples/mapalgs.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python3 + +# The MIT License (MIT) +# +# Copyright (c) 2015 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# File included from https://github.com/ejordangottlieb/pyswmap +# Thanks to jordan ;) +# - Pierre +# + +# There is still a great deal of work required on this module. Please +# use with caution. +# -Jordan + +import sys +from ipaddress import ( + IPv6Address, + IPv6Network, + ip_network, + ip_address, + ) +from math import ( + log, + ) + +class MapCalc(object): + + def __init__(self,**bmr): + #rulev6,rulev4): + self.portranges = False + + # Validate and set BMR and BMR derived values + self._check_bmr_values(bmr) + + def _check_bmr_values(self,bmr): + # Assume these values have not been supplied. Validate later. + self.ealen = False + self.ratio = False + + # Validate that a proper PSID Offset has been set + if 'psidoffset' not in bmr: + # Set Default PSID Offset of 6 if it is not set + self.psidoffset = 6 + else: + self.psidoffset = self._psid_offset(bmr['psidoffset']) + + # Validate that a proper IPv4 rule prefix is defined + if 'rulev4' not in bmr: + print("The rule IPv4 prefix has not been set") + sys.exit(1) + else: + self.rulev4 = self._ipv4_rule(bmr['rulev4']) + + # Validate that a proper IPv6 rule prefix is defined + if 'rulev6' not in bmr: + print("The rule IPv6 prefix has not been set") + sys.exit(1) + else: + self.rulev6 = self._ipv6_rule(bmr['rulev6']) + + # Check if EA length was passed + if 'ealen' not in bmr: + self.ealen = False + else: + self.ealen = bmr['ealen'] + self.ratio = self._calc_ratio(bmr['ealen']) + + # Check if sharing ratio was passed or calculated by _calc_ratio + if 'ratio' not in bmr: + # Skip if we have already calculated ratio + if not (self.ratio): + self.ratio = False + else: + if (self.ealen): + # Check to see if supplied EA length contradicts supplied ratio + if ( bmr['ratio'] != self.ratio ): + eavalue = "EA value {}".format(self.ealen) + sharingratio = "sharing ratio {}".format(bmr['ratio']) + print("Supplied {} and {} are contradictory".format( + eavalue, + sharingratio) + ) + sys.exit(1) + else: + self.ratio = bmr['ratio'] + self.ealen = self._calc_ea(bmr['ratio']) + + # EA length or sharing ratio must be set + if not ( self.ealen or self.ratio): + print("The BMR must include an EA length or sharing ratio") + sys.exit(1) + + # Since we have not hit an exception we can calculate the port bits + self.portbits = self._calc_port_bits() + + def _ipv4_rule(self,rulev4): + try: + self.rulev4mask = ip_network( + rulev4, + strict=False + ).prefixlen + except ValueError: + print("Invalid IPv4 prefix {}".format(rulev4)) + sys.exit(1) + + self.rulev4object = ip_network(rulev4) + + return rulev4 + + def _ipv6_rule(self,rulev6): + try: + self.rulev6mask = IPv6Network( + rulev6, + strict=False + ).prefixlen + except ValueError: + print("Invalid IPv6 prefix {}".format(rulev6)) + sys.exit(1) + + return rulev6 + + def _psid_offset(self,psidoffset): + PSIDOFFSET_MAX = 6 + if psidoffset in range(0,PSIDOFFSET_MAX+1): + return psidoffset + else: + print("Invalid PSID Offset value: {}".format(psidoffset)) + sys.exit(1) + + def _psid_range(self,x): + rset = [] + for i in range(0,x+1): + rset.append(2**i) + return rset + + def _calc_port_bits(self): + portbits = 16 - self.psidoffset - self.psidbits + return portbits + + def _calc_ea(self,ratio): + if ratio not in ( self._psid_range(16) ): + print("Invalid ratio {}".format(ratio)) + print("Ratio between 2 to the power of 0 thru 16") + sys.exit(1) + + if ( 1 == ratio): + self.psidbits = 0 + else: + self.psidbits = int(log(ratio,2)) + ealen = self.psidbits + ( 32 - self.rulev4mask ) + return ealen + + def _calc_ratio(self,ealen): + maskbits = 32 - self.rulev4mask + if ( ealen < maskbits ): + print("EA of {} incompatible with rule IPv4 prefix {}".format( + ealen, + self.rulev4, + ) + ) + print("EA length must be at least {} bits".format( + maskbits, + ) + ) + sys.exit(1) + + self.psidbits = ealen - ( 32 - self.rulev4mask ) + if ( self.psidbits > 16): + print("EA length of {} is too large".format( + ealen, + ) + ) + print("EA should not exceed {} for rule IPv4 prefix {}".format( + maskbits + 16, + self.rulev4, + ) + ) + sys.exit(1) + ratio = 2**self.psidbits + return ratio + + def gen_psid(self,portnum): + if ( portnum < self.start_port() ): + print("port value is less than allowed by PSID Offset") + sys.exit(1) + psid = (portnum & ((2**self.psidbits - 1) << self.portbits)) + psid = psid >> self.portbits + return psid + + def port_ranges(self): + return 2**self.psidoffset - 1 + + def start_port(self): + if self.psidoffset == 0: return 0 + return 2**(16 - self.psidoffset) + + def port_list(self,psid): + startrange = psid * (2**self.portbits) + self.start_port() + increment = (2**self.psidbits) * (2**self.portbits) + portlist = [ ] + for port in range(startrange,startrange + 2**self.portbits): + if port >= 65536: continue + portlist.append(port) + for x in range(1,self.port_ranges()): + startrange += increment + for port in range(startrange,startrange + 2**self.portbits): + portlist.append(port) + return portlist + + def ipv4_index(self,ipv4addr): + if ip_address(ipv4addr) in ip_network(self.rulev4): + x = ip_address(ipv4addr) + y = ip_network(self.rulev4,strict=False).network_address + self.ipv4addr = x + return ( int(x) - int(y) ) + else: + print("Error: IPv4 address {} not in Rule IPv4 subnet {}".format( + ipv4add, + ip_network(self.rulev4,strict=False).network_address)) + sys.exit(1) + + def _calc_ipv6bit_pos(self): + addroffset = 128 - (self.rulev6mask + ( self.ealen - self.psidbits)) + psidshift = 128 - ( self.rulev6mask + self.ealen ) + return [addroffset,psidshift] + + def _append_map_eabits(self,ipv4index,addroffset,psidshift,psid): + rulev6base = IPv6Network(self.rulev6,strict=False).network_address + map_prefix = int(rulev6base) | ( ipv4index << addroffset ) + map_fullprefix = map_prefix | ( psid << psidshift) + return map_fullprefix + + + def get_mapce_addr(self,ipv4addr,psid): + ipv4index = self.ipv4_index(ipv4addr) + (addroffset,psidshift) = self._calc_ipv6bit_pos() + map_fullprefix = self._append_map_eabits(ipv4index, + addroffset, + psidshift, + psid) + mapv4iid = map_fullprefix | ( int(self.ipv4addr) << 16 ) + map_full_address = mapv4iid | psid + mapce_address = "{}".format(IPv6Address(map_full_address)) + return mapce_address + + def get_mapce_prefix(self,ipv4addr,psid): + ipv4index = self.ipv4_index(ipv4addr) + (addroffset,psidshift) = self._calc_ipv6bit_pos() + map_fullprefix = self._append_map_eabits(ipv4index, + addroffset, + psidshift, + psid) + mapce_prefix = "{}/{}".format( + IPv6Address(map_fullprefix), + self.rulev6mask + self.ealen + ) + return mapce_prefix + + def get_map_ipv4(self,mapce_address): + ipv4 = (int(IPv6Address(mapce_address)) & ( 0xffffffff << 16 )) >> 16 + return ip_address(ipv4) + + + +class DmrCalc(object): + + def __init__(self,dmr): + + # Validate and set BMR and BMR derived values + self.dmrprefix = self._check_dmr_prefix(dmr) + + def embed_6052addr(self,ipv4addr): + + try: + ipv4addrint = int(ip_address(ipv4addr)) + except ValueError: + print("Invalid IPv4 address {}".format(ipv4addr)) + sys.exit(1) + + if ( self.dmrprefix.prefixlen == 64 ): + ipv6int = ipv4addrint << 24 + ipv6int += int(self.dmrprefix.network_address) + return IPv6Address(ipv6int) + + if ( self.dmrprefix.prefixlen == 96 ): + ipv6int = ipv4addrint + ipv6int += int(self.dmrprefix.network_address) + return IPv6Address(ipv6int) + + def _check_dmr_prefix(self,dmrprefix): + try: + self.dmrmask = IPv6Network( + dmrprefix, + strict=False + ).prefixlen + except ValueError: + print("Invalid IPv6 prefix {}".format(prefix)) + sys.exit(1) + + if self.dmrmask not in (32,40,48,56,64,96): + print("Invalid prefix mask /{}".format(self.dmrmask)) + sys.exit(1) + + return IPv6Network(dmrprefix) + +if __name__ == "__main__": + m = DmrCalc('fd80::/48') + print(m.dmrprefix) diff --git a/vnet/vnet/map/examples/mt-test.py b/vnet/vnet/map/examples/mt-test.py new file mode 100644 index 00000000000..62d269c7a13 --- /dev/null +++ b/vnet/vnet/map/examples/mt-test.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +# Copyright (c) 2009-2014 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import threading +import time +from scapy.all import * +from Queue import * + +iface = 'veth1' + +class SnifferThread(threading.Thread) : + def __init__(self,q,iface,flt,timeout) : + threading.Thread.__init__(self) + self.q = q + self.iface = iface + self.timeout = timeout + self.flt = flt + print("Sniffers reporting for service on ",self.iface) + + def run(self) : + conf.iface=self.iface + conf.iface6=self.iface + + r = sniff(filter=self.flt,iface=self.iface,timeout=self.timeout,prn=lambda x: x.summary()) + self.q.put(r) + + + +# New "SR" function +# Fire off thread with filter and expected answer packet(s). +# Fire off sniffer thread, main thread sends packet +# Returns true if found + +def sr2(answer, *args, **kwargs): + q = Queue() + print("Creating SnifferThreadWorkerThread") + flt='ip proto 41' + iface='veth1' + sniffer = SnifferThread(q,iface,flt,1) + sniffer.setDaemon(True) + sniffer.start() + + print "Sending packet:" + send(*args, **kwargs) + sniffer.join() + ps = q.get() + +# ps.summary() + print "Number of packets sniffed:", len(ps) + + for p in ps: + ip = p.getlayer(1) + print "Comparing", ip.summary(), "and", answer.summary() + if ip == answer: + print "We have a match!!" + return True + return False + +aip6 = IPv6(dst='2002:0a0a:0a0a::12')/ICMPv6EchoRequest() +answer= IP(src="10.0.0.100",dst="10.10.10.10",ttl=63)/aip6 +packet = IPv6(dst='2002:0a0a:0a0a::12')/ICMPv6EchoRequest() + +# From IPv6 +sr2(answer, packet,iface='veth1') + +#From IPv4 +packet = IP(src='10.10.10.10',dst='10.0.0.100')/IPv6(src='2002:0a0a:0a0a::12',dst='1::2')/ICMPv6EchoRequest() +sr2(answer, packet,iface='veth1') diff --git a/vnet/vnet/map/gen-rules.py b/vnet/vnet/map/gen-rules.py new file mode 100755 index 00000000000..533a8e237f7 --- /dev/null +++ b/vnet/vnet/map/gen-rules.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python + +# Copyright (c) 2015 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import ipaddress +import argparse +import sys + +# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6 +# map add rule index <0> psid <psid> ip6-dst <ip6-dst> + +parser = argparse.ArgumentParser(description='MAP VPP configuration generator') +parser.add_argument('-t', action="store", dest="mapmode") +args = parser.parse_args() + +# +# 1:1 Shared IPv4 address, shared BR +# +def shared11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + + +# +# 1:1 Shared IPv4 address +# +def shared11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Shared IPv4 address small +# +def smallshared11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/24') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 6 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 6 psid-len", psid_len) + for psid in range(0x1 << psid_len): + print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + +# +# 1:1 Full IPv4 address +# +def full11(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_src = ipaddress.ip_network('cccc:bbbb::/64') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 0 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i], + "ea-bits-len 0 psid-offset 0 psid-len 0") +def full11br(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + ip6_dst = ipaddress.ip_network('bbbb::/32') + psid_len = 0 + for i in range(ip4_pfx.num_addresses): + print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1", + "ea-bits-len 0 psid-offset 0 psid-len 0") + +# +# Algorithmic mapping Shared IPv4 address +# +def algo(): + print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8") + print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0") + +# +# IP4 forwarding +# +def ip4(): + ip4_pfx = ipaddress.ip_network('20.0.0.0/16') + for i in range(ip4_pfx.num_addresses): + print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2") + + +globals()[args.mapmode]() + + diff --git a/vnet/vnet/map/ip4_map.c b/vnet/vnet/map/ip4_map.c new file mode 100644 index 00000000000..cf53ef4918c --- /dev/null +++ b/vnet/vnet/map/ip4_map.c @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * Defines used for testing various optimisation schemes + */ +#define MAP_ENCAP_DUAL 0 + +#include "map.h" +#include "../ip/ip_frag.h" + +vlib_node_registration_t ip4_map_reass_node; + +enum ip4_map_next_e { + IP4_MAP_NEXT_IP6_LOOKUP, +#ifdef MAP_SKIP_IP6_LOOKUP + IP4_MAP_NEXT_IP6_REWRITE, +#endif + IP4_MAP_NEXT_FRAGMENT, + IP4_MAP_NEXT_REASS, + IP4_MAP_NEXT_DROP, + IP4_MAP_N_NEXT, +}; + +enum ip4_map_reass_next_t { + IP4_MAP_REASS_NEXT_IP6_LOOKUP, + IP4_MAP_REASS_NEXT_IP4_FRAGMENT, + IP4_MAP_REASS_NEXT_DROP, + IP4_MAP_REASS_N_NEXT, +}; + +typedef struct { + u32 map_domain_index; + u16 port; + u8 cached; +} map_ip4_map_reass_trace_t; + +u8 * +format_ip4_map_reass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + map_ip4_map_reass_trace_t *t = va_arg (*args, map_ip4_map_reass_trace_t *); + return format(s, "MAP domain index: %d L4 port: %u Status: %s", t->map_domain_index, + t->port, t->cached?"cached":"forwarded"); +} + +/* + * ip4_map_get_port + */ +u16 +ip4_map_get_port (ip4_header_t *ip, map_dir_e dir) +{ + /* Find port information */ + if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) { + udp_header_t *udp = (void *)(ip + 1); + return (dir == MAP_SENDER ? udp->src_port : udp->dst_port); + } else if (ip->protocol == IP_PROTOCOL_ICMP) { + /* + * 1) ICMP Echo request or Echo reply + * 2) ICMP Error with inner packet being UDP or TCP + * 3) ICMP Error with inner packet being ICMP Echo request or Echo reply + */ + icmp46_header_t *icmp = (void *)(ip + 1); + if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) { + return *((u16 *)(icmp + 1)); + } else if (clib_net_to_host_u16(ip->length) >= 64) { // IP + ICMP + IP + L4 header + ip4_header_t *icmp_ip = (ip4_header_t *)(icmp + 2); + if (PREDICT_TRUE((icmp_ip->protocol == IP_PROTOCOL_TCP) || + (icmp_ip->protocol == IP_PROTOCOL_UDP))) { + udp_header_t *udp = (void *)(icmp_ip + 1); + return (dir == MAP_SENDER ? udp->dst_port : udp->src_port); + } else if (icmp_ip->protocol == IP_PROTOCOL_ICMP) { + icmp46_header_t *inner_icmp = (void *)(icmp_ip + 1); + if (inner_icmp->type == ICMP4_echo_request || inner_icmp->type == ICMP4_echo_reply) + return (*((u16 *)(inner_icmp + 1))); + } + } + } + return (0); +} + +static_always_inline u16 +ip4_map_port_and_security_check (map_domain_t *d, ip4_header_t *ip, u32 *next, u8 *error) +{ + u16 port = 0; + + if (d->psid_length > 0) { + if (!ip4_is_fragment(ip)) { + if (PREDICT_FALSE((ip->ip_version_and_header_length != 0x45) || clib_host_to_net_u16(ip->length) < 28)) { + return 0; + } + port = ip4_map_get_port(ip, MAP_RECEIVER); + if (port) { + /* Verify that port is not among the well-known ports */ + if ((d->psid_offset > 0) && (clib_net_to_host_u16(port) < (0x1 << (16 - d->psid_offset)))) { + *error = MAP_ERROR_ENCAP_SEC_CHECK; + } else { + return (port); + } + } else { + *error = MAP_ERROR_BAD_PROTOCOL; + } + } else { + *next = IP4_MAP_NEXT_REASS; + } + } + return (0); +} + +/* + * ip4_map_vtcfl + */ +static_always_inline u32 +ip4_map_vtcfl (ip4_header_t *ip4, vlib_buffer_t *p) +{ + map_main_t *mm = &map_main; + u8 tc = mm->tc_copy ? ip4->tos : mm->tc; + u32 vtcfl = 0x6 << 28; + vtcfl |= tc << 20; + vtcfl |= vnet_buffer(p)->ip.flow_hash && 0x000fffff; + + return (clib_host_to_net_u32(vtcfl)); +} + +static_always_inline bool +ip4_map_ip6_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip) +{ +#ifdef MAP_SKIP_IP6_LOOKUP + map_main_t *mm = &map_main; + u32 adj_index0 = mm->adj6_index; + if (adj_index0 > 0) { + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm6, mm->adj6_index); + if (adj->n_adj > 1) { + u32 hash_c0 = ip4_compute_flow_hash(ip, IP_FLOW_HASH_DEFAULT); + adj_index0 += (hash_c0 & (adj->n_adj - 1)); + } + vnet_buffer(p0)->ip.adj_index[VLIB_TX] = adj_index0; + return (true); + } +#endif + return (false); +} + +/* + * ip4_map + */ +static uword +ip4_map (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Dual loop */ + while (n_left_from > 4 && n_left_to_next > 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + map_domain_t *d0, *d1; + u8 error0 = MAP_ERROR_NONE, error1 = MAP_ERROR_NONE; + ip4_header_t *ip40, *ip41; + u16 port0 = 0, port1 = 0; + ip6_header_t *ip6h0, *ip6h1; + u32 map_domain_index0 = ~0, map_domain_index1 = ~0; + u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP, next1 = IP4_MAP_NEXT_IP6_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer(vm, from[2]); + p3 = vlib_get_buffer(vm, from[3]); + + vlib_prefetch_buffer_header(p2, STORE); + vlib_prefetch_buffer_header(p3, STORE); + /* IPv4 + 8 = 28. possibly plus -40 */ + CLIB_PREFETCH (p2->data-40, 68, STORE); + CLIB_PREFETCH (p3->data-40, 68, STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next +=2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip40 = vlib_buffer_get_current(p0); + ip41 = vlib_buffer_get_current(p1); + p0->current_length = clib_net_to_host_u16(ip40->length); + p1->current_length = clib_net_to_host_u16(ip41->length); + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0); + d1 = ip4_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], &map_domain_index1); + ASSERT(d0); + ASSERT(d1); + + /* + * Shared IPv4 address + */ + port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0); + port1 = ip4_map_port_and_security_check(d1, ip41, &next1, &error1); + + /* MAP calc */ + u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32); + u32 da41 = clib_net_to_host_u32(ip41->dst_address.as_u32); + u16 dp40 = clib_net_to_host_u16(port0); + u16 dp41 = clib_net_to_host_u16(port1); + u64 dal60 = map_get_pfx(d0, da40, dp40); + u64 dal61 = map_get_pfx(d1, da41, dp41); + u64 dar60 = map_get_sfx(d0, da40, dp40); + u64 dar61 = map_get_sfx(d1, da41, dp41); + if (dal60 == 0 && dar60 == 0) error0 = MAP_ERROR_UNKNOWN; + if (dal61 == 0 && dar61 == 0) error1 = MAP_ERROR_UNKNOWN; + + /* construct ipv6 header */ + vlib_buffer_advance(p0, - sizeof(ip6_header_t)); + vlib_buffer_advance(p1, - sizeof(ip6_header_t)); + ip6h0 = vlib_buffer_get_current(p0); + ip6h1 = vlib_buffer_get_current(p1); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0; + vnet_buffer(p1)->sw_if_index[VLIB_TX] = (u32)~0; + + ip6h0->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip40, p0); + ip6h1->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip41, p1); + ip6h0->payload_length = ip40->length; + ip6h1->payload_length = ip41->length; + ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h1->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h0->hop_limit = 0x40; + ip6h1->hop_limit = 0x40; + ip6h0->src_address = d0->ip6_src; + ip6h1->src_address = d1->ip6_src; + ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64(dal60); + ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64(dar60); + ip6h1->dst_address.as_u64[0] = clib_host_to_net_u64(dal61); + ip6h1->dst_address.as_u64[1] = clib_host_to_net_u64(dar61); + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { + if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0); + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next0 = IP4_MAP_NEXT_FRAGMENT; + } else { + next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0; + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip6h0->payload_length) + 40); + } + } else { + next0 = IP4_MAP_NEXT_DROP; + } + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE(error1 == MAP_ERROR_NONE)) { + if (PREDICT_FALSE(d1->mtu && (clib_net_to_host_u16(ip6h1->payload_length) + sizeof(*ip6h1) > d1->mtu))) { + vnet_buffer(p1)->ip_frag.header_offset = sizeof(*ip6h1); + vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer(p1)->ip_frag.mtu = d1->mtu; + vnet_buffer(p1)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next1 = IP4_MAP_NEXT_FRAGMENT; + } else { + next1 = ip4_map_ip6_lookup_bypass(p1, ip41) ? IP4_MAP_NEXT_IP6_REWRITE : next1; + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index1, 1, + clib_net_to_host_u16(ip6h1->payload_length) + 40); + } + } else { + next1 = IP4_MAP_NEXT_DROP; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p1, sizeof(*tr)); + tr->map_domain_index = map_domain_index1; + tr->port = port1; + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + map_domain_t *d0; + u8 error0 = MAP_ERROR_NONE; + ip4_header_t *ip40; + u16 port0 = 0; + ip6_header_t *ip6h0; + u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP; + u32 map_domain_index0 = ~0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip40 = vlib_buffer_get_current(p0); + p0->current_length = clib_net_to_host_u16(ip40->length); + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0); + ASSERT(d0); + + /* + * Shared IPv4 address + */ + port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0); + + /* MAP calc */ + u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32); + u16 dp40 = clib_net_to_host_u16(port0); + u64 dal60 = map_get_pfx(d0, da40, dp40); + u64 dar60 = map_get_sfx(d0, da40, dp40); + if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) error0 = MAP_ERROR_UNKNOWN; + + /* construct ipv6 header */ + vlib_buffer_advance(p0, - (sizeof(ip6_header_t))); + ip6h0 = vlib_buffer_get_current(p0); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0; + + ip6h0->ip_version_traffic_class_and_flow_label = ip4_map_vtcfl(ip40, p0); + ip6h0->payload_length = ip40->length; + ip6h0->protocol = IP_PROTOCOL_IP_IN_IP; + ip6h0->hop_limit = 0x40; + ip6h0->src_address = d0->ip6_src; + ip6h0->dst_address.as_u64[0] = clib_host_to_net_u64(dal60); + ip6h0->dst_address.as_u64[1] = clib_host_to_net_u64(dar60); + + /* + * Determine next node. Can be one of: + * ip6-lookup, ip6-rewrite, ip4-fragment, ip4-virtreass, error-drop + */ + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { + if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0); + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next0 = IP4_MAP_NEXT_FRAGMENT; + } else { + next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0; + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip6h0->payload_length) + 40); + } + } else { + next0 = IP4_MAP_NEXT_DROP; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* + * ip4_map_reass + */ +static uword +ip4_map_reass (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_reass_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 cpu_index = os_get_cpu_number(); + u32 *fragments_to_drop = NULL; + u32 *fragments_to_loopback = NULL; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + map_domain_t *d0; + u8 error0 = MAP_ERROR_NONE; + ip4_header_t *ip40; + i32 port0 = 0; + ip6_header_t *ip60; + u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP; + u32 map_domain_index0; + u8 cached = 0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + ip40 = (ip4_header_t *)(ip60 + 1); + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &map_domain_index0); + + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(ip40->src_address.as_u32, ip40->dst_address.as_u32, + ip40->fragment_id, ip40->protocol, &fragments_to_drop); + if (PREDICT_FALSE(!r)) { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } else if (PREDICT_TRUE(ip4_get_fragment_offset(ip40))) { + if (r->port >= 0) { + // We know the port already + port0 = r->port; + } else if (map_ip4_reass_add_fragment(r, pi0)) { + // Not enough space for caching + error0 = MAP_ERROR_FRAGMENT_MEMORY; + map_ip4_reass_free(r, &fragments_to_drop); + } else { + cached = 1; + } + } else if ((port0 = ip4_get_port(ip40, MAP_RECEIVER, p0->current_length)) < 0) { + // Could not find port. We'll free the reassembly. + error0 = MAP_ERROR_BAD_PROTOCOL; + port0 = 0; + map_ip4_reass_free(r, &fragments_to_drop); + } else { + r->port = port0; + map_ip4_reass_get_fragments(r, &fragments_to_loopback); + } + +#ifdef MAP_IP4_REASS_COUNT_BYTES + if (!cached && r) { + r->forwarded += clib_host_to_net_u16(ip40->length) - 20; + if (!ip4_get_fragment_more(ip40)) + r->expected_total = ip4_get_fragment_offset(ip40) * 8 + clib_host_to_net_u16(ip40->length) - 20; + if(r->forwarded >= r->expected_total) + map_ip4_reass_free(r, &fragments_to_drop); + } +#endif + + map_ip4_reass_unlock(); + + // NOTE: Most operations have already been performed by ip4_map + // All we need is the right destination address + ip60->dst_address.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, port0); + ip60->dst_address.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, port0); + + if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip60->payload_length) + sizeof(*ip60) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip60); + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; + next0 = IP4_MAP_REASS_NEXT_IP4_FRAGMENT; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_ip4_map_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + tr->cached = cached; + } + + if(cached) { + //Dequeue the packet + n_left_to_next++; + to_next--; + } else { + if (error0 == MAP_ERROR_NONE) + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip60->payload_length) + 40); + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP4_MAP_REASS_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + + //Loopback when we reach the end of the inpu vector + if(n_left_from == 0 && vec_len(fragments_to_loopback)) { + from = vlib_frame_vector_args(frame); + u32 len = vec_len(fragments_to_loopback); + if(len <= VLIB_FRAME_SIZE) { + memcpy(from, fragments_to_loopback, sizeof(u32)*len); + n_left_from = len; + vec_reset_length(fragments_to_loopback); + } else { + memcpy(from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof(u32)*VLIB_FRAME_SIZE); + n_left_from = VLIB_FRAME_SIZE; + _vec_len(fragments_to_loopback) = len - VLIB_FRAME_SIZE; + } + } + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + map_send_all_to_node(vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP4_MAP_REASS_NEXT_DROP); + + vec_free(fragments_to_drop); + vec_free(fragments_to_loopback); + return frame->n_vectors; +} + +static char *map_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip4_map_node) = { + .function = ip4_map, + .name = "ip4-map", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP4_MAP_N_NEXT, + .next_nodes = { + [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup", +#ifdef MAP_SKIP_IP6_LOOKUP + [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-rewrite", +#endif + [IP4_MAP_NEXT_FRAGMENT] = "ip4-frag", + [IP4_MAP_NEXT_REASS] = "ip4-map-reass", + [IP4_MAP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip4_map_reass_node) = { + .function = ip4_map_reass, + .name = "ip4-map-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip4_map_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP4_MAP_REASS_N_NEXT, + .next_nodes = { + [IP4_MAP_REASS_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAP_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP4_MAP_REASS_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip4_map_t.c b/vnet/vnet/map/ip4_map_t.c new file mode 100644 index 00000000000..07f5b19c257 --- /dev/null +++ b/vnet/vnet/map/ip4_map_t.c @@ -0,0 +1,1092 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include "../ip/ip_frag.h" + +#define IP4_MAP_T_DUAL_LOOP 1 + +typedef enum { + IP4_MAPT_NEXT_MAPT_TCP_UDP, + IP4_MAPT_NEXT_MAPT_ICMP, + IP4_MAPT_NEXT_MAPT_FRAGMENTED, + IP4_MAPT_NEXT_DROP, + IP4_MAPT_N_NEXT +} ip4_mapt_next_t; + +typedef enum { + IP4_MAPT_ICMP_NEXT_IP6_LOOKUP, + IP4_MAPT_ICMP_NEXT_IP6_FRAG, + IP4_MAPT_ICMP_NEXT_DROP, + IP4_MAPT_ICMP_N_NEXT +} ip4_mapt_icmp_next_t; + +typedef enum { + IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP, + IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG, + IP4_MAPT_TCP_UDP_NEXT_DROP, + IP4_MAPT_TCP_UDP_N_NEXT +} ip4_mapt_tcp_udp_next_t; + +typedef enum { + IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP, + IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG, + IP4_MAPT_FRAGMENTED_NEXT_DROP, + IP4_MAPT_FRAGMENTED_N_NEXT +} ip4_mapt_fragmented_next_t; + +//This is used to pass information within the buffer data. +//Buffer structure being too small to contain big structures like this. +typedef CLIB_PACKED(struct { + ip6_address_t daddr; + ip6_address_t saddr; + //IPv6 header + Fragmentation header will be here + //sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4) + u8 unused[28]; +}) ip4_mapt_pseudo_header_t; + +#define frag_id_4to6(id) (id) + +//TODO: Find the right place in memory for this. +static u8 icmp_to_icmp6_updater_pointer_table[] = + { 0, 1, 4, 4,~0, + ~0,~0,~0, 7, 6, + ~0,~0, 8, 8, 8, + 8, 24, 24, 24, 24 }; + + +static_always_inline int +ip4_map_fragment_cache (ip4_header_t *ip4, u16 port) +{ + u32 *ignore = NULL; + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(ip4->src_address.as_u32, ip4->dst_address.as_u32, + ip4->fragment_id, + (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, + &ignore); + if (r) + r->port = port; + + map_ip4_reass_unlock(); + return !r; +} + +static_always_inline i32 +ip4_map_fragment_get_port (ip4_header_t *ip4) +{ + u32 *ignore = NULL; + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(ip4->src_address.as_u32, ip4->dst_address.as_u32, + ip4->fragment_id, + (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol, + &ignore); + i32 ret = r?r->port:-1; + map_ip4_reass_unlock(); + return ret; +} + + +/* Statelessly translates an ICMP packet into ICMPv6. + * + * Warning: The checksum will need to be recomputed. + * + */ +static_always_inline int +ip4_icmp_to_icmp6_in_place (icmp46_header_t *icmp, u32 icmp_len, + i32 *receiver_port, ip4_header_t **inner_ip4) +{ + *inner_ip4 = NULL; + switch (icmp->type) { + case ICMP4_echo_reply: + *receiver_port = ((u16 *)icmp)[2]; + icmp->type = ICMP6_echo_reply; + break; + case ICMP4_echo_request: + *receiver_port = ((u16 *)icmp)[2]; + icmp->type = ICMP6_echo_request; + break; + case ICMP4_destination_unreachable: + *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8); + *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8); + + switch (icmp->code) { + case ICMP4_destination_unreachable_destination_unreachable_net: //0 + case ICMP4_destination_unreachable_destination_unreachable_host: //1 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_no_route_to_destination; + break; + case ICMP4_destination_unreachable_protocol_unreachable: //2 + icmp->type = ICMP6_parameter_problem; + icmp->code = ICMP6_parameter_problem_unrecognized_next_header; + break; + case ICMP4_destination_unreachable_port_unreachable: //3 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_port_unreachable; + break; + case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4 + icmp->type = ICMP6_packet_too_big; + icmp->code = 0; + { + u32 advertised_mtu = clib_net_to_host_u32(*((u32 *)(icmp + 1))); + if (advertised_mtu) + advertised_mtu += 20; + else + advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value) + + //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20) + *((u32 *)(icmp + 1)) = clib_host_to_net_u32(advertised_mtu); + } + break; + + case ICMP4_destination_unreachable_source_route_failed: //5 + case ICMP4_destination_unreachable_destination_network_unknown: //6 + case ICMP4_destination_unreachable_destination_host_unknown: //7 + case ICMP4_destination_unreachable_source_host_isolated: //8 + case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11 + case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_no_route_to_destination; + break; + case ICMP4_destination_unreachable_network_administratively_prohibited: //9 + case ICMP4_destination_unreachable_host_administratively_prohibited: //10 + case ICMP4_destination_unreachable_communication_administratively_prohibited: //13 + case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_destination_administratively_prohibited; + break; + case ICMP4_destination_unreachable_host_precedence_violation: //14 + default: + return -1; + } + break; + + case ICMP4_time_exceeded: //11 + *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8); + *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8); + icmp->type = ICMP6_time_exceeded; + //icmp->code = icmp->code //unchanged + break; + + case ICMP4_parameter_problem: + *inner_ip4 = (ip4_header_t *)(((u8 *) icmp) + 8); + *receiver_port = ip4_get_port(*inner_ip4, MAP_SENDER, icmp_len - 8); + + switch (icmp->code) { + case ICMP4_parameter_problem_pointer_indicates_error: + case ICMP4_parameter_problem_bad_length: + icmp->type = ICMP6_parameter_problem; + icmp->code = ICMP6_parameter_problem_erroneous_header_field; + { + u8 ptr = icmp_to_icmp6_updater_pointer_table[*((u8 *)(icmp + 1))]; + if (ptr == 0xff) + return -1; + + *((u32 *)(icmp + 1)) = clib_host_to_net_u32(ptr); + } + break; + default: + //All other codes cause dropping the packet + return -1; + } + break; + + default: + //All other types cause dropping the packet + return -1; + break; + } + return 0; +} + +static_always_inline void +_ip4_map_t_icmp (map_domain_t *d, vlib_buffer_t *p, u8 *error) +{ + ip4_header_t *ip4, *inner_ip4; + ip6_header_t *ip6, *inner_ip6; + u32 ip_len; + icmp46_header_t *icmp; + i32 recv_port; + ip_csum_t csum; + u16 *inner_L4_checksum = 0; + ip6_frag_hdr_t *inner_frag; + u32 inner_frag_id; + u32 inner_frag_offset; + u8 inner_frag_more; + + ip4 = vlib_buffer_get_current(p); + ip_len = clib_net_to_host_u16(ip4->length); + ASSERT(ip_len <= p->current_length); + + icmp = (icmp46_header_t *)(ip4 + 1); + if (ip4_icmp_to_icmp6_in_place(icmp, ip_len - sizeof(*ip4), + &recv_port, &inner_ip4)) { + *error = MAP_ERROR_ICMP; + return; + } + + if (recv_port < 0) { + // In case of 1:1 mapping, we don't care about the port + if(d->ea_bits_len == 0 && d->rules) { + recv_port = 0; + } else { + *error = MAP_ERROR_ICMP; + return; + } + } + + if (inner_ip4) { + //We have 2 headers to translate. + //We need to make some room in the middle of the packet + + if (PREDICT_FALSE(ip4_is_fragment(inner_ip4))) { + //Here it starts getting really tricky + //We will add a fragmentation header in the inner packet + + if (!ip4_is_first_fragment(inner_ip4)) { + //For now we do not handle unless it is the first fragment + //Ideally we should handle the case as we are in slow path already + *error = MAP_ERROR_FRAGMENTED; + return; + } + + vlib_buffer_advance(p, - 2*(sizeof(*ip6) - sizeof(*ip4)) - sizeof(*inner_frag)); + ip6 = vlib_buffer_get_current(p); + memcpy(u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)), ip4, 20 + 8); + ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)); + icmp = (icmp46_header_t *) (ip4 + 1); + + inner_ip6 = (ip6_header_t *) u8_ptr_add(inner_ip4, sizeof(*ip4) - sizeof(*ip6) - sizeof(*inner_frag)); + inner_frag = (ip6_frag_hdr_t *) u8_ptr_add(inner_ip6, sizeof(*inner_ip6)); + ip6->payload_length = u16_net_add(ip4->length, sizeof(*ip6) - 2*sizeof(*ip4) + sizeof(*inner_frag)); + inner_frag_id = frag_id_4to6(inner_ip4->fragment_id); + inner_frag_offset = ip4_get_fragment_offset(inner_ip4); + inner_frag_more = !!(inner_ip4->flags_and_fragment_offset & clib_net_to_host_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS)); + } else { + vlib_buffer_advance(p, - 2*(sizeof(*ip6) - sizeof(*ip4))); + ip6 = vlib_buffer_get_current(p); + memcpy(u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)), ip4, 20 + 8); + ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)); + icmp = (icmp46_header_t *) u8_ptr_add(ip4, sizeof(*ip4)); + inner_ip6 = (ip6_header_t *) u8_ptr_add(inner_ip4, sizeof(*ip4) - sizeof(*ip6)); + ip6->payload_length = u16_net_add(ip4->length, sizeof(*ip6) - 2*sizeof(*ip4)); + inner_frag = NULL; + } + + if (PREDICT_TRUE(inner_ip4->protocol == IP_PROTOCOL_TCP)) { + inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum; + *inner_L4_checksum = ip_csum_fold(ip_csum_sub_even(*inner_L4_checksum, *((u64 *) (&inner_ip4->src_address)))); + } else if (PREDICT_TRUE(inner_ip4->protocol == IP_PROTOCOL_UDP)) { + inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum; + if (!*inner_L4_checksum) { + //The inner packet was first translated, and therefore came from IPv6. + //As the packet was an IPv6 packet, the UDP checksum can't be NULL + *error = MAP_ERROR_ICMP; + return; + } + *inner_L4_checksum = ip_csum_fold(ip_csum_sub_even(*inner_L4_checksum, *((u64 *)(&inner_ip4->src_address)))); + } else if (inner_ip4->protocol == IP_PROTOCOL_ICMP) { + //We have an ICMP inside an ICMP + //It needs to be translated, but not for error ICMP messages + icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1); + csum = inner_icmp->checksum; + //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by ip4_icmp_to_icmp6_in_place + csum = ip_csum_sub_even(csum, *((u16 *)inner_icmp)); + inner_icmp->type = (inner_icmp->type == ICMP4_echo_request)? + ICMP6_echo_request:ICMP6_echo_reply; + csum = ip_csum_add_even(csum, *((u16 *)inner_icmp)); + csum = ip_csum_add_even(csum, clib_host_to_net_u16(IP_PROTOCOL_ICMP6)); + csum = ip_csum_add_even(csum, inner_ip4->length - sizeof(*inner_ip4)); + inner_icmp->checksum = ip_csum_fold(csum); + inner_L4_checksum = &inner_icmp->checksum; + inner_ip4->protocol = IP_PROTOCOL_ICMP6; + } else { + ASSERT(0); // We had a port from that, so it is udp or tcp or ICMP + } + + //FIXME: Security check with the port found in the inner packet + + csum = *inner_L4_checksum; //Initial checksum of the inner L4 header + //FIXME: Shouldn't we remove ip addresses from there ? + + inner_ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (inner_ip4->tos << 20)); + inner_ip6->payload_length = u16_net_add(inner_ip4->length, - sizeof(*inner_ip4)); + inner_ip6->hop_limit = inner_ip4->ttl; + inner_ip6->protocol = inner_ip4->protocol; + + //Note that the source address is within the domain + //while the destination address is the one outside the domain + ip4_map_t_embedded_address(d, &inner_ip6->dst_address, &inner_ip4->dst_address); + inner_ip6->src_address.as_u64[0] = map_get_pfx_net(d, inner_ip4->src_address.as_u32, recv_port); + inner_ip6->src_address.as_u64[1] = map_get_sfx_net(d, inner_ip4->src_address.as_u32, recv_port); + + if (PREDICT_FALSE(inner_frag != NULL)) { + inner_frag->next_hdr = inner_ip6->protocol; + inner_frag->identification = inner_frag_id; + inner_frag->rsv = 0; + inner_frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(inner_frag_offset, inner_frag_more); + inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + inner_ip6->payload_length = clib_host_to_net_u16( + clib_net_to_host_u16(inner_ip6->payload_length) + sizeof(*inner_frag)); + } + + csum = ip_csum_add_even(csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_add_even(csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_add_even(csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even(csum, inner_ip6->dst_address.as_u64[1]); + *inner_L4_checksum = ip_csum_fold(csum); + + } else { + vlib_buffer_advance(p, sizeof(*ip4) - sizeof(*ip6)); + ip6 = vlib_buffer_get_current(p); + ip6->payload_length = clib_host_to_net_u16(clib_net_to_host_u16(ip4->length) - sizeof(*ip4)); + } + + //Translate outer IPv6 + ip6->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip4->tos << 20)); + + ip6->hop_limit = ip4->ttl; + ip6->protocol = IP_PROTOCOL_ICMP6; + + ip4_map_t_embedded_address(d, &ip6->src_address, &ip4->src_address); + ip6->dst_address.as_u64[0] = map_get_pfx_net(d, ip4->dst_address.as_u32, recv_port); + ip6->dst_address.as_u64[1] = map_get_sfx_net(d, ip4->dst_address.as_u32, recv_port); + + //Truncate when the packet exceeds the minimal IPv6 MTU + if (p->current_length > 1280) { + ip6->payload_length = clib_host_to_net_u16(1280 - sizeof(*ip6)); + p->current_length = 1280; //Looks too simple to be correct... + } + + //TODO: We could do an easy diff-checksum for echo requests/replies + //Recompute ICMP checksum + icmp->checksum = 0; + csum = ip_csum_with_carry(0, ip6->payload_length); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(ip6->protocol)); + csum = ip_csum_with_carry(csum, ip6->src_address.as_u64[0]); + csum = ip_csum_with_carry(csum, ip6->src_address.as_u64[1]); + csum = ip_csum_with_carry(csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_with_carry(csum, ip6->dst_address.as_u64[1]); + csum = ip_incremental_checksum(csum, icmp, clib_net_to_host_u16(ip6->payload_length)); + icmp->checksum = ~ip_csum_fold (csum); +} + +static uword +ip4_map_t_icmp (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_t_icmp_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip4_mapt_icmp_next_t next0; + u8 error0; + map_domain_t *d0; + u16 len0; + + next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP; + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + vlib_buffer_advance(p0, sizeof(ip4_mapt_pseudo_header_t)); //The pseudo-header is not used + len0 = clib_net_to_host_u16(((ip4_header_t *)vlib_buffer_get_current(p0))->length); + d0 = pool_elt_at_index(map_main.domains, vnet_buffer(p0)->map_t.map_domain_index); + _ip4_map_t_icmp(d0, p0, &error0); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG; + } + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + len0); + } else { + next0 = IP4_MAPT_ICMP_NEXT_DROP; + } + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip4_map_t_fragmented (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip4_header_t *ip40; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + ip4_mapt_pseudo_header_t *pheader0; + ip4_mapt_fragmented_next_t next0; + + next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP; + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current(p0); + vlib_buffer_advance(p0, sizeof(*pheader0)); + + //Accessing ip4 header + ip40 = vlib_buffer_get_current(p0); + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0)); + ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0) - sizeof(*ip60)); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + + //We know that the protocol was one of ICMP, TCP or UDP + //because the first fragment was found and cached + frag0->next_hdr = (ip40->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip40->protocol; + frag0->identification = frag_id_4to6(ip40->fragment_id); + frag0->rsv = 0; + frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more( + ip4_get_fragment_offset(ip40), + clib_net_to_host_u16(ip40->flags_and_fragment_offset) & IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20)); + ip60->payload_length = clib_host_to_net_u16(clib_net_to_host_u16(ip40->length) - sizeof(*ip40) + sizeof(*frag0)); + ip60->hop_limit = ip40->ttl; + ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; + ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; + ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; + ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip4_map_t_tcp_udp(vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP4_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip4_header_t *ip40, *ip41; + ip6_header_t *ip60, *ip61; + ip_csum_t csum0, csum1; + u16 *checksum0, *checksum1; + ip6_frag_hdr_t *frag0, *frag1; + u32 frag_id0, frag_id1; + ip4_mapt_pseudo_header_t *pheader0, *pheader1; + ip4_mapt_tcp_udp_next_t next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next +=2; + n_left_to_next -= 2; + + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current(p0); + pheader1 = vlib_buffer_get_current(p1); + vlib_buffer_advance(p0, sizeof(*pheader0)); + vlib_buffer_advance(p1, sizeof(*pheader1)); + + //Accessing ip4 header + ip40 = vlib_buffer_get_current(p0); + ip41 = vlib_buffer_get_current(p1); + checksum0 = (u16 *) u8_ptr_add(ip40, vnet_buffer(p0)->map_t.checksum_offset); + checksum1 = (u16 *) u8_ptr_add(ip41, vnet_buffer(p1)->map_t.checksum_offset); + + //UDP checksum is optional over IPv4 but mandatory for IPv6 + //We do not check udp->length sanity but use our safe computed value instead + if (PREDICT_FALSE(!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP)) { + u16 udp_len = clib_host_to_net_u16(ip40->length) - sizeof(*ip40); + udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip40, sizeof(*ip40)); + ip_csum_t csum; + csum = ip_incremental_checksum(0, udp, udp_len); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len)); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry(csum, *((u64 *)(&ip40->src_address))); + *checksum0 = ~ip_csum_fold(csum); + } + if (PREDICT_FALSE(!*checksum1 && ip41->protocol == IP_PROTOCOL_UDP)) { + u16 udp_len = clib_host_to_net_u16(ip41->length) - sizeof(*ip40); + udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip41, sizeof(*ip40)); + ip_csum_t csum; + csum = ip_incremental_checksum(0, udp, udp_len); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len)); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry(csum, *((u64 *)(&ip41->src_address))); + *checksum1 = ~ip_csum_fold(csum); + } + + csum0 = ip_csum_sub_even(*checksum0, ip40->src_address.as_u32); + csum1 = ip_csum_sub_even(*checksum1, ip41->src_address.as_u32); + csum0 = ip_csum_sub_even(csum0, ip40->dst_address.as_u32); + csum1 = ip_csum_sub_even(csum1, ip41->dst_address.as_u32); + + // Deal with fragmented packets + if (PREDICT_FALSE(ip40->flags_and_fragment_offset & + clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) { + ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0)); + frag_id0 = frag_id_4to6(ip40->fragment_id); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + } else { + ip60 = (ip6_header_t *) (((u8 *)ip40) + sizeof(*ip40) - sizeof(*ip60)); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60)); + frag0 = NULL; + } + + if (PREDICT_FALSE(ip41->flags_and_fragment_offset & + clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) { + ip61 = (ip6_header_t *) u8_ptr_add(ip41, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + frag1 = (ip6_frag_hdr_t *) u8_ptr_add(ip41, sizeof(*ip40) - sizeof(*frag0)); + frag_id1 = frag_id_4to6(ip41->fragment_id); + vlib_buffer_advance(p1, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + } else { + ip61 = (ip6_header_t *) (((u8 *)ip41) + sizeof(*ip40) - sizeof(*ip60)); + vlib_buffer_advance(p1, sizeof(*ip40) - sizeof(*ip60)); + frag1 = NULL; + } + + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20)); + ip61->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip41->tos << 20)); + ip60->payload_length = u16_net_add(ip40->length, - sizeof(*ip40)); + ip61->payload_length = u16_net_add(ip41->length, - sizeof(*ip40)); + ip60->hop_limit = ip40->ttl; + ip61->hop_limit = ip41->ttl; + ip60->protocol = ip40->protocol; + ip61->protocol = ip41->protocol; + + if (PREDICT_FALSE(frag0 != NULL)) { + frag0->next_hdr = ip60->protocol; + frag0->identification = frag_id0; + frag0->rsv = 0; + frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1); + ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip60->payload_length = u16_net_add(ip60->payload_length, sizeof(*frag0)); + } + + if (PREDICT_FALSE(frag1 != NULL)) { + frag1->next_hdr = ip61->protocol; + frag1->identification = frag_id1; + frag1->rsv = 0; + frag1->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1); + ip61->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip61->payload_length = u16_net_add(ip61->payload_length, sizeof(*frag0)); + } + + //Finally copying the address + ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; + ip61->dst_address.as_u64[0] = pheader1->daddr.as_u64[0]; + ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; + ip61->dst_address.as_u64[1] = pheader1->daddr.as_u64[1]; + ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; + ip61->src_address.as_u64[0] = pheader1->saddr.as_u64[0]; + ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; + ip61->src_address.as_u64[1] = pheader1->saddr.as_u64[1]; + + csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[0]); + csum1 = ip_csum_add_even(csum1, ip61->src_address.as_u64[0]); + csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[1]); + csum1 = ip_csum_add_even(csum1, ip61->src_address.as_u64[1]); + csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[0]); + csum1 = ip_csum_add_even(csum1, ip61->dst_address.as_u64[0]); + csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[1]); + csum1 = ip_csum_add_even(csum1, ip61->dst_address.as_u64[1]); + *checksum0 = ip_csum_fold(csum0); + *checksum1 = ip_csum_fold(csum1); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + + if(vnet_buffer(p1)->map_t.mtu < p1->current_length) { + vnet_buffer(p1)->ip_frag.header_offset = 0; + vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu; + vnet_buffer(p1)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, + to_next, n_left_to_next, pi0, pi1, + next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip4_header_t *ip40; + ip6_header_t *ip60; + ip_csum_t csum0; + u16 *checksum0; + ip6_frag_hdr_t *frag0; + u32 frag_id0; + ip4_mapt_pseudo_header_t *pheader0; + ip4_mapt_tcp_udp_next_t next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP; + p0 = vlib_get_buffer(vm, pi0); + + //Accessing pseudo header + pheader0 = vlib_buffer_get_current(p0); + vlib_buffer_advance(p0, sizeof(*pheader0)); + + //Accessing ip4 header + ip40 = vlib_buffer_get_current(p0); + checksum0 = (u16 *) u8_ptr_add(ip40, vnet_buffer(p0)->map_t.checksum_offset); + + //UDP checksum is optional over IPv4 but mandatory for IPv6 + //We do not check udp->length sanity but use our safe computed value instead + if (PREDICT_FALSE(!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP)) { + u16 udp_len = clib_host_to_net_u16(ip40->length) - sizeof(*ip40); + udp_header_t *udp = (udp_header_t *) u8_ptr_add(ip40, sizeof(*ip40)); + ip_csum_t csum; + csum = ip_incremental_checksum(0, udp, udp_len); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(udp_len)); + csum = ip_csum_with_carry(csum, clib_host_to_net_u16(IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry(csum, *((u64 *)(&ip40->src_address))); + *checksum0 = ~ip_csum_fold(csum); + } + + csum0 = ip_csum_sub_even(*checksum0, ip40->src_address.as_u32); + csum0 = ip_csum_sub_even(csum0, ip40->dst_address.as_u32); + + // Deal with fragmented packets + if (PREDICT_FALSE(ip40->flags_and_fragment_offset & + clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS))) { + ip60 = (ip6_header_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip40, sizeof(*ip40) - sizeof(*frag0)); + frag_id0 = frag_id_4to6(ip40->fragment_id); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60) - sizeof(*frag0)); + } else { + ip60 = (ip6_header_t *) (((u8 *)ip40) + sizeof(*ip40) - sizeof(*ip60)); + vlib_buffer_advance(p0, sizeof(*ip40) - sizeof(*ip60)); + frag0 = NULL; + } + + ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32((6 << 28) + (ip40->tos << 20)); + ip60->payload_length = u16_net_add(ip40->length, - sizeof(*ip40)); + ip60->hop_limit = ip40->ttl; + ip60->protocol = ip40->protocol; + + if (PREDICT_FALSE(frag0 != NULL)) { + frag0->next_hdr = ip60->protocol; + frag0->identification = frag_id0; + frag0->rsv = 0; + frag0->fragment_offset_and_more = ip6_frag_hdr_offset_and_more(0, 1); + ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip60->payload_length = u16_net_add(ip60->payload_length, sizeof(*frag0)); + } + + //Finally copying the address + ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; + ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; + ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; + ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; + + csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[0]); + csum0 = ip_csum_add_even(csum0, ip60->src_address.as_u64[1]); + csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[0]); + csum0 = ip_csum_add_even(csum0, ip60->dst_address.as_u64[1]); + *checksum0 = ip_csum_fold(csum0); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + //Send to fragmentation node if necessary + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static_always_inline void +ip4_map_t_classify(vlib_buffer_t *p0, map_domain_t *d0, ip4_header_t *ip40, u16 ip4_len0, + i32 *dst_port0, u8 *error0, ip4_mapt_next_t *next0) +{ + if (PREDICT_FALSE(ip4_get_fragment_offset(ip40))) { + *next0 = IP4_MAPT_NEXT_MAPT_FRAGMENTED; + if(d0->ea_bits_len == 0 && d0->rules) { + *dst_port0 = 0; + } else { + *dst_port0 = ip4_map_fragment_get_port(ip40); + *error0 = (*dst_port0 == -1) ? MAP_ERROR_FRAGMENT_MEMORY : *error0; + } + } else if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_TCP)) { + vnet_buffer(p0)->map_t.checksum_offset = 36; + *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; + *error0 = ip4_len0 < 40 ? MAP_ERROR_MALFORMED : *error0; + *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 2)); + } else if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_UDP)) { + vnet_buffer(p0)->map_t.checksum_offset = 26; + *next0 = IP4_MAPT_NEXT_MAPT_TCP_UDP; + *error0 = ip4_len0 < 28 ? MAP_ERROR_MALFORMED : *error0; + *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 2)); + } else if (ip40->protocol == IP_PROTOCOL_ICMP) { + *next0 = IP4_MAPT_NEXT_MAPT_ICMP; + if(d0->ea_bits_len == 0 && d0->rules) + *dst_port0 = 0; + else if (((icmp46_header_t *) u8_ptr_add(ip40, sizeof(*ip40)))->code == ICMP4_echo_reply || + ((icmp46_header_t *) u8_ptr_add(ip40, sizeof(*ip40)))->code == ICMP4_echo_request) + *dst_port0 = (i32) *((u16 *)u8_ptr_add(ip40, sizeof(*ip40) + 6)); + } else { + *error0 = MAP_ERROR_BAD_PROTOCOL; + } +} + +static uword +ip4_map_t (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_map_t_node.index); + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP4_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >= 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip4_header_t *ip40, *ip41; + map_domain_t *d0, *d1; + ip4_mapt_next_t next0, next1; + u16 ip4_len0, ip4_len1; + u8 error0, error1; + i32 dst_port0, dst_port1; + ip4_mapt_pseudo_header_t *pheader0, *pheader1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next +=2; + n_left_to_next -= 2; + error0 = MAP_ERROR_NONE; + error1 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip40 = vlib_buffer_get_current(p0); + ip41 = vlib_buffer_get_current(p1); + ip4_len0 = clib_host_to_net_u16(ip40->length); + ip4_len1 = clib_host_to_net_u16(ip41->length); + + if (PREDICT_FALSE(p0->current_length < ip4_len0 || + ip40->ip_version_and_header_length != 0x45)) { + error0 = MAP_ERROR_UNKNOWN; + next0 = IP4_MAPT_NEXT_DROP; + } + + if (PREDICT_FALSE(p1->current_length < ip4_len1 || + ip41->ip_version_and_header_length != 0x45)) { + error1 = MAP_ERROR_UNKNOWN; + next1 = IP4_MAPT_NEXT_DROP; + } + + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], + &vnet_buffer(p0)->map_t.map_domain_index); + d1 = ip4_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], + &vnet_buffer(p1)->map_t.map_domain_index); + + vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + vnet_buffer(p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; + + dst_port0 = -1; + dst_port1 = -1; + + ip4_map_t_classify(p0, d0, ip40, ip4_len0, &dst_port0, &error0, &next0); + ip4_map_t_classify(p1, d1, ip41, ip4_len1, &dst_port1, &error1, &next1); + + //Add MAP-T pseudo header in front of the packet + vlib_buffer_advance(p0, - sizeof(*pheader0)); + vlib_buffer_advance(p1, - sizeof(*pheader1)); + pheader0 = vlib_buffer_get_current(p0); + pheader1 = vlib_buffer_get_current(p1); + + //Save addresses within the packet + ip4_map_t_embedded_address(d0, &pheader0->saddr, &ip40->src_address); + ip4_map_t_embedded_address(d1, &pheader1->saddr, &ip41->src_address); + pheader0->daddr.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0); + pheader0->daddr.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0); + pheader1->daddr.as_u64[0] = map_get_pfx_net(d1, ip41->dst_address.as_u32, (u16)dst_port1); + pheader1->daddr.as_u64[1] = map_get_sfx_net(d1, ip41->dst_address.as_u32, (u16)dst_port1); + + if (PREDICT_FALSE(ip4_is_first_fragment(ip40) && (dst_port0 != -1) && + (d0->ea_bits_len != 0 || !d0->rules) && + ip4_map_fragment_cache(ip40, dst_port0))) { + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } + + if (PREDICT_FALSE(ip4_is_first_fragment(ip41) && (dst_port1 != -1) && + (d1->ea_bits_len != 0 || !d1->rules) && + ip4_map_fragment_cache(ip41, dst_port1))) { + error1 = MAP_ERROR_FRAGMENT_MEMORY; + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip40->length)); + } + + if (PREDICT_TRUE(error1 == MAP_ERROR_NONE && next1 != IP4_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, + vnet_buffer(p1)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip41->length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; + next1 = (error1 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next1; + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip4_header_t *ip40; + map_domain_t *d0; + ip4_mapt_next_t next0; + u16 ip4_len0; + u8 error0; + i32 dst_port0; + ip4_mapt_pseudo_header_t *pheader0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + ip40 = vlib_buffer_get_current(p0); + ip4_len0 = clib_host_to_net_u16(ip40->length); + if (PREDICT_FALSE(p0->current_length < ip4_len0 || + ip40->ip_version_and_header_length != 0x45)) { + error0 = MAP_ERROR_UNKNOWN; + next0 = IP4_MAPT_NEXT_DROP; + } + + d0 = ip4_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], + &vnet_buffer(p0)->map_t.map_domain_index); + + vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + + dst_port0 = -1; + ip4_map_t_classify(p0, d0, ip40, ip4_len0, &dst_port0, &error0, &next0); + + //Add MAP-T pseudo header in front of the packet + vlib_buffer_advance(p0, - sizeof(*pheader0)); + pheader0 = vlib_buffer_get_current(p0); + + //Save addresses within the packet + ip4_map_t_embedded_address(d0, &pheader0->saddr, &ip40->src_address); + pheader0->daddr.as_u64[0] = map_get_pfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0); + pheader0->daddr.as_u64[1] = map_get_sfx_net(d0, ip40->dst_address.as_u32, (u16)dst_port0); + + //It is important to cache at this stage because the result might be necessary + //for packets within the same vector. + //Actually, this approach even provides some limited out-of-order fragments support + if (PREDICT_FALSE(ip4_is_first_fragment(ip40) && (dst_port0 != -1) && + (d0->ea_bits_len != 0 || !d0->rules) && + ip4_map_fragment_cache(ip40, dst_port0))) { + error0 = MAP_ERROR_UNKNOWN; + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP4_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip40->length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static char *map_t_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { + .function = ip4_map_t_fragmented, + .name = "ip4-map-t-fragmented", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_FRAGMENTED_N_NEXT, + .next_nodes = { + [IP4_MAPT_FRAGMENTED_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = { + .function = ip4_map_t_icmp, + .name = "ip4-map-t-icmp", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_ICMP_N_NEXT, + .next_nodes = { + [IP4_MAPT_ICMP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_ICMP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_ICMP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = { + .function = ip4_map_t_tcp_udp, + .name = "ip4-map-t-tcp-udp", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_TCP_UDP_N_NEXT, + .next_nodes = { + [IP4_MAPT_TCP_UDP_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG] = IP6_FRAG_NODE_NAME, + [IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip4_map_t_node) = { + .function = ip4_map_t, + .name = "ip4-map-t", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP4_MAPT_N_NEXT, + .next_nodes = { + [IP4_MAPT_NEXT_MAPT_TCP_UDP] = "ip4-map-t-tcp-udp", + [IP4_MAPT_NEXT_MAPT_ICMP] = "ip4-map-t-icmp", + [IP4_MAPT_NEXT_MAPT_FRAGMENTED] = "ip4-map-t-fragmented", + [IP4_MAPT_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip4_sixrd.c b/vnet/vnet/map/ip4_sixrd.c new file mode 100644 index 00000000000..1e83ce831e0 --- /dev/null +++ b/vnet/vnet/map/ip4_sixrd.c @@ -0,0 +1,127 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#include "sixrd.h" + +vlib_node_registration_t ip4_sixrd_node; + +typedef enum { + IP4_SIXRD_NEXT_IP6_LOOKUP, + IP4_SIXRD_NEXT_DROP, + IP4_SIXRD_N_NEXT, +} ip4_sixrd_next_t; + +/* + * ip4_sixrd_sec_check + */ +static_always_inline void +ip4_sixrd_sec_check (sixrd_domain_t *d, ip4_address_t sa4, ip6_address_t sa6, u8 *error) +{ + u32 a = sixrd_get_addr(d, sa6.as_u64[0]); + clib_warning("Security check: %U %U", format_ip4_address, &a, format_ip4_address, &sa4); + if (PREDICT_FALSE(sixrd_get_addr(d, sa6.as_u64[0]) != sa4.as_u32)) + *error = SIXRD_ERROR_SEC_CHECK; +} + +/* + * ip4_sixrd + */ +static uword +ip4_sixrd (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip4_sixrd_node.index); + u32 decap = 0; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = SIXRD_ERROR_NONE; + sixrd_domain_t *d0 = 0; + ip4_header_t *ip40; + ip6_header_t *ip60; + u32 sixrd_domain_index0 = ~0; + u32 next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip40 = vlib_buffer_get_current(p0); + + /* Throw away anything that isn't IP in IP. */ + if (PREDICT_TRUE(ip40->protocol == IP_PROTOCOL_IPV6 && clib_net_to_host_u16(ip40->length) >= 60)) { + vlib_buffer_advance(p0, sizeof(ip4_header_t)); + ip60 = vlib_buffer_get_current(p0); + d0 = ip4_sixrd_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip6_address_t *)&ip60->src_address, + &sixrd_domain_index0, &error0); + } else { + error0 = SIXRD_ERROR_BAD_PROTOCOL; + } + if (d0) { + /* SIXRD inbound security check */ + ip4_sixrd_sec_check(d0, ip40->src_address, ip60->src_address, &error0); + } + + next0 = error0 == SIXRD_ERROR_NONE ? IP4_SIXRD_NEXT_IP6_LOOKUP : IP4_SIXRD_NEXT_DROP; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + sixrd_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->sixrd_domain_index = sixrd_domain_index0; + } + + p0->error = error_node->errors[error0]; + if (PREDICT_TRUE(error0 == SIXRD_ERROR_NONE)) decap++; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter(vm, ip4_sixrd_node.index, SIXRD_ERROR_DECAPSULATED, decap); + + return frame->n_vectors; +} + +static char *sixrd_error_strings[] = { +#define _(sym,string) string, + foreach_sixrd_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip4_sixrd_node) = { + .function = ip4_sixrd, + .name = "ip4-sixrd", + .vector_size = sizeof(u32), + .format_trace = format_sixrd_trace, + .n_errors = SIXRD_N_ERROR, + .error_strings = sixrd_error_strings, + .n_next_nodes = IP4_SIXRD_N_NEXT, + .next_nodes = { + [IP4_SIXRD_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_SIXRD_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip6_map.c b/vnet/vnet/map/ip6_map.c new file mode 100644 index 00000000000..e803af9007a --- /dev/null +++ b/vnet/vnet/map/ip6_map.c @@ -0,0 +1,966 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include "../ip/ip_frag.h" + +enum ip6_map_next_e { + IP6_MAP_NEXT_IP4_LOOKUP, +#ifdef MAP_SKIP_IP6_LOOKUP + IP6_MAP_NEXT_IP4_REWRITE, +#endif + IP6_MAP_NEXT_IP6_REASS, + IP6_MAP_NEXT_IP4_REASS, + IP6_MAP_NEXT_IP4_FRAGMENT, + IP6_MAP_NEXT_IP6_ICMP_RELAY, + IP6_MAP_NEXT_IP6_LOCAL, + IP6_MAP_NEXT_DROP, + IP6_MAP_N_NEXT, +}; + +enum ip6_map_ip6_reass_next_e { + IP6_MAP_IP6_REASS_NEXT_IP6_MAP, + IP6_MAP_IP6_REASS_NEXT_DROP, + IP6_MAP_IP6_REASS_N_NEXT, +}; + +enum ip6_map_ip4_reass_next_e { + IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP, + IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT, + IP6_MAP_IP4_REASS_NEXT_DROP, + IP6_MAP_IP4_REASS_N_NEXT, +}; + +enum ip6_icmp_relay_next_e { + IP6_ICMP_RELAY_NEXT_IP4_LOOKUP, + IP6_ICMP_RELAY_NEXT_DROP, + IP6_ICMP_RELAY_N_NEXT, +}; + +vlib_node_registration_t ip6_map_ip4_reass_node; +vlib_node_registration_t ip6_map_ip6_reass_node; +static vlib_node_registration_t ip6_map_icmp_relay_node; + +typedef struct { + u32 map_domain_index; + u16 port; + u8 cached; +} map_ip6_map_ip4_reass_trace_t; + +u8 * +format_ip6_map_ip4_reass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + map_ip6_map_ip4_reass_trace_t *t = va_arg (*args, map_ip6_map_ip4_reass_trace_t *); + return format(s, "MAP domain index: %d L4 port: %u Status: %s", t->map_domain_index, + t->port, t->cached?"cached":"forwarded"); +} + +typedef struct { + u16 offset; + u16 frag_len; + u8 out; +} map_ip6_map_ip6_reass_trace_t; + +u8 * +format_ip6_map_ip6_reass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + map_ip6_map_ip6_reass_trace_t *t = va_arg (*args, map_ip6_map_ip6_reass_trace_t *); + return format(s, "Offset: %d Fragment length: %d Status: %s", t->offset, t->frag_len, t->out?"out":"in"); +} + +/* + * ip6_map_sec_check + */ +static_always_inline bool +ip6_map_sec_check (map_domain_t *d, u16 port, ip4_header_t *ip4, ip6_header_t *ip6) +{ + u16 sp4 = clib_net_to_host_u16(port); + u32 sa4 = clib_net_to_host_u32(ip4->src_address.as_u32); + u64 sal6 = map_get_pfx(d, sa4, sp4); + u64 sar6 = map_get_sfx(d, sa4, sp4); + + if (PREDICT_FALSE(sal6 != clib_net_to_host_u64(ip6->src_address.as_u64[0]) || + sar6 != clib_net_to_host_u64(ip6->src_address.as_u64[1]))) + return (false); + return (true); +} + +static_always_inline void +ip6_map_security_check (map_domain_t *d, ip4_header_t *ip4, ip6_header_t *ip6, u32 *next, u8 *error) +{ + map_main_t *mm = &map_main; + if (d->ea_bits_len || d->rules) { + if (d->psid_length > 0) { + if (!ip4_is_fragment(ip4)) { + u16 port = ip4_map_get_port(ip4, MAP_SENDER); + if (port) { + if (mm->sec_check) + *error = ip6_map_sec_check(d, port, ip4, ip6) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK; + } else { + *error = MAP_ERROR_BAD_PROTOCOL; + } + } else { + *next = mm->sec_check_frag ? IP6_MAP_NEXT_IP4_REASS : *next; + } + } + } +} + +static_always_inline bool +ip6_map_ip4_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip) +{ +#ifdef MAP_SKIP_IP6_LOOKUP + map_main_t *mm = &map_main; + u32 adj_index0 = mm->adj4_index; + if (adj_index0 > 0) { + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm4, mm->adj4_index); + if (adj->n_adj > 1) { + u32 hash_c0 = ip4_compute_flow_hash(ip, IP_FLOW_HASH_DEFAULT); + adj_index0 += (hash_c0 & (adj->n_adj - 1)); + } + vnet_buffer(p0)->ip.adj_index[VLIB_TX] = adj_index0; + return (true); + } +#endif + return (false); +} + + +/* + * ip6_map + */ +static uword +ip6_map (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_node.index); + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Dual loop */ + while (n_left_from > 4 && n_left_to_next > 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + u8 error0 = MAP_ERROR_NONE; + u8 error1 = MAP_ERROR_NONE; + map_domain_t *d0 = 0, *d1 = 0; + ip4_header_t *ip40, *ip41; + ip6_header_t *ip60, *ip61; + u16 port0 = 0, port1 = 0; + u32 map_domain_index0 = ~0, map_domain_index1 = ~0; + u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP; + u32 next1 = IP6_MAP_NEXT_IP4_LOOKUP; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer(vm, from[2]); + p3 = vlib_get_buffer(vm, from[3]); + + vlib_prefetch_buffer_header(p2, LOAD); + vlib_prefetch_buffer_header(p3, LOAD); + + /* IPv6 + IPv4 header + 8 bytes of ULP */ + CLIB_PREFETCH(p2->data, 68, LOAD); + CLIB_PREFETCH(p3->data, 68, LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next +=2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip60 = vlib_buffer_get_current(p0); + ip61 = vlib_buffer_get_current(p1); + vlib_buffer_advance(p0, sizeof(ip6_header_t)); + vlib_buffer_advance(p1, sizeof(ip6_header_t)); + ip40 = vlib_buffer_get_current(p0); + ip41 = vlib_buffer_get_current(p1); + + /* + * Encapsulated IPv4 packet + * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled + * - Lookup/Rewrite or Fragment node in case of packet > MTU + * Fragmented IPv6 packet + * ICMP IPv6 packet + * - Error -> Pass to ICMPv6/ICMPv4 relay + * - Info -> Pass to IPv6 local + * Anything else -> drop + */ + if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) { + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32, + &map_domain_index0, &error0); + } else if (ip60->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) { + icmp46_header_t *icmp = (void *)(ip60 + 1); + next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ? + IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY; + } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { + next0 = IP6_MAP_NEXT_IP6_REASS; + } else { + error0 = MAP_ERROR_BAD_PROTOCOL; + next0 = IP6_MAP_NEXT_DROP; + } + if (PREDICT_TRUE(ip61->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip61->payload_length) > 20)) { + d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip41->src_address.as_u32, + &map_domain_index1, &error1); + } else if (ip61->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16(ip61->payload_length) > sizeof(icmp46_header_t)) { + icmp46_header_t *icmp = (void *)(ip61 + 1); + next1 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ? + IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY; + } else if (ip61->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { + next1 = IP6_MAP_NEXT_IP6_REASS; + } else { + error1 = MAP_ERROR_BAD_PROTOCOL; + next1 = IP6_MAP_NEXT_DROP; + } + + if (d0) { + /* MAP inbound security check */ + ip6_map_security_check(d0, ip40, ip60, &next0, &error0); + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && + next0 == IP6_MAP_NEXT_IP4_LOOKUP)) { + if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.flags = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_NEXT_IP4_FRAGMENT; + } else { + next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0; + } + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip40->length)); + } + } + if (d1) { + /* MAP inbound security check */ + ip6_map_security_check(d1, ip41, ip61, &next1, &error1); + + if (PREDICT_TRUE(error1 == MAP_ERROR_NONE && + next1 == IP6_MAP_NEXT_IP4_LOOKUP)) { + if (PREDICT_FALSE(d1->mtu && (clib_host_to_net_u16(ip41->length) > d1->mtu))) { + vnet_buffer(p1)->ip_frag.header_offset = 0; + vnet_buffer(p1)->ip_frag.flags = 0; + vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer(p1)->ip_frag.mtu = d0->mtu; + next1 = IP6_MAP_NEXT_IP4_FRAGMENT; + } else { + next1 = ip6_map_ip4_lookup_bypass(p1, ip41) ? IP6_MAP_NEXT_IP4_REWRITE : next1; + } + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index1, 1, + clib_net_to_host_u16(ip41->length)); + } + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + } + + if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p1, sizeof(*tr)); + tr->map_domain_index = map_domain_index1; + tr->port = port1; + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); + } + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + map_domain_t *d0 = 0; + ip4_header_t *ip40; + ip6_header_t *ip60; + i32 port0 = 0; + u32 map_domain_index0 = ~0; + u32 next0 = IP6_MAP_NEXT_IP4_LOOKUP; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + vlib_buffer_advance(p0, sizeof(ip6_header_t)); + ip40 = vlib_buffer_get_current(p0); + + /* + * Encapsulated IPv4 packet + * - IPv4 fragmented -> Pass to virtual reassembly unless security check disabled + * - Lookup/Rewrite or Fragment node in case of packet > MTU + * Fragmented IPv6 packet + * ICMP IPv6 packet + * - Error -> Pass to ICMPv6/ICMPv4 relay + * - Info -> Pass to IPv6 local + * Anything else -> drop + */ + if (PREDICT_TRUE(ip60->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip60->payload_length) > 20)) { + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32, + &map_domain_index0, &error0); + } else if (ip60->protocol == IP_PROTOCOL_ICMP6 && + clib_net_to_host_u16(ip60->payload_length) > sizeof(icmp46_header_t)) { + icmp46_header_t *icmp = (void *)(ip60 + 1); + next0 = (icmp->type == ICMP6_echo_request || icmp->type == ICMP6_echo_reply) ? + IP6_MAP_NEXT_IP6_LOCAL : IP6_MAP_NEXT_IP6_ICMP_RELAY; + } else if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION && + (((ip6_frag_hdr_t *)(ip60+1))->next_hdr == IP_PROTOCOL_IP_IN_IP)) { + next0 = IP6_MAP_NEXT_IP6_REASS; + } else { + error0 = MAP_ERROR_BAD_PROTOCOL; + } + + if (d0) { + /* MAP inbound security check */ + ip6_map_security_check(d0, ip40, ip60, &next0, &error0); + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && + next0 == IP6_MAP_NEXT_IP4_LOOKUP)) { + if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu))) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.flags = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_NEXT_IP4_FRAGMENT; + } else { + next0 = ip6_map_ip4_lookup_bypass(p0, ip40) ? IP6_MAP_NEXT_IP4_REWRITE : next0; + } + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip40->length)); + } + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = (u16)port0; + } + + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + + +static_always_inline void +ip6_map_ip6_reass_prepare(vlib_main_t *vm, vlib_node_runtime_t *node, map_ip6_reass_t *r, + u32 **fragments_ready, u32 **fragments_to_drop) +{ + ip4_header_t *ip40; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + vlib_buffer_t *p0; + + if(!r->ip4_header.ip_version_and_header_length) + return; + + //The IP header is here, we need to check for packets + //that can be forwarded + int i; + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) { + if (r->fragments[i].pi == ~0 || + ((!r->fragments[i].next_data_len) && (r->fragments[i].next_data_offset != (0xffff)))) + continue; + + p0 = vlib_get_buffer(vm, r->fragments[i].pi); + ip60 = vlib_buffer_get_current(p0); + frag0 = (ip6_frag_hdr_t *)(ip60 + 1); + ip40 = (ip4_header_t *)(frag0 + 1); + + if (ip6_frag_hdr_offset(frag0)) { + //Not first fragment, add the IPv4 header + memcpy(ip40, &r->ip4_header, 20); + } + +#ifdef MAP_IP6_REASS_COUNT_BYTES + r->forwarded += clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0); +#endif + + if (ip6_frag_hdr_more(frag0)) { + //Not last fragment, we copy end of next + memcpy(u8_ptr_add(ip60, p0->current_length), r->fragments[i].next_data, 20); + p0->current_length += 20; + ip60->payload_length = u16_net_add(ip60->payload_length, 20); + } + + if (!ip4_is_fragment(ip40)) { + ip40->fragment_id = frag_id_6to4(frag0->identification); + ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip6_frag_hdr_offset(frag0)); + } else { + ip40->flags_and_fragment_offset = clib_host_to_net_u16(ip4_get_fragment_offset(ip40) + ip6_frag_hdr_offset(frag0)); + } + + if (ip6_frag_hdr_more(frag0)) + ip40->flags_and_fragment_offset |= clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip40->length = clib_host_to_net_u16(p0->current_length - sizeof(*ip60) - sizeof(*frag0)); + ip40->checksum = ip4_header_checksum(ip40); + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->offset = ip4_get_fragment_offset(ip40); + tr->frag_len = clib_net_to_host_u16(ip40->length) - sizeof(*ip40); + tr->out = 1; + } + + vec_add1(*fragments_ready, r->fragments[i].pi); + r->fragments[i].pi = ~0; + r->fragments[i].next_data_len = 0; + r->fragments[i].next_data_offset = 0; + map_main.ip6_reass_buffered_counter--; + + //TODO: Best solution would be that ip6_map handles extension headers + // and ignores atomic fragment. But in the meantime, let's just copy the header. + + u8 protocol = frag0->next_hdr; + memmove(u8_ptr_add(ip40, - sizeof(*ip60)), ip60, sizeof(*ip60)); + ((ip6_header_t *)u8_ptr_add(ip40, - sizeof(*ip60)))->protocol = protocol; + vlib_buffer_advance(p0, sizeof(*frag0)); + } +} + +void +map_ip6_drop_pi(u32 pi) +{ + vlib_main_t *vm = vlib_get_main(); + vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index); + vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP6_REASS_NEXT_DROP, pi); +} + +void +map_ip4_drop_pi(u32 pi) +{ + vlib_main_t *vm = vlib_get_main(); + vlib_node_runtime_t *n = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index); + vlib_set_next_frame_buffer(vm, n, IP6_MAP_IP4_REASS_NEXT_DROP, pi); +} + +/* + * ip6_reass + * TODO: We should count the number of successfully + * transmitted fragment bytes and compare that to the last fragment + * offset such that we can free the reassembly structure when all fragments + * have been forwarded. + */ +static uword +ip6_map_ip6_reass (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip6_reass_node.index); + u32 *fragments_to_drop = NULL; + u32 *fragments_ready = NULL; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + u16 offset; + u16 next_offset; + u16 frag_len; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + frag0 = (ip6_frag_hdr_t *)(ip60 + 1); + offset = clib_host_to_net_u16(frag0->fragment_offset_and_more) & (~7); + frag_len = clib_net_to_host_u16(ip60->payload_length) - sizeof(*frag0); + next_offset = ip6_frag_hdr_more(frag0) ? (offset + frag_len) : (0xffff); + + //FIXME: Support other extension headers, maybe + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_ip6_map_ip6_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->offset = offset; + tr->frag_len = frag_len; + tr->out = 0; + } + + map_ip6_reass_lock(); + map_ip6_reass_t *r = map_ip6_reass_get(&ip60->src_address, &ip60->dst_address, + frag0->identification, frag0->next_hdr, &fragments_to_drop); + //FIXME: Use better error codes + if (PREDICT_FALSE(!r)) { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } else if (PREDICT_FALSE((frag_len <= 20 && + (ip6_frag_hdr_more(frag0) || (!offset))))) { + //Very small fragment are restricted to the last one and + //can't be the first one + error0 = MAP_ERROR_FRAGMENT_MALFORMED; + } else if (map_ip6_reass_add_fragment(r, pi0, offset, next_offset, (u8 *)(frag0 + 1), frag_len)) { + map_ip6_reass_free(r, &fragments_to_drop); + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } else { +#ifdef MAP_IP6_REASS_COUNT_BYTES + if (!ip6_frag_hdr_more(frag0)) + r->expected_total = offset + frag_len; +#endif + ip6_map_ip6_reass_prepare(vm, node, r, &fragments_ready, &fragments_to_drop); +#ifdef MAP_IP6_REASS_COUNT_BYTES + if(r->forwarded >= r->expected_total) + map_ip6_reass_free(r, &fragments_to_drop); +#endif + } + map_ip6_reass_unlock(); + + if (error0 == MAP_ERROR_NONE) { + if (frag_len > 20) { + //Dequeue the packet + n_left_to_next++; + to_next--; + } else { + //All data from that packet was copied no need to keep it, but this is not an error + p0->error = error_node->errors[MAP_ERROR_NONE]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP); + } + } else { + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, IP6_MAP_IP6_REASS_NEXT_DROP); + } + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + map_send_all_to_node(vm, fragments_ready, node, + &error_node->errors[MAP_ERROR_NONE], + IP6_MAP_IP6_REASS_NEXT_IP6_MAP); + map_send_all_to_node(vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP6_MAP_IP6_REASS_NEXT_DROP); + + vec_free(fragments_to_drop); + vec_free(fragments_ready); + return frame->n_vectors; +} + +/* + * ip6_ip4_virt_reass + */ +static uword +ip6_map_ip4_reass (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_ip4_reass_node.index); + map_main_t *mm = &map_main; + vlib_combined_counter_main_t *cm = mm->domain_counters; + u32 cpu_index = os_get_cpu_number(); + u32 *fragments_to_drop = NULL; + u32 *fragments_to_loopback = NULL; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + map_domain_t *d0; + ip4_header_t *ip40; + ip6_header_t *ip60; + i32 port0 = 0; + u32 map_domain_index0; + u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP; + u8 cached = 0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip40 = vlib_buffer_get_current(p0); + ip60 = ((ip6_header_t *)ip40) - 1; + + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip40->src_address.as_u32, + &map_domain_index0, &error0); + + map_ip4_reass_lock(); + //This node only deals with fragmented ip4 + map_ip4_reass_t *r = map_ip4_reass_get(ip40->src_address.as_u32, ip40->dst_address.as_u32, + ip40->fragment_id, ip40->protocol, &fragments_to_drop); + if (PREDICT_FALSE(!r)) { + // Could not create a caching entry + error0 = MAP_ERROR_FRAGMENT_MEMORY; + } else if (PREDICT_TRUE(ip4_get_fragment_offset(ip40))) { + // This is a fragment + if (r->port >= 0) { + // We know the port already + port0 = r->port; + } else if (map_ip4_reass_add_fragment(r, pi0)) { + // Not enough space for caching + error0 = MAP_ERROR_FRAGMENT_MEMORY; + map_ip4_reass_free(r, &fragments_to_drop); + } else { + cached = 1; + } + } else if ((port0 = ip4_get_port(ip40, MAP_SENDER, p0->current_length)) < 0) { + // Could not find port from first fragment. Stop reassembling. + error0 = MAP_ERROR_BAD_PROTOCOL; + port0 = 0; + map_ip4_reass_free(r, &fragments_to_drop); + } else { + // Found port. Remember it and loopback saved fragments + r->port = port0; + map_ip4_reass_get_fragments(r, &fragments_to_loopback); + } + +#ifdef MAP_IP4_REASS_COUNT_BYTES + if (!cached && r) { + r->forwarded += clib_host_to_net_u16(ip40->length) - 20; + if (!ip4_get_fragment_more(ip40)) + r->expected_total = ip4_get_fragment_offset(ip40) * 8 + clib_host_to_net_u16(ip40->length) - 20; + if(r->forwarded >= r->expected_total) + map_ip4_reass_free(r, &fragments_to_drop); + } +#endif + + map_ip4_reass_unlock(); + + if(PREDICT_TRUE(error0 == MAP_ERROR_NONE)) + error0 = ip6_map_sec_check(d0, port0, ip40, ip60) ? MAP_ERROR_NONE : MAP_ERROR_DECAP_SEC_CHECK; + + if (PREDICT_FALSE(d0->mtu && (clib_host_to_net_u16(ip40->length) > d0->mtu) && + error0 == MAP_ERROR_NONE && !cached)) { + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.flags = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + vnet_buffer(p0)->ip_frag.mtu = d0->mtu; + next0 = IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT; + } + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_ip6_map_ip4_reass_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = map_domain_index0; + tr->port = port0; + tr->cached = cached; + } + + if (cached) { + //Dequeue the packet + n_left_to_next++; + to_next--; + } else { + if (error0 == MAP_ERROR_NONE) + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, map_domain_index0, 1, + clib_net_to_host_u16(ip40->length)); + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_IP4_REASS_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + + //Loopback when we reach the end of the inpu vector + if(n_left_from == 0 && vec_len(fragments_to_loopback)) { + from = vlib_frame_vector_args(frame); + u32 len = vec_len(fragments_to_loopback); + if(len <= VLIB_FRAME_SIZE) { + memcpy(from, fragments_to_loopback, sizeof(u32)*len); + n_left_from = len; + vec_reset_length(fragments_to_loopback); + } else { + memcpy(from, fragments_to_loopback + (len - VLIB_FRAME_SIZE), sizeof(u32)*VLIB_FRAME_SIZE); + n_left_from = VLIB_FRAME_SIZE; + _vec_len(fragments_to_loopback) = len - VLIB_FRAME_SIZE; + } + } + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + map_send_all_to_node(vm, fragments_to_drop, node, + &error_node->errors[MAP_ERROR_FRAGMENT_DROPPED], + IP6_MAP_IP4_REASS_NEXT_DROP); + + vec_free(fragments_to_drop); + vec_free(fragments_to_loopback); + return frame->n_vectors; +} + +/* + * ip6_icmp_relay + */ +static uword +ip6_map_icmp_relay (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_icmp_relay_node.index); + map_main_t *mm = &map_main; + u32 cpu_index = os_get_cpu_number(); + u16 *fragment_ids, *fid; + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + /* Get random fragment IDs for replies. */ + fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer, n_left_from * sizeof (fragment_ids[0])); + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + /* Single loop */ + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0 = MAP_ERROR_NONE; + ip6_header_t *ip60; + u32 next0 = IP6_ICMP_RELAY_NEXT_IP4_LOOKUP; + u32 mtu; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + u16 tlen = clib_net_to_host_u16(ip60->payload_length); + + /* + * In: + * IPv6 header (40) + * ICMPv6 header (8) + * IPv6 header (40) + * Original IPv4 header / packet + * Out: + * New IPv4 header + * New ICMP header + * Original IPv4 header / packet + */ + + /* Need at least ICMP(8) + IPv6(40) + IPv4(20) + L4 header(8) */ + if (tlen < 76) { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + icmp46_header_t *icmp60 = (icmp46_header_t *)(ip60 + 1); + ip6_header_t *inner_ip60 = (ip6_header_t *)(icmp60 + 2); + + if (inner_ip60->protocol != IP_PROTOCOL_IP_IN_IP) { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + ip4_header_t *inner_ip40 = (ip4_header_t *)(inner_ip60 + 1); + vlib_buffer_advance(p0, 60); /* sizeof ( IPv6 + ICMP + IPv6 - IPv4 - ICMP ) */ + ip4_header_t *new_ip40 = vlib_buffer_get_current(p0); + icmp46_header_t *new_icmp40 = (icmp46_header_t *)(new_ip40 + 1); + + /* + * Relay according to RFC2473, section 8.3 + */ + switch (icmp60->type) { + case ICMP6_destination_unreachable: + case ICMP6_time_exceeded: + case ICMP6_parameter_problem: + /* Type 3 - destination unreachable, Code 1 - host unreachable */ + new_icmp40->type = ICMP4_destination_unreachable; + new_icmp40->code = ICMP4_destination_unreachable_destination_unreachable_host; + break; + + case ICMP6_packet_too_big: + /* Type 3 - destination unreachable, Code 4 - packet too big */ + /* Potential TODO: Adjust domain tunnel MTU based on the value received here */ + mtu = clib_net_to_host_u32(*((u32 *)(icmp60 + 1))); + + /* Check DF flag */ + if (!(inner_ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT))) { + error0 = MAP_ERROR_ICMP_RELAY; + goto error; + } + + new_icmp40->type = ICMP4_destination_unreachable; + new_icmp40->code = ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set; + *((u32 *)(new_icmp40 + 1)) = clib_host_to_net_u32(mtu < 1280 ? 1280 : mtu); + break; + + default: + error0 = MAP_ERROR_ICMP_RELAY; + break; + } + + /* + * Ensure the total ICMP packet is no longer than 576 bytes (RFC1812) + */ + new_ip40->ip_version_and_header_length = 0x45; + new_ip40->tos = 0; + u16 nlen = (tlen - 20) > 576 ? 576 : tlen - 20; + new_ip40->length = clib_host_to_net_u16(nlen); + new_ip40->fragment_id = fid[0]; fid++; + new_ip40->ttl = 64; + new_ip40->protocol = IP_PROTOCOL_ICMP; + new_ip40->src_address = mm->icmp_src_address; + new_ip40->dst_address = inner_ip40->src_address; + new_ip40->checksum = ip4_header_checksum(new_ip40); + + new_icmp40->checksum = 0; + ip_csum_t sum = ip_incremental_checksum(0, new_icmp40, nlen - 20); + new_icmp40->checksum = ~ip_csum_fold(sum); + + vlib_increment_simple_counter(&mm->icmp_relayed, cpu_index, 0, 1); + + error: + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + map_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->map_domain_index = 0; + tr->port = 0; + } + + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_ICMP_RELAY_NEXT_DROP; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; + +} + +static char *map_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip6_map_node) = { + .function = ip6_map, + .name = "ip6-map", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + + .n_next_nodes = IP6_MAP_N_NEXT, + .next_nodes = { + [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup", +#ifdef MAP_SKIP_IP6_LOOKUP + [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite-transit", +#endif + [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass", + [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass", + [IP6_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay", + [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local", + [IP6_MAP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_ip6_reass_node) = { + .function = ip6_map_ip6_reass, + .name = "ip6-map-ip6-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip6_map_ip6_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_MAP_IP6_REASS_N_NEXT, + .next_nodes = { + [IP6_MAP_IP6_REASS_NEXT_IP6_MAP] = "ip6-map", + [IP6_MAP_IP6_REASS_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_ip4_reass_node) = { + .function = ip6_map_ip4_reass, + .name = "ip6-map-ip4-reass", + .vector_size = sizeof(u32), + .format_trace = format_ip6_map_ip4_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_MAP_IP4_REASS_N_NEXT, + .next_nodes = { + [IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAP_IP4_REASS_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP6_MAP_IP4_REASS_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = { + .function = ip6_map_icmp_relay, + .name = "ip6-map-icmp-relay", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, //FIXME + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = MAP_N_ERROR, + .error_strings = map_error_strings, + .n_next_nodes = IP6_ICMP_RELAY_N_NEXT, + .next_nodes = { + [IP6_ICMP_RELAY_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_ICMP_RELAY_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip6_map_t.c b/vnet/vnet/map/ip6_map_t.c new file mode 100644 index 00000000000..7720e06fba4 --- /dev/null +++ b/vnet/vnet/map/ip6_map_t.c @@ -0,0 +1,1141 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "map.h" + +#include "../ip/ip_frag.h" + +#define IP6_MAP_T_DUAL_LOOP + +typedef enum { + IP6_MAPT_NEXT_MAPT_TCP_UDP, + IP6_MAPT_NEXT_MAPT_ICMP, + IP6_MAPT_NEXT_MAPT_FRAGMENTED, + IP6_MAPT_NEXT_DROP, + IP6_MAPT_N_NEXT +} ip6_mapt_next_t; + +typedef enum { + IP6_MAPT_ICMP_NEXT_IP4_LOOKUP, + IP6_MAPT_ICMP_NEXT_IP4_FRAG, + IP6_MAPT_ICMP_NEXT_DROP, + IP6_MAPT_ICMP_N_NEXT +} ip6_mapt_icmp_next_t; + +typedef enum { + IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP, + IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG, + IP6_MAPT_TCP_UDP_NEXT_DROP, + IP6_MAPT_TCP_UDP_N_NEXT +} ip6_mapt_tcp_udp_next_t; + +typedef enum { + IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP, + IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG, + IP6_MAPT_FRAGMENTED_NEXT_DROP, + IP6_MAPT_FRAGMENTED_N_NEXT +} ip6_mapt_fragmented_next_t; + +static_always_inline int +ip6_map_fragment_cache (ip6_header_t *ip6, ip6_frag_hdr_t *frag, map_domain_t *d, u16 port) +{ + u32 *ignore = NULL; + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(map_get_ip4(&ip6->src_address), ip6_map_t_embedded_address(d, &ip6->dst_address), + frag_id_6to4(frag->identification), + (ip6->protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol, + &ignore); + if (r) + r->port = port; + + map_ip4_reass_unlock(); + return !r; +} + +/* Returns the associated port or -1 */ +static_always_inline i32 +ip6_map_fragment_get(ip6_header_t *ip6, ip6_frag_hdr_t *frag, map_domain_t *d) +{ + u32 *ignore = NULL; + map_ip4_reass_lock(); + map_ip4_reass_t *r = map_ip4_reass_get(map_get_ip4(&ip6->src_address), ip6_map_t_embedded_address(d, &ip6->dst_address), + frag_id_6to4(frag->identification), + (ip6->protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol, + &ignore); + i32 ret = r?r->port:-1; + map_ip4_reass_unlock(); + return ret; +} + +static_always_inline u8 +ip6_translate_tos(const ip6_header_t *ip6) +{ +#ifdef IP6_MAP_T_OVERRIDE_TOS + return IP6_MAP_T_OVERRIDE_TOS; +#else + return (clib_net_to_host_u32(ip6->ip_version_traffic_class_and_flow_label) & 0x0ff00000) >> 20; +#endif +} + +//TODO: Find right place in memory for that +static u8 icmp6_to_icmp_updater_pointer_table[] = + { 0, 1,~0,~0, + 2, 2, 9, 8, + 12,12,12,12, + 12,12,12,12, + 12,12,12,12, + 12,12,12,12, + 24,24,24,24, + 24,24,24,24, + 24,24,24,24, + 24,24,24,24 + }; + +static_always_inline int +ip6_icmp_to_icmp6_in_place (icmp46_header_t *icmp, u32 icmp_len, + i32 *sender_port, ip6_header_t **inner_ip6) +{ + *inner_ip6 = NULL; + switch (icmp->type) { + case ICMP6_echo_request: + *sender_port = ((u16 *)icmp)[2]; + icmp->type = ICMP4_echo_request; + break; + case ICMP6_echo_reply: + *sender_port = ((u16 *)icmp)[2]; + icmp->type = ICMP4_echo_reply; + break; + case ICMP6_destination_unreachable: + *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8); + *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len); + + switch (icmp->code) { + case ICMP6_destination_unreachable_no_route_to_destination: //0 + case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2 + case ICMP6_destination_unreachable_address_unreachable: //3 + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_destination_unreachable_host; + break; + case ICMP6_destination_unreachable_destination_administratively_prohibited: //1 + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_communication_administratively_prohibited; + break; + case ICMP6_destination_unreachable_port_unreachable: + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_port_unreachable; + break; + default: + return -1; + } + break; + case ICMP6_packet_too_big: + *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8); + *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len); + + icmp->type = ICMP4_destination_unreachable; + icmp->code = 4; + { + u32 advertised_mtu = clib_net_to_host_u32(*((u32 *)(icmp + 1))); + advertised_mtu -= 20; + //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20) + ((u16 *)(icmp))[3] = clib_host_to_net_u16(advertised_mtu); + } + break; + + case ICMP6_time_exceeded: + *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8); + *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len); + + icmp->type = ICMP4_time_exceeded; + break; + + case ICMP6_parameter_problem: + *inner_ip6 = (ip6_header_t *) u8_ptr_add(icmp, 8); + *sender_port = ip6_get_port(*inner_ip6, MAP_RECEIVER, icmp_len); + + switch (icmp->code) { + case ICMP6_parameter_problem_erroneous_header_field: + icmp->type = ICMP4_parameter_problem; + icmp->code = ICMP4_parameter_problem_pointer_indicates_error; + u32 pointer = clib_net_to_host_u32(*((u32*)(icmp + 1))); + if (pointer >= 40) + return -1; + + ((u8*)(icmp + 1))[0] = icmp6_to_icmp_updater_pointer_table[pointer]; + break; + case ICMP6_parameter_problem_unrecognized_next_header: + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_port_unreachable; + break; + case ICMP6_parameter_problem_unrecognized_option: + default: + return -1; + } + break; + default: + return -1; + break; + } + return 0; +} + +static_always_inline void +_ip6_map_t_icmp (map_domain_t *d, vlib_buffer_t *p, u8 *error) +{ + ip6_header_t *ip6, *inner_ip6; + ip4_header_t *ip4, *inner_ip4; + u32 ip6_pay_len; + icmp46_header_t *icmp; + i32 sender_port; + ip_csum_t csum; + u32 ip4_sadr, inner_ip4_dadr; + + ip6 = vlib_buffer_get_current(p); + ip6_pay_len = clib_net_to_host_u16(ip6->payload_length); + icmp = (icmp46_header_t *)(ip6 + 1); + ASSERT(ip6_pay_len + sizeof(*ip6) <= p->current_length); + + if (ip6->protocol != IP_PROTOCOL_ICMP6) { + //No extensions headers allowed here + //TODO: SR header + *error = MAP_ERROR_MALFORMED; + return; + } + + //There are no fragmented ICMP messages, so no extension header for now + + if (ip6_icmp_to_icmp6_in_place(icmp, ip6_pay_len, &sender_port, &inner_ip6)) { + //TODO: In case of 1:1 mapping it is not necessary to have the sender port + *error = MAP_ERROR_ICMP; + return; + } + + if (sender_port < 0) { + // In case of 1:1 mapping, we don't care about the port + if(d->ea_bits_len == 0 && d->rules) { + sender_port = 0; + } else { + *error = MAP_ERROR_ICMP; + return; + } + } + + //Security check + //Note that this prevents an intermediate IPv6 router from answering the request + ip4_sadr = map_get_ip4(&ip6->src_address); + if (ip6->src_address.as_u64[0] != map_get_pfx_net(d, ip4_sadr, sender_port) || + ip6->src_address.as_u64[1] != map_get_sfx_net(d, ip4_sadr, sender_port)) { + *error = MAP_ERROR_SEC_CHECK; + return; + } + + if (inner_ip6) { + u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset, inner_frag_id; + u8 *inner_l4, inner_protocol; + + //We have two headers to translate + // FROM + // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ... + // Handled cases: + // [ IPv6 ][IC][ IPv6 ][L4 header ... + // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ... + // TO + // [ IPv4][IC][ IPv4][L4 header ... + + //TODO: This was already done deep in ip6_icmp_to_icmp6_in_place + //We shouldn't have to do it again + if (ip6_parse(inner_ip6, ip6_pay_len - 8, + &inner_protocol, &inner_l4_offset, &inner_frag_offset)) { + *error = MAP_ERROR_MALFORMED; + return; + } + + inner_l4 = u8_ptr_add(inner_ip6, inner_l4_offset); + inner_ip4 = (ip4_header_t *) u8_ptr_add(inner_l4, - sizeof(*inner_ip4)); + if (inner_frag_offset) { + ip6_frag_hdr_t *inner_frag = (ip6_frag_hdr_t *) u8_ptr_add(inner_ip6, inner_frag_offset); + inner_frag_id = frag_id_6to4(inner_frag->identification); + } else { + inner_frag_id = 0; + } + + //Do the translation of the inner packet + if (inner_protocol == IP_PROTOCOL_TCP) { + inner_L4_checksum = (u16 *) u8_ptr_add(inner_l4, 16); + } else if (inner_protocol == IP_PROTOCOL_UDP) { + inner_L4_checksum = (u16 *) u8_ptr_add(inner_l4, 6); + } else if (inner_protocol == IP_PROTOCOL_ICMP6) { + icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4; + csum = inner_icmp->checksum; + csum = ip_csum_sub_even(csum, *((u16 *)inner_icmp)); + //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded + inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ? + ICMP4_echo_request : ICMP4_echo_reply; + csum = ip_csum_add_even(csum, *((u16 *)inner_icmp)); + inner_icmp->checksum = ip_csum_fold(csum); + inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later + inner_L4_checksum = &inner_icmp->checksum; + } else { + *error = MAP_ERROR_BAD_PROTOCOL; + return; + } + + csum = *inner_L4_checksum; + csum = ip_csum_sub_even(csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even(csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even(csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even(csum, inner_ip6->dst_address.as_u64[1]); + + //Sanity check of the outer destination address + if (ip6->dst_address.as_u64[0] != inner_ip6->src_address.as_u64[0] && + ip6->dst_address.as_u64[1] != inner_ip6->src_address.as_u64[1]) { + *error = MAP_ERROR_SEC_CHECK; + return; + } + + //Security check of inner packet + inner_ip4_dadr = map_get_ip4(&inner_ip6->dst_address); + if (inner_ip6->dst_address.as_u64[0] != map_get_pfx_net(d, inner_ip4_dadr, sender_port) || + inner_ip6->dst_address.as_u64[1] != map_get_sfx_net(d, inner_ip4_dadr, sender_port)) { + *error = MAP_ERROR_SEC_CHECK; + return; + } + + inner_ip4->dst_address.as_u32 = inner_ip4_dadr; + inner_ip4->src_address.as_u32 = ip6_map_t_embedded_address(d, &inner_ip6->src_address); + inner_ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + inner_ip4->tos = ip6_translate_tos(inner_ip6); + inner_ip4->length = u16_net_add(inner_ip6->payload_length, sizeof(*ip4) + sizeof(*ip6) - + inner_l4_offset); + inner_ip4->fragment_id = inner_frag_id; + inner_ip4->flags_and_fragment_offset = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + inner_ip4->ttl = inner_ip6->hop_limit; + inner_ip4->protocol = inner_protocol; + inner_ip4->checksum = ip4_header_checksum(inner_ip4); + + if (inner_ip4->protocol == IP_PROTOCOL_ICMP) { + //Remove remainings of the pseudo-header in the csum + csum = ip_csum_sub_even(csum, clib_host_to_net_u16(IP_PROTOCOL_ICMP6)); + csum = ip_csum_sub_even(csum, inner_ip4->length - sizeof(*inner_ip4)); + } else { + //Update to new pseudo-header + csum = ip_csum_add_even(csum, inner_ip4->src_address.as_u32); + csum = ip_csum_add_even(csum, inner_ip4->dst_address.as_u32); + } + *inner_L4_checksum = ip_csum_fold(csum); + + //Move up icmp header + ip4 = (ip4_header_t *) u8_ptr_add(inner_l4, - 2 * sizeof(*ip4) - 8); + memcpy(u8_ptr_add(inner_l4, - sizeof(*ip4) - 8), icmp, 8); + icmp = (icmp46_header_t *) u8_ptr_add(inner_l4, - sizeof(*ip4) - 8); + } else { + //Only one header to translate + ip4 = (ip4_header_t *) u8_ptr_add(ip6, sizeof(*ip6) - sizeof(*ip4)); + } + vlib_buffer_advance(p, (u32) (((u8 *)ip4) - ((u8 *)ip6))); + + ip4->dst_address.as_u32 = ip6_map_t_embedded_address(d, &ip6->dst_address); + ip4->src_address.as_u32 = ip4_sadr; + ip4->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos(ip6); + ip4->fragment_id = 0; + ip4->flags_and_fragment_offset = 0; + ip4->ttl = ip6->hop_limit; + ip4->protocol = IP_PROTOCOL_ICMP; + //TODO fix the length depending on offset length + ip4->length = u16_net_add(ip6->payload_length, + (inner_ip6 == NULL)?sizeof(*ip4):(2*sizeof(*ip4) - sizeof(*ip6))); + ip4->checksum = ip4_header_checksum(ip4); + + //TODO: We could do an easy diff-checksum for echo requests/replies + //Recompute ICMP checksum + icmp->checksum = 0; + csum = ip_incremental_checksum(0, icmp, clib_net_to_host_u16(ip4->length) - sizeof(*ip4)); + icmp->checksum = ~ip_csum_fold (csum); +} + +static uword +ip6_map_t_icmp (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip6_map_t_icmp_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 cpu_index = os_get_cpu_number(); + + while (n_left_from > 0) { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + u8 error0; + ip6_mapt_icmp_next_t next0; + map_domain_t *d0; + u16 len0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + next0 = IP6_MAPT_ICMP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer(vm, pi0); + len0 = clib_net_to_host_u16(((ip6_header_t *)vlib_buffer_get_current(p0))->payload_length); + d0 = pool_elt_at_index(map_main.domains, vnet_buffer(p0)->map_t.map_domain_index); + _ip6_map_t_icmp(d0, p0, &error0); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + //Send to fragmentation node if necessary + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG; + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + len0); + } else { + next0 = IP6_MAPT_ICMP_NEXT_DROP; + } + + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip6_map_t_fragmented (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while(n_left_from >= 4 && n_left_to_next >= 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip60, *ip61; + ip6_frag_hdr_t *frag0, *frag1; + ip4_header_t *ip40, *ip41; + u16 frag_id0, frag_offset0, + frag_id1, frag_offset1; + u8 frag_more0, frag_more1; + ip6_mapt_fragmented_next_t next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip60 = vlib_buffer_get_current(p0); + ip61 = vlib_buffer_get_current(p1); + frag0 = (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + frag1 = (ip6_frag_hdr_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset); + ip40 = (ip4_header_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + ip41 = (ip4_header_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p1, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40)); + + frag_id0 = frag_id_6to4(frag0->identification); + frag_id1 = frag_id_6to4(frag1->identification); + frag_more0 = ip6_frag_hdr_more(frag0); + frag_more1 = ip6_frag_hdr_more(frag1); + frag_offset0 = ip6_frag_hdr_offset(frag0); + frag_offset1 = ip6_frag_hdr_offset(frag1); + + ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr; + ip41->dst_address.as_u32 = vnet_buffer(p1)->map_t.v6.daddr; + ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr; + ip41->src_address.as_u32 = vnet_buffer(p1)->map_t.v6.saddr; + ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip41->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip40->tos = ip6_translate_tos(ip60); + ip41->tos = ip6_translate_tos(ip61); + ip40->length = u16_net_add(ip60->payload_length, + sizeof(*ip40) - vnet_buffer(p0)->map_t.v6.l4_offset + sizeof(*ip60)); + ip41->length = u16_net_add(ip61->payload_length, + sizeof(*ip40) - vnet_buffer(p1)->map_t.v6.l4_offset + sizeof(*ip60)); + ip40->fragment_id = frag_id0; + ip41->fragment_id = frag_id1; + ip40->flags_and_fragment_offset = + clib_host_to_net_u16(frag_offset0 | (frag_more0?IP4_HEADER_FLAG_MORE_FRAGMENTS:0)); + ip41->flags_and_fragment_offset = + clib_host_to_net_u16(frag_offset1 | (frag_more1?IP4_HEADER_FLAG_MORE_FRAGMENTS:0)); + ip40->ttl = ip60->hop_limit; + ip41->ttl = ip61->hop_limit; + ip40->protocol = (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)? + IP_PROTOCOL_ICMP:vnet_buffer(p0)->map_t.v6.l4_protocol; + ip41->protocol = (vnet_buffer(p1)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)? + IP_PROTOCOL_ICMP:vnet_buffer(p1)->map_t.v6.l4_protocol; + ip40->checksum = ip4_header_checksum(ip40); + ip41->checksum = ip4_header_checksum(ip41); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + + if(vnet_buffer(p1)->map_t.mtu < p1->current_length) { + vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu; + vnet_buffer(p1)->ip_frag.header_offset = 0; + vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, + to_next, n_left_to_next, pi0, pi1, + next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip6_header_t *ip60; + ip6_frag_hdr_t *frag0; + ip4_header_t *ip40; + u16 frag_id0; + u8 frag_more0; + u16 frag_offset0; + ip6_mapt_fragmented_next_t next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + frag0 = (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + ip40 = (ip4_header_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + + frag_id0 = frag_id_6to4(frag0->identification); + frag_more0 = ip6_frag_hdr_more(frag0); + frag_offset0 = ip6_frag_hdr_offset(frag0); + + ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr; + ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr; + ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip40->tos = ip6_translate_tos(ip60); + ip40->length = u16_net_add(ip60->payload_length, + sizeof(*ip40) - vnet_buffer(p0)->map_t.v6.l4_offset + sizeof(*ip60)); + ip40->fragment_id = frag_id0; + ip40->flags_and_fragment_offset = + clib_host_to_net_u16(frag_offset0 | (frag_more0?IP4_HEADER_FLAG_MORE_FRAGMENTS:0)); + ip40->ttl = ip60->hop_limit; + ip40->protocol = (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6)? + IP_PROTOCOL_ICMP:vnet_buffer(p0)->map_t.v6.l4_protocol; + ip40->checksum = ip4_header_checksum(ip40); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + //Send to fragmentation node if necessary + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static uword +ip6_map_t_tcp_udp (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while(n_left_from >= 4 && n_left_to_next >= 2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip60, *ip61; + ip_csum_t csum0, csum1; + ip4_header_t *ip40, *ip41; + u16 fragment_id0, flags0, *checksum0, + fragment_id1, flags1, *checksum1; + ip6_mapt_tcp_udp_next_t next0, next1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip60 = vlib_buffer_get_current(p0); + ip61 = vlib_buffer_get_current(p1); + ip40 = (ip4_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + ip41 = (ip4_header_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p1, vnet_buffer(p1)->map_t.v6.l4_offset - sizeof(*ip40)); + checksum0 = (u16 *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.checksum_offset); + checksum1 = (u16 *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.checksum_offset); + + csum0 = ip_csum_sub_even(*checksum0, ip60->src_address.as_u64[0]); + csum1 = ip_csum_sub_even(*checksum1, ip61->src_address.as_u64[0]); + csum0 = ip_csum_sub_even(csum0, ip60->src_address.as_u64[1]); + csum1 = ip_csum_sub_even(csum1, ip61->src_address.as_u64[1]); + csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[0]); + csum1 = ip_csum_sub_even(csum0, ip61->dst_address.as_u64[0]); + csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[1]); + csum1 = ip_csum_sub_even(csum1, ip61->dst_address.as_u64[1]); + csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.daddr); + csum1 = ip_csum_add_even(csum1, vnet_buffer(p1)->map_t.v6.daddr); + csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.saddr); + csum1 = ip_csum_add_even(csum1, vnet_buffer(p1)->map_t.v6.saddr); + *checksum0 = ip_csum_fold(csum0); + *checksum1 = ip_csum_fold(csum1); + + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset)) { + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + fragment_id0 = frag_id_6to4(hdr->identification); + flags0 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + } else { + fragment_id0 = 0; + flags0 = 0; + } + + if (PREDICT_FALSE(vnet_buffer(p1)->map_t.v6.frag_offset)) { + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset); + fragment_id1 = frag_id_6to4(hdr->identification); + flags1 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + } else { + fragment_id1 = 0; + flags1 = 0; + } + + ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr; + ip41->dst_address.as_u32 = vnet_buffer(p1)->map_t.v6.daddr; + ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr; + ip41->src_address.as_u32 = vnet_buffer(p1)->map_t.v6.saddr; + ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip41->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip40->tos = ip6_translate_tos(ip60); + ip41->tos = ip6_translate_tos(ip61); + ip40->length = u16_net_add(ip60->payload_length, + sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset); + ip41->length = u16_net_add(ip61->payload_length, + sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p1)->map_t.v6.l4_offset); + ip40->fragment_id = fragment_id0; + ip41->fragment_id = fragment_id1; + ip40->flags_and_fragment_offset = flags0; + ip41->flags_and_fragment_offset = flags1; + ip40->ttl = ip60->hop_limit; + ip41->ttl = ip61->hop_limit; + ip40->protocol = vnet_buffer(p0)->map_t.v6.l4_protocol; + ip41->protocol = vnet_buffer(p1)->map_t.v6.l4_protocol; + ip40->checksum = ip4_header_checksum(ip40); + ip41->checksum = ip4_header_checksum(ip41); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + + if(vnet_buffer(p1)->map_t.mtu < p1->current_length) { + vnet_buffer(p1)->ip_frag.mtu = vnet_buffer(p1)->map_t.mtu; + vnet_buffer(p1)->ip_frag.header_offset = 0; + vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip6_header_t *ip60; + u16 *checksum0; + ip_csum_t csum0; + ip4_header_t *ip40; + u16 fragment_id0; + u16 flags0; + ip6_mapt_tcp_udp_next_t next0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + ip40 = (ip4_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + vlib_buffer_advance(p0, vnet_buffer(p0)->map_t.v6.l4_offset - sizeof(*ip40)); + checksum0 = (u16 *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.checksum_offset); + + //TODO: This can probably be optimized + csum0 = ip_csum_sub_even(*checksum0, ip60->src_address.as_u64[0]); + csum0 = ip_csum_sub_even(csum0, ip60->src_address.as_u64[1]); + csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[0]); + csum0 = ip_csum_sub_even(csum0, ip60->dst_address.as_u64[1]); + csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.daddr); + csum0 = ip_csum_add_even(csum0, vnet_buffer(p0)->map_t.v6.saddr); + *checksum0 = ip_csum_fold(csum0); + + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset)) { + //Only the first fragment + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + fragment_id0 = frag_id_6to4(hdr->identification); + flags0 = clib_host_to_net_u16(IP4_HEADER_FLAG_MORE_FRAGMENTS); + } else { + fragment_id0 = 0; + flags0 = 0; + } + + ip40->dst_address.as_u32 = vnet_buffer(p0)->map_t.v6.daddr; + ip40->src_address.as_u32 = vnet_buffer(p0)->map_t.v6.saddr; + ip40->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip40->tos = ip6_translate_tos(ip60); + ip40->length = u16_net_add(ip60->payload_length, + sizeof(*ip40) + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset); + ip40->fragment_id = fragment_id0; + ip40->flags_and_fragment_offset = flags0; + ip40->ttl = ip60->hop_limit; + ip40->protocol = vnet_buffer(p0)->map_t.v6.l4_protocol; + ip40->checksum = ip4_header_checksum(ip40); + + if(vnet_buffer(p0)->map_t.mtu < p0->current_length) { + //Send to fragmentation node if necessary + vnet_buffer(p0)->ip_frag.mtu = vnet_buffer(p0)->map_t.mtu; + vnet_buffer(p0)->ip_frag.header_offset = 0; + vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static_always_inline void +ip6_map_t_classify(vlib_buffer_t *p0, ip6_header_t *ip60, + map_domain_t *d0, i32 *src_port0, + u8 *error0, ip6_mapt_next_t *next0, + u32 l4_len0, ip6_frag_hdr_t *frag0) +{ + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset && + ip6_frag_hdr_offset(frag0))) { + *next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; + if(d0->ea_bits_len == 0 && d0->rules) { + *src_port0 = 0; + } else { + *src_port0 = ip6_map_fragment_get(ip60, frag0, d0); + *error0 = (*src_port0 != -1) ? *error0 : MAP_ERROR_FRAGMENT_DROPPED; + } + } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) { + *error0 = l4_len0 < sizeof(tcp_header_t) ? MAP_ERROR_MALFORMED : *error0; + vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 16; + *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + *src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset)); + } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) { + *error0 = l4_len0 < sizeof(udp_header_t) ? MAP_ERROR_MALFORMED : *error0; + vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 6; + *next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + *src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset)); + } else if (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) { + *error0 = l4_len0 < sizeof(icmp46_header_t) ? MAP_ERROR_MALFORMED : *error0; + *next0 = IP6_MAPT_NEXT_MAPT_ICMP; + if(d0->ea_bits_len == 0 && d0->rules) { + *src_port0 = 0; + } else if (((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_reply || + ((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_request) { + *src_port0 = (i32) *((u16 *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset + 6)); + } + } else { + //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. + *error0 = MAP_ERROR_BAD_PROTOCOL; + } +} + +static uword +ip6_map_t (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_map_t_node.index); + vlib_combined_counter_main_t *cm = map_main.domain_counters; + u32 cpu_index = os_get_cpu_number(); + + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + +#ifdef IP6_MAP_T_DUAL_LOOP + while (n_left_from >= 4 && n_left_to_next >=2) { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip60, *ip61; + u8 error0, error1; + ip6_mapt_next_t next0, next1; + u32 l4_len0, l4_len1; + i32 src_port0, src_port1; + map_domain_t *d0, *d1; + ip6_frag_hdr_t *frag0, *frag1; + u32 saddr0, saddr1; + next0 = next1 = 0; //Because compiler whines + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + error0 = MAP_ERROR_NONE; + error1 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + p1 = vlib_get_buffer(vm, pi1); + ip60 = vlib_buffer_get_current(p0); + ip61 = vlib_buffer_get_current(p1); + + saddr0 = map_get_ip4(&ip60->src_address); + saddr1 = map_get_ip4(&ip61->src_address); + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *)&saddr0, + &vnet_buffer(p0)->map_t.map_domain_index, &error0); + d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], + (ip4_address_t *)&saddr1, + &vnet_buffer(p1)->map_t.map_domain_index, &error1); + + vnet_buffer(p0)->map_t.v6.saddr = saddr0; + vnet_buffer(p1)->map_t.v6.saddr = saddr1; + vnet_buffer(p0)->map_t.v6.daddr = ip6_map_t_embedded_address(d0, &ip60->dst_address); + vnet_buffer(p1)->map_t.v6.daddr = ip6_map_t_embedded_address(d1, &ip61->dst_address); + vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + vnet_buffer(p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; + + if (PREDICT_FALSE(ip6_parse(ip60, p0->current_length, + &(vnet_buffer(p0)->map_t.v6.l4_protocol), + &(vnet_buffer(p0)->map_t.v6.l4_offset), + &(vnet_buffer(p0)->map_t.v6.frag_offset)))) { + error0 = MAP_ERROR_MALFORMED; + next0 = IP6_MAPT_NEXT_DROP; + } + + if (PREDICT_FALSE(ip6_parse(ip61, p1->current_length, + &(vnet_buffer(p1)->map_t.v6.l4_protocol), + &(vnet_buffer(p1)->map_t.v6.l4_offset), + &(vnet_buffer(p1)->map_t.v6.frag_offset)))) { + error1 = MAP_ERROR_MALFORMED; + next1 = IP6_MAPT_NEXT_DROP; + } + + src_port0 = src_port1 = -1; + l4_len0 = (u32)clib_net_to_host_u16(ip60->payload_length) + + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset; + l4_len1 = (u32)clib_net_to_host_u16(ip61->payload_length) + + sizeof(*ip60) - vnet_buffer(p1)->map_t.v6.l4_offset; + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + frag1 = (ip6_frag_hdr_t *) u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset); + + ip6_map_t_classify(p0, ip60, d0, &src_port0, &error0, &next0, l4_len0, frag0); + ip6_map_t_classify(p1, ip61, d1, &src_port1, &error1, &next1, l4_len1, frag1); + + if (PREDICT_FALSE((src_port0 != -1) && ( + ip60->src_address.as_u64[0] != map_get_pfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0) || + ip60->src_address.as_u64[1] != map_get_sfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0)))) { + error0 = MAP_ERROR_SEC_CHECK; + } + + if (PREDICT_FALSE((src_port1 != -1) && ( + ip61->src_address.as_u64[0] != map_get_pfx_net(d1, vnet_buffer(p1)->map_t.v6.saddr, src_port1) || + ip61->src_address.as_u64[1] != map_get_sfx_net(d1, vnet_buffer(p1)->map_t.v6.saddr, src_port1)))) { + error1 = MAP_ERROR_SEC_CHECK; + } + + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset((ip6_frag_hdr_t *) + u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset))) && + (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) && (error0 == MAP_ERROR_NONE)) { + ip6_map_fragment_cache(ip60, + (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset), + d0, src_port0); + } + + if (PREDICT_FALSE(vnet_buffer(p1)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset((ip6_frag_hdr_t *) + u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset))) && + (src_port1 != -1) && (d1->ea_bits_len != 0 || !d1->rules) && (error1 == MAP_ERROR_NONE)) { + ip6_map_fragment_cache(ip61, + (ip6_frag_hdr_t *)u8_ptr_add(ip61, vnet_buffer(p1)->map_t.v6.frag_offset), + d1, src_port1); + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip60->payload_length)); + } + + if (PREDICT_TRUE(error1 == MAP_ERROR_NONE && next1 != IP6_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, + vnet_buffer(p1)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip61->payload_length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; + next1 = (error1 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next1; + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); + } +#endif + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + ip6_header_t *ip60; + u8 error0; + u32 l4_len0; + i32 src_port0; + map_domain_t *d0; + ip6_frag_hdr_t *frag0; + ip6_mapt_next_t next0 = 0; + u32 saddr; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + error0 = MAP_ERROR_NONE; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + //Save saddr in a different variable to not overwrite ip.adj_index + saddr = map_get_ip4(&ip60->src_address); + d0 = ip6_map_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], + (ip4_address_t *)&saddr, + &vnet_buffer(p0)->map_t.map_domain_index, &error0); + + //FIXME: What if d0 is null + vnet_buffer(p0)->map_t.v6.saddr = saddr; + vnet_buffer(p0)->map_t.v6.daddr = ip6_map_t_embedded_address(d0, &ip60->dst_address); + vnet_buffer(p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; + + if (PREDICT_FALSE(ip6_parse(ip60, p0->current_length, + &(vnet_buffer(p0)->map_t.v6.l4_protocol), + &(vnet_buffer(p0)->map_t.v6.l4_offset), + &(vnet_buffer(p0)->map_t.v6.frag_offset)))) { + error0 = MAP_ERROR_MALFORMED; + next0 = IP6_MAPT_NEXT_DROP; + } + + src_port0 = -1; + l4_len0 = (u32)clib_net_to_host_u16(ip60->payload_length) + + sizeof(*ip60) - vnet_buffer(p0)->map_t.v6.l4_offset; + frag0 = (ip6_frag_hdr_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset); + + + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset && + ip6_frag_hdr_offset(frag0))) { + src_port0 = ip6_map_fragment_get(ip60, frag0, d0); + error0 = (src_port0 != -1) ? error0 : MAP_ERROR_FRAGMENT_MEMORY; + next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; + } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_TCP)) { + error0 = l4_len0 < sizeof(tcp_header_t) ? MAP_ERROR_MALFORMED : error0; + vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 16; + next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset)); + } else if (PREDICT_TRUE(vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_UDP)) { + error0 = l4_len0 < sizeof(udp_header_t) ? MAP_ERROR_MALFORMED : error0; + vnet_buffer(p0)->map_t.checksum_offset = vnet_buffer(p0)->map_t.v6.l4_offset + 6; + next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; + src_port0 = (i32) *((u16*)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset)); + } else if (vnet_buffer(p0)->map_t.v6.l4_protocol == IP_PROTOCOL_ICMP6) { + error0 = l4_len0 < sizeof(icmp46_header_t) ? MAP_ERROR_MALFORMED : error0; + next0 = IP6_MAPT_NEXT_MAPT_ICMP; + if (((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_reply || + ((icmp46_header_t *) u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset))->code == ICMP6_echo_request) + src_port0 = (i32) *((u16 *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.l4_offset + 6)); + } else { + //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. + error0 = MAP_ERROR_BAD_PROTOCOL; + } + + //Security check + if (PREDICT_FALSE((src_port0 != -1) && ( + ip60->src_address.as_u64[0] != map_get_pfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0) || + ip60->src_address.as_u64[1] != map_get_sfx_net(d0, vnet_buffer(p0)->map_t.v6.saddr, src_port0)))) { + //Security check when src_port0 is not zero (non-first fragment, UDP or TCP) + error0 = MAP_ERROR_SEC_CHECK; + } + + //Fragmented first packet needs to be cached for following packets + if (PREDICT_FALSE(vnet_buffer(p0)->map_t.v6.frag_offset && + !ip6_frag_hdr_offset((ip6_frag_hdr_t *) + u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset))) && + (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) && (error0 == MAP_ERROR_NONE)) { + ip6_map_fragment_cache(ip60, + (ip6_frag_hdr_t *)u8_ptr_add(ip60, vnet_buffer(p0)->map_t.v6.frag_offset), + d0, src_port0); + } + + if (PREDICT_TRUE(error0 == MAP_ERROR_NONE && next0 != IP6_MAPT_NEXT_MAPT_ICMP)) { + vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_RX, cpu_index, + vnet_buffer(p0)->map_t.map_domain_index, 1, + clib_net_to_host_u16(ip60->payload_length)); + } + + next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; + p0->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, pi0, + next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +static char *map_t_error_strings[] = { +#define _(sym,string) string, + foreach_map_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = { + .function = ip6_map_t_fragmented, + .name = "ip6-map-t-fragmented", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_FRAGMENTED_N_NEXT, + .next_nodes = { + [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = { + .function = ip6_map_t_icmp, + .name = "ip6-map-t-icmp", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_ICMP_N_NEXT, + .next_nodes = { + [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = { + .function = ip6_map_t_tcp_udp, + .name = "ip6-map-t-tcp-udp", + .vector_size = sizeof (u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_TCP_UDP_N_NEXT, + .next_nodes = { + [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", + }, +}; + +VLIB_REGISTER_NODE(ip6_map_t_node) = { + .function = ip6_map_t, + .name = "ip6-map-t", + .vector_size = sizeof(u32), + .format_trace = format_map_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = MAP_N_ERROR, + .error_strings = map_t_error_strings, + + .n_next_nodes = IP6_MAPT_N_NEXT, + .next_nodes = { + [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp", + [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp", + [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented", + [IP6_MAPT_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/ip6_sixrd.c b/vnet/vnet/map/ip6_sixrd.c new file mode 100644 index 00000000000..0bd0cf3a303 --- /dev/null +++ b/vnet/vnet/map/ip6_sixrd.c @@ -0,0 +1,129 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +/* + * Defines used for testing various optimisation schemes + */ +#define SIXRD_ENCAP_DUAL 0 + +#include "sixrd.h" + +vlib_node_registration_t ip6_sixrd_node; + +typedef enum { + IP6_SIXRD_NEXT_IP4_LOOKUP, + IP6_SIXRD_NEXT_DROP, + IP6_SIXRD_N_NEXT, +} ip6_sixrd_next_t; + +/* + * ip6_sixrd + */ +static uword +ip6_sixrd (vlib_main_t *vm, + vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, ip6_sixrd_node.index); + u32 encap = 0; + from = vlib_frame_vector_args(frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0; + vlib_buffer_t *p0; + sixrd_domain_t *d0; + u8 error0 = SIXRD_ERROR_NONE; + ip6_header_t *ip60; + ip4_header_t *ip4h0; + u32 next0 = IP6_SIXRD_NEXT_IP4_LOOKUP; + u32 sixrd_domain_index0 = ~0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next +=1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip60 = vlib_buffer_get_current(p0); + // p0->current_length = clib_net_to_host_u16(ip40->length); + d0 = ip6_sixrd_get_domain(vnet_buffer(p0)->ip.adj_index[VLIB_TX], &sixrd_domain_index0); + ASSERT(d0); + + /* SIXRD calc */ + u64 dal60 = clib_net_to_host_u64(ip60->dst_address.as_u64[0]); + u32 da40 = sixrd_get_addr(d0, dal60); + u16 len = clib_net_to_host_u16(ip60->payload_length) + 60; + if (da40 == 0) error0 = SIXRD_ERROR_UNKNOWN; + + /* construct ipv4 header */ + vlib_buffer_advance(p0, - (sizeof(ip4_header_t))); + ip4h0 = vlib_buffer_get_current(p0); + vnet_buffer(p0)->sw_if_index[VLIB_TX] = (u32)~0; + ip4h0->ip_version_and_header_length = 0x45; + ip4h0->tos = 0; + ip4h0->length = clib_host_to_net_u16(len); + ip4h0->fragment_id = 0; + ip4h0->flags_and_fragment_offset = 0; + ip4h0->ttl = 0x40; + ip4h0->protocol = IP_PROTOCOL_IPV6; + ip4h0->src_address = d0->ip4_src; + ip4h0->dst_address.as_u32 = clib_host_to_net_u32(da40); + ip4h0->checksum = ip4_header_checksum(ip4h0); + + next0 = error0 == SIXRD_ERROR_NONE ? IP6_SIXRD_NEXT_IP4_LOOKUP : IP6_SIXRD_NEXT_DROP; + + if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED)) { + sixrd_trace_t *tr = vlib_add_trace(vm, node, p0, sizeof(*tr)); + tr->sixrd_domain_index = sixrd_domain_index0; + } + + p0->error = error_node->errors[error0]; + if (PREDICT_TRUE(error0 == SIXRD_ERROR_NONE)) encap++; + + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } + vlib_node_increment_counter(vm, ip6_sixrd_node.index, SIXRD_ERROR_ENCAPSULATED, encap); + + return frame->n_vectors; +} + +static char *sixrd_error_strings[] = { +#define _(sym,string) string, + foreach_sixrd_error +#undef _ +}; + +VLIB_REGISTER_NODE(ip6_sixrd_node) = { + .function = ip6_sixrd, + .name = "ip6-sixrd", + .vector_size = sizeof(u32), + .format_trace = format_sixrd_trace, + .n_errors = SIXRD_N_ERROR, + .error_strings = sixrd_error_strings, + .n_next_nodes = IP6_SIXRD_N_NEXT, + .next_nodes = { + [IP6_SIXRD_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_SIXRD_NEXT_DROP] = "error-drop", + }, +}; diff --git a/vnet/vnet/map/map.c b/vnet/vnet/map/map.c new file mode 100644 index 00000000000..b0cab660876 --- /dev/null +++ b/vnet/vnet/map/map.c @@ -0,0 +1,1634 @@ +/* + * map.c : MAP support + * + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "map.h" + +/* + * This code supports the following MAP modes: + * + * Algorithmic Shared IPv4 address (ea_bits_len > 0): + * ea_bits_len + ip4_prefix > 32 + * psid_length > 0, ip6_prefix < 64, ip4_prefix <= 32 + * Algorithmic Full IPv4 address (ea_bits_len > 0): + * ea_bits_len + ip4_prefix = 32 + * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32 + * Algorithmic IPv4 prefix (ea_bits_len > 0): + * ea_bits_len + ip4_prefix < 32 + * psid_length = 0, ip6_prefix < 64, ip4_prefix <= 32 + * + * Independent Shared IPv4 address (ea_bits_len = 0): + * ip4_prefix = 32 + * psid_length > 0 + * Rule IPv6 address = 128, Rule PSID Set + * Independent Full IPv4 address (ea_bits_len = 0): + * ip4_prefix = 32 + * psid_length = 0, ip6_prefix = 128 + * Independent IPv4 prefix (ea_bits_len = 0): + * ip4_prefix < 32 + * psid_length = 0, ip6_prefix = 128 + * + */ + +/* + * This code supports MAP-T: + * + * With DMR prefix length equal to 96. + * + */ + + +i32 +ip4_get_port (ip4_header_t *ip, map_dir_e dir, u16 buffer_len) +{ + //TODO: use buffer length + if (ip->ip_version_and_header_length != 0x45 || + ip4_get_fragment_offset(ip)) + return -1; + + if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) { + udp_header_t *udp = (void *)(ip + 1); + return (dir == MAP_SENDER) ? udp->src_port : udp->dst_port; + } else if (ip->protocol == IP_PROTOCOL_ICMP) { + icmp46_header_t *icmp = (void *)(ip + 1); + if (icmp->type == ICMP4_echo_request || + icmp->type == ICMP4_echo_reply) { + return *((u16 *)(icmp + 1)); + } else if (clib_net_to_host_u16(ip->length) >= 64) { + ip = (ip4_header_t *)(icmp + 2); + if (PREDICT_TRUE((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) { + udp_header_t *udp = (void *)(ip + 1); + return (dir == MAP_SENDER) ? udp->dst_port : udp->src_port; + } else if (ip->protocol == IP_PROTOCOL_ICMP) { + icmp46_header_t *icmp = (void *)(ip + 1); + if (icmp->type == ICMP4_echo_request || + icmp->type == ICMP4_echo_reply) { + return *((u16 *)(icmp + 1)); + } + } + } + } + return -1; +} + +i32 +ip6_get_port (ip6_header_t *ip6, map_dir_e dir, u16 buffer_len) +{ + u8 l4_protocol; + u16 l4_offset; + u16 frag_offset; + u8 *l4; + + if (ip6_parse(ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + //TODO: Use buffer length + + if (frag_offset && + ip6_frag_hdr_offset(((ip6_frag_hdr_t *)u8_ptr_add(ip6, frag_offset)))) + return -1; //Can't deal with non-first fragment for now + + l4 = u8_ptr_add(ip6, l4_offset); + if (l4_protocol == IP_PROTOCOL_TCP || + l4_protocol == IP_PROTOCOL_UDP) { + return (dir == MAP_SENDER) ? ((udp_header_t *)(l4))->src_port : ((udp_header_t *)(l4))->dst_port; + } else if (l4_protocol == IP_PROTOCOL_ICMP6) { + icmp46_header_t *icmp = (icmp46_header_t *)(l4); + if (icmp->type == ICMP6_echo_request) { + return (dir == MAP_SENDER) ? ((u16*)(icmp))[2] : -1; + } else if (icmp->type == ICMP6_echo_reply) { + return (dir == MAP_SENDER) ? -1 : ((u16*)(icmp))[2]; + } + } + return -1; +} + + +int +map_create_domain (ip4_address_t *ip4_prefix, + u8 ip4_prefix_len, + ip6_address_t *ip6_prefix, + u8 ip6_prefix_len, + ip6_address_t *ip6_src, + u8 ip6_src_len, + u8 ea_bits_len, + u8 psid_offset, + u8 psid_length, + u32 *map_domain_index, + u16 mtu, + u8 flags) +{ + map_main_t *mm = &map_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + map_domain_t *d; + ip_adjacency_t adj; + ip4_add_del_route_args_t args4; + ip6_add_del_route_args_t args6; + u8 suffix_len; + uword *p; + + /* EA bits must be within the first 64 bits */ + if (ea_bits_len > 0 && (ip6_prefix_len + ea_bits_len) > 64) + return -1; + + /* Sanity check on the src prefix length */ + if (flags & MAP_DOMAIN_TRANSLATION) { + if (ip6_src_len != 96) { + clib_warning("MAP-T only supports ip6_src_len = 96 for now."); + return -1; + } + } else { + if (ip6_src_len != 128) { + clib_warning("MAP-E requires a BR address, not a prefix (ip6_src_len should be 128)."); + return -1; + } + } + + /* Get domain index */ + pool_get_aligned(mm->domains, d, CLIB_CACHE_LINE_BYTES); + memset(d, 0, sizeof (*d)); + *map_domain_index = d - mm->domains; + + /* Init domain struct */ + d->ip4_prefix.as_u32 = ip4_prefix->as_u32; + d->ip4_prefix_len = ip4_prefix_len; + d->ip6_prefix = *ip6_prefix; + d->ip6_prefix_len = ip6_prefix_len; + d->ip6_src = *ip6_src; + d->ip6_src_len = ip6_src_len; + d->ea_bits_len = ea_bits_len; + d->psid_offset = psid_offset; + d->psid_length = psid_length; + d->mtu = mtu; + d->flags = flags; + + /* How many, and which bits to grab from the IPv4 DA */ + if (ip4_prefix_len + ea_bits_len < 32) { + d->flags |= MAP_DOMAIN_PREFIX; + suffix_len = d->suffix_shift = 32 - ip4_prefix_len - ea_bits_len; + } else { + d->suffix_shift = 0; + suffix_len = 32 - ip4_prefix_len; + } + d->suffix_mask = (1<<suffix_len) - 1; + + d->psid_shift = 16 - psid_length - psid_offset; + d->psid_mask = (1 << d->psid_length) - 1; + d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length; + + /* Init IP adjacency */ + memset(&adj, 0, sizeof(adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T : IP_LOOKUP_NEXT_MAP; + p = (uword *)&adj.rewrite_data[0]; + *p = (uword) (*map_domain_index); + + if (ip4_get_route(im4, 0, 0, (u8 *)ip4_prefix, ip4_prefix_len)) { + clib_warning("IPv4 route already defined: %U/%d", format_ip4_address, ip4_prefix, ip4_prefix_len); + pool_put(mm->domains, d); + return -1; + } + + /* Create ip4 adjacency */ + memset(&args4, 0, sizeof(args4)); + args4.table_index_or_table_id = 0; + args4.flags = IP4_ROUTE_FLAG_ADD; + args4.dst_address.as_u32 = ip4_prefix->as_u32; + args4.dst_address_length = ip4_prefix_len; + + args4.adj_index = ~0; + args4.add_adj = &adj; + args4.n_add_adj = 1; + ip4_add_del_route(im4, &args4); + + /* Multiple MAP domains may share same source IPv6 TEP */ + u32 ai = ip6_get_route(im6, 0, 0, ip6_src, ip6_src_len); + if (ai > 0) { + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj6 = ip_get_adjacency(lm6, ai); + if (adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP && + adj6->lookup_next_index != IP_LOOKUP_NEXT_MAP_T) { + clib_warning("BR source address already assigned: %U", format_ip6_address, ip6_src); + pool_put(mm->domains, d); + return -1; + } + /* Shared source */ + p = (uword *)&adj6->rewrite_data[0]; + p[0] = ~0; + + /* Add refcount, so we don't accidentially delete the route underneath someone */ + p[1]++; + } else { + /* Create ip6 adjacency. */ + memset(&args6, 0, sizeof(args6)); + args6.table_index_or_table_id = 0; + args6.flags = IP6_ROUTE_FLAG_ADD; + args6.dst_address.as_u64[0] = ip6_src->as_u64[0]; + args6.dst_address.as_u64[1] = ip6_src->as_u64[1]; + args6.dst_address_length = ip6_src_len; + args6.adj_index = ~0; + args6.add_adj = &adj; + args6.n_add_adj = 1; + ip6_add_del_route(im6, &args6); + } + + /* Validate packet/byte counters */ + map_domain_counter_lock(mm); + int i; + for (i = 0; i < vec_len(mm->simple_domain_counters); i++) { + vlib_validate_simple_counter(&mm->simple_domain_counters[i], *map_domain_index); + vlib_zero_simple_counter(&mm->simple_domain_counters[i], *map_domain_index); + } + for (i = 0; i < vec_len(mm->domain_counters); i++) { + vlib_validate_combined_counter(&mm->domain_counters[i], *map_domain_index); + vlib_zero_combined_counter(&mm->domain_counters[i], *map_domain_index); + } + map_domain_counter_unlock(mm); + + return 0; +} + +/* + * map_delete_domain + */ +int +map_delete_domain (u32 map_domain_index) +{ + map_main_t *mm = &map_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + map_domain_t *d; + ip_adjacency_t adj; + ip4_add_del_route_args_t args4; + ip6_add_del_route_args_t args6; + + if (pool_is_free_index(mm->domains, map_domain_index)) { + clib_warning("MAP domain delete: domain does not exist: %d", map_domain_index); + return -1; + } + + d = pool_elt_at_index(mm->domains, map_domain_index); + + memset(&adj, 0, sizeof(adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = (d->flags & MAP_DOMAIN_TRANSLATION) ? IP_LOOKUP_NEXT_MAP_T : IP_LOOKUP_NEXT_MAP; + + /* Delete ip4 adjacency */ + memset(&args4, 0, sizeof(args4)); + args4.table_index_or_table_id = 0; + args4.flags = IP4_ROUTE_FLAG_DEL; + args4.dst_address.as_u32 = d->ip4_prefix.as_u32; + args4.dst_address_length = d->ip4_prefix_len; + args4.adj_index = 0; + args4.add_adj = &adj; + args4.n_add_adj = 0; + ip4_add_del_route(im4, &args4); + + /* Delete ip6 adjacency */ + u32 ai = ip6_get_route(im6, 0, 0, &d->ip6_src, d->ip6_src_len); + if (ai > 0) { + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj6 = ip_get_adjacency(lm6, ai); + + uword *p = (uword *)&adj6->rewrite_data[0]; + /* Delete route when no other domains use this source */ + if (p[1] == 0) { + memset(&args6, 0, sizeof (args6)); + args6.table_index_or_table_id = 0; + args6.flags = IP6_ROUTE_FLAG_DEL; + args6.dst_address.as_u64[0] = d->ip6_src.as_u64[0]; + args6.dst_address.as_u64[1] = d->ip6_src.as_u64[1]; + args6.dst_address_length = d->ip6_src_len; + args6.adj_index = 0; + args6.add_adj = &adj; + args6.n_add_adj = 0; + ip6_add_del_route(im6, &args6); + } + p[1]--; + } + /* Deleting rules */ + if (d->rules) + clib_mem_free(d->rules); + + pool_put(mm->domains, d); + + return 0; +} + +int +map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t *tep, + u8 is_add) +{ + map_domain_t *d; + map_main_t *mm = &map_main; + + if (pool_is_free_index(mm->domains, map_domain_index)) { + clib_warning("MAP rule: domain does not exist: %d", map_domain_index); + return -1; + } + d = pool_elt_at_index(mm->domains, map_domain_index); + + /* Rules are only used in 1:1 independent case */ + if (d->ea_bits_len > 0) + return (-1); + + if (!d->rules) { + u32 l = (0x1 << d->psid_length) * sizeof(ip6_address_t); + d->rules = clib_mem_alloc_aligned(l, CLIB_CACHE_LINE_BYTES); + if (!d->rules) return -1; + memset(d->rules, 0, l); + } + + if (psid >= (0x1 << d->psid_length)) { + clib_warning("MAP rule: PSID outside bounds: %d [%d]", psid, 0x1 << d->psid_length); + return -1; + } + + if (is_add) { + d->rules[psid] = *tep; + } else { + memset(&d->rules[psid], 0, sizeof(ip6_address_t)); + } + return 0; +} + +#ifdef MAP_SKIP_IP6_LOOKUP +static void +map_pre_resolve (ip4_address_t *ip4, ip6_address_t *ip6) +{ + map_main_t *mm = &map_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + + if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0) { + mm->adj6_index = ip6_fib_lookup_with_table(im6, 0, ip6); + clib_warning("FIB lookup results in: %u", mm->adj6_index); + } + if (ip4->as_u32 != 0) { + mm->adj4_index = ip4_fib_lookup_with_table(im4, 0, ip4, 0); + clib_warning("FIB lookup results in: %u", mm->adj4_index); + } +} +#endif + +static clib_error_t * +map_security_check_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "off")) + mm->sec_check = false; + else if (unformat(line_input, "on")) + mm->sec_check = true; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + return 0; +} + +static clib_error_t * +map_security_check_frag_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "off")) + mm->sec_check_frag = false; + else if (unformat(line_input, "on")) + mm->sec_check_frag = true; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + return 0; +} + +static clib_error_t * +map_add_domain_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4_prefix; + ip6_address_t ip6_prefix; + ip6_address_t ip6_src; + u32 ip6_prefix_len, ip4_prefix_len, map_domain_index, ip6_src_len; + u32 num_m_args = 0; + /* Optional arguments */ + u32 ea_bits_len, psid_offset = 0, psid_length = 0; + u32 mtu = 0; + u8 flags = 0; + ip6_src_len = 128; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, &ip4_prefix_len)) + num_m_args++; + else if (unformat(line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, &ip6_prefix_len)) + num_m_args++; + else if (unformat(line_input, "ip6-src %U/%d", unformat_ip6_address, &ip6_src, &ip6_src_len)) + num_m_args++; + else if (unformat(line_input, "ip6-src %U", unformat_ip6_address, &ip6_src)) + num_m_args++; + else if (unformat(line_input, "ea-bits-len %d", &ea_bits_len)) + num_m_args++; + else if (unformat(line_input, "psid-offset %d", &psid_offset)) + num_m_args++; + else if (unformat(line_input, "psid-len %d", &psid_length)) + num_m_args++; + else if (unformat(line_input, "mtu %d", &mtu)) + num_m_args++; + else if (unformat(line_input, "map-t")) + flags |= MAP_DOMAIN_TRANSLATION; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args < 3) + return clib_error_return(0, "mandatory argument(s) missing"); + + map_create_domain(&ip4_prefix, ip4_prefix_len, + &ip6_prefix, ip6_prefix_len, &ip6_src, ip6_src_len, + ea_bits_len, psid_offset, psid_length, &map_domain_index, + mtu, flags); + + return 0; +} + +static clib_error_t * +map_del_domain_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 num_m_args = 0; + u32 map_domain_index; + + /* Get a line of input. */ + if (! unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "index %d", &map_domain_index)) + num_m_args++; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args != 1) + return clib_error_return(0, "mandatory argument(s) missing"); + + map_delete_domain(map_domain_index); + + return 0; +} + +static clib_error_t * +map_add_rule_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip6_address_t tep; + u32 num_m_args = 0; + u32 psid, map_domain_index; + + /* Get a line of input. */ + if (! unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "index %d", &map_domain_index)) + num_m_args++; + else if (unformat(line_input, "psid %d", &psid)) + num_m_args++; + else if (unformat(line_input, "ip6-dst %U", unformat_ip6_address, &tep)) + num_m_args++; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args != 3) + return clib_error_return(0, "mandatory argument(s) missing"); + + if (map_add_del_psid(map_domain_index, psid, &tep, 1) != 0) { + return clib_error_return(0, "Failing to add Mapping Rule"); + } + return 0; +} + +#if MAP_SKIP_IP6_LOOKUP +static clib_error_t * +map_pre_resolve_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4nh; + ip6_address_t ip6nh; + map_main_t *mm = &map_main; + + memset(&ip4nh, 0, sizeof(ip4nh)); + memset(&ip6nh, 0, sizeof(ip6nh)); + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh)) + mm->preresolve_ip4 = ip4nh; + else if (unformat(line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) + mm->preresolve_ip6 = ip6nh; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + map_pre_resolve(&ip4nh, &ip6nh); + + return 0; +} +#endif + +static clib_error_t * +map_icmp_relay_source_address_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t icmp_src_address; + map_main_t *mm = &map_main; + + memset(&icmp_src_address, 0, sizeof(icmp_src_address)); + + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "%U", unformat_ip4_address, &icmp_src_address)) + mm->icmp_src_address = icmp_src_address; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + return 0; +} + +static clib_error_t * +map_traffic_class_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + u32 tc = 0; + + mm->tc_copy = false; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "copy")) + mm->tc_copy = true; + else if (unformat(line_input, "%x", &tc)) + mm->tc = tc & 0xff; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + return 0; +} + +static u8 * +format_map_domain (u8 *s, va_list *args) +{ + map_domain_t *d = va_arg(*args, map_domain_t *); + bool counters = va_arg(*args, int); + map_main_t *mm = &map_main; + ip6_address_t ip6_prefix; + + if (d->rules) + memset(&ip6_prefix, 0, sizeof(ip6_prefix)); + else + ip6_prefix = d->ip6_prefix; + + s = format(s, + "[%d] ip4-pfx %U/%d ip6-pfx %U/%d ip6-src %U/%d ea_bits_len %d psid-offset %d psid-len %d mtu %d %s", + d - mm->domains, + format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len, + format_ip6_address, &ip6_prefix, d->ip6_prefix_len, + format_ip6_address, &d->ip6_src, d->ip6_src_len, + d->ea_bits_len, d->psid_offset, d->psid_length, d->mtu, + (d->flags & MAP_DOMAIN_TRANSLATION) ? "map-t" : ""); + + if (counters) { + map_domain_counter_lock(mm); + vlib_counter_t v; + vlib_get_combined_counter(&mm->domain_counters[MAP_DOMAIN_COUNTER_TX], d - mm->domains, &v); + s = format(s, " TX: %d/%d", v.packets, v.bytes); + vlib_get_combined_counter(&mm->domain_counters[MAP_DOMAIN_COUNTER_RX], d - mm->domains, &v); + s = format(s, " RX: %d/%d", v.packets, v.bytes); + map_domain_counter_unlock(mm); + } + + if (d->rules) { + int i; + ip6_address_t dst; + for (i = 0; i < (0x1 << d->psid_length); i++) { + dst = d->rules[i]; + if (dst.as_u64[0] == 0 && dst.as_u64[1] == 0 ) + continue; + s = format(s, + " rule psid: %d ip6-dst %U\n", i, format_ip6_address, &dst); + } + } + return s; +} + +static u8 * +format_map_ip4_reass (u8 *s, va_list *args) +{ + map_main_t *mm = &map_main; + map_ip4_reass_t *r = va_arg(*args, map_ip4_reass_t *); + map_ip4_reass_key_t *k = &r->key; + f64 now = vlib_time_now(mm->vlib_main); + f64 lifetime = (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000); + f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; + s = format(s, + "ip4-reass src=%U dst=%U protocol=%d identifier=%d port=%d lifetime=%.3lf\n", + format_ip4_address, &k->src.as_u8, format_ip4_address, &k->dst.as_u8, + k->protocol, clib_net_to_host_u16(k->fragment_id), (r->port >= 0)?clib_net_to_host_u16(r->port):-1, dt); + return s; +} + +static u8 * +format_map_ip6_reass (u8 *s, va_list *args) +{ + map_main_t *mm = &map_main; + map_ip6_reass_t *r = va_arg(*args, map_ip6_reass_t *); + map_ip6_reass_key_t *k = &r->key; + f64 now = vlib_time_now(mm->vlib_main); + f64 lifetime = (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000); + f64 dt = (r->ts + lifetime > now) ? (r->ts + lifetime - now) : -1; + s = format(s, + "ip6-reass src=%U dst=%U protocol=%d identifier=%d lifetime=%.3lf\n", + format_ip6_address, &k->src.as_u8, format_ip6_address, &k->dst.as_u8, + k->protocol, clib_net_to_host_u32(k->fragment_id), dt); + return s; +} + +static clib_error_t * +show_map_domain_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + map_domain_t *d; + bool counters = false; + u32 map_domain_index = ~0; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "counters")) + counters = true; + else if (unformat(line_input, "index %d", &map_domain_index)) + ; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (pool_elts(mm->domains) == 0) + vlib_cli_output(vm, "No MAP domains are configured..."); + + if (map_domain_index == ~0) { + pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);})); + } else { + if (pool_is_free_index(mm->domains, map_domain_index)) { + return clib_error_return(0, "MAP domain does not exists %d", map_domain_index); + } + + d = pool_elt_at_index(mm->domains, map_domain_index); + vlib_cli_output(vm, "%U", format_map_domain, d, counters); + } + + return 0; +} + +static clib_error_t * +show_map_fragments_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + map_main_t *mm = &map_main; + map_ip4_reass_t *f4; + map_ip6_reass_t *f6; + + pool_foreach(f4, mm->ip4_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip4_reass, f4);})); + pool_foreach(f6, mm->ip6_reass_pool, ({vlib_cli_output (vm, "%U", format_map_ip6_reass, f6);})); + return (0); +} + +u64 +map_error_counter_get (u32 node_index, map_error_t map_error) +{ + vlib_main_t *vm = vlib_get_main(); + vlib_node_runtime_t *error_node = vlib_node_get_runtime(vm, node_index); + vlib_error_main_t *em = &vm->error_main; + vlib_error_t e = error_node->errors[map_error]; + vlib_node_t *n = vlib_get_node(vm, node_index); + u32 ci; + + ci = vlib_error_get_code(e); + ASSERT (ci < n->n_errors); + ci += n->error_heap_index; + + return (em->counters[ci]); +} + +static clib_error_t * +show_map_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + map_main_t *mm = &map_main; + map_domain_t *d; + int domains = 0, rules = 0, domaincount = 0, rulecount = 0; + if (pool_elts (mm->domains) == 0) + vlib_cli_output(vm, "No MAP domains are configured..."); + + pool_foreach(d, mm->domains, ({ + if (d->rules) { + rulecount+= 0x1 << d->psid_length; + rules += sizeof(ip6_address_t) * 0x1 << d->psid_length; + } + domains += sizeof(*d); + domaincount++; + })); + + vlib_cli_output(vm, "MAP domains structure: %d\n", sizeof (map_domain_t)); + vlib_cli_output(vm, "MAP domains: %d (%d bytes)\n", domaincount, domains); + vlib_cli_output(vm, "MAP rules: %d (%d bytes)\n", rulecount, rules); + vlib_cli_output(vm, "Total: %d bytes)\n", rules + domains); + +#if MAP_SKIP_IP6_LOOKUP + vlib_cli_output(vm, "MAP pre-resolve: IP6 next-hop: %U (%u), IP4 next-hop: %U (%u)\n", + format_ip6_address, &mm->preresolve_ip6, mm->adj6_index, + format_ip4_address, &mm->preresolve_ip4, mm->adj4_index); +#endif + + if (mm->tc_copy) + vlib_cli_output(vm, "MAP traffic-class: copy"); + else + vlib_cli_output(vm, "MAP traffic-class: %x", mm->tc); + + vlib_cli_output(vm, "MAP IPv6 inbound security check: %s Fragments: %s", mm->sec_check ? "enabled" : "disabled", + mm->sec_check_frag ? "enabled" : "disabled"); + + + /* + * Counters + */ + vlib_combined_counter_main_t *cm = mm->domain_counters; + u64 total_pkts[MAP_N_DOMAIN_COUNTER]; + u64 total_bytes[MAP_N_DOMAIN_COUNTER]; + int which, i; + vlib_counter_t v; + + memset (total_pkts, 0, sizeof (total_pkts)); + memset (total_bytes, 0, sizeof (total_bytes)); + + map_domain_counter_lock (mm); + vec_foreach (cm, mm->domain_counters) { + which = cm - mm->domain_counters; + + for (i = 0; i < vec_len (cm->maxi); i++) { + vlib_get_combined_counter (cm, i, &v); + total_pkts[which] += v.packets; + total_bytes[which] += v.bytes; + } + } + map_domain_counter_unlock (mm); + + vlib_cli_output(vm, "Encapsulated packets: %d bytes: %d\n", total_pkts[MAP_DOMAIN_COUNTER_TX], + total_bytes[MAP_DOMAIN_COUNTER_TX]); + vlib_cli_output(vm, "Decapsulated packets: %d bytes: %d\n", total_pkts[MAP_DOMAIN_COUNTER_RX], + total_bytes[MAP_DOMAIN_COUNTER_RX]); + + vlib_cli_output(vm, "ICMP relayed packets: %d\n", vlib_get_simple_counter(&mm->icmp_relayed, 0)); + + return 0; +} + +static clib_error_t * +map_params_reass_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 lifetime = ~0; + f64 ht_ratio = (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1); + u32 pool_size = ~0; + u64 buffers = ~(0ull); + u8 ip4 = 0, ip6 = 0; + + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (!unformat(line_input, "lifetime %u", &lifetime) && + !unformat(line_input, "ht-ratio %lf", &ht_ratio) && + !unformat(line_input, "pool-size %u", &pool_size) && + !unformat(line_input, "buffers %llu", &buffers) && + !((unformat(line_input, "ip4")) && (ip4 = 1)) && + !((unformat(line_input, "ip6")) && (ip6 = 1))) { + unformat_free(line_input); + return clib_error_return(0, "invalid input"); + } + } + unformat_free(line_input); + + if (!ip4 && !ip6) + return clib_error_return(0, "must specify ip4 and/or ip6"); + + if (ip4) { + if (pool_size != ~0 && pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) + return clib_error_return(0, "invalid ip4-reass pool-size ( > %d)", MAP_IP4_REASS_CONF_POOL_SIZE_MAX); + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1) && ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) + return clib_error_return(0, "invalid ip4-reass ht-ratio ( > %d)", MAP_IP4_REASS_CONF_HT_RATIO_MAX); + if (lifetime != ~0 && lifetime > MAP_IP4_REASS_CONF_LIFETIME_MAX) + return clib_error_return(0, "invalid ip4-reass lifetime ( > %d)", MAP_IP4_REASS_CONF_LIFETIME_MAX); + if (buffers != ~(0ull) && buffers > MAP_IP4_REASS_CONF_BUFFERS_MAX) + return clib_error_return(0, "invalid ip4-reass buffers ( > %ld)", MAP_IP4_REASS_CONF_BUFFERS_MAX); + } + + if (ip6) { + if (pool_size != ~0 && pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) + return clib_error_return(0, "invalid ip6-reass pool-size ( > %d)", MAP_IP6_REASS_CONF_POOL_SIZE_MAX); + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1) && ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) + return clib_error_return(0, "invalid ip6-reass ht-log2len ( > %d)", MAP_IP6_REASS_CONF_HT_RATIO_MAX); + if (lifetime != ~0 && lifetime > MAP_IP6_REASS_CONF_LIFETIME_MAX) + return clib_error_return(0, "invalid ip6-reass lifetime ( > %d)", MAP_IP6_REASS_CONF_LIFETIME_MAX); + if (buffers != ~(0ull) && buffers > MAP_IP6_REASS_CONF_BUFFERS_MAX) + return clib_error_return(0, "invalid ip6-reass buffers ( > %ld)", MAP_IP6_REASS_CONF_BUFFERS_MAX); + } + + if (ip4) { + u32 reass = 0, packets = 0; + if (pool_size != ~0) { + if (map_ip4_reass_conf_pool_size(pool_size, &reass, &packets)) { + vlib_cli_output(vm, "Could not set ip4-reass pool-size"); + } else { + vlib_cli_output(vm, "Setting ip4-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets); + } + } + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1)) { + if (map_ip4_reass_conf_ht_ratio(ht_ratio, &reass, &packets)) { + vlib_cli_output(vm, "Could not set ip4-reass ht-log2len"); + } else { + vlib_cli_output(vm, "Setting ip4-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets); + } + } + if (lifetime != ~0) { + if (map_ip4_reass_conf_lifetime(lifetime)) + vlib_cli_output(vm, "Could not set ip4-reass lifetime"); + else + vlib_cli_output(vm, "Setting ip4-reass lifetime"); + } + if (buffers != ~(0ull)) { + if (map_ip4_reass_conf_buffers(buffers)) + vlib_cli_output(vm, "Could not set ip4-reass buffers"); + else + vlib_cli_output(vm, "Setting ip4-reass buffers"); + } + + if (map_main.ip4_reass_conf_buffers > + map_main.ip4_reass_conf_pool_size * MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) { + vlib_cli_output(vm, "Note: 'ip4-reass buffers' > pool-size * max-fragments-per-reassembly."); + } + } + + if (ip6) { + u32 reass = 0, packets = 0; + if (pool_size != ~0) { + if (map_ip6_reass_conf_pool_size(pool_size, &reass, &packets)) { + vlib_cli_output(vm, "Could not set ip6-reass pool-size"); + } else { + vlib_cli_output(vm, "Setting ip6-reass pool-size (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets); + } + } + if (ht_ratio != (MAP_IP4_REASS_CONF_HT_RATIO_MAX+1)) { + if (map_ip6_reass_conf_ht_ratio(ht_ratio, &reass, &packets)) { + vlib_cli_output(vm, "Could not set ip6-reass ht-log2len"); + } else { + vlib_cli_output(vm, "Setting ip6-reass ht-log2len (destroyed-reassembly=%u , dropped-fragments=%u)", reass, packets); + } + } + if (lifetime != ~0) { + if (map_ip6_reass_conf_lifetime(lifetime)) + vlib_cli_output(vm, "Could not set ip6-reass lifetime"); + else + vlib_cli_output(vm, "Setting ip6-reass lifetime"); + } + if (buffers != ~(0ull)) { + if (map_ip6_reass_conf_buffers(buffers)) + vlib_cli_output(vm, "Could not set ip6-reass buffers"); + else + vlib_cli_output(vm, "Setting ip6-reass buffers"); + } + + if (map_main.ip6_reass_conf_buffers > + map_main.ip6_reass_conf_pool_size * MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) { + vlib_cli_output(vm, "Note: 'ip6-reass buffers' > pool-size * max-fragments-per-reassembly."); + } + } + + return 0; +} + + +/* + * packet trace format function + */ +u8 * +format_map_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + map_trace_t *t = va_arg (*args, map_trace_t *); + u32 map_domain_index = t->map_domain_index; + u16 port = t->port; + + s = format(s, "MAP domain index: %d L4 port: %u", map_domain_index, clib_net_to_host_u16(port)); + + return s; +} + +static_always_inline map_ip4_reass_t * +map_ip4_reass_lookup(map_ip4_reass_key_t *k, u32 bucket, f64 now) +{ + map_main_t *mm = &map_main; + u32 ri = mm->ip4_reass_hash_table[bucket]; + while(ri != MAP_REASS_INDEX_NONE) { + map_ip4_reass_t * r = pool_elt_at_index(mm->ip4_reass_pool, ri); + if (r->key.as_u64[0] == k->as_u64[0] && + r->key.as_u64[1] == k->as_u64[1] && + now < r->ts + (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000)) { + return r; + } + ri = r->bucket_next; + } + return NULL; +} + +#define map_ip4_reass_pool_index(r) (r - map_main.ip4_reass_pool) + +void +map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop) +{ + map_main_t *mm = &map_main; + map_ip4_reass_get_fragments(r, pi_to_drop); + + // Unlink in hash bucket + map_ip4_reass_t *r2 = NULL; + u32 r2i = mm->ip4_reass_hash_table[r->bucket]; + while (r2i != map_ip4_reass_pool_index(r)) { + ASSERT(r2i != MAP_REASS_INDEX_NONE); + r2 = pool_elt_at_index(mm->ip4_reass_pool, r2i); + r2i = r2->bucket_next; + } + if (r2) { + r2->bucket_next = r->bucket_next; + } else { + mm->ip4_reass_hash_table[r->bucket] = r->bucket_next; + } + + // Unlink in list + if (r->fifo_next == map_ip4_reass_pool_index(r)) { + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + } else { + if(mm->ip4_reass_fifo_last == map_ip4_reass_pool_index(r)) + mm->ip4_reass_fifo_last = r->fifo_prev; + pool_elt_at_index(mm->ip4_reass_pool, r->fifo_prev)->fifo_next = r->fifo_next; + pool_elt_at_index(mm->ip4_reass_pool, r->fifo_next)->fifo_prev = r->fifo_prev; + } + + pool_put(mm->ip4_reass_pool, r); + mm->ip4_reass_allocated--; +} + +map_ip4_reass_t * +map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id, + u8 protocol, u32 **pi_to_drop) +{ + map_ip4_reass_t * r; + map_main_t *mm = &map_main; + map_ip4_reass_key_t k = {.src.data_u32 = src, + .dst.data_u32 = dst, + .fragment_id = fragment_id, + .protocol = protocol }; + + u32 h = 0; + h = crc_u32(k.as_u32[0], h); + h = crc_u32(k.as_u32[1], h); + h = crc_u32(k.as_u32[2], h); + h = crc_u32(k.as_u32[3], h); + h = h >> (32 - mm->ip4_reass_ht_log2len); + + f64 now = vlib_time_now(mm->vlib_main); + + //Cache garbage collection + while (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) { + map_ip4_reass_t *last = pool_elt_at_index(mm->ip4_reass_pool, mm->ip4_reass_fifo_last); + if (last->ts + (((f64)mm->ip4_reass_conf_lifetime_ms) / 1000) < now) + map_ip4_reass_free(last, pi_to_drop); + else + break; + } + + if ((r = map_ip4_reass_lookup(&k, h, now))) + return r; + + if (mm->ip4_reass_allocated >= mm->ip4_reass_conf_pool_size) + return NULL; + + pool_get(mm->ip4_reass_pool, r); + mm->ip4_reass_allocated++; + int i; + for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + r->fragments[i] = ~0; + + u32 ri = map_ip4_reass_pool_index(r); + + //Link in new bucket + r->bucket = h; + r->bucket_next = mm->ip4_reass_hash_table[h]; + mm->ip4_reass_hash_table[h] = ri; + + //Link in fifo + if(mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) { + r->fifo_next = pool_elt_at_index(mm->ip4_reass_pool, mm->ip4_reass_fifo_last)->fifo_next; + r->fifo_prev = mm->ip4_reass_fifo_last; + pool_elt_at_index(mm->ip4_reass_pool, r->fifo_prev)->fifo_next = ri; + pool_elt_at_index(mm->ip4_reass_pool, r->fifo_next)->fifo_prev = ri; + } else { + r->fifo_next = r->fifo_prev = ri; + mm->ip4_reass_fifo_last = ri; + } + + //Set other fields + r->ts = now; + r->key = k; + r->port = -1; +#ifdef MAP_IP4_REASS_COUNT_BYTES + r->expected_total = 0xffff; + r->forwarded = 0; +#endif + + return r; +} + +int +map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi) +{ + if (map_main.ip4_reass_buffered_counter >= map_main.ip4_reass_conf_buffers) + return -1; + + int i; + for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if(r->fragments[i] == ~0) { + r->fragments[i] = pi; + map_main.ip4_reass_buffered_counter++; + return 0; + } + return -1; +} + +static_always_inline map_ip6_reass_t * +map_ip6_reass_lookup(map_ip6_reass_key_t *k, u32 bucket, f64 now) +{ + map_main_t *mm = &map_main; + u32 ri = mm->ip6_reass_hash_table[bucket]; + while(ri != MAP_REASS_INDEX_NONE) { + map_ip6_reass_t * r = pool_elt_at_index(mm->ip6_reass_pool, ri); + if(now < r->ts + (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000) && + r->key.as_u64[0] == k->as_u64[0] && + r->key.as_u64[1] == k->as_u64[1] && + r->key.as_u64[2] == k->as_u64[2] && + r->key.as_u64[3] == k->as_u64[3] && + r->key.as_u64[4] == k->as_u64[4]) + return r; + ri = r->bucket_next; + } + return NULL; +} + +#define map_ip6_reass_pool_index(r) (r - map_main.ip6_reass_pool) + +void +map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop) +{ + map_main_t *mm = &map_main; + int i; + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if(r->fragments[i].pi != ~0) { + vec_add1(*pi_to_drop, r->fragments[i].pi); + r->fragments[i].pi = ~0; + map_main.ip6_reass_buffered_counter--; + } + + // Unlink in hash bucket + map_ip6_reass_t *r2 = NULL; + u32 r2i = mm->ip6_reass_hash_table[r->bucket]; + while (r2i != map_ip6_reass_pool_index(r)) { + ASSERT(r2i != MAP_REASS_INDEX_NONE); + r2 = pool_elt_at_index(mm->ip6_reass_pool, r2i); + r2i = r2->bucket_next; + } + if (r2) { + r2->bucket_next = r->bucket_next; + } else { + mm->ip6_reass_hash_table[r->bucket] = r->bucket_next; + } + + // Unlink in list + if (r->fifo_next == map_ip6_reass_pool_index(r)) { + //Single element in the list, list is now empty + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + } else { + if (mm->ip6_reass_fifo_last == map_ip6_reass_pool_index(r)) //First element + mm->ip6_reass_fifo_last = r->fifo_prev; + pool_elt_at_index(mm->ip6_reass_pool, r->fifo_prev)->fifo_next = r->fifo_next; + pool_elt_at_index(mm->ip6_reass_pool, r->fifo_next)->fifo_prev = r->fifo_prev; + } + + // Free from pool if necessary + pool_put(mm->ip6_reass_pool, r); + mm->ip6_reass_allocated--; +} + +map_ip6_reass_t * +map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id, + u8 protocol, u32 **pi_to_drop) +{ + map_ip6_reass_t * r; + map_main_t *mm = &map_main; + map_ip6_reass_key_t k = { + .src = *src, + .dst = *dst, + .fragment_id = fragment_id, + .protocol = protocol }; + + u32 h = 0; + int i; + for (i=0; i<10; i++) + h = crc_u32(k.as_u32[i], h); + h = h >> (32 - mm->ip6_reass_ht_log2len); + + f64 now = vlib_time_now(mm->vlib_main); + + //Cache garbage collection + while (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) { + map_ip6_reass_t *last = pool_elt_at_index(mm->ip6_reass_pool, mm->ip6_reass_fifo_last); + if (last->ts + (((f64)mm->ip6_reass_conf_lifetime_ms) / 1000) < now) + map_ip6_reass_free(last, pi_to_drop); + else + break; + } + + if ((r = map_ip6_reass_lookup(&k, h, now))) + return r; + + if (mm->ip6_reass_allocated >= mm->ip6_reass_conf_pool_size) + return NULL; + + pool_get(mm->ip6_reass_pool, r); + mm->ip6_reass_allocated++; + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) { + r->fragments[i].pi = ~0; + r->fragments[i].next_data_len = 0; + r->fragments[i].next_data_offset = 0; + } + + u32 ri = map_ip6_reass_pool_index(r); + + //Link in new bucket + r->bucket = h; + r->bucket_next = mm->ip6_reass_hash_table[h]; + mm->ip6_reass_hash_table[h] = ri; + + //Link in fifo + if(mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) { + r->fifo_next = pool_elt_at_index(mm->ip6_reass_pool, mm->ip6_reass_fifo_last)->fifo_next; + r->fifo_prev = mm->ip6_reass_fifo_last; + pool_elt_at_index(mm->ip6_reass_pool, r->fifo_prev)->fifo_next = ri; + pool_elt_at_index(mm->ip6_reass_pool, r->fifo_next)->fifo_prev = ri; + } else { + r->fifo_next = r->fifo_prev = ri; + mm->ip6_reass_fifo_last = ri; + } + + //Set other fields + r->ts = now; + r->key = k; + r->ip4_header.ip_version_and_header_length = 0; +#ifdef MAP_IP6_REASS_COUNT_BYTES + r->expected_total = 0xffff; + r->forwarded = 0; +#endif + return r; +} + +int +map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi, + u16 data_offset, u16 next_data_offset, + u8 *data_start, u16 data_len) +{ + map_ip6_fragment_t *f = NULL, *prev_f = NULL; + u16 copied_len = (data_len > 20) ? 20 : data_len; + + if (map_main.ip6_reass_buffered_counter >= map_main.ip6_reass_conf_buffers) + return -1; + + //Lookup for fragments for the current buffer + //and the one before that + int i; + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) { + if (data_offset && r->fragments[i].next_data_offset == data_offset) { + prev_f = &r->fragments[i]; // This is buffer for previous packet + } else if (r->fragments[i].next_data_offset == next_data_offset) { + f = &r->fragments[i]; // This is a buffer for the current packet + } else if (r->fragments[i].next_data_offset == 0) { //Available + if (f == NULL) + f = &r->fragments[i]; + else if (prev_f == NULL) + prev_f = &r->fragments[i]; + } + } + + if (!f || f->pi != ~0) + return -1; + + if (data_offset) { + if (!prev_f) + return -1; + + memcpy(prev_f->next_data, data_start, copied_len); + prev_f->next_data_len = copied_len; + prev_f->next_data_offset = data_offset; + } else { + if (((ip4_header_t *)data_start)->ip_version_and_header_length != 0x45) + return -1; + + if (r->ip4_header.ip_version_and_header_length == 0) + memcpy(&r->ip4_header, data_start, sizeof(ip4_header_t)); + } + + if(data_len > 20) { + f->next_data_offset = next_data_offset; + f->pi = pi; + map_main.ip6_reass_buffered_counter++; + } + return 0; +} + +void map_ip4_reass_reinit(u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + int i; + + if(dropped_packets) + *dropped_packets = mm->ip4_reass_buffered_counter; + if(trashed_reass) + *trashed_reass = mm->ip4_reass_allocated; + if (mm->ip4_reass_fifo_last != MAP_REASS_INDEX_NONE) { + u16 ri = mm->ip4_reass_fifo_last; + do { + map_ip4_reass_t *r = pool_elt_at_index(mm->ip4_reass_pool, ri); + for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if (r->fragments[i] != ~0) + map_ip4_drop_pi(r->fragments[i]); + + ri = r->fifo_next; + pool_put(mm->ip4_reass_pool, r); + } while (ri != mm->ip4_reass_fifo_last); + } + + vec_free(mm->ip4_reass_hash_table); + vec_resize(mm->ip4_reass_hash_table, 1 << mm->ip4_reass_ht_log2len); + for (i=0; i<(1 << mm->ip4_reass_ht_log2len); i++) + mm->ip4_reass_hash_table[i] = MAP_REASS_INDEX_NONE; + pool_free(mm->ip4_reass_pool); + pool_alloc(mm->ip4_reass_pool, mm->ip4_reass_conf_pool_size); + + mm->ip4_reass_allocated = 0; + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + mm->ip4_reass_buffered_counter = 0; +} + +u8 map_get_ht_log2len(f32 ht_ratio, u16 pool_size) +{ + u32 desired_size = (u32)(pool_size * ht_ratio); + u8 i; + for (i=1; i<31; i++) + if ((1 << i) >= desired_size) + return i; + return 4; +} + +int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if (ht_ratio > MAP_IP4_REASS_CONF_HT_RATIO_MAX) + return -1; + + map_ip4_reass_lock(); + mm->ip4_reass_conf_ht_ratio = ht_ratio; + mm->ip4_reass_ht_log2len = map_get_ht_log2len(ht_ratio, mm->ip4_reass_conf_pool_size); + map_ip4_reass_reinit(trashed_reass, dropped_packets); + map_ip4_reass_unlock(); + return 0; +} + +int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if (pool_size > MAP_IP4_REASS_CONF_POOL_SIZE_MAX) + return -1; + + map_ip4_reass_lock(); + mm->ip4_reass_conf_pool_size = pool_size; + map_ip4_reass_reinit(trashed_reass, dropped_packets); + map_ip4_reass_unlock(); + return 0; +} + +int map_ip4_reass_conf_lifetime(u16 lifetime_ms) +{ + map_main.ip4_reass_conf_lifetime_ms = lifetime_ms; + return 0; +} + +int map_ip4_reass_conf_buffers(u32 buffers) +{ + map_main.ip4_reass_conf_buffers = buffers; + return 0; +} + +void map_ip6_reass_reinit(u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if(dropped_packets) + *dropped_packets = mm->ip6_reass_buffered_counter; + if(trashed_reass) + *trashed_reass = mm->ip6_reass_allocated; + int i; + if (mm->ip6_reass_fifo_last != MAP_REASS_INDEX_NONE) { + u16 ri = mm->ip6_reass_fifo_last; + do { + map_ip6_reass_t *r = pool_elt_at_index(mm->ip6_reass_pool, ri); + for (i=0; i<MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if (r->fragments[i].pi != ~0) + map_ip6_drop_pi(r->fragments[i].pi); + + ri = r->fifo_next; + pool_put(mm->ip6_reass_pool, r); + } while (ri != mm->ip6_reass_fifo_last); + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + } + + vec_free(mm->ip6_reass_hash_table); + vec_resize(mm->ip6_reass_hash_table, 1 << mm->ip6_reass_ht_log2len); + for(i=0; i<(1 << mm->ip6_reass_ht_log2len); i++) + mm->ip6_reass_hash_table[i] = MAP_REASS_INDEX_NONE; + pool_free(mm->ip6_reass_pool); + pool_alloc(mm->ip6_reass_pool, mm->ip4_reass_conf_pool_size); + + mm->ip6_reass_allocated = 0; + mm->ip6_reass_buffered_counter = 0; +} + +int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if (ht_ratio > MAP_IP6_REASS_CONF_HT_RATIO_MAX) + return -1; + + map_ip6_reass_lock(); + mm->ip6_reass_conf_ht_ratio = ht_ratio; + mm->ip6_reass_ht_log2len = map_get_ht_log2len(ht_ratio, mm->ip6_reass_conf_pool_size); + map_ip6_reass_reinit(trashed_reass, dropped_packets); + map_ip6_reass_unlock(); + return 0; +} + +int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets) +{ + map_main_t *mm = &map_main; + if (pool_size > MAP_IP6_REASS_CONF_POOL_SIZE_MAX) + return -1; + + map_ip6_reass_lock(); + mm->ip6_reass_conf_pool_size = pool_size; + map_ip6_reass_reinit(trashed_reass, dropped_packets); + map_ip6_reass_unlock(); + return 0; +} + +int map_ip6_reass_conf_lifetime(u16 lifetime_ms) +{ + map_main.ip6_reass_conf_lifetime_ms = lifetime_ms; + return 0; +} + +int map_ip6_reass_conf_buffers(u32 buffers) +{ + map_main.ip6_reass_conf_buffers = buffers; + return 0; +} + +VLIB_CLI_COMMAND(map_ip4_reass_lifetime_command, static) = { + .path = "map params reassembly", + .short_help = "[ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>]", + .function = map_params_reass_command_fn, +}; + +VLIB_CLI_COMMAND(map_traffic_class_command, static) = { + .path = "map params traffic-class", + .short_help = + "traffic-class {0x0-0xff | copy}", + .function = map_traffic_class_command_fn, +}; + +VLIB_CLI_COMMAND(map_pre_resolve_command, static) = { + .path = "map params pre-resolve", + .short_help = + "pre-resolve {ip4-nh <address>} | {ip6-nh <address>}", + .function = map_pre_resolve_command_fn, +}; + +VLIB_CLI_COMMAND(map_security_check_command, static) = { + .path = "map params security-check", + .short_help = + "security-check on|off", + .function = map_security_check_command_fn, +}; + +VLIB_CLI_COMMAND(map_icmp_relay_source_address_command, static) = { + .path = "map params icmp-source-address", + .short_help = + "icmp-source-address <ip4-address>", + .function = map_icmp_relay_source_address_command_fn, +}; + +VLIB_CLI_COMMAND(map_security_check_frag_command, static) = { + .path = "map params security-check fragments", + .short_help = + "fragments on|off", + .function = map_security_check_frag_command_fn, +}; + +VLIB_CLI_COMMAND(map_add_domain_command, static) = { + .path = "map add domain", + .short_help = + "map add domain ip4-pfx <ip4-pfx> ip6-pfx <ip6-pfx> ip6-src <ip6-pfx> " + "ea-bits-len <n> psid-offset <n> psid-len <n> [map-t] [mtu <mtu>]", + .function = map_add_domain_command_fn, +}; + +VLIB_CLI_COMMAND(map_add_rule_command, static) = { + .path = "map add rule", + .short_help = + "map add rule index <domain> psid <psid> ip6-dst <ip6-addr>", + .function = map_add_rule_command_fn, +}; + +VLIB_CLI_COMMAND(map_del_command, static) = { + .path = "map del domain", + .short_help = + "map del domain index <domain>", + .function = map_del_domain_command_fn, +}; + +VLIB_CLI_COMMAND(show_map_domain_command, static) = { + .path = "show map domain", + .function = show_map_domain_command_fn, +}; + +VLIB_CLI_COMMAND(show_map_stats_command, static) = { + .path = "show map stats", + .function = show_map_stats_command_fn, +}; + +VLIB_CLI_COMMAND(show_map_fragments_command, static) = { + .path = "show map fragments", + .function = show_map_fragments_command_fn, +}; + +/* + * map_init + */ +clib_error_t *map_init (vlib_main_t *vm) +{ + map_main_t *mm = &map_main; + mm->vnet_main = vnet_get_main(); + mm->vlib_main = vm; + +#ifdef MAP_SKIP_IP6_LOOKUP + memset(&mm->preresolve_ip4, 0, sizeof(mm->preresolve_ip4)); + memset(&mm->preresolve_ip6, 0, sizeof(mm->preresolve_ip6)); + mm->adj4_index = 0; + mm->adj6_index = 0; +#endif + + /* traffic class */ + mm->tc = 0; + mm->tc_copy = true; + + /* Inbound security check */ + mm->sec_check = true; + mm->sec_check_frag = false; + + vec_validate(mm->domain_counters, MAP_N_DOMAIN_COUNTER - 1); + mm->domain_counters[MAP_DOMAIN_COUNTER_RX].name = "rx"; + mm->domain_counters[MAP_DOMAIN_COUNTER_TX].name = "tx"; + + vlib_validate_simple_counter(&mm->icmp_relayed, 0); + vlib_zero_simple_counter(&mm->icmp_relayed, 0); + + /* IP4 virtual reassembly */ + mm->ip4_reass_hash_table = 0; + mm->ip4_reass_pool = 0; + mm->ip4_reass_lock = clib_mem_alloc_aligned(CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + mm->ip4_reass_conf_ht_ratio = MAP_IP4_REASS_HT_RATIO_DEFAULT; + mm->ip4_reass_conf_lifetime_ms = MAP_IP4_REASS_LIFETIME_DEFAULT; + mm->ip4_reass_conf_pool_size = MAP_IP4_REASS_POOL_SIZE_DEFAULT; + mm->ip4_reass_conf_buffers = MAP_IP4_REASS_BUFFERS_DEFAULT; + mm->ip4_reass_ht_log2len = map_get_ht_log2len(mm->ip4_reass_conf_ht_ratio, mm->ip4_reass_conf_pool_size); + mm->ip4_reass_fifo_last = MAP_REASS_INDEX_NONE; + map_ip4_reass_reinit(NULL, NULL); + + /* IP6 virtual reassembly */ + mm->ip6_reass_hash_table = 0; + mm->ip6_reass_pool = 0; + mm->ip6_reass_lock = clib_mem_alloc_aligned(CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); + mm->ip6_reass_conf_ht_ratio = MAP_IP6_REASS_HT_RATIO_DEFAULT; + mm->ip6_reass_conf_lifetime_ms = MAP_IP6_REASS_LIFETIME_DEFAULT; + mm->ip6_reass_conf_pool_size = MAP_IP6_REASS_POOL_SIZE_DEFAULT; + mm->ip6_reass_conf_buffers = MAP_IP6_REASS_BUFFERS_DEFAULT; + mm->ip6_reass_ht_log2len = map_get_ht_log2len(mm->ip6_reass_conf_ht_ratio, mm->ip6_reass_conf_pool_size); + mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; + map_ip6_reass_reinit(NULL, NULL); + + return 0; +} + +VLIB_INIT_FUNCTION(map_init); diff --git a/vnet/vnet/map/map.h b/vnet/vnet/map/map.h new file mode 100644 index 00000000000..ae58cdb9120 --- /dev/null +++ b/vnet/vnet/map/map.h @@ -0,0 +1,556 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <stdbool.h> +#include <vppinfra/error.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vlib/vlib.h> + +#define MAP_SKIP_IP6_LOOKUP 1 + +typedef enum { + MAP_SENDER, + MAP_RECEIVER +} map_dir_e; + +int map_create_domain(ip4_address_t *ip4_prefix, u8 ip4_prefix_len, + ip6_address_t *ip6_prefix, u8 ip6_prefix_len, + ip6_address_t *ip6_src, u8 ip6_src_len, + u8 ea_bits_len, u8 psid_offset, u8 psid_length, + u32 *map_domain_index, u16 mtu, u8 flags); +int map_delete_domain(u32 map_domain_index); +int map_add_del_psid(u32 map_domain_index, u16 psid, ip6_address_t *tep, u8 is_add); +u8 *format_map_trace(u8 *s, va_list *args); +i32 ip4_get_port(ip4_header_t *ip, map_dir_e dir, u16 buffer_len); +i32 ip6_get_port(ip6_header_t *ip6, map_dir_e dir, u16 buffer_len); +u16 ip4_map_get_port (ip4_header_t *ip, map_dir_e dir); + +typedef enum __attribute__ ((__packed__)) { + MAP_DOMAIN_PREFIX = 1 << 0, + MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T +} map_domain_flags_e; + +/** + * IP4 reassembly logic: + * One virtually reassembled flow requires a map_ip4_reass_t structure in order + * to keep the first-fragment port number and, optionally, cache out of sequence + * packets. + * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures. + * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets. + * When a new structure needs to be used, it is allocated from available ones. + * If there is no structure available, the oldest in use is selected and used if and + * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago. + * In case no structure can be allocated, the fragment is dropped. + */ + +#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */ +#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0) +#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures +#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048 + +#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly + +#define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */ +#define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0) +#define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures +#define MAP_IP6_REASS_BUFFERS_DEFAULT 2048 + +#define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 + +#define MAP_IP6_REASS_COUNT_BYTES +#define MAP_IP4_REASS_COUNT_BYTES + +//#define IP6_MAP_T_OVERRIDE_TOS 0 + +/* + * This structure _MUST_ be no larger than a single cache line (64 bytes). + * If more space is needed make a union of ip6_prefix and *rules, those are mutually exclusive. + */ +typedef struct { + ip6_address_t ip6_src; + ip6_address_t ip6_prefix; + ip6_address_t *rules; + u32 suffix_mask; + ip4_address_t ip4_prefix; + u16 psid_mask; + u16 mtu; + map_domain_flags_e flags; + u8 ip6_prefix_len; + u8 ip6_src_len; + u8 ea_bits_len; + u8 psid_offset; + u8 psid_length; + + /* helpers */ + u8 psid_shift; + u8 suffix_shift; + u8 ea_shift; + + /* not used by forwarding */ + u8 ip4_prefix_len; +} map_domain_t; + +#define MAP_REASS_INDEX_NONE ((u16)0xffff) + +/* + * Hash key, padded out to 16 bytes for fast compare + */ +typedef union { + CLIB_PACKED (struct { + ip4_address_t src; + ip4_address_t dst; + u16 fragment_id; + u8 protocol; + }); + u64 as_u64[2]; + u32 as_u32[4]; +} map_ip4_reass_key_t; + +typedef struct { + map_ip4_reass_key_t key; + f64 ts; +#ifdef MAP_IP4_REASS_COUNT_BYTES + u16 expected_total; + u16 forwarded; +#endif + i32 port; + u16 bucket; + u16 bucket_next; + u16 fifo_prev; + u16 fifo_next; + u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; +} map_ip4_reass_t; + +/* + * MAP domain counters + */ +typedef enum { + /* Simple counters */ + MAP_DOMAIN_IPV4_FRAGMENT = 0, + /* Combined counters */ + MAP_DOMAIN_COUNTER_RX = 0, + MAP_DOMAIN_COUNTER_TX, + MAP_N_DOMAIN_COUNTER +} map_domain_counter_t; + +/* + * main_main_t + */ +typedef union { + CLIB_PACKED (struct { + ip6_address_t src; + ip6_address_t dst; + u32 fragment_id; + u8 protocol; + }); + u64 as_u64[5]; + u32 as_u32[10]; +} map_ip6_reass_key_t; + +typedef struct { + u32 pi; //Cached packet or ~0 + u16 next_data_offset; //The data offset of the additional 20 bytes or ~0 + u8 next_data_len; //Number of bytes ready to be copied (20 if not last fragment) + u8 next_data[20]; //The 20 additional bytes +} map_ip6_fragment_t; + +typedef struct { + map_ip6_reass_key_t key; + f64 ts; +#ifdef MAP_IP6_REASS_COUNT_BYTES + u16 expected_total; + u16 forwarded; +#endif + u16 bucket; //What hash bucket this element is linked in + u16 bucket_next; + u16 fifo_prev; + u16 fifo_next; + ip4_header_t ip4_header; + map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; +} map_ip6_reass_t; + +typedef struct { + /* pool of MAP domains */ + map_domain_t *domains; + + /* MAP Domain packet/byte counters indexed by map domain index */ + vlib_simple_counter_main_t *simple_domain_counters; + vlib_combined_counter_main_t *domain_counters; + volatile u32 *counter_lock; + + /* Global counters */ + vlib_simple_counter_main_t icmp_relayed; + +#ifdef MAP_SKIP_IP6_LOOKUP + /* pre-presolve */ + u32 adj6_index, adj4_index; + ip4_address_t preresolve_ip4; + ip6_address_t preresolve_ip6; +#endif + + /* Traffic class: zero, copy (~0) or fixed value */ + u8 tc; + bool tc_copy; + bool sec_check; + bool sec_check_frag; + + /* ICMPv6 -> ICMPv4 relay parameters */ + ip4_address_t icmp_src_address; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* + * IPv4 encap and decap reassembly + */ + //Conf + f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) + u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures + u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms + u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly + + //Runtime + map_ip4_reass_t *ip4_reass_pool; + u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len + u16 ip4_reass_allocated; + u16 *ip4_reass_hash_table; + u16 ip4_reass_fifo_last; + volatile u32 *ip4_reass_lock; + + //Counters + u32 ip4_reass_buffered_counter; + + /* + * IPv6 decap reassembly + */ + //Conf + f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) + u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures + u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms + u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly + + //Runtime + map_ip6_reass_t *ip6_reass_pool; + u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len + u16 ip6_reass_allocated; + u16 *ip6_reass_hash_table; + u16 ip6_reass_fifo_last; + volatile u32 *ip6_reass_lock; + + //Counters + u32 ip6_reass_buffered_counter; + +} map_main_t; + +/* + * TODO: Remove SEC_CHECK / TRANSLATED_4TO6 / TRANSLATED_6TO4 + */ +#define foreach_map_error \ + /* Must be first. */ \ + _(NONE, "valid MAP packets") \ + _(BAD_PROTOCOL, "bad protocol") \ + _(WRONG_ICMP_TYPE, "wrong icmp type") \ + _(SEC_CHECK, "security check failed") \ + _(ENCAP_SEC_CHECK, "encap security check failed") \ + _(DECAP_SEC_CHECK, "decap security check failed") \ + _(ICMP, "unable to translate ICMP") \ + _(ICMP_RELAY, "unable to relay ICMP") \ + _(UNKNOWN, "unknown") \ + _(NO_DOMAIN, "no domain") \ + _(FRAGMENTED, "packet is a fragment") \ + _(FRAGMENT_MEMORY, "could not cache fragment") \ + _(FRAGMENT_MALFORMED, "fragment has unexpected format")\ + _(FRAGMENT_DROPPED, "dropped cached fragment") \ + _(MALFORMED, "malformed packet") + +typedef enum { +#define _(sym,str) MAP_ERROR_##sym, + foreach_map_error +#undef _ + MAP_N_ERROR, + } map_error_t; + +u64 map_error_counter_get(u32 node_index, map_error_t map_error); + +typedef struct { + u32 map_domain_index; + u16 port; +} map_trace_t; + +map_main_t map_main; + +vlib_node_registration_t ip4_map_node; +vlib_node_registration_t ip6_map_node; + +vlib_node_registration_t ip4_map_t_node; +vlib_node_registration_t ip4_map_t_fragmented_node; +vlib_node_registration_t ip4_map_t_tcp_udp_node; +vlib_node_registration_t ip4_map_t_icmp_node; + +vlib_node_registration_t ip6_map_t_node; +vlib_node_registration_t ip6_map_t_fragmented_node; +vlib_node_registration_t ip6_map_t_tcp_udp_node; +vlib_node_registration_t ip6_map_t_icmp_node; + +/* + * map_get_pfx + */ +static_always_inline u64 +map_get_pfx (map_domain_t *d, u32 addr, u16 port) +{ + u16 psid = (port >> d->psid_shift) & d->psid_mask; + + if (d->ea_bits_len == 0 && d->rules) + return clib_net_to_host_u64(d->rules[psid].as_u64[0]); + + u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask; + u64 ea = d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid; + + return clib_net_to_host_u64(d->ip6_prefix.as_u64[0]) | ea << d->ea_shift; +} + +static_always_inline u64 +map_get_pfx_net (map_domain_t *d, u32 addr, u16 port) +{ + return clib_host_to_net_u64(map_get_pfx(d, clib_net_to_host_u32(addr), + clib_net_to_host_u16(port))); +} + +/* + * map_get_sfx + */ +static_always_inline u64 +map_get_sfx (map_domain_t *d, u32 addr, u16 port) +{ + u16 psid = (port >> d->psid_shift) & d->psid_mask; + + /* Shared 1:1 mode. */ + if (d->ea_bits_len == 0 && d->rules) + return clib_net_to_host_u64(d->rules[psid].as_u64[1]); + if (d->ip6_prefix_len == 128) + return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]); + + /* IPv4 prefix */ + if (d->flags & MAP_DOMAIN_PREFIX) + return (u64) (addr & ~d->suffix_mask) << 16; + + /* Shared or full IPv4 address */ + return ((u64) addr << 16) | psid; +} + +static_always_inline u64 +map_get_sfx_net (map_domain_t *d, u32 addr, u16 port) +{ + return clib_host_to_net_u64(map_get_sfx(d, clib_net_to_host_u32(addr), + clib_net_to_host_u16(port))); +} + +static_always_inline u32 +map_get_ip4 (ip6_address_t *addr) +{ + return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16); +} + +/* + * Get the MAP domain from an IPv4 lookup adjacency. + */ +static_always_inline map_domain_t * +ip4_map_get_domain (u32 adj_index, u32 *map_domain_index) +{ + map_main_t *mm = &map_main; + ip_lookup_main_t *lm = &ip4_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm, adj_index); + ASSERT(adj); + uword *p = (uword *)adj->rewrite_data; + ASSERT(p); + *map_domain_index = p[0]; + return pool_elt_at_index(mm->domains, p[0]); +} + +/* + * Get the MAP domain from an IPv6 lookup adjacency. + * If the IPv6 address or prefix is not shared, no lookup is required. + * The IPv4 address is used otherwise. + */ +static_always_inline map_domain_t * +ip6_map_get_domain (u32 adj_index, ip4_address_t *addr, + u32 *map_domain_index, u8 *error) +{ + map_main_t *mm = &map_main; + ip4_main_t *im4 = &ip4_main; + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm6, adj_index); + ASSERT(adj); + uword *p = (uword *)adj->rewrite_data; + ASSERT(p); + *map_domain_index = p[0]; + if (p[0] != ~0) + return pool_elt_at_index(mm->domains, p[0]); + + u32 ai = ip4_fib_lookup_with_table(im4, 0, addr, 0); + ip_adjacency_t *adj4 = ip_get_adjacency (lm4, ai); + if (PREDICT_TRUE(adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP || + adj4->lookup_next_index == IP_LOOKUP_NEXT_MAP_T)) { + uword *p = (uword *)adj4->rewrite_data; + *map_domain_index = p[0]; + return pool_elt_at_index(mm->domains, *map_domain_index); + } + *error = MAP_ERROR_NO_DOMAIN; + return NULL; +} + +map_ip4_reass_t * +map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id, + u8 protocol, u32 **pi_to_drop); +void +map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop); + +#define map_ip4_reass_lock() while (__sync_lock_test_and_set(map_main.ip4_reass_lock, 1)) {} +#define map_ip4_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip4_reass_lock = 0;} while(0) + +static_always_inline void +map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi) +{ + int i; + for (i=0; i<MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY; i++) + if(r->fragments[i] != ~0) { + vec_add1(*pi, r->fragments[i]); + r->fragments[i] = ~0; + map_main.ip4_reass_buffered_counter--; + } +} + +int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi); + +map_ip6_reass_t * +map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id, + u8 protocol, u32 **pi_to_drop); +void +map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop); + +#define map_ip6_reass_lock() while (__sync_lock_test_and_set(map_main.ip6_reass_lock, 1)) {} +#define map_ip6_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip6_reass_lock = 0;} while(0) + +int +map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi, + u16 data_offset, u16 next_data_offset, + u8 *data_start, u16 data_len); + +void map_ip4_drop_pi(u32 pi); + +int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100 +int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff) +int map_ip4_reass_conf_lifetime(u16 lifetime_ms); +#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff +int map_ip4_reass_conf_buffers(u32 buffers); +#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff) + +void map_ip6_drop_pi(u32 pi); + + +int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP6_REASS_CONF_HT_RATIO_MAX 100 +int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP6_REASS_CONF_POOL_SIZE_MAX (0xfeff) +int map_ip6_reass_conf_lifetime(u16 lifetime_ms); +#define MAP_IP6_REASS_CONF_LIFETIME_MAX 0xffff +int map_ip6_reass_conf_buffers(u32 buffers); +#define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff) + +static_always_inline +int ip6_parse(const ip6_header_t *ip6, u32 buff_len, + u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset) +{ + if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { + *l4_protocol = ((ip6_frag_hdr_t *)(ip6 + 1))->next_hdr; + *frag_hdr_offset = sizeof(*ip6); + *l4_offset = sizeof(*ip6) + sizeof(ip6_frag_hdr_t); + } else { + *l4_protocol = ip6->protocol; + *frag_hdr_offset = 0; + *l4_offset = sizeof(*ip6); + } + + return (buff_len < (*l4_offset + 4)) || + (clib_net_to_host_u16(ip6->payload_length) < (*l4_offset + 4 - sizeof(*ip6))); +} + + +#define u8_ptr_add(ptr, index) (((u8 *)ptr) + index) +#define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val)) + +#define frag_id_6to4(id) ((id) ^ ((id) >> 16)) + +static_always_inline void +ip4_map_t_embedded_address (map_domain_t *d, + ip6_address_t *ip6, const ip4_address_t *ip4) +{ + ASSERT(d->ip6_src_len == 96); //No support for other lengths for now + ip6->as_u64[0] = d->ip6_src.as_u64[0]; + ip6->as_u32[2] = d->ip6_src.as_u32[2]; + ip6->as_u32[3] = ip4->as_u32; +} + +static_always_inline u32 +ip6_map_t_embedded_address (map_domain_t *d, ip6_address_t *addr) +{ + ASSERT(d->ip6_src_len == 96); //No support for other lengths for now + return addr->as_u32[3]; +} + +static inline void +map_domain_counter_lock (map_main_t *mm) +{ + if (mm->counter_lock) + while (__sync_lock_test_and_set(mm->counter_lock, 1)) + /* zzzz */ ; +} +static inline void +map_domain_counter_unlock (map_main_t *mm) +{ + if (mm->counter_lock) + *mm->counter_lock = 0; +} + + +static_always_inline void +map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector, + vlib_node_runtime_t *node, vlib_error_t *error, + u32 next) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + //Deal with fragments that are ready + from = pi_vector; + n_left_from = vec_len(pi_vector); + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + vlib_buffer_t *p0 = vlib_get_buffer(vm, pi0); + p0->error = *error; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } +} diff --git a/vnet/vnet/map/map_doc.md b/vnet/vnet/map/map_doc.md new file mode 100644 index 00000000000..230c52dfafd --- /dev/null +++ b/vnet/vnet/map/map_doc.md @@ -0,0 +1,69 @@ +# VPP MAP and Lw4o6 implementation + +This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations. +Everything that is not directly obvious should come here. + + + +## MAP-E Virtual Reassembly + +The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments. + +Fragment caching and handling is not always necessary. It is performed when: +* An IPv4 fragment is received and the destination IPv4 address is shared. +* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on. +* An IPv6 fragment is received. + +There are 3 dedicated nodes: +* ip4-map-reass +* ip6-map-ip4-reass +* ip6-map-ip6-reass + +ip4-map sends all fragments to ip4-map-reass. +ip6-map sends all inner-fragments to ip6-map-ip4-reass. +ip6-map sends all outer-fragments to ip6-map-ip6-reass. + +IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes. + +An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received. + +#### Virtual Reassembly configuration + +IPv4 and IPv6 virtual reassembly support the following configuration: + map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>] + +lifetime: + The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 people. Those values are not realistic for high-throughput cases. + +buffers: + The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool. + +pool-size: + The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total. + +ht-ratio: + The amount of buckets in the hash-table is pool-size * ht-ratio. + + +Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost. + + +##### Additional considerations + +Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart. + +Let: +R be the packet rate at which fragments are received. +F be the number of fragments per packet. + +Assuming the first fragment is always received last. We should have: +buffers > lifetime * R / F * (F - 1) +pool-size > lifetime * R/F + +This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'. + +But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments. + +If you want to do that, be prepared to configure a lot of fragments. + + diff --git a/vnet/vnet/map/sixrd.c b/vnet/vnet/map/sixrd.c new file mode 100644 index 00000000000..26b4eea9a86 --- /dev/null +++ b/vnet/vnet/map/sixrd.c @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sixrd.h" + +/* + * This code supports the following sixrd modes: + * + * 32 EA bits (Complete IPv4 address is embedded): + * ea_bits_len = 32 + * IPv4 suffix is embedded: + * ea_bits_len = < 32 + * No embedded address bits (1:1 mode): + * ea_bits_len = 0 + */ + +int +sixrd_create_domain (ip6_address_t *ip6_prefix, + u8 ip6_prefix_len, + ip4_address_t *ip4_prefix, + u8 ip4_prefix_len, + ip4_address_t *ip4_src, + u32 *sixrd_domain_index, + u16 mtu) +{ + sixrd_main_t *mm = &sixrd_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + sixrd_domain_t *d; + ip_adjacency_t adj; + ip4_add_del_route_args_t args4; + ip6_add_del_route_args_t args6; + u32 *p; + + /* Get domain index */ + pool_get_aligned(mm->domains, d, CLIB_CACHE_LINE_BYTES); + memset(d, 0, sizeof (*d)); + *sixrd_domain_index = d - mm->domains; + + /* Init domain struct */ + d->ip4_prefix.as_u32 = ip4_prefix->as_u32; + d->ip4_prefix_len = ip4_prefix_len; + d->ip6_prefix = *ip6_prefix; + d->ip6_prefix_len = ip6_prefix_len; + d->ip4_src = *ip4_src; + d->mtu = mtu; + + if (ip4_prefix_len < 32) + d->shift = 64 - ip6_prefix_len + (32 - ip4_prefix_len); + + /* Init IP adjacency */ + memset(&adj, 0, sizeof(adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_SIXRD; + p = (u32 *)&adj.rewrite_data[0]; + *p = (u32) (*sixrd_domain_index); + + /* Create ip6 adjacency */ + memset(&args6, 0, sizeof(args6)); + args6.table_index_or_table_id = 0; + args6.flags = IP6_ROUTE_FLAG_ADD; + args6.dst_address.as_u64[0] = ip6_prefix->as_u64[0]; + args6.dst_address.as_u64[1] = ip6_prefix->as_u64[1]; + args6.dst_address_length = ip6_prefix_len; + args6.adj_index = ~0; + args6.add_adj = &adj; + args6.n_add_adj = 1; + ip6_add_del_route(im6, &args6); + + /* Multiple SIXRD domains may share same source IPv4 TEP */ + uword *q = ip4_get_route(im4, 0, 0, (u8 *)ip4_src, 32); + if (q) { + u32 ai = q[0]; + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_adjacency_t *adj4 = ip_get_adjacency(lm4, ai); + if (adj4->lookup_next_index != IP_LOOKUP_NEXT_SIXRD) { + clib_warning("BR source address already assigned: %U", format_ip4_address, ip4_src); + pool_put(mm->domains, d); + return -1; + } + /* Shared source */ + p = (u32 *)&adj4->rewrite_data[0]; + p[0] = ~0; + + /* Add refcount, so we don't accidentially delete the route underneath someone */ + p[1]++; + } else { + /* Create ip4 adjacency. */ + memset(&args4, 0, sizeof(args4)); + args4.table_index_or_table_id = 0; + args4.flags = IP4_ROUTE_FLAG_ADD; + args4.dst_address.as_u32 = ip4_src->as_u32; + args4.dst_address_length = 32; + args4.adj_index = ~0; + args4.add_adj = &adj; + args4.n_add_adj = 1; + ip4_add_del_route(im4, &args4); + } + + return 0; +} + +/* + * sixrd_delete_domain + */ +int +sixrd_delete_domain (u32 sixrd_domain_index) +{ + sixrd_main_t *mm = &sixrd_main; + ip4_main_t *im4 = &ip4_main; + ip6_main_t *im6 = &ip6_main; + sixrd_domain_t *d; + ip_adjacency_t adj; + ip4_add_del_route_args_t args4; + ip6_add_del_route_args_t args6; + + if (pool_is_free_index(mm->domains, sixrd_domain_index)) { + clib_warning("SIXRD domain delete: domain does not exist: %d", sixrd_domain_index); + return -1; + } + + d = pool_elt_at_index(mm->domains, sixrd_domain_index); + + memset(&adj, 0, sizeof(adj)); + adj.explicit_fib_index = ~0; + adj.lookup_next_index = IP_LOOKUP_NEXT_SIXRD; + + /* Delete ip6 adjacency */ + memset(&args6, 0, sizeof (args6)); + args6.table_index_or_table_id = 0; + args6.flags = IP6_ROUTE_FLAG_DEL; + args6.dst_address.as_u64[0] = d->ip6_prefix.as_u64[0]; + args6.dst_address.as_u64[1] = d->ip6_prefix.as_u64[1]; + args6.dst_address_length = d->ip6_prefix_len; + args6.adj_index = 0; + args6.add_adj = &adj; + args6.n_add_adj = 0; + ip6_add_del_route(im6, &args6); + + /* Delete ip4 adjacency */ + uword *q = ip4_get_route(im4, 0, 0, (u8 *)&d->ip4_src, 32); + if (q) { + u32 ai = q[0]; + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_adjacency_t *adj4 = ip_get_adjacency(lm4, ai); + + u32 *p = (u32 *)&adj4->rewrite_data[0]; + /* Delete route when no other domains use this source */ + if (p[1] == 0) { + memset(&args4, 0, sizeof(args4)); + args4.table_index_or_table_id = 0; + args4.flags = IP4_ROUTE_FLAG_DEL; + args4.dst_address.as_u32 = d->ip4_prefix.as_u32; + args4.dst_address_length = d->ip4_prefix_len; + args4.adj_index = 0; + args4.add_adj = &adj; + args4.n_add_adj = 0; + ip4_add_del_route(im4, &args4); + } + p[1]--; + } + + pool_put(mm->domains, d); + + return 0; +} + +static clib_error_t * +sixrd_add_domain_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t ip4_prefix; + ip6_address_t ip6_prefix; + ip4_address_t ip4_src; + u32 ip6_prefix_len, ip4_prefix_len, sixrd_domain_index; + u32 num_m_args = 0; + /* Optional arguments */ + u32 mtu = 0; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "ip6-pfx %U/%d", unformat_ip6_address, &ip6_prefix, &ip6_prefix_len)) + num_m_args++; + else if (unformat(line_input, "ip4-pfx %U/%d", unformat_ip4_address, &ip4_prefix, &ip4_prefix_len)) + num_m_args++; + else if (unformat(line_input, "ip4-src %U", unformat_ip4_address, &ip4_src)) + num_m_args++; + else if (unformat(line_input, "mtu %d", &mtu)) + num_m_args++; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args < 3) + return clib_error_return(0, "mandatory argument(s) missing"); + + sixrd_create_domain(&ip6_prefix, ip6_prefix_len, &ip4_prefix, ip4_prefix_len, + &ip4_src, &sixrd_domain_index, mtu); + + return 0; +} + +static clib_error_t * +sixrd_del_domain_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u32 num_m_args = 0; + u32 sixrd_domain_index; + + /* Get a line of input. */ + if (! unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "index %d", &sixrd_domain_index)) + num_m_args++; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + if (num_m_args != 1) + return clib_error_return(0, "mandatory argument(s) missing"); + + sixrd_delete_domain(sixrd_domain_index); + + return 0; +} + +static u8 * +format_sixrd_domain (u8 *s, va_list *args) +{ + sixrd_domain_t *d = va_arg(*args, sixrd_domain_t *); + sixrd_main_t *mm = &sixrd_main; + + s = format(s, + "[%d] ip6-pfx %U/%d ip4-pfx %U/%d ip4-src %U mtu %d", + d - mm->domains, + format_ip6_address, &d->ip6_prefix, d->ip6_prefix_len, + format_ip4_address, &d->ip4_prefix, d->ip4_prefix_len, + format_ip4_address, &d->ip4_src, d->mtu); + + return s; +} + +static clib_error_t * +show_sixrd_domain_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + sixrd_main_t *mm = &sixrd_main; + sixrd_domain_t *d; + + if (pool_elts(mm->domains) == 0) + vlib_cli_output(vm, "No SIXRD domains are configured..."); + + pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_sixrd_domain, d);})); + + return 0; + +} + +static clib_error_t * +show_sixrd_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd) +{ + sixrd_main_t *mm = &sixrd_main; + sixrd_domain_t *d; + int domains = 0, domaincount = 0; + if (pool_elts (mm->domains) == 0) + vlib_cli_output (vm, "No SIXRD domains are configured..."); + + pool_foreach(d, mm->domains, ({ + domains += sizeof(*d); + domaincount++; + })); + + vlib_cli_output(vm, "SIXRD domains structure: %d\n", sizeof (sixrd_domain_t)); + vlib_cli_output(vm, "SIXRD domains: %d (%d bytes)\n", domaincount, domains); + + return 0; +} + +/* + * packet trace format function + */ +u8 * +format_sixrd_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED(vlib_main_t *vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED(vlib_node_t *node) = va_arg (*args, vlib_node_t *); + sixrd_trace_t *t = va_arg (*args, sixrd_trace_t *); + u32 sixrd_domain_index = t->sixrd_domain_index; + + s = format(s, "SIXRD domain index: %d", sixrd_domain_index); + + return s; +} + +VLIB_CLI_COMMAND(sixrd_add_domain_command, static) = { + .path = "sixrd add domain", + .short_help = + "sixrd add domain ip6-pfx <ip6-pfx> ip4-pfx <ip4-pfx> ip4-src <ip4-addr>", + .function = sixrd_add_domain_command_fn, +}; + +VLIB_CLI_COMMAND(sixrd_del_command, static) = { + .path = "sixrd del domain", + .short_help = + "sixrd del domain index <domain>", + .function = sixrd_del_domain_command_fn, +}; + +VLIB_CLI_COMMAND(show_sixrd_domain_command, static) = { + .path = "show sixrd domain", + .function = show_sixrd_domain_command_fn, +}; + +VLIB_CLI_COMMAND(show_sixrd_stats_command, static) = { + .path = "show sixrd stats", + .function = show_sixrd_stats_command_fn, +}; + +/* + * sixrd_init + */ +clib_error_t *sixrd_init (vlib_main_t *vm) +{ + sixrd_main_t *mm = &sixrd_main; + + mm->vnet_main = vnet_get_main(); + mm->vlib_main = vm; + + return 0; +} + +VLIB_INIT_FUNCTION(sixrd_init); diff --git a/vnet/vnet/map/sixrd.h b/vnet/vnet/map/sixrd.h new file mode 100644 index 00000000000..d741cb278b5 --- /dev/null +++ b/vnet/vnet/map/sixrd.h @@ -0,0 +1,144 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +#include <stdbool.h> +#include <vppinfra/error.h> +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> + +vlib_node_registration_t ip6_sixrd_node; +vlib_node_registration_t ip4_sixrd_node; + +int sixrd_create_domain(ip6_address_t *ip6_prefix, u8 ip6_prefix_len, + ip4_address_t *ip4_prefix, u8 ip4_prefix_len, + ip4_address_t *ip4_src, u32 *sixrd_domain_index, u16 mtu); +int sixrd_delete_domain(u32 sixrd_domain_index); +u8 *format_sixrd_trace(u8 *s, va_list *args); + +typedef struct { + ip6_address_t ip6_prefix; + ip4_address_t ip4_prefix; + ip4_address_t ip4_src; + u8 ip6_prefix_len; + u8 ip4_prefix_len; + + /* helpers */ + u8 shift; + + u16 mtu; +} sixrd_domain_t; + +typedef struct { + /* pool of SIXRD domains */ + sixrd_domain_t *domains; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} sixrd_main_t; + +#define foreach_sixrd_error \ + /* Must be first. */ \ + _(NONE, "valid SIXRD packets") \ + _(BAD_PROTOCOL, "bad protocol") \ + _(WRONG_ICMP_TYPE, "wrong icmp type") \ + _(SEC_CHECK, "security check failed") \ + _(ICMP, "unable to translate ICMP") \ + _(UNKNOWN, "unknown") \ + _(NO_DOMAIN, "no domain") \ + _(ENCAPSULATED, "encapsulated") \ + _(DECAPSULATED, "decapsulated") \ + _(TRANSLATED_4TO6, "translated 4 to 6") \ + _(TRANSLATED_6TO4, "translated 6 to 4") \ + _(FRAGMENT, "fragment handling error") \ + _(FRAGMENT_QUEUED, "dropped, missing first fragment") \ + _(FRAGMENTED, "packets requiring fragmentation") \ + _(FRAGMENT_PARTS, "fragment parts") \ + _(MALFORMED, "malformed packet") + +typedef enum { +#define _(sym,str) SIXRD_ERROR_##sym, + foreach_sixrd_error +#undef _ + SIXRD_N_ERROR, + } sixrd_error_t; + +typedef struct { + u32 sixrd_domain_index; +} sixrd_trace_t; + +sixrd_main_t sixrd_main; + +/* + * sixrd_get_addr + */ +static_always_inline u32 +sixrd_get_addr (sixrd_domain_t *d, u64 dal) +{ + + /* 1:1 mode */ + if (d->ip4_prefix_len == 32) return (d->ip4_prefix.as_u32); + + /* Grab 32 - ip4_prefix_len bits out of IPv6 address from offset ip6_prefix_len */ + return (d->ip4_prefix.as_u32 | (u32)(dal >> d->shift)); +} + +/* + * Get the SIXRD domain from an IPv6 lookup adjacency. + */ +static_always_inline sixrd_domain_t * +ip6_sixrd_get_domain (u32 adj_index, u32 *sixrd_domain_index) +{ + sixrd_main_t *mm = &sixrd_main; + ip_lookup_main_t *lm = &ip6_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm, adj_index); + ASSERT(adj); + uword *p = (uword *)adj->rewrite_data; + ASSERT(p); + *sixrd_domain_index = p[0]; + return pool_elt_at_index(mm->domains, p[0]); +} + +/* + * Get the SIXRD domain from an IPv4 lookup adjacency. + * If the IPv4 address is not shared, no lookup is required. + * The IPv6 address is used otherwise. + */ +static_always_inline sixrd_domain_t * +ip4_sixrd_get_domain (u32 adj_index, ip6_address_t *addr, + u32 *sixrd_domain_index, u8 *error) +{ + sixrd_main_t *mm = &sixrd_main; + ip6_main_t *im6 = &ip6_main; + ip_lookup_main_t *lm4 = &ip4_main.lookup_main; + ip_lookup_main_t *lm6 = &ip6_main.lookup_main; + ip_adjacency_t *adj = ip_get_adjacency(lm4, adj_index); + ASSERT(adj); + uword *p = (uword *)adj->rewrite_data; + ASSERT(p); + *sixrd_domain_index = p[0]; + if (p[0] != ~0) + return pool_elt_at_index(mm->domains, p[0]); + + u32 ai = ip6_fib_lookup_with_table(im6, 0, addr); + ip_adjacency_t *adj6 = ip_get_adjacency (lm6, ai); + if (PREDICT_TRUE(adj6->lookup_next_index == IP_LOOKUP_NEXT_SIXRD)) { + uword *p = (uword *)adj6->rewrite_data; + *sixrd_domain_index = p[0]; + return pool_elt_at_index(mm->domains, *sixrd_domain_index); + } + *error = SIXRD_ERROR_NO_DOMAIN; + return NULL; +} |